summaryrefslogtreecommitdiff
path: root/storage/innobase/lock
diff options
context:
space:
mode:
authorGuilhem Bichot <guilhem@mysql.com>2009-08-07 12:16:00 +0200
committerGuilhem Bichot <guilhem@mysql.com>2009-08-07 12:16:00 +0200
commit29a46b4cf93afb06d7de8d97de5c376226f61377 (patch)
tree65b42f5cb11f29ea5b4414ff075ccafd48569ad6 /storage/innobase/lock
parent13a87951e96bc7c187be127865aa1c6de4f4ba46 (diff)
downloadmariadb-git-29a46b4cf93afb06d7de8d97de5c376226f61377.tar.gz
Renamed storage/innodb_plugin to storage/innobase, so that 1) it's the same
layout as we always had in trees containing only the builtin 2) win\configure.js WITH_INNOBASE_STORAGE_ENGINE still works.
Diffstat (limited to 'storage/innobase/lock')
-rw-r--r--storage/innobase/lock/lock0iter.c114
-rw-r--r--storage/innobase/lock/lock0lock.c5592
2 files changed, 5706 insertions, 0 deletions
diff --git a/storage/innobase/lock/lock0iter.c b/storage/innobase/lock/lock0iter.c
new file mode 100644
index 00000000000..51d1802ccde
--- /dev/null
+++ b/storage/innobase/lock/lock0iter.c
@@ -0,0 +1,114 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0iter.c
+Lock queue iterator. Can iterate over table and record
+lock queues.
+
+Created July 16, 2007 Vasil Dimov
+*******************************************************/
+
+#define LOCK_MODULE_IMPLEMENTATION
+
+#include "univ.i"
+#include "lock0iter.h"
+#include "lock0lock.h"
+#include "lock0priv.h"
+#include "ut0dbg.h"
+#include "ut0lst.h"
+#ifdef UNIV_DEBUG
+# include "srv0srv.h" /* kernel_mutex */
+#endif /* UNIV_DEBUG */
+
+/*******************************************************************//**
+Initialize lock queue iterator so that it starts to iterate from
+"lock". bit_no specifies the record number within the heap where the
+record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
+1. If the lock is a table lock, thus we have a table lock queue;
+2. If the lock is a record lock and it is a wait lock. In this case
+ bit_no is calculated in this function by using
+ lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
+ of a wait lock. */
+UNIV_INTERN
+void
+lock_queue_iterator_reset(
+/*======================*/
+ lock_queue_iterator_t* iter, /*!< out: iterator */
+ const lock_t* lock, /*!< in: lock to start from */
+ ulint bit_no) /*!< in: record number in the
+ heap */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ iter->current_lock = lock;
+
+ if (bit_no != ULINT_UNDEFINED) {
+
+ iter->bit_no = bit_no;
+ } else {
+
+ switch (lock_get_type_low(lock)) {
+ case LOCK_TABLE:
+ iter->bit_no = ULINT_UNDEFINED;
+ break;
+ case LOCK_REC:
+ iter->bit_no = lock_rec_find_set_bit(lock);
+ ut_a(iter->bit_no != ULINT_UNDEFINED);
+ break;
+ default:
+ ut_error;
+ }
+ }
+}
+
+/*******************************************************************//**
+Gets the previous lock in the lock queue, returns NULL if there are no
+more locks (i.e. the current lock is the first one). The iterator is
+receded (if not-NULL is returned).
+@return previous lock or NULL */
+UNIV_INTERN
+const lock_t*
+lock_queue_iterator_get_prev(
+/*=========================*/
+ lock_queue_iterator_t* iter) /*!< in/out: iterator */
+{
+ const lock_t* prev_lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ switch (lock_get_type_low(iter->current_lock)) {
+ case LOCK_REC:
+ prev_lock = lock_rec_get_prev(
+ iter->current_lock, iter->bit_no);
+ break;
+ case LOCK_TABLE:
+ prev_lock = UT_LIST_GET_PREV(
+ un_member.tab_lock.locks, iter->current_lock);
+ break;
+ default:
+ ut_error;
+ }
+
+ if (prev_lock != NULL) {
+
+ iter->current_lock = prev_lock;
+ }
+
+ return(prev_lock);
+}
diff --git a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
new file mode 100644
index 00000000000..fcd8d268331
--- /dev/null
+++ b/storage/innobase/lock/lock0lock.c
@@ -0,0 +1,5592 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0lock.c
+The transaction lock system
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#define LOCK_MODULE_IMPLEMENTATION
+
+#include "lock0lock.h"
+#include "lock0priv.h"
+
+#ifdef UNIV_NONINL
+#include "lock0lock.ic"
+#include "lock0priv.ic"
+#endif
+
+#include "ha_prototypes.h"
+#include "usr0sess.h"
+#include "trx0purge.h"
+#include "dict0mem.h"
+#include "trx0sys.h"
+
+/* Restricts the length of search we will do in the waits-for
+graph of transactions */
+#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
+
+/* Restricts the recursion depth of the search we will do in the waits-for
+graph of transactions */
+#define LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK 200
+
+/* When releasing transaction locks, this specifies how often we release
+the kernel mutex for a moment to give also others access to it */
+
+#define LOCK_RELEASE_KERNEL_INTERVAL 1000
+
+/* Safety margin when creating a new record lock: this many extra records
+can be inserted to the page without need to create a lock with a bigger
+bitmap */
+
+#define LOCK_PAGE_BITMAP_MARGIN 64
+
+/* An explicit record lock affects both the record and the gap before it.
+An implicit x-lock does not affect the gap, it only locks the index
+record from read or update.
+
+If a transaction has modified or inserted an index record, then
+it owns an implicit x-lock on the record. On a secondary index record,
+a transaction has an implicit x-lock also if it has modified the
+clustered index record, the max trx id of the page where the secondary
+index record resides is >= trx id of the transaction (or database recovery
+is running), and there are no explicit non-gap lock requests on the
+secondary index record.
+
+This complicated definition for a secondary index comes from the
+implementation: we want to be able to determine if a secondary index
+record has an implicit x-lock, just by looking at the present clustered
+index record, not at the historical versions of the record. The
+complicated definition can be explained to the user so that there is
+nondeterminism in the access path when a query is answered: we may,
+or may not, access the clustered index record and thus may, or may not,
+bump into an x-lock set there.
+
+Different transaction can have conflicting locks set on the gap at the
+same time. The locks on the gap are purely inhibitive: an insert cannot
+be made, or a select cursor may have to wait if a different transaction
+has a conflicting lock on the gap. An x-lock on the gap does not give
+the right to insert into the gap.
+
+An explicit lock can be placed on a user record or the supremum record of
+a page. The locks on the supremum record are always thought to be of the gap
+type, though the gap bit is not set. When we perform an update of a record
+where the size of the record changes, we may temporarily store its explicit
+locks on the infimum record of the page, though the infimum otherwise never
+carries locks.
+
+A waiting record lock can also be of the gap type. A waiting lock request
+can be granted when there is no conflicting mode lock request by another
+transaction ahead of it in the explicit lock queue.
+
+In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
+It only locks the record it is placed on, not the gap before the record.
+This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
+level.
+
+-------------------------------------------------------------------------
+RULE 1: If there is an implicit x-lock on a record, and there are non-gap
+-------
+lock requests waiting in the queue, then the transaction holding the implicit
+x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
+released, we can grant locks to waiting lock requests purely by looking at
+the explicit lock requests in the queue.
+
+RULE 3: Different transactions cannot have conflicting granted non-gap locks
+-------
+on a record at the same time. However, they can have conflicting granted gap
+locks.
+RULE 4: If a there is a waiting lock request in a queue, no lock request,
+-------
+gap or not, can be inserted ahead of it in the queue. In record deletes
+and page splits new gap type locks can be created by the database manager
+for a transaction, and without rule 4, the waits-for graph of transactions
+might become cyclic without the database noticing it, as the deadlock check
+is only performed when a transaction itself requests a lock!
+-------------------------------------------------------------------------
+
+An insert is allowed to a gap if there are no explicit lock requests by
+other transactions on the next record. It does not matter if these lock
+requests are granted or waiting, gap bit set or not, with the exception
+that a gap type request set by another transaction to wait for
+its turn to do an insert is ignored. On the other hand, an
+implicit x-lock by another transaction does not prevent an insert, which
+allows for more concurrency when using an Oracle-style sequence number
+generator for the primary key with many transactions doing inserts
+concurrently.
+
+A modify of a record is allowed if the transaction has an x-lock on the
+record, or if other transactions do not have any non-gap lock requests on the
+record.
+
+A read of a single user record with a cursor is allowed if the transaction
+has a non-gap explicit, or an implicit lock on the record, or if the other
+transactions have no x-lock requests on the record. At a page supremum a
+read is always allowed.
+
+In summary, an implicit lock is seen as a granted x-lock only on the
+record, not on the gap. An explicit lock with no gap bit set is a lock
+both on the record and the gap. If the gap bit is set, the lock is only
+on the gap. Different transaction cannot own conflicting locks on the
+record at the same time, but they may own conflicting locks on the gap.
+Granted locks on a record give an access right to the record, but gap type
+locks just inhibit operations.
+
+NOTE: Finding out if some transaction has an implicit x-lock on a secondary
+index record can be cumbersome. We may have to look at previous versions of
+the corresponding clustered index record to find out if a delete marked
+secondary index record was delete marked by an active transaction, not by
+a committed one.
+
+FACT A: If a transaction has inserted a row, it can delete it any time
+without need to wait for locks.
+
+PROOF: The transaction has an implicit x-lock on every index record inserted
+for the row, and can thus modify each record without the need to wait. Q.E.D.
+
+FACT B: If a transaction has read some result set with a cursor, it can read
+it again, and retrieves the same result set, if it has not modified the
+result set in the meantime. Hence, there is no phantom problem. If the
+biggest record, in the alphabetical order, touched by the cursor is removed,
+a lock wait may occur, otherwise not.
+
+PROOF: When a read cursor proceeds, it sets an s-lock on each user record
+it passes, and a gap type s-lock on each page supremum. The cursor must
+wait until it has these locks granted. Then no other transaction can
+have a granted x-lock on any of the user records, and therefore cannot
+modify the user records. Neither can any other transaction insert into
+the gaps which were passed over by the cursor. Page splits and merges,
+and removal of obsolete versions of records do not affect this, because
+when a user record or a page supremum is removed, the next record inherits
+its locks as gap type locks, and therefore blocks inserts to the same gap.
+Also, if a page supremum is inserted, it inherits its locks from the successor
+record. When the cursor is positioned again at the start of the result set,
+the records it will touch on its course are either records it touched
+during the last pass or new inserted page supremums. It can immediately
+access all these records, and when it arrives at the biggest record, it
+notices that the result set is complete. If the biggest record was removed,
+lock wait can occur because the next record only inherits a gap type lock,
+and a wait may be needed. Q.E.D. */
+
+/* If an index record should be changed or a new inserted, we must check
+the lock on the record or the next. When a read cursor starts reading,
+we will set a record level s-lock on each record it passes, except on the
+initial record on which the cursor is positioned before we start to fetch
+records. Our index tree search has the convention that the B-tree
+cursor is positioned BEFORE the first possibly matching record in
+the search. Optimizations are possible here: if the record is searched
+on an equality condition to a unique key, we could actually set a special
+lock on the record, a lock which would not prevent any insert before
+this record. In the next key locking an x-lock set on a record also
+prevents inserts just before that record.
+ There are special infimum and supremum records on each page.
+A supremum record can be locked by a read cursor. This records cannot be
+updated but the lock prevents insert of a user record to the end of
+the page.
+ Next key locks will prevent the phantom problem where new rows
+could appear to SELECT result sets after the select operation has been
+performed. Prevention of phantoms ensures the serilizability of
+transactions.
+ What should we check if an insert of a new record is wanted?
+Only the lock on the next record on the same page, because also the
+supremum record can carry a lock. An s-lock prevents insertion, but
+what about an x-lock? If it was set by a searched update, then there
+is implicitly an s-lock, too, and the insert should be prevented.
+What if our transaction owns an x-lock to the next record, but there is
+a waiting s-lock request on the next record? If this s-lock was placed
+by a read cursor moving in the ascending order in the index, we cannot
+do the insert immediately, because when we finally commit our transaction,
+the read cursor should see also the new inserted record. So we should
+move the read cursor backward from the the next record for it to pass over
+the new inserted record. This move backward may be too cumbersome to
+implement. If we in this situation just enqueue a second x-lock request
+for our transaction on the next record, then the deadlock mechanism
+notices a deadlock between our transaction and the s-lock request
+transaction. This seems to be an ok solution.
+ We could have the convention that granted explicit record locks,
+lock the corresponding records from changing, and also lock the gaps
+before them from inserting. A waiting explicit lock request locks the gap
+before from inserting. Implicit record x-locks, which we derive from the
+transaction id in the clustered index record, only lock the record itself
+from modification, not the gap before it from inserting.
+ How should we store update locks? If the search is done by a unique
+key, we could just modify the record trx id. Otherwise, we could put a record
+x-lock on the record. If the update changes ordering fields of the
+clustered index record, the inserted new record needs no record lock in
+lock table, the trx id is enough. The same holds for a secondary index
+record. Searched delete is similar to update.
+
+PROBLEM:
+What about waiting lock requests? If a transaction is waiting to make an
+update to a record which another modified, how does the other transaction
+know to send the end-lock-wait signal to the waiting transaction? If we have
+the convention that a transaction may wait for just one lock at a time, how
+do we preserve it if lock wait ends?
+
+PROBLEM:
+Checking the trx id label of a secondary index record. In the case of a
+modification, not an insert, is this necessary? A secondary index record
+is modified only by setting or resetting its deleted flag. A secondary index
+record contains fields to uniquely determine the corresponding clustered
+index record. A secondary index record is therefore only modified if we
+also modify the clustered index record, and the trx id checking is done
+on the clustered index record, before we come to modify the secondary index
+record. So, in the case of delete marking or unmarking a secondary index
+record, we do not have to care about trx ids, only the locks in the lock
+table must be checked. In the case of a select from a secondary index, the
+trx id is relevant, and in this case we may have to search the clustered
+index record.
+
+PROBLEM: How to update record locks when page is split or merged, or
+--------------------------------------------------------------------
+a record is deleted or updated?
+If the size of fields in a record changes, we perform the update by
+a delete followed by an insert. How can we retain the locks set or
+waiting on the record? Because a record lock is indexed in the bitmap
+by the heap number of the record, when we remove the record from the
+record list, it is possible still to keep the lock bits. If the page
+is reorganized, we could make a table of old and new heap numbers,
+and permute the bitmaps in the locks accordingly. We can add to the
+table a row telling where the updated record ended. If the update does
+not require a reorganization of the page, we can simply move the lock
+bits for the updated record to the position determined by its new heap
+number (we may have to allocate a new lock, if we run out of the bitmap
+in the old one).
+ A more complicated case is the one where the reinsertion of the
+updated record is done pessimistically, because the structure of the
+tree may change.
+
+PROBLEM: If a supremum record is removed in a page merge, or a record
+---------------------------------------------------------------------
+removed in a purge, what to do to the waiting lock requests? In a split to
+the right, we just move the lock requests to the new supremum. If a record
+is removed, we could move the waiting lock request to its inheritor, the
+next record in the index. But, the next record may already have lock
+requests on its own queue. A new deadlock check should be made then. Maybe
+it is easier just to release the waiting transactions. They can then enqueue
+new lock requests on appropriate records.
+
+PROBLEM: When a record is inserted, what locks should it inherit from the
+-------------------------------------------------------------------------
+upper neighbor? An insert of a new supremum record in a page split is
+always possible, but an insert of a new user record requires that the upper
+neighbor does not have any lock requests by other transactions, granted or
+waiting, in its lock queue. Solution: We can copy the locks as gap type
+locks, so that also the waiting locks are transformed to granted gap type
+locks on the inserted record. */
+
+/* LOCK COMPATIBILITY MATRIX
+ * IS IX S X AI
+ * IS + + + - +
+ * IX + + - - +
+ * S + - + - -
+ * X - - - - -
+ * AI + + - - -
+ *
+ * Note that for rows, InnoDB only acquires S or X locks.
+ * For tables, InnoDB normally acquires IS or IX locks.
+ * S or X table locks are only acquired for LOCK TABLES.
+ * Auto-increment (AI) locks are needed because of
+ * statement-level MySQL binlog.
+ * See also lock_mode_compatible().
+ */
+#define LK(a,b) (1 << ((a) * LOCK_NUM + (b)))
+#define LKS(a,b) LK(a,b) | LK(b,a)
+
+/* Define the lock compatibility matrix in a ulint. The first line below
+defines the diagonal entries. The following lines define the compatibility
+for LOCK_IX, LOCK_S, and LOCK_AUTO_INC using LKS(), since the matrix
+is symmetric. */
+#define LOCK_MODE_COMPATIBILITY 0 \
+ | LK(LOCK_IS, LOCK_IS) | LK(LOCK_IX, LOCK_IX) | LK(LOCK_S, LOCK_S) \
+ | LKS(LOCK_IX, LOCK_IS) | LKS(LOCK_IS, LOCK_AUTO_INC) \
+ | LKS(LOCK_S, LOCK_IS) \
+ | LKS(LOCK_AUTO_INC, LOCK_IS) | LKS(LOCK_AUTO_INC, LOCK_IX)
+
+/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
+ * IS IX S X AI
+ * IS + - - - -
+ * IX + + - - -
+ * S + - + - -
+ * X + + + + +
+ * AI - - - - +
+ * See lock_mode_stronger_or_eq().
+ */
+
+/* Define the stronger-or-equal lock relation in a ulint. This relation
+contains all pairs LK(mode1, mode2) where mode1 is stronger than or
+equal to mode2. */
+#define LOCK_MODE_STRONGER_OR_EQ 0 \
+ | LK(LOCK_IS, LOCK_IS) \
+ | LK(LOCK_IX, LOCK_IS) | LK(LOCK_IX, LOCK_IX) \
+ | LK(LOCK_S, LOCK_IS) | LK(LOCK_S, LOCK_S) \
+ | LK(LOCK_AUTO_INC, LOCK_AUTO_INC) \
+ | LK(LOCK_X, LOCK_IS) | LK(LOCK_X, LOCK_IX) | LK(LOCK_X, LOCK_S) \
+ | LK(LOCK_X, LOCK_AUTO_INC) | LK(LOCK_X, LOCK_X)
+
+#ifdef UNIV_DEBUG
+UNIV_INTERN ibool lock_print_waits = FALSE;
+
+/*********************************************************************//**
+Validates the lock system.
+@return TRUE if ok */
+static
+ibool
+lock_validate(void);
+/*===============*/
+
+/*********************************************************************//**
+Validates the record lock queues on a page.
+@return TRUE if ok */
+static
+ibool
+lock_rec_validate_page(
+/*===================*/
+ ulint space, /*!< in: space id */
+ ulint page_no);/*!< in: page number */
+
+/* Define the following in order to enable lock_rec_validate_page() checks. */
+# undef UNIV_DEBUG_LOCK_VALIDATE
+#endif /* UNIV_DEBUG */
+
+/* The lock system */
+UNIV_INTERN lock_sys_t* lock_sys = NULL;
+
+/* We store info on the latest deadlock error to this buffer. InnoDB
+Monitor will then fetch it and print */
+UNIV_INTERN ibool lock_deadlock_found = FALSE;
+UNIV_INTERN FILE* lock_latest_err_file;
+
+/* Flags for recursive deadlock search */
+#define LOCK_VICTIM_IS_START 1
+#define LOCK_VICTIM_IS_OTHER 2
+
+/********************************************************************//**
+Checks if a lock request results in a deadlock.
+@return TRUE if a deadlock was detected and we chose trx as a victim;
+FALSE if no deadlock, or there was a deadlock, but we chose other
+transaction(s) as victim(s) */
+static
+ibool
+lock_deadlock_occurs(
+/*=================*/
+ lock_t* lock, /*!< in: lock the transaction is requesting */
+ trx_t* trx); /*!< in: transaction */
+/********************************************************************//**
+Looks recursively for a deadlock.
+@return 0 if no deadlock found, LOCK_VICTIM_IS_START if there was a
+deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
+deadlock was found and we chose some other trx as a victim: we must do
+the search again in this last case because there may be another
+deadlock! */
+static
+ulint
+lock_deadlock_recursive(
+/*====================*/
+ trx_t* start, /*!< in: recursion starting point */
+ trx_t* trx, /*!< in: a transaction waiting for a lock */
+ lock_t* wait_lock, /*!< in: the lock trx is waiting to be granted */
+ ulint* cost, /*!< in/out: number of calculation steps thus
+ far: if this exceeds LOCK_MAX_N_STEPS_...
+ we return LOCK_VICTIM_IS_START */
+ ulint depth); /*!< in: recursion depth: if this exceeds
+ LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
+ return LOCK_VICTIM_IS_START */
+
+/*********************************************************************//**
+Gets the nth bit of a record lock.
+@return TRUE if bit set */
+UNIV_INLINE
+ibool
+lock_rec_get_nth_bit(
+/*=================*/
+ const lock_t* lock, /*!< in: record lock */
+ ulint i) /*!< in: index of the bit */
+{
+ ulint byte_index;
+ ulint bit_index;
+
+ ut_ad(lock);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+ if (i >= lock->un_member.rec_lock.n_bits) {
+
+ return(FALSE);
+ }
+
+ byte_index = i / 8;
+ bit_index = i % 8;
+
+ return(1 & ((const byte*) &lock[1])[byte_index] >> bit_index);
+}
+
+/*************************************************************************/
+
+#define lock_mutex_enter_kernel() mutex_enter(&kernel_mutex)
+#define lock_mutex_exit_kernel() mutex_exit(&kernel_mutex)
+
+/*********************************************************************//**
+Checks that a transaction id is sensible, i.e., not in the future.
+@return TRUE if ok */
+UNIV_INTERN
+ibool
+lock_check_trx_id_sanity(
+/*=====================*/
+ trx_id_t trx_id, /*!< in: trx id */
+ const rec_t* rec, /*!< in: user record */
+ dict_index_t* index, /*!< in: index */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
+ ibool has_kernel_mutex)/*!< in: TRUE if the caller owns the
+ kernel mutex */
+{
+ ibool is_ok = TRUE;
+
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
+ if (!has_kernel_mutex) {
+ mutex_enter(&kernel_mutex);
+ }
+
+ /* A sanity check: the trx_id in rec must be smaller than the global
+ trx id counter */
+
+ if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) {
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Error: transaction id associated"
+ " with record\n",
+ stderr);
+ rec_print_new(stderr, rec, offsets);
+ fputs("InnoDB: in ", stderr);
+ dict_index_name_print(stderr, NULL, index);
+ fprintf(stderr, "\n"
+ "InnoDB: is " TRX_ID_FMT " which is higher than the"
+ " global trx id counter " TRX_ID_FMT "!\n"
+ "InnoDB: The table is corrupt. You have to do"
+ " dump + drop + reimport.\n",
+ TRX_ID_PREP_PRINTF(trx_id),
+ TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
+
+ is_ok = FALSE;
+ }
+
+ if (!has_kernel_mutex) {
+ mutex_exit(&kernel_mutex);
+ }
+
+ return(is_ok);
+}
+
+/*********************************************************************//**
+Checks that a record is seen in a consistent read.
+@return TRUE if sees, or FALSE if an earlier version of the record
+should be retrieved */
+UNIV_INTERN
+ibool
+lock_clust_rec_cons_read_sees(
+/*==========================*/
+ const rec_t* rec, /*!< in: user record which should be read or
+ passed over by a read cursor */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ read_view_t* view) /*!< in: consistent read view */
+{
+ trx_id_t trx_id;
+
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(page_rec_is_user_rec(rec));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
+ /* NOTE that we call this function while holding the search
+ system latch. To obey the latching order we must NOT reserve the
+ kernel mutex here! */
+
+ trx_id = row_get_rec_trx_id(rec, index, offsets);
+
+ return(read_view_sees_trx_id(view, trx_id));
+}
+
+/*********************************************************************//**
+Checks that a non-clustered index record is seen in a consistent read.
+
+NOTE that a non-clustered index page contains so little information on
+its modifications that also in the case FALSE, the present version of
+rec may be the right, but we must check this from the clustered index
+record.
+
+@return TRUE if certainly sees, or FALSE if an earlier version of the
+clustered index record might be needed */
+UNIV_INTERN
+ulint
+lock_sec_rec_cons_read_sees(
+/*========================*/
+ const rec_t* rec, /*!< in: user record which
+ should be read or passed over
+ by a read cursor */
+ const read_view_t* view) /*!< in: consistent read view */
+{
+ trx_id_t max_trx_id;
+
+ ut_ad(page_rec_is_user_rec(rec));
+
+ /* NOTE that we might call this function while holding the search
+ system latch. To obey the latching order we must NOT reserve the
+ kernel mutex here! */
+
+ if (recv_recovery_is_on()) {
+
+ return(FALSE);
+ }
+
+ max_trx_id = page_get_max_trx_id(page_align(rec));
+ ut_ad(!ut_dulint_is_zero(max_trx_id));
+
+ return(ut_dulint_cmp(max_trx_id, view->up_limit_id) < 0);
+}
+
+/*********************************************************************//**
+Creates the lock system at database start. */
+UNIV_INTERN
+void
+lock_sys_create(
+/*============*/
+ ulint n_cells) /*!< in: number of slots in lock hash table */
+{
+ lock_sys = mem_alloc(sizeof(lock_sys_t));
+
+ lock_sys->rec_hash = hash_create(n_cells);
+
+ /* hash_create_mutexes(lock_sys->rec_hash, 2, SYNC_REC_LOCK); */
+
+ lock_latest_err_file = os_file_create_tmpfile();
+ ut_a(lock_latest_err_file);
+}
+
+/*********************************************************************//**
+Gets the size of a lock struct.
+@return size in bytes */
+UNIV_INTERN
+ulint
+lock_get_size(void)
+/*===============*/
+{
+ return((ulint)sizeof(lock_t));
+}
+
+/*********************************************************************//**
+Gets the mode of a lock.
+@return mode */
+UNIV_INLINE
+enum lock_mode
+lock_get_mode(
+/*==========*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ut_ad(lock);
+
+ return(lock->type_mode & LOCK_MODE_MASK);
+}
+
+/*********************************************************************//**
+Gets the wait flag of a lock.
+@return TRUE if waiting */
+UNIV_INLINE
+ibool
+lock_get_wait(
+/*==========*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ut_ad(lock);
+
+ if (UNIV_UNLIKELY(lock->type_mode & LOCK_WAIT)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************//**
+Gets the source table of an ALTER TABLE transaction. The table must be
+covered by an IX or IS table lock.
+@return the source table of transaction, if it is covered by an IX or
+IS table lock; dest if there is no source table, and NULL if the
+transaction is locking more than two tables or an inconsistency is
+found */
+UNIV_INTERN
+dict_table_t*
+lock_get_src_table(
+/*===============*/
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* dest, /*!< in: destination of ALTER TABLE */
+ enum lock_mode* mode) /*!< out: lock mode of the source table */
+{
+ dict_table_t* src;
+ lock_t* lock;
+
+ src = NULL;
+ *mode = LOCK_NONE;
+
+ for (lock = UT_LIST_GET_FIRST(trx->trx_locks);
+ lock;
+ lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+ lock_table_t* tab_lock;
+ enum lock_mode lock_mode;
+ if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
+ /* We are only interested in table locks. */
+ continue;
+ }
+ tab_lock = &lock->un_member.tab_lock;
+ if (dest == tab_lock->table) {
+ /* We are not interested in the destination table. */
+ continue;
+ } else if (!src) {
+ /* This presumably is the source table. */
+ src = tab_lock->table;
+ if (UT_LIST_GET_LEN(src->locks) != 1
+ || UT_LIST_GET_FIRST(src->locks) != lock) {
+ /* We only support the case when
+ there is only one lock on this table. */
+ return(NULL);
+ }
+ } else if (src != tab_lock->table) {
+ /* The transaction is locking more than
+ two tables (src and dest): abort */
+ return(NULL);
+ }
+
+ /* Check that the source table is locked by
+ LOCK_IX or LOCK_IS. */
+ lock_mode = lock_get_mode(lock);
+ if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) {
+ if (*mode != LOCK_NONE && *mode != lock_mode) {
+ /* There are multiple locks on src. */
+ return(NULL);
+ }
+ *mode = lock_mode;
+ }
+ }
+
+ if (!src) {
+ /* No source table lock found: flag the situation to caller */
+ src = dest;
+ }
+
+ return(src);
+}
+
+/*********************************************************************//**
+Determine if the given table is exclusively "owned" by the given
+transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
+on the table.
+@return TRUE if table is only locked by trx, with LOCK_IX, and
+possibly LOCK_AUTO_INC */
+UNIV_INTERN
+ibool
+lock_is_table_exclusive(
+/*====================*/
+ dict_table_t* table, /*!< in: table */
+ trx_t* trx) /*!< in: transaction */
+{
+ const lock_t* lock;
+ ibool ok = FALSE;
+
+ ut_ad(table);
+ ut_ad(trx);
+
+ lock_mutex_enter_kernel();
+
+ for (lock = UT_LIST_GET_FIRST(table->locks);
+ lock;
+ lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) {
+ if (lock->trx != trx) {
+ /* A lock on the table is held
+ by some other transaction. */
+ goto not_ok;
+ }
+
+ if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
+ /* We are interested in table locks only. */
+ continue;
+ }
+
+ switch (lock_get_mode(lock)) {
+ case LOCK_IX:
+ ok = TRUE;
+ break;
+ case LOCK_AUTO_INC:
+ /* It is allowed for trx to hold an
+ auto_increment lock. */
+ break;
+ default:
+not_ok:
+ /* Other table locks than LOCK_IX are not allowed. */
+ ok = FALSE;
+ goto func_exit;
+ }
+ }
+
+func_exit:
+ lock_mutex_exit_kernel();
+
+ return(ok);
+}
+
+/*********************************************************************//**
+Sets the wait flag of a lock and the back pointer in trx to lock. */
+UNIV_INLINE
+void
+lock_set_lock_and_trx_wait(
+/*=======================*/
+ lock_t* lock, /*!< in: lock */
+ trx_t* trx) /*!< in: trx */
+{
+ ut_ad(lock);
+ ut_ad(trx->wait_lock == NULL);
+
+ trx->wait_lock = lock;
+ lock->type_mode |= LOCK_WAIT;
+}
+
+/**********************************************************************//**
+The back pointer to a waiting lock request in the transaction is set to NULL
+and the wait bit in lock type_mode is reset. */
+UNIV_INLINE
+void
+lock_reset_lock_and_trx_wait(
+/*=========================*/
+ lock_t* lock) /*!< in: record lock */
+{
+ ut_ad((lock->trx)->wait_lock == lock);
+ ut_ad(lock_get_wait(lock));
+
+ /* Reset the back pointer in trx to this waiting lock request */
+
+ (lock->trx)->wait_lock = NULL;
+ lock->type_mode &= ~LOCK_WAIT;
+}
+
+/*********************************************************************//**
+Gets the gap flag of a record lock.
+@return TRUE if gap flag set */
+UNIV_INLINE
+ibool
+lock_rec_get_gap(
+/*=============*/
+ const lock_t* lock) /*!< in: record lock */
+{
+ ut_ad(lock);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+ if (lock->type_mode & LOCK_GAP) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************//**
+Gets the LOCK_REC_NOT_GAP flag of a record lock.
+@return TRUE if LOCK_REC_NOT_GAP flag set */
+UNIV_INLINE
+ibool
+lock_rec_get_rec_not_gap(
+/*=====================*/
+ const lock_t* lock) /*!< in: record lock */
+{
+ ut_ad(lock);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+ if (lock->type_mode & LOCK_REC_NOT_GAP) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************//**
+Gets the waiting insert flag of a record lock.
+@return TRUE if gap flag set */
+UNIV_INLINE
+ibool
+lock_rec_get_insert_intention(
+/*==========================*/
+ const lock_t* lock) /*!< in: record lock */
+{
+ ut_ad(lock);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+ if (lock->type_mode & LOCK_INSERT_INTENTION) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************//**
+Calculates if lock mode 1 is stronger or equal to lock mode 2.
+@return nonzero if mode1 stronger or equal to mode2 */
+UNIV_INLINE
+ulint
+lock_mode_stronger_or_eq(
+/*=====================*/
+ enum lock_mode mode1, /*!< in: lock mode */
+ enum lock_mode mode2) /*!< in: lock mode */
+{
+ ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
+ || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
+ ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
+ || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
+
+ return((LOCK_MODE_STRONGER_OR_EQ) & LK(mode1, mode2));
+}
+
+/*********************************************************************//**
+Calculates if lock mode 1 is compatible with lock mode 2.
+@return nonzero if mode1 compatible with mode2 */
+UNIV_INLINE
+ulint
+lock_mode_compatible(
+/*=================*/
+ enum lock_mode mode1, /*!< in: lock mode */
+ enum lock_mode mode2) /*!< in: lock mode */
+{
+ ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
+ || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
+ ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
+ || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
+
+ return((LOCK_MODE_COMPATIBILITY) & LK(mode1, mode2));
+}
+
+/*********************************************************************//**
+Checks if a lock request for a new lock has to wait for request lock2.
+@return TRUE if new lock has to wait for lock2 to be removed */
+UNIV_INLINE
+ibool
+lock_rec_has_to_wait(
+/*=================*/
+ const trx_t* trx, /*!< in: trx of new lock */
+ ulint type_mode,/*!< in: precise mode of the new lock
+ to set: LOCK_S or LOCK_X, possibly
+ ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
+ LOCK_INSERT_INTENTION */
+ const lock_t* lock2, /*!< in: another record lock; NOTE that
+ it is assumed that this has a lock bit
+ set on the same record as in the new
+ lock we are setting */
+ ibool lock_is_on_supremum) /*!< in: TRUE if we are setting the
+ lock on the 'supremum' record of an
+ index page: we know then that the lock
+ request is really for a 'gap' type lock */
+{
+ ut_ad(trx && lock2);
+ ut_ad(lock_get_type_low(lock2) == LOCK_REC);
+
+ if (trx != lock2->trx
+ && !lock_mode_compatible(LOCK_MODE_MASK & type_mode,
+ lock_get_mode(lock2))) {
+
+ /* We have somewhat complex rules when gap type record locks
+ cause waits */
+
+ if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
+ && !(type_mode & LOCK_INSERT_INTENTION)) {
+
+ /* Gap type locks without LOCK_INSERT_INTENTION flag
+ do not need to wait for anything. This is because
+ different users can have conflicting lock types
+ on gaps. */
+
+ return(FALSE);
+ }
+
+ if (!(type_mode & LOCK_INSERT_INTENTION)
+ && lock_rec_get_gap(lock2)) {
+
+ /* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
+ does not need to wait for a gap type lock */
+
+ return(FALSE);
+ }
+
+ if ((type_mode & LOCK_GAP)
+ && lock_rec_get_rec_not_gap(lock2)) {
+
+ /* Lock on gap does not need to wait for
+ a LOCK_REC_NOT_GAP type lock */
+
+ return(FALSE);
+ }
+
+ if (lock_rec_get_insert_intention(lock2)) {
+
+ /* No lock request needs to wait for an insert
+ intention lock to be removed. This is ok since our
+ rules allow conflicting locks on gaps. This eliminates
+ a spurious deadlock caused by a next-key lock waiting
+ for an insert intention lock; when the insert
+ intention lock was granted, the insert deadlocked on
+ the waiting next-key lock.
+
+ Also, insert intention locks do not disturb each
+ other. */
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************//**
+Checks if a lock request lock1 has to wait for request lock2.
+@return TRUE if lock1 has to wait for lock2 to be removed */
+UNIV_INTERN
+ibool
+lock_has_to_wait(
+/*=============*/
+ const lock_t* lock1, /*!< in: waiting lock */
+ const lock_t* lock2) /*!< in: another lock; NOTE that it is
+ assumed that this has a lock bit set
+ on the same record as in lock1 if the
+ locks are record locks */
+{
+ ut_ad(lock1 && lock2);
+
+ if (lock1->trx != lock2->trx
+ && !lock_mode_compatible(lock_get_mode(lock1),
+ lock_get_mode(lock2))) {
+ if (lock_get_type_low(lock1) == LOCK_REC) {
+ ut_ad(lock_get_type_low(lock2) == LOCK_REC);
+
+ /* If this lock request is for a supremum record
+ then the second bit on the lock bitmap is set */
+
+ return(lock_rec_has_to_wait(lock1->trx,
+ lock1->type_mode, lock2,
+ lock_rec_get_nth_bit(
+ lock1, 1)));
+ }
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*============== RECORD LOCK BASIC FUNCTIONS ============================*/
+
+/*********************************************************************//**
+Gets the number of bits in a record lock bitmap.
+@return number of bits */
+UNIV_INLINE
+ulint
+lock_rec_get_n_bits(
+/*================*/
+ const lock_t* lock) /*!< in: record lock */
+{
+ return(lock->un_member.rec_lock.n_bits);
+}
+
+/**********************************************************************//**
+Sets the nth bit of a record lock to TRUE. */
+UNIV_INLINE
+void
+lock_rec_set_nth_bit(
+/*=================*/
+ lock_t* lock, /*!< in: record lock */
+ ulint i) /*!< in: index of the bit */
+{
+ ulint byte_index;
+ ulint bit_index;
+
+ ut_ad(lock);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+ ut_ad(i < lock->un_member.rec_lock.n_bits);
+
+ byte_index = i / 8;
+ bit_index = i % 8;
+
+ ((byte*) &lock[1])[byte_index] |= 1 << bit_index;
+}
+
+/**********************************************************************//**
+Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
+if none found.
+@return bit index == heap number of the record, or ULINT_UNDEFINED if
+none found */
+UNIV_INTERN
+ulint
+lock_rec_find_set_bit(
+/*==================*/
+ const lock_t* lock) /*!< in: record lock with at least one bit set */
+{
+ ulint i;
+
+ for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+
+ if (lock_rec_get_nth_bit(lock, i)) {
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**********************************************************************//**
+Resets the nth bit of a record lock. */
+UNIV_INLINE
+void
+lock_rec_reset_nth_bit(
+/*===================*/
+ lock_t* lock, /*!< in: record lock */
+ ulint i) /*!< in: index of the bit which must be set to TRUE
+ when this function is called */
+{
+ ulint byte_index;
+ ulint bit_index;
+
+ ut_ad(lock);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+ ut_ad(i < lock->un_member.rec_lock.n_bits);
+
+ byte_index = i / 8;
+ bit_index = i % 8;
+
+ ((byte*) &lock[1])[byte_index] &= ~(1 << bit_index);
+}
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next_on_page(
+/*======================*/
+ lock_t* lock) /*!< in: a record lock */
+{
+ ulint space;
+ ulint page_no;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+ space = lock->un_member.rec_lock.space;
+ page_no = lock->un_member.rec_lock.page_no;
+
+ for (;;) {
+ lock = HASH_GET_NEXT(hash, lock);
+
+ if (!lock) {
+
+ break;
+ }
+
+ if ((lock->un_member.rec_lock.space == space)
+ && (lock->un_member.rec_lock.page_no == page_no)) {
+
+ break;
+ }
+ }
+
+ return(lock);
+}
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by its
+file address.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page_addr(
+/*============================*/
+ ulint space, /*!< in: space */
+ ulint page_no)/*!< in: page number */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = HASH_GET_FIRST(lock_sys->rec_hash,
+ lock_rec_hash(space, page_no));
+ while (lock) {
+ if ((lock->un_member.rec_lock.space == space)
+ && (lock->un_member.rec_lock.page_no == page_no)) {
+
+ break;
+ }
+
+ lock = HASH_GET_NEXT(hash, lock);
+ }
+
+ return(lock);
+}
+
+/*********************************************************************//**
+Returns TRUE if there are explicit record locks on a page.
+@return TRUE if there are explicit record locks on the page */
+UNIV_INTERN
+ibool
+lock_rec_expl_exist_on_page(
+/*========================*/
+ ulint space, /*!< in: space id */
+ ulint page_no)/*!< in: page number */
+{
+ ibool ret;
+
+ mutex_enter(&kernel_mutex);
+
+ if (lock_rec_get_first_on_page_addr(space, page_no)) {
+ ret = TRUE;
+ } else {
+ ret = FALSE;
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ return(ret);
+}
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by a
+pointer to it.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page(
+/*=======================*/
+ const buf_block_t* block) /*!< in: buffer block */
+{
+ ulint hash;
+ lock_t* lock;
+ ulint space = buf_block_get_space(block);
+ ulint page_no = buf_block_get_page_no(block);
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ hash = buf_block_get_lock_hash_val(block);
+
+ lock = HASH_GET_FIRST(lock_sys->rec_hash, hash);
+
+ while (lock) {
+ if ((lock->un_member.rec_lock.space == space)
+ && (lock->un_member.rec_lock.page_no == page_no)) {
+
+ break;
+ }
+
+ lock = HASH_GET_NEXT(hash, lock);
+ }
+
+ return(lock);
+}
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next(
+/*==============*/
+ ulint heap_no,/*!< in: heap number of the record */
+ lock_t* lock) /*!< in: lock */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ do {
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+ lock = lock_rec_get_next_on_page(lock);
+ } while (lock && !lock_rec_get_nth_bit(lock, heap_no));
+
+ return(lock);
+}
+
+/*********************************************************************//**
+Gets the first explicit lock request on a record.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first(
+/*===============*/
+ const buf_block_t* block, /*!< in: block containing the record */
+ ulint heap_no)/*!< in: heap number of the record */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ for (lock = lock_rec_get_first_on_page(block); lock;
+ lock = lock_rec_get_next_on_page(lock)) {
+ if (lock_rec_get_nth_bit(lock, heap_no)) {
+ break;
+ }
+ }
+
+ return(lock);
+}
+
+/*********************************************************************//**
+Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
+pointer in the transaction! This function is used in lock object creation
+and resetting. */
+static
+void
+lock_rec_bitmap_reset(
+/*==================*/
+ lock_t* lock) /*!< in: record lock */
+{
+ ulint n_bytes;
+
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+ /* Reset to zero the bitmap which resides immediately after the lock
+ struct */
+
+ n_bytes = lock_rec_get_n_bits(lock) / 8;
+
+ ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
+
+ memset(&lock[1], 0, n_bytes);
+}
+
+/*********************************************************************//**
+Copies a record lock to heap.
+@return copy of lock */
+static
+lock_t*
+lock_rec_copy(
+/*==========*/
+ const lock_t* lock, /*!< in: record lock */
+ mem_heap_t* heap) /*!< in: memory heap */
+{
+ ulint size;
+
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+ size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
+
+ return(mem_heap_dup(heap, lock, size));
+}
+
+/*********************************************************************//**
+Gets the previous record lock set on a record.
+@return previous lock on the same record, NULL if none exists */
+UNIV_INTERN
+const lock_t*
+lock_rec_get_prev(
+/*==============*/
+ const lock_t* in_lock,/*!< in: record lock */
+ ulint heap_no)/*!< in: heap number of the record */
+{
+ lock_t* lock;
+ ulint space;
+ ulint page_no;
+ lock_t* found_lock = NULL;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
+
+ space = in_lock->un_member.rec_lock.space;
+ page_no = in_lock->un_member.rec_lock.page_no;
+
+ lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+ for (;;) {
+ ut_ad(lock);
+
+ if (lock == in_lock) {
+
+ return(found_lock);
+ }
+
+ if (lock_rec_get_nth_bit(lock, heap_no)) {
+
+ found_lock = lock;
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+}
+
+/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/
+
+/*********************************************************************//**
+Checks if a transaction has the specified table lock, or stronger.
+@return lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_table_has(
+/*===========*/
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* table, /*!< in: table */
+ enum lock_mode mode) /*!< in: lock mode */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ /* Look for stronger locks the same trx already has on the table */
+
+ lock = UT_LIST_GET_LAST(table->locks);
+
+ while (lock != NULL) {
+
+ if (lock->trx == trx
+ && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
+
+ /* The same trx already has locked the table in
+ a mode stronger or equal to the mode given */
+
+ ut_ad(!lock_get_wait(lock));
+
+ return(lock);
+ }
+
+ lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
+ }
+
+ return(NULL);
+}
+
+/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
+
+/*********************************************************************//**
+Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
+to precise_mode.
+@return lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_rec_has_expl(
+/*==============*/
+ ulint precise_mode,/*!< in: LOCK_S or LOCK_X
+ possibly ORed to LOCK_GAP or
+ LOCK_REC_NOT_GAP, for a
+ supremum record we regard this
+ always a gap type request */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ trx_t* trx) /*!< in: transaction */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
+ || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
+ ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
+
+ lock = lock_rec_get_first(block, heap_no);
+
+ while (lock) {
+ if (lock->trx == trx
+ && lock_mode_stronger_or_eq(lock_get_mode(lock),
+ precise_mode & LOCK_MODE_MASK)
+ && !lock_get_wait(lock)
+ && (!lock_rec_get_rec_not_gap(lock)
+ || (precise_mode & LOCK_REC_NOT_GAP)
+ || heap_no == PAGE_HEAP_NO_SUPREMUM)
+ && (!lock_rec_get_gap(lock)
+ || (precise_mode & LOCK_GAP)
+ || heap_no == PAGE_HEAP_NO_SUPREMUM)
+ && (!lock_rec_get_insert_intention(lock))) {
+
+ return(lock);
+ }
+
+ lock = lock_rec_get_next(heap_no, lock);
+ }
+
+ return(NULL);
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Checks if some other transaction has a lock request in the queue.
+@return lock or NULL */
+static
+lock_t*
+lock_rec_other_has_expl_req(
+/*========================*/
+ enum lock_mode mode, /*!< in: LOCK_S or LOCK_X */
+ ulint gap, /*!< in: LOCK_GAP if also gap
+ locks are taken into account,
+ or 0 if not */
+ ulint wait, /*!< in: LOCK_WAIT if also
+ waiting locks are taken into
+ account, or 0 if not */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ const trx_t* trx) /*!< in: transaction, or NULL if
+ requests by all transactions
+ are taken into account */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(mode == LOCK_X || mode == LOCK_S);
+ ut_ad(gap == 0 || gap == LOCK_GAP);
+ ut_ad(wait == 0 || wait == LOCK_WAIT);
+
+ lock = lock_rec_get_first(block, heap_no);
+
+ while (lock) {
+ if (lock->trx != trx
+ && (gap
+ || !(lock_rec_get_gap(lock)
+ || heap_no == PAGE_HEAP_NO_SUPREMUM))
+ && (wait || !lock_get_wait(lock))
+ && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
+
+ return(lock);
+ }
+
+ lock = lock_rec_get_next(heap_no, lock);
+ }
+
+ return(NULL);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Checks if some other transaction has a conflicting explicit lock request
+in the queue, so that we have to wait.
+@return lock or NULL */
+static
+lock_t*
+lock_rec_other_has_conflicting(
+/*===========================*/
+ enum lock_mode mode, /*!< in: LOCK_S or LOCK_X,
+ possibly ORed to LOCK_GAP or
+ LOC_REC_NOT_GAP,
+ LOCK_INSERT_INTENTION */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ trx_t* trx) /*!< in: our transaction */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = lock_rec_get_first(block, heap_no);
+
+ if (UNIV_LIKELY_NULL(lock)) {
+ if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+
+ do {
+ if (lock_rec_has_to_wait(trx, mode, lock,
+ TRUE)) {
+ return(lock);
+ }
+
+ lock = lock_rec_get_next(heap_no, lock);
+ } while (lock);
+ } else {
+
+ do {
+ if (lock_rec_has_to_wait(trx, mode, lock,
+ FALSE)) {
+ return(lock);
+ }
+
+ lock = lock_rec_get_next(heap_no, lock);
+ } while (lock);
+ }
+ }
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Looks for a suitable type record lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old is found.
+@return lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_rec_find_similar_on_page(
+/*==========================*/
+ ulint type_mode, /*!< in: lock type_mode field */
+ ulint heap_no, /*!< in: heap number of the record */
+ lock_t* lock, /*!< in: lock_rec_get_first_on_page() */
+ const trx_t* trx) /*!< in: transaction */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ while (lock != NULL) {
+ if (lock->trx == trx
+ && lock->type_mode == type_mode
+ && lock_rec_get_n_bits(lock) > heap_no) {
+
+ return(lock);
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Checks if some transaction has an implicit x-lock on a record in a secondary
+index.
+@return transaction which has the x-lock, or NULL */
+static
+trx_t*
+lock_sec_rec_some_has_impl_off_kernel(
+/*==================================*/
+ const rec_t* rec, /*!< in: user record */
+ dict_index_t* index, /*!< in: secondary index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+ const page_t* page = page_align(rec);
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(!dict_index_is_clust(index));
+ ut_ad(page_rec_is_user_rec(rec));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
+ /* Some transaction may have an implicit x-lock on the record only
+ if the max trx id for the page >= min trx id for the trx list, or
+ database recovery is running. We do not write the changes of a page
+ max trx id to the log, and therefore during recovery, this value
+ for a page may be incorrect. */
+
+ if (!(ut_dulint_cmp(page_get_max_trx_id(page),
+ trx_list_get_min_trx_id()) >= 0)
+ && !recv_recovery_is_on()) {
+
+ return(NULL);
+ }
+
+ /* Ok, in this case it is possible that some transaction has an
+ implicit x-lock. We have to look in the clustered index. */
+
+ if (!lock_check_trx_id_sanity(page_get_max_trx_id(page),
+ rec, index, offsets, TRUE)) {
+ buf_page_print(page, 0);
+
+ /* The page is corrupt: try to avoid a crash by returning
+ NULL */
+ return(NULL);
+ }
+
+ return(row_vers_impl_x_locked_off_kernel(rec, index, offsets));
+}
+
+/*********************************************************************//**
+Return approximate number or record locks (bits set in the bitmap) for
+this transaction. Since delete-marked records may be removed, the
+record count will not be precise. */
+UNIV_INTERN
+ulint
+lock_number_of_rows_locked(
+/*=======================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ lock_t* lock;
+ ulint n_records = 0;
+ ulint n_bits;
+ ulint n_bit;
+
+ lock = UT_LIST_GET_FIRST(trx->trx_locks);
+
+ while (lock) {
+ if (lock_get_type_low(lock) == LOCK_REC) {
+ n_bits = lock_rec_get_n_bits(lock);
+
+ for (n_bit = 0; n_bit < n_bits; n_bit++) {
+ if (lock_rec_get_nth_bit(lock, n_bit)) {
+ n_records++;
+ }
+ }
+ }
+
+ lock = UT_LIST_GET_NEXT(trx_locks, lock);
+ }
+
+ return (n_records);
+}
+
+/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
+
+/*********************************************************************//**
+Creates a new record lock and inserts it to the lock queue. Does NOT check
+for deadlocks or lock compatibility!
+@return created lock */
+static
+lock_t*
+lock_rec_create(
+/*============*/
+ ulint type_mode,/*!< in: lock mode and wait
+ flag, type is ignored and
+ replaced by LOCK_REC */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ dict_index_t* index, /*!< in: index of record */
+ trx_t* trx) /*!< in: transaction */
+{
+ lock_t* lock;
+ ulint page_no;
+ ulint space;
+ ulint n_bits;
+ ulint n_bytes;
+ const page_t* page;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ space = buf_block_get_space(block);
+ page_no = buf_block_get_page_no(block);
+ page = block->frame;
+
+ ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+
+ /* If rec is the supremum record, then we reset the gap and
+ LOCK_REC_NOT_GAP bits, as all locks on the supremum are
+ automatically of the gap type */
+
+ if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+ ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
+
+ type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
+ }
+
+ /* Make lock bitmap bigger by a safety margin */
+ n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN;
+ n_bytes = 1 + n_bits / 8;
+
+ lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t) + n_bytes);
+
+ UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock);
+
+ lock->trx = trx;
+
+ lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
+ lock->index = index;
+
+ lock->un_member.rec_lock.space = space;
+ lock->un_member.rec_lock.page_no = page_no;
+ lock->un_member.rec_lock.n_bits = n_bytes * 8;
+
+ /* Reset to zero the bitmap which resides immediately after the
+ lock struct */
+
+ lock_rec_bitmap_reset(lock);
+
+ /* Set the bit corresponding to rec */
+ lock_rec_set_nth_bit(lock, heap_no);
+
+ HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
+ lock_rec_fold(space, page_no), lock);
+ if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+
+ lock_set_lock_and_trx_wait(lock, trx);
+ }
+
+ return(lock);
+}
+
+/*********************************************************************//**
+Enqueues a waiting request for a lock which cannot be granted immediately.
+Checks for deadlocks.
+@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
+transaction was chosen as a victim, and we got the lock immediately:
+no need to wait then */
+static
+ulint
+lock_rec_enqueue_waiting(
+/*=====================*/
+ ulint type_mode,/*!< in: lock mode this
+ transaction is requesting:
+ LOCK_S or LOCK_X, possibly
+ ORed with LOCK_GAP or
+ LOCK_REC_NOT_GAP, ORed with
+ LOCK_INSERT_INTENTION if this
+ waiting lock request is set
+ when performing an insert of
+ an index record */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ dict_index_t* index, /*!< in: index of record */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ lock_t* lock;
+ trx_t* trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ /* Test if there already is some other reason to suspend thread:
+ we do not enqueue a lock request if the query thread should be
+ stopped anyway */
+
+ if (UNIV_UNLIKELY(que_thr_stop(thr))) {
+
+ ut_error;
+
+ return(DB_QUE_THR_SUSPENDED);
+ }
+
+ trx = thr_get_trx(thr);
+
+ switch (trx_get_dict_operation(trx)) {
+ case TRX_DICT_OP_NONE:
+ break;
+ case TRX_DICT_OP_TABLE:
+ case TRX_DICT_OP_INDEX:
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Error: a record lock wait happens"
+ " in a dictionary operation!\n"
+ "InnoDB: ", stderr);
+ dict_index_name_print(stderr, trx, index);
+ fputs(".\n"
+ "InnoDB: Submit a detailed bug report"
+ " to http://bugs.mysql.com\n",
+ stderr);
+ }
+
+ /* Enqueue the lock request that will wait to be granted */
+ lock = lock_rec_create(type_mode | LOCK_WAIT,
+ block, heap_no, index, trx);
+
+ /* Check if a deadlock occurs: if yes, remove the lock request and
+ return an error code */
+
+ if (UNIV_UNLIKELY(lock_deadlock_occurs(lock, trx))) {
+
+ lock_reset_lock_and_trx_wait(lock);
+ lock_rec_reset_nth_bit(lock, heap_no);
+
+ return(DB_DEADLOCK);
+ }
+
+ /* If there was a deadlock but we chose another transaction as a
+ victim, it is possible that we already have the lock now granted! */
+
+ if (trx->wait_lock == NULL) {
+
+ return(DB_SUCCESS);
+ }
+
+ trx->que_state = TRX_QUE_LOCK_WAIT;
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ trx->wait_started = time(NULL);
+
+ ut_a(que_thr_stop(thr));
+
+#ifdef UNIV_DEBUG
+ if (lock_print_waits) {
+ fprintf(stderr, "Lock wait for trx %lu in index ",
+ (ulong) ut_dulint_get_low(trx->id));
+ ut_print_name(stderr, trx, FALSE, index->name);
+ }
+#endif /* UNIV_DEBUG */
+
+ return(DB_LOCK_WAIT);
+}
+
+/*********************************************************************//**
+Adds a record lock request in the record queue. The request is normally
+added as the last in the queue, but if there are no waiting lock requests
+on the record, and the request to be added is not a waiting request, we
+can reuse a suitable record lock object already existing on the same page,
+just setting the appropriate bit in its bitmap. This is a low-level function
+which does NOT check for deadlocks or lock compatibility!
+@return lock where the bit was set */
+static
+lock_t*
+lock_rec_add_to_queue(
+/*==================*/
+ ulint type_mode,/*!< in: lock mode, wait, gap
+ etc. flags; type is ignored
+ and replaced by LOCK_REC */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ dict_index_t* index, /*!< in: index of record */
+ trx_t* trx) /*!< in: transaction */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+#ifdef UNIV_DEBUG
+ switch (type_mode & LOCK_MODE_MASK) {
+ case LOCK_X:
+ case LOCK_S:
+ break;
+ default:
+ ut_error;
+ }
+
+ if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
+ enum lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
+ ? LOCK_X
+ : LOCK_S;
+ lock_t* other_lock
+ = lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT,
+ block, heap_no, trx);
+ ut_a(!other_lock);
+ }
+#endif /* UNIV_DEBUG */
+
+ type_mode |= LOCK_REC;
+
+ /* If rec is the supremum record, then we can reset the gap bit, as
+ all locks on the supremum are automatically of the gap type, and we
+ try to avoid unnecessary memory consumption of a new record lock
+ struct for a gap type lock */
+
+ if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+ ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
+
+ /* There should never be LOCK_REC_NOT_GAP on a supremum
+ record, but let us play safe */
+
+ type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
+ }
+
+ /* Look for a waiting lock request on the same record or on a gap */
+
+ lock = lock_rec_get_first_on_page(block);
+
+ while (lock != NULL) {
+ if (lock_get_wait(lock)
+ && (lock_rec_get_nth_bit(lock, heap_no))) {
+
+ goto somebody_waits;
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+
+ if (UNIV_LIKELY(!(type_mode & LOCK_WAIT))) {
+
+ /* Look for a similar record lock on the same page:
+ if one is found and there are no waiting lock requests,
+ we can just set the bit */
+
+ lock = lock_rec_find_similar_on_page(
+ type_mode, heap_no,
+ lock_rec_get_first_on_page(block), trx);
+
+ if (lock) {
+
+ lock_rec_set_nth_bit(lock, heap_no);
+
+ return(lock);
+ }
+ }
+
+somebody_waits:
+ return(lock_rec_create(type_mode, block, heap_no, index, trx));
+}
+
+/*********************************************************************//**
+This is a fast routine for locking a record in the most common cases:
+there are no explicit locks on the page, or there is just one lock, owned
+by this transaction, and of the right type_mode. This is a low-level function
+which does NOT look at implicit locks! Checks lock compatibility within
+explicit locks. This function sets a normal next-key lock, or in the case of
+a page supremum record, a gap type lock.
+@return TRUE if locking succeeded */
+UNIV_INLINE
+ibool
+lock_rec_lock_fast(
+/*===============*/
+ ibool impl, /*!< in: if TRUE, no lock is set
+ if no wait is necessary: we
+ assume that the caller will
+ set an implicit lock */
+ ulint mode, /*!< in: lock mode: LOCK_X or
+ LOCK_S possibly ORed to either
+ LOCK_GAP or LOCK_REC_NOT_GAP */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of record */
+ dict_index_t* index, /*!< in: index of record */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ lock_t* lock;
+ trx_t* trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+ ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+ ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
+ || (LOCK_MODE_MASK & mode) == LOCK_X);
+ ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
+ || mode - (LOCK_MODE_MASK & mode) == 0
+ || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
+
+ lock = lock_rec_get_first_on_page(block);
+
+ trx = thr_get_trx(thr);
+
+ if (lock == NULL) {
+ if (!impl) {
+ lock_rec_create(mode, block, heap_no, index, trx);
+ }
+
+ return(TRUE);
+ }
+
+ if (lock_rec_get_next_on_page(lock)) {
+
+ return(FALSE);
+ }
+
+ if (lock->trx != trx
+ || lock->type_mode != (mode | LOCK_REC)
+ || lock_rec_get_n_bits(lock) <= heap_no) {
+
+ return(FALSE);
+ }
+
+ if (!impl) {
+ /* If the nth bit of the record lock is already set then we
+ do not set a new lock bit, otherwise we do set */
+
+ if (!lock_rec_get_nth_bit(lock, heap_no)) {
+ lock_rec_set_nth_bit(lock, heap_no);
+ }
+ }
+
+ return(TRUE);
+}
+
+/*********************************************************************//**
+This is the general, and slower, routine for locking a record. This is a
+low-level function which does NOT look at implicit locks! Checks lock
+compatibility within explicit locks. This function sets a normal next-key
+lock, or in the case of a page supremum record, a gap type lock.
+@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
+static
+ulint
+lock_rec_lock_slow(
+/*===============*/
+ ibool impl, /*!< in: if TRUE, no lock is set
+ if no wait is necessary: we
+ assume that the caller will
+ set an implicit lock */
+ ulint mode, /*!< in: lock mode: LOCK_X or
+ LOCK_S possibly ORed to either
+ LOCK_GAP or LOCK_REC_NOT_GAP */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of record */
+ dict_index_t* index, /*!< in: index of record */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ trx_t* trx;
+ ulint err;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+ ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+ ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
+ || (LOCK_MODE_MASK & mode) == LOCK_X);
+ ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
+ || mode - (LOCK_MODE_MASK & mode) == 0
+ || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
+
+ trx = thr_get_trx(thr);
+
+ if (lock_rec_has_expl(mode, block, heap_no, trx)) {
+ /* The trx already has a strong enough lock on rec: do
+ nothing */
+
+ err = DB_SUCCESS;
+ } else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) {
+
+ /* If another transaction has a non-gap conflicting request in
+ the queue, as this transaction does not have a lock strong
+ enough already granted on the record, we have to wait. */
+
+ err = lock_rec_enqueue_waiting(mode, block, heap_no,
+ index, thr);
+ } else {
+ if (!impl) {
+ /* Set the requested lock on the record */
+
+ lock_rec_add_to_queue(LOCK_REC | mode, block,
+ heap_no, index, trx);
+ }
+
+ err = DB_SUCCESS;
+ }
+
+ return(err);
+}
+
+/*********************************************************************//**
+Tries to lock the specified record in the mode requested. If not immediately
+possible, enqueues a waiting lock request. This is a low-level function
+which does NOT look at implicit locks! Checks lock compatibility within
+explicit locks. This function sets a normal next-key lock, or in the case
+of a page supremum record, a gap type lock.
+@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
+static
+ulint
+lock_rec_lock(
+/*==========*/
+ ibool impl, /*!< in: if TRUE, no lock is set
+ if no wait is necessary: we
+ assume that the caller will
+ set an implicit lock */
+ ulint mode, /*!< in: lock mode: LOCK_X or
+ LOCK_S possibly ORed to either
+ LOCK_GAP or LOCK_REC_NOT_GAP */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of record */
+ dict_index_t* index, /*!< in: index of record */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ ulint err;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+ ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+ ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
+ || (LOCK_MODE_MASK & mode) == LOCK_X);
+ ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
+ || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
+ || mode - (LOCK_MODE_MASK & mode) == 0);
+
+ if (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
+
+ /* We try a simplified and faster subroutine for the most
+ common cases */
+
+ err = DB_SUCCESS;
+ } else {
+ err = lock_rec_lock_slow(impl, mode, block,
+ heap_no, index, thr);
+ }
+
+ return(err);
+}
+
+/*********************************************************************//**
+Checks if a waiting record lock request still has to wait in a queue.
+@return TRUE if still has to wait */
+static
+ibool
+lock_rec_has_to_wait_in_queue(
+/*==========================*/
+ lock_t* wait_lock) /*!< in: waiting record lock */
+{
+ lock_t* lock;
+ ulint space;
+ ulint page_no;
+ ulint heap_no;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_get_wait(wait_lock));
+ ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
+
+ space = wait_lock->un_member.rec_lock.space;
+ page_no = wait_lock->un_member.rec_lock.page_no;
+ heap_no = lock_rec_find_set_bit(wait_lock);
+
+ lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+ while (lock != wait_lock) {
+
+ if (lock_rec_get_nth_bit(lock, heap_no)
+ && lock_has_to_wait(wait_lock, lock)) {
+
+ return(TRUE);
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************//**
+Grants a lock to a waiting lock request and releases the waiting
+transaction. */
+static
+void
+lock_grant(
+/*=======*/
+ lock_t* lock) /*!< in/out: waiting lock request */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock_reset_lock_and_trx_wait(lock);
+
+ if (lock_get_mode(lock) == LOCK_AUTO_INC) {
+ trx_t* trx = lock->trx;
+ dict_table_t* table = lock->un_member.tab_lock.table;
+
+ if (table->autoinc_trx == trx) {
+ fprintf(stderr,
+ "InnoDB: Error: trx already had"
+ " an AUTO-INC lock!\n");
+ } else {
+ table->autoinc_trx = trx;
+
+ ib_vector_push(trx->autoinc_locks, lock);
+ }
+ }
+
+#ifdef UNIV_DEBUG
+ if (lock_print_waits) {
+ fprintf(stderr, "Lock wait for trx %lu ends\n",
+ (ulong) ut_dulint_get_low(lock->trx->id));
+ }
+#endif /* UNIV_DEBUG */
+
+ /* If we are resolving a deadlock by choosing another transaction
+ as a victim, then our original transaction may not be in the
+ TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
+ for it */
+
+ if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
+ trx_end_lock_wait(lock->trx);
+ }
+}
+
+/*************************************************************//**
+Cancels a waiting record lock request and releases the waiting transaction
+that requested it. NOTE: does NOT check if waiting lock requests behind this
+one can now be granted! */
+static
+void
+lock_rec_cancel(
+/*============*/
+ lock_t* lock) /*!< in: waiting record lock request */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+ /* Reset the bit (there can be only one set bit) in the lock bitmap */
+ lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
+
+ /* Reset the wait flag and the back pointer to lock in trx */
+
+ lock_reset_lock_and_trx_wait(lock);
+
+ /* The following function releases the trx from lock wait */
+
+ trx_end_lock_wait(lock->trx);
+}
+
+/*************************************************************//**
+Removes a record lock request, waiting or granted, from the queue and
+grants locks to other transactions in the queue if they now are entitled
+to a lock. NOTE: all record locks contained in in_lock are removed. */
+static
+void
+lock_rec_dequeue_from_page(
+/*=======================*/
+ lock_t* in_lock)/*!< in: record lock object: all record locks which
+ are contained in this lock object are removed;
+ transactions waiting behind will get their lock
+ requests granted, if they are now qualified to it */
+{
+ ulint space;
+ ulint page_no;
+ lock_t* lock;
+ trx_t* trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
+
+ trx = in_lock->trx;
+
+ space = in_lock->un_member.rec_lock.space;
+ page_no = in_lock->un_member.rec_lock.page_no;
+
+ HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
+ lock_rec_fold(space, page_no), in_lock);
+
+ UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
+
+ /* Check if waiting locks in the queue can now be granted: grant
+ locks if there are no conflicting locks ahead. */
+
+ lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+ while (lock != NULL) {
+ if (lock_get_wait(lock)
+ && !lock_rec_has_to_wait_in_queue(lock)) {
+
+ /* Grant the lock */
+ lock_grant(lock);
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+}
+
+/*************************************************************//**
+Removes a record lock request, waiting or granted, from the queue. */
+static
+void
+lock_rec_discard(
+/*=============*/
+ lock_t* in_lock)/*!< in: record lock object: all record locks which
+ are contained in this lock object are removed */
+{
+ ulint space;
+ ulint page_no;
+ trx_t* trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
+
+ trx = in_lock->trx;
+
+ space = in_lock->un_member.rec_lock.space;
+ page_no = in_lock->un_member.rec_lock.page_no;
+
+ HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
+ lock_rec_fold(space, page_no), in_lock);
+
+ UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
+}
+
+/*************************************************************//**
+Removes record lock objects set on an index page which is discarded. This
+function does not move locks, or check for waiting locks, therefore the
+lock bitmaps must already be reset when this function is called. */
+static
+void
+lock_rec_free_all_from_discard_page(
+/*================================*/
+ const buf_block_t* block) /*!< in: page to be discarded */
+{
+ ulint space;
+ ulint page_no;
+ lock_t* lock;
+ lock_t* next_lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ space = buf_block_get_space(block);
+ page_no = buf_block_get_page_no(block);
+
+ lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+ while (lock != NULL) {
+ ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
+ ut_ad(!lock_get_wait(lock));
+
+ next_lock = lock_rec_get_next_on_page(lock);
+
+ lock_rec_discard(lock);
+
+ lock = next_lock;
+ }
+}
+
+/*============= RECORD LOCK MOVING AND INHERITING ===================*/
+
+/*************************************************************//**
+Resets the lock bits for a single record. Releases transactions waiting for
+lock requests here. */
+static
+void
+lock_rec_reset_and_release_wait(
+/*============================*/
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no)/*!< in: heap number of record */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = lock_rec_get_first(block, heap_no);
+
+ while (lock != NULL) {
+ if (lock_get_wait(lock)) {
+ lock_rec_cancel(lock);
+ } else {
+ lock_rec_reset_nth_bit(lock, heap_no);
+ }
+
+ lock = lock_rec_get_next(heap_no, lock);
+ }
+}
+
+/*************************************************************//**
+Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
+of another record as gap type locks, but does not reset the lock bits of
+the other record. Also waiting lock requests on rec are inherited as
+GRANTED gap locks. */
+static
+void
+lock_rec_inherit_to_gap(
+/*====================*/
+ const buf_block_t* heir_block, /*!< in: block containing the
+ record which inherits */
+ const buf_block_t* block, /*!< in: block containing the
+ record from which inherited;
+ does NOT reset the locks on
+ this record */
+ ulint heir_heap_no, /*!< in: heap_no of the
+ inheriting record */
+ ulint heap_no) /*!< in: heap_no of the
+ donating record */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = lock_rec_get_first(block, heap_no);
+
+ /* If srv_locks_unsafe_for_binlog is TRUE or session is using
+ READ COMMITTED isolation level, we do not want locks set
+ by an UPDATE or a DELETE to be inherited as gap type locks. But we
+ DO want S-locks set by a consistency constraint to be inherited also
+ then. */
+
+ while (lock != NULL) {
+ if (!lock_rec_get_insert_intention(lock)
+ && !((srv_locks_unsafe_for_binlog
+ || lock->trx->isolation_level
+ == TRX_ISO_READ_COMMITTED)
+ && lock_get_mode(lock) == LOCK_X)) {
+
+ lock_rec_add_to_queue(LOCK_REC | LOCK_GAP
+ | lock_get_mode(lock),
+ heir_block, heir_heap_no,
+ lock->index, lock->trx);
+ }
+
+ lock = lock_rec_get_next(heap_no, lock);
+ }
+}
+
+/*************************************************************//**
+Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
+of another record as gap type locks, but does not reset the lock bits of the
+other record. Also waiting lock requests are inherited as GRANTED gap locks. */
+static
+void
+lock_rec_inherit_to_gap_if_gap_lock(
+/*================================*/
+ const buf_block_t* block, /*!< in: buffer block */
+ ulint heir_heap_no, /*!< in: heap_no of
+ record which inherits */
+ ulint heap_no) /*!< in: heap_no of record
+ from which inherited;
+ does NOT reset the locks
+ on this record */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = lock_rec_get_first(block, heap_no);
+
+ while (lock != NULL) {
+ if (!lock_rec_get_insert_intention(lock)
+ && (heap_no == PAGE_HEAP_NO_SUPREMUM
+ || !lock_rec_get_rec_not_gap(lock))) {
+
+ lock_rec_add_to_queue(LOCK_REC | LOCK_GAP
+ | lock_get_mode(lock),
+ block, heir_heap_no,
+ lock->index, lock->trx);
+ }
+
+ lock = lock_rec_get_next(heap_no, lock);
+ }
+}
+
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+static
+void
+lock_rec_move(
+/*==========*/
+ const buf_block_t* receiver, /*!< in: buffer block containing
+ the receiving record */
+ const buf_block_t* donator, /*!< in: buffer block containing
+ the donating record */
+ ulint receiver_heap_no,/*!< in: heap_no of the record
+ which gets the locks; there
+ must be no lock requests
+ on it! */
+ ulint donator_heap_no)/*!< in: heap_no of the record
+ which gives the locks */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = lock_rec_get_first(donator, donator_heap_no);
+
+ ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL);
+
+ while (lock != NULL) {
+ const ulint type_mode = lock->type_mode;
+
+ lock_rec_reset_nth_bit(lock, donator_heap_no);
+
+ if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+ lock_reset_lock_and_trx_wait(lock);
+ }
+
+ /* Note that we FIRST reset the bit, and then set the lock:
+ the function works also if donator == receiver */
+
+ lock_rec_add_to_queue(type_mode, receiver, receiver_heap_no,
+ lock->index, lock->trx);
+ lock = lock_rec_get_next(donator_heap_no, lock);
+ }
+
+ ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL);
+}
+
+/*************************************************************//**
+Updates the lock table when we have reorganized a page. NOTE: we copy
+also the locks set on the infimum of the page; the infimum may carry
+locks if an update of a record is occurring on the page, and its locks
+were temporarily stored on the infimum. */
+UNIV_INTERN
+void
+lock_move_reorganize_page(
+/*======================*/
+ const buf_block_t* block, /*!< in: old index page, now
+ reorganized */
+ const buf_block_t* oblock) /*!< in: copy of the old, not
+ reorganized page */
+{
+ lock_t* lock;
+ UT_LIST_BASE_NODE_T(lock_t) old_locks;
+ mem_heap_t* heap = NULL;
+ ulint comp;
+
+ lock_mutex_enter_kernel();
+
+ lock = lock_rec_get_first_on_page(block);
+
+ if (lock == NULL) {
+ lock_mutex_exit_kernel();
+
+ return;
+ }
+
+ heap = mem_heap_create(256);
+
+ /* Copy first all the locks on the page to heap and reset the
+ bitmaps in the original locks; chain the copies of the locks
+ using the trx_locks field in them. */
+
+ UT_LIST_INIT(old_locks);
+
+ do {
+ /* Make a copy of the lock */
+ lock_t* old_lock = lock_rec_copy(lock, heap);
+
+ UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock);
+
+ /* Reset bitmap of lock */
+ lock_rec_bitmap_reset(lock);
+
+ if (lock_get_wait(lock)) {
+ lock_reset_lock_and_trx_wait(lock);
+ }
+
+ lock = lock_rec_get_next_on_page(lock);
+ } while (lock != NULL);
+
+ comp = page_is_comp(block->frame);
+ ut_ad(comp == page_is_comp(oblock->frame));
+
+ for (lock = UT_LIST_GET_FIRST(old_locks); lock;
+ lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+ /* NOTE: we copy also the locks set on the infimum and
+ supremum of the page; the infimum may carry locks if an
+ update of a record is occurring on the page, and its locks
+ were temporarily stored on the infimum */
+ page_cur_t cur1;
+ page_cur_t cur2;
+
+ page_cur_set_before_first(block, &cur1);
+ page_cur_set_before_first(oblock, &cur2);
+
+ /* Set locks according to old locks */
+ for (;;) {
+ ulint old_heap_no;
+ ulint new_heap_no;
+
+ ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
+ page_cur_get_rec(&cur2),
+ rec_get_data_size_old(
+ page_cur_get_rec(
+ &cur2))));
+ if (UNIV_LIKELY(comp)) {
+ old_heap_no = rec_get_heap_no_new(
+ page_cur_get_rec(&cur2));
+ new_heap_no = rec_get_heap_no_new(
+ page_cur_get_rec(&cur1));
+ } else {
+ old_heap_no = rec_get_heap_no_old(
+ page_cur_get_rec(&cur2));
+ new_heap_no = rec_get_heap_no_old(
+ page_cur_get_rec(&cur1));
+ }
+
+ if (lock_rec_get_nth_bit(lock, old_heap_no)) {
+
+ /* Clear the bit in old_lock. */
+ ut_d(lock_rec_reset_nth_bit(lock,
+ old_heap_no));
+
+ /* NOTE that the old lock bitmap could be too
+ small for the new heap number! */
+
+ lock_rec_add_to_queue(lock->type_mode, block,
+ new_heap_no,
+ lock->index, lock->trx);
+
+ /* if (new_heap_no == PAGE_HEAP_NO_SUPREMUM
+ && lock_get_wait(lock)) {
+ fprintf(stderr,
+ "---\n--\n!!!Lock reorg: supr type %lu\n",
+ lock->type_mode);
+ } */
+ }
+
+ if (UNIV_UNLIKELY
+ (new_heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+
+ ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
+ break;
+ }
+
+ page_cur_move_to_next(&cur1);
+ page_cur_move_to_next(&cur2);
+ }
+
+#ifdef UNIV_DEBUG
+ {
+ ulint i = lock_rec_find_set_bit(lock);
+
+ /* Check that all locks were moved. */
+ if (UNIV_UNLIKELY(i != ULINT_UNDEFINED)) {
+ fprintf(stderr,
+ "lock_move_reorganize_page():"
+ " %lu not moved in %p\n",
+ (ulong) i, (void*) lock);
+ ut_error;
+ }
+ }
+#endif /* UNIV_DEBUG */
+ }
+
+ lock_mutex_exit_kernel();
+
+ mem_heap_free(heap);
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+ ut_ad(lock_rec_validate_page(buf_block_get_space(block),
+ buf_block_get_page_no(block)));
+#endif
+}
+
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list end is moved to another page. */
+UNIV_INTERN
+void
+lock_move_rec_list_end(
+/*===================*/
+ const buf_block_t* new_block, /*!< in: index page to move to */
+ const buf_block_t* block, /*!< in: index page */
+ const rec_t* rec) /*!< in: record on page: this
+ is the first record moved */
+{
+ lock_t* lock;
+ const ulint comp = page_rec_is_comp(rec);
+
+ lock_mutex_enter_kernel();
+
+ /* Note: when we move locks from record to record, waiting locks
+ and possible granted gap type locks behind them are enqueued in
+ the original order, because new elements are inserted to a hash
+ table to the end of the hash chain, and lock_rec_add_to_queue
+ does not reuse locks if there are waiters in the queue. */
+
+ for (lock = lock_rec_get_first_on_page(block); lock;
+ lock = lock_rec_get_next_on_page(lock)) {
+ page_cur_t cur1;
+ page_cur_t cur2;
+ const ulint type_mode = lock->type_mode;
+
+ page_cur_position(rec, block, &cur1);
+
+ if (page_cur_is_before_first(&cur1)) {
+ page_cur_move_to_next(&cur1);
+ }
+
+ page_cur_set_before_first(new_block, &cur2);
+ page_cur_move_to_next(&cur2);
+
+ /* Copy lock requests on user records to new page and
+ reset the lock bits on the old */
+
+ while (!page_cur_is_after_last(&cur1)) {
+ ulint heap_no;
+
+ if (comp) {
+ heap_no = rec_get_heap_no_new(
+ page_cur_get_rec(&cur1));
+ } else {
+ heap_no = rec_get_heap_no_old(
+ page_cur_get_rec(&cur1));
+ ut_ad(!memcmp(page_cur_get_rec(&cur1),
+ page_cur_get_rec(&cur2),
+ rec_get_data_size_old(
+ page_cur_get_rec(&cur2))));
+ }
+
+ if (lock_rec_get_nth_bit(lock, heap_no)) {
+ lock_rec_reset_nth_bit(lock, heap_no);
+
+ if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+ lock_reset_lock_and_trx_wait(lock);
+ }
+
+ if (comp) {
+ heap_no = rec_get_heap_no_new(
+ page_cur_get_rec(&cur2));
+ } else {
+ heap_no = rec_get_heap_no_old(
+ page_cur_get_rec(&cur2));
+ }
+
+ lock_rec_add_to_queue(type_mode,
+ new_block, heap_no,
+ lock->index, lock->trx);
+ }
+
+ page_cur_move_to_next(&cur1);
+ page_cur_move_to_next(&cur2);
+ }
+ }
+
+ lock_mutex_exit_kernel();
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+ ut_ad(lock_rec_validate_page(buf_block_get_space(block),
+ buf_block_get_page_no(block)));
+ ut_ad(lock_rec_validate_page(buf_block_get_space(new_block),
+ buf_block_get_page_no(new_block)));
+#endif
+}
+
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+UNIV_INTERN
+void
+lock_move_rec_list_start(
+/*=====================*/
+ const buf_block_t* new_block, /*!< in: index page to move to */
+ const buf_block_t* block, /*!< in: index page */
+ const rec_t* rec, /*!< in: record on page:
+ this is the first
+ record NOT copied */
+ const rec_t* old_end) /*!< in: old
+ previous-to-last
+ record on new_page
+ before the records
+ were copied */
+{
+ lock_t* lock;
+ const ulint comp = page_rec_is_comp(rec);
+
+ ut_ad(block->frame == page_align(rec));
+ ut_ad(new_block->frame == page_align(old_end));
+
+ lock_mutex_enter_kernel();
+
+ for (lock = lock_rec_get_first_on_page(block); lock;
+ lock = lock_rec_get_next_on_page(lock)) {
+ page_cur_t cur1;
+ page_cur_t cur2;
+ const ulint type_mode = lock->type_mode;
+
+ page_cur_set_before_first(block, &cur1);
+ page_cur_move_to_next(&cur1);
+
+ page_cur_position(old_end, new_block, &cur2);
+ page_cur_move_to_next(&cur2);
+
+ /* Copy lock requests on user records to new page and
+ reset the lock bits on the old */
+
+ while (page_cur_get_rec(&cur1) != rec) {
+ ulint heap_no;
+
+ if (comp) {
+ heap_no = rec_get_heap_no_new(
+ page_cur_get_rec(&cur1));
+ } else {
+ heap_no = rec_get_heap_no_old(
+ page_cur_get_rec(&cur1));
+ ut_ad(!memcmp(page_cur_get_rec(&cur1),
+ page_cur_get_rec(&cur2),
+ rec_get_data_size_old(
+ page_cur_get_rec(
+ &cur2))));
+ }
+
+ if (lock_rec_get_nth_bit(lock, heap_no)) {
+ lock_rec_reset_nth_bit(lock, heap_no);
+
+ if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+ lock_reset_lock_and_trx_wait(lock);
+ }
+
+ if (comp) {
+ heap_no = rec_get_heap_no_new(
+ page_cur_get_rec(&cur2));
+ } else {
+ heap_no = rec_get_heap_no_old(
+ page_cur_get_rec(&cur2));
+ }
+
+ lock_rec_add_to_queue(type_mode,
+ new_block, heap_no,
+ lock->index, lock->trx);
+ }
+
+ page_cur_move_to_next(&cur1);
+ page_cur_move_to_next(&cur2);
+ }
+
+#ifdef UNIV_DEBUG
+ if (page_rec_is_supremum(rec)) {
+ ulint i;
+
+ for (i = PAGE_HEAP_NO_USER_LOW;
+ i < lock_rec_get_n_bits(lock); i++) {
+ if (UNIV_UNLIKELY
+ (lock_rec_get_nth_bit(lock, i))) {
+
+ fprintf(stderr,
+ "lock_move_rec_list_start():"
+ " %lu not moved in %p\n",
+ (ulong) i, (void*) lock);
+ ut_error;
+ }
+ }
+ }
+#endif /* UNIV_DEBUG */
+ }
+
+ lock_mutex_exit_kernel();
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+ ut_ad(lock_rec_validate_page(buf_block_get_space(block),
+ buf_block_get_page_no(block)));
+#endif
+}
+
+/*************************************************************//**
+Updates the lock table when a page is split to the right. */
+UNIV_INTERN
+void
+lock_update_split_right(
+/*====================*/
+ const buf_block_t* right_block, /*!< in: right page */
+ const buf_block_t* left_block) /*!< in: left page */
+{
+ ulint heap_no = lock_get_min_heap_no(right_block);
+
+ lock_mutex_enter_kernel();
+
+ /* Move the locks on the supremum of the left page to the supremum
+ of the right page */
+
+ lock_rec_move(right_block, left_block,
+ PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+
+ /* Inherit the locks to the supremum of left page from the successor
+ of the infimum on right page */
+
+ lock_rec_inherit_to_gap(left_block, right_block,
+ PAGE_HEAP_NO_SUPREMUM, heap_no);
+
+ lock_mutex_exit_kernel();
+}
+
+/*************************************************************//**
+Updates the lock table when a page is merged to the right. */
+UNIV_INTERN
+void
+lock_update_merge_right(
+/*====================*/
+ const buf_block_t* right_block, /*!< in: right page to
+ which merged */
+ const rec_t* orig_succ, /*!< in: original
+ successor of infimum
+ on the right page
+ before merge */
+ const buf_block_t* left_block) /*!< in: merged index
+ page which will be
+ discarded */
+{
+ lock_mutex_enter_kernel();
+
+ /* Inherit the locks from the supremum of the left page to the
+ original successor of infimum on the right page, to which the left
+ page was merged */
+
+ lock_rec_inherit_to_gap(right_block, left_block,
+ page_rec_get_heap_no(orig_succ),
+ PAGE_HEAP_NO_SUPREMUM);
+
+ /* Reset the locks on the supremum of the left page, releasing
+ waiting transactions */
+
+ lock_rec_reset_and_release_wait(left_block,
+ PAGE_HEAP_NO_SUPREMUM);
+
+ lock_rec_free_all_from_discard_page(left_block);
+
+ lock_mutex_exit_kernel();
+}
+
+/*************************************************************//**
+Updates the lock table when the root page is copied to another in
+btr_root_raise_and_insert. Note that we leave lock structs on the
+root page, even though they do not make sense on other than leaf
+pages: the reason is that in a pessimistic update the infimum record
+of the root page will act as a dummy carrier of the locks of the record
+to be updated. */
+UNIV_INTERN
+void
+lock_update_root_raise(
+/*===================*/
+ const buf_block_t* block, /*!< in: index page to which copied */
+ const buf_block_t* root) /*!< in: root page */
+{
+ lock_mutex_enter_kernel();
+
+ /* Move the locks on the supremum of the root to the supremum
+ of block */
+
+ lock_rec_move(block, root,
+ PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+ lock_mutex_exit_kernel();
+}
+
+/*************************************************************//**
+Updates the lock table when a page is copied to another and the original page
+is removed from the chain of leaf pages, except if page is the root! */
+UNIV_INTERN
+void
+lock_update_copy_and_discard(
+/*=========================*/
+ const buf_block_t* new_block, /*!< in: index page to
+ which copied */
+ const buf_block_t* block) /*!< in: index page;
+ NOT the root! */
+{
+ lock_mutex_enter_kernel();
+
+ /* Move the locks on the supremum of the old page to the supremum
+ of new_page */
+
+ lock_rec_move(new_block, block,
+ PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+ lock_rec_free_all_from_discard_page(block);
+
+ lock_mutex_exit_kernel();
+}
+
+/*************************************************************//**
+Updates the lock table when a page is split to the left. */
+UNIV_INTERN
+void
+lock_update_split_left(
+/*===================*/
+ const buf_block_t* right_block, /*!< in: right page */
+ const buf_block_t* left_block) /*!< in: left page */
+{
+ ulint heap_no = lock_get_min_heap_no(right_block);
+
+ lock_mutex_enter_kernel();
+
+ /* Inherit the locks to the supremum of the left page from the
+ successor of the infimum on the right page */
+
+ lock_rec_inherit_to_gap(left_block, right_block,
+ PAGE_HEAP_NO_SUPREMUM, heap_no);
+
+ lock_mutex_exit_kernel();
+}
+
+/*************************************************************//**
+Updates the lock table when a page is merged to the left. */
+UNIV_INTERN
+void
+lock_update_merge_left(
+/*===================*/
+ const buf_block_t* left_block, /*!< in: left page to
+ which merged */
+ const rec_t* orig_pred, /*!< in: original predecessor
+ of supremum on the left page
+ before merge */
+ const buf_block_t* right_block) /*!< in: merged index page
+ which will be discarded */
+{
+ const rec_t* left_next_rec;
+
+ ut_ad(left_block->frame == page_align(orig_pred));
+
+ lock_mutex_enter_kernel();
+
+ left_next_rec = page_rec_get_next_const(orig_pred);
+
+ if (!page_rec_is_supremum(left_next_rec)) {
+
+ /* Inherit the locks on the supremum of the left page to the
+ first record which was moved from the right page */
+
+ lock_rec_inherit_to_gap(left_block, left_block,
+ page_rec_get_heap_no(left_next_rec),
+ PAGE_HEAP_NO_SUPREMUM);
+
+ /* Reset the locks on the supremum of the left page,
+ releasing waiting transactions */
+
+ lock_rec_reset_and_release_wait(left_block,
+ PAGE_HEAP_NO_SUPREMUM);
+ }
+
+ /* Move the locks from the supremum of right page to the supremum
+ of the left page */
+
+ lock_rec_move(left_block, right_block,
+ PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+
+ lock_rec_free_all_from_discard_page(right_block);
+
+ lock_mutex_exit_kernel();
+}
+
+/*************************************************************//**
+Resets the original locks on heir and replaces them with gap type locks
+inherited from rec. */
+UNIV_INTERN
+void
+lock_rec_reset_and_inherit_gap_locks(
+/*=================================*/
+ const buf_block_t* heir_block, /*!< in: block containing the
+ record which inherits */
+ const buf_block_t* block, /*!< in: block containing the
+ record from which inherited;
+ does NOT reset the locks on
+ this record */
+ ulint heir_heap_no, /*!< in: heap_no of the
+ inheriting record */
+ ulint heap_no) /*!< in: heap_no of the
+ donating record */
+{
+ mutex_enter(&kernel_mutex);
+
+ lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
+
+ lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
+
+ mutex_exit(&kernel_mutex);
+}
+
+/*************************************************************//**
+Updates the lock table when a page is discarded. */
+UNIV_INTERN
+void
+lock_update_discard(
+/*================*/
+ const buf_block_t* heir_block, /*!< in: index page
+ which will inherit the locks */
+ ulint heir_heap_no, /*!< in: heap_no of the record
+ which will inherit the locks */
+ const buf_block_t* block) /*!< in: index page
+ which will be discarded */
+{
+ const page_t* page = block->frame;
+ const rec_t* rec;
+ ulint heap_no;
+
+ lock_mutex_enter_kernel();
+
+ if (!lock_rec_get_first_on_page(block)) {
+ /* No locks exist on page, nothing to do */
+
+ lock_mutex_exit_kernel();
+
+ return;
+ }
+
+ /* Inherit all the locks on the page to the record and reset all
+ the locks on the page */
+
+ if (page_is_comp(page)) {
+ rec = page + PAGE_NEW_INFIMUM;
+
+ do {
+ heap_no = rec_get_heap_no_new(rec);
+
+ lock_rec_inherit_to_gap(heir_block, block,
+ heir_heap_no, heap_no);
+
+ lock_rec_reset_and_release_wait(block, heap_no);
+
+ rec = page + rec_get_next_offs(rec, TRUE);
+ } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
+ } else {
+ rec = page + PAGE_OLD_INFIMUM;
+
+ do {
+ heap_no = rec_get_heap_no_old(rec);
+
+ lock_rec_inherit_to_gap(heir_block, block,
+ heir_heap_no, heap_no);
+
+ lock_rec_reset_and_release_wait(block, heap_no);
+
+ rec = page + rec_get_next_offs(rec, FALSE);
+ } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
+ }
+
+ lock_rec_free_all_from_discard_page(block);
+
+ lock_mutex_exit_kernel();
+}
+
+/*************************************************************//**
+Updates the lock table when a new user record is inserted. */
+UNIV_INTERN
+void
+lock_update_insert(
+/*===============*/
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec) /*!< in: the inserted record */
+{
+ ulint receiver_heap_no;
+ ulint donator_heap_no;
+
+ ut_ad(block->frame == page_align(rec));
+
+ /* Inherit the gap-locking locks for rec, in gap mode, from the next
+ record */
+
+ if (page_rec_is_comp(rec)) {
+ receiver_heap_no = rec_get_heap_no_new(rec);
+ donator_heap_no = rec_get_heap_no_new(
+ page_rec_get_next_low(rec, TRUE));
+ } else {
+ receiver_heap_no = rec_get_heap_no_old(rec);
+ donator_heap_no = rec_get_heap_no_old(
+ page_rec_get_next_low(rec, FALSE));
+ }
+
+ lock_mutex_enter_kernel();
+ lock_rec_inherit_to_gap_if_gap_lock(block,
+ receiver_heap_no, donator_heap_no);
+ lock_mutex_exit_kernel();
+}
+
+/*************************************************************//**
+Updates the lock table when a record is removed. */
+UNIV_INTERN
+void
+lock_update_delete(
+/*===============*/
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec) /*!< in: the record to be removed */
+{
+ const page_t* page = block->frame;
+ ulint heap_no;
+ ulint next_heap_no;
+
+ ut_ad(page == page_align(rec));
+
+ if (page_is_comp(page)) {
+ heap_no = rec_get_heap_no_new(rec);
+ next_heap_no = rec_get_heap_no_new(page
+ + rec_get_next_offs(rec,
+ TRUE));
+ } else {
+ heap_no = rec_get_heap_no_old(rec);
+ next_heap_no = rec_get_heap_no_old(page
+ + rec_get_next_offs(rec,
+ FALSE));
+ }
+
+ lock_mutex_enter_kernel();
+
+ /* Let the next record inherit the locks from rec, in gap mode */
+
+ lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
+
+ /* Reset the lock bits on rec and release waiting transactions */
+
+ lock_rec_reset_and_release_wait(block, heap_no);
+
+ lock_mutex_exit_kernel();
+}
+
+/*********************************************************************//**
+Stores on the page infimum record the explicit locks of another record.
+This function is used to store the lock state of a record when it is
+updated and the size of the record changes in the update. The record
+is moved in such an update, perhaps to another page. The infimum record
+acts as a dummy carrier record, taking care of lock releases while the
+actual record is being moved. */
+UNIV_INTERN
+void
+lock_rec_store_on_page_infimum(
+/*===========================*/
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec) /*!< in: record whose lock state
+ is stored on the infimum
+ record of the same page; lock
+ bits are reset on the
+ record */
+{
+ ulint heap_no = page_rec_get_heap_no(rec);
+
+ ut_ad(block->frame == page_align(rec));
+
+ lock_mutex_enter_kernel();
+
+ lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
+
+ lock_mutex_exit_kernel();
+}
+
+/*********************************************************************//**
+Restores the state of explicit lock requests on a single record, where the
+state was stored on the infimum of the page. */
+UNIV_INTERN
+void
+lock_rec_restore_from_page_infimum(
+/*===============================*/
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec, /*!< in: record whose lock state
+ is restored */
+ const buf_block_t* donator)/*!< in: page (rec is not
+ necessarily on this page)
+ whose infimum stored the lock
+ state; lock bits are reset on
+ the infimum */
+{
+ ulint heap_no = page_rec_get_heap_no(rec);
+
+ lock_mutex_enter_kernel();
+
+ lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
+
+ lock_mutex_exit_kernel();
+}
+
+/*=========== DEADLOCK CHECKING ======================================*/
+
+/********************************************************************//**
+Checks if a lock request results in a deadlock.
+@return TRUE if a deadlock was detected and we chose trx as a victim;
+FALSE if no deadlock, or there was a deadlock, but we chose other
+transaction(s) as victim(s) */
+static
+ibool
+lock_deadlock_occurs(
+/*=================*/
+ lock_t* lock, /*!< in: lock the transaction is requesting */
+ trx_t* trx) /*!< in: transaction */
+{
+ dict_table_t* table;
+ dict_index_t* index;
+ trx_t* mark_trx;
+ ulint ret;
+ ulint cost = 0;
+
+ ut_ad(trx);
+ ut_ad(lock);
+ ut_ad(mutex_own(&kernel_mutex));
+retry:
+ /* We check that adding this trx to the waits-for graph
+ does not produce a cycle. First mark all active transactions
+ with 0: */
+
+ mark_trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ while (mark_trx) {
+ mark_trx->deadlock_mark = 0;
+ mark_trx = UT_LIST_GET_NEXT(trx_list, mark_trx);
+ }
+
+ ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0);
+
+ if (ret == LOCK_VICTIM_IS_OTHER) {
+ /* We chose some other trx as a victim: retry if there still
+ is a deadlock */
+
+ goto retry;
+ }
+
+ if (UNIV_UNLIKELY(ret == LOCK_VICTIM_IS_START)) {
+ if (lock_get_type_low(lock) & LOCK_TABLE) {
+ table = lock->un_member.tab_lock.table;
+ index = NULL;
+ } else {
+ index = lock->index;
+ table = index->table;
+ }
+
+ lock_deadlock_found = TRUE;
+
+ fputs("*** WE ROLL BACK TRANSACTION (2)\n",
+ lock_latest_err_file);
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/********************************************************************//**
+Looks recursively for a deadlock.
+@return 0 if no deadlock found, LOCK_VICTIM_IS_START if there was a
+deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
+deadlock was found and we chose some other trx as a victim: we must do
+the search again in this last case because there may be another
+deadlock! */
+static
+ulint
+lock_deadlock_recursive(
+/*====================*/
+ trx_t* start, /*!< in: recursion starting point */
+ trx_t* trx, /*!< in: a transaction waiting for a lock */
+ lock_t* wait_lock, /*!< in: the lock trx is waiting to be granted */
+ ulint* cost, /*!< in/out: number of calculation steps thus
+ far: if this exceeds LOCK_MAX_N_STEPS_...
+ we return LOCK_VICTIM_IS_START */
+ ulint depth) /*!< in: recursion depth: if this exceeds
+ LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
+ return LOCK_VICTIM_IS_START */
+{
+ lock_t* lock;
+ ulint bit_no = ULINT_UNDEFINED;
+ trx_t* lock_trx;
+ ulint ret;
+
+ ut_a(trx);
+ ut_a(start);
+ ut_a(wait_lock);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (trx->deadlock_mark == 1) {
+ /* We have already exhaustively searched the subtree starting
+ from this trx */
+
+ return(0);
+ }
+
+ *cost = *cost + 1;
+
+ lock = wait_lock;
+
+ if (lock_get_type_low(wait_lock) == LOCK_REC) {
+
+ bit_no = lock_rec_find_set_bit(wait_lock);
+
+ ut_a(bit_no != ULINT_UNDEFINED);
+ }
+
+ /* Look at the locks ahead of wait_lock in the lock queue */
+
+ for (;;) {
+ if (lock_get_type_low(lock) & LOCK_TABLE) {
+
+ lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
+ lock);
+ } else {
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+ ut_a(bit_no != ULINT_UNDEFINED);
+
+ lock = (lock_t*) lock_rec_get_prev(lock, bit_no);
+ }
+
+ if (lock == NULL) {
+ /* We can mark this subtree as searched */
+ trx->deadlock_mark = 1;
+
+ return(FALSE);
+ }
+
+ if (lock_has_to_wait(wait_lock, lock)) {
+
+ ibool too_far
+ = depth > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
+ || *cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK;
+
+ lock_trx = lock->trx;
+
+ if (lock_trx == start || too_far) {
+
+ /* We came back to the recursion starting
+ point: a deadlock detected; or we have
+ searched the waits-for graph too long */
+
+ FILE* ef = lock_latest_err_file;
+
+ rewind(ef);
+ ut_print_timestamp(ef);
+
+ fputs("\n*** (1) TRANSACTION:\n", ef);
+
+ trx_print(ef, wait_lock->trx, 3000);
+
+ fputs("*** (1) WAITING FOR THIS LOCK"
+ " TO BE GRANTED:\n", ef);
+
+ if (lock_get_type_low(wait_lock) == LOCK_REC) {
+ lock_rec_print(ef, wait_lock);
+ } else {
+ lock_table_print(ef, wait_lock);
+ }
+
+ fputs("*** (2) TRANSACTION:\n", ef);
+
+ trx_print(ef, lock->trx, 3000);
+
+ fputs("*** (2) HOLDS THE LOCK(S):\n", ef);
+
+ if (lock_get_type_low(lock) == LOCK_REC) {
+ lock_rec_print(ef, lock);
+ } else {
+ lock_table_print(ef, lock);
+ }
+
+ fputs("*** (2) WAITING FOR THIS LOCK"
+ " TO BE GRANTED:\n", ef);
+
+ if (lock_get_type_low(start->wait_lock)
+ == LOCK_REC) {
+ lock_rec_print(ef, start->wait_lock);
+ } else {
+ lock_table_print(ef, start->wait_lock);
+ }
+#ifdef UNIV_DEBUG
+ if (lock_print_waits) {
+ fputs("Deadlock detected"
+ " or too long search\n",
+ stderr);
+ }
+#endif /* UNIV_DEBUG */
+ if (too_far) {
+
+ fputs("TOO DEEP OR LONG SEARCH"
+ " IN THE LOCK TABLE"
+ " WAITS-FOR GRAPH\n", ef);
+
+ return(LOCK_VICTIM_IS_START);
+ }
+
+ if (trx_weight_cmp(wait_lock->trx,
+ start) >= 0) {
+ /* Our recursion starting point
+ transaction is 'smaller', let us
+ choose 'start' as the victim and roll
+ back it */
+
+ return(LOCK_VICTIM_IS_START);
+ }
+
+ lock_deadlock_found = TRUE;
+
+ /* Let us choose the transaction of wait_lock
+ as a victim to try to avoid deadlocking our
+ recursion starting point transaction */
+
+ fputs("*** WE ROLL BACK TRANSACTION (1)\n",
+ ef);
+
+ wait_lock->trx->was_chosen_as_deadlock_victim
+ = TRUE;
+
+ lock_cancel_waiting_and_release(wait_lock);
+
+ /* Since trx and wait_lock are no longer
+ in the waits-for graph, we can return FALSE;
+ note that our selective algorithm can choose
+ several transactions as victims, but still
+ we may end up rolling back also the recursion
+ starting point transaction! */
+
+ return(LOCK_VICTIM_IS_OTHER);
+ }
+
+ if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) {
+
+ /* Another trx ahead has requested lock in an
+ incompatible mode, and is itself waiting for
+ a lock */
+
+ ret = lock_deadlock_recursive(
+ start, lock_trx,
+ lock_trx->wait_lock, cost, depth + 1);
+ if (ret != 0) {
+
+ return(ret);
+ }
+ }
+ }
+ }/* end of the 'for (;;)'-loop */
+}
+
+/*========================= TABLE LOCKS ==============================*/
+
+/*********************************************************************//**
+Creates a table lock object and adds it as the last in the lock queue
+of the table. Does NOT check for deadlocks or lock compatibility.
+@return own: new lock object */
+UNIV_INLINE
+lock_t*
+lock_table_create(
+/*==============*/
+ dict_table_t* table, /*!< in: database table in dictionary cache */
+ ulint type_mode,/*!< in: lock mode possibly ORed with
+ LOCK_WAIT */
+ trx_t* trx) /*!< in: trx */
+{
+ lock_t* lock;
+
+ ut_ad(table && trx);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
+ ++table->n_waiting_or_granted_auto_inc_locks;
+ }
+
+ /* For AUTOINC locking we reuse the lock instance only if
+ there is no wait involved else we allocate the waiting lock
+ from the transaction lock heap. */
+ if (type_mode == LOCK_AUTO_INC) {
+
+ lock = table->autoinc_lock;
+
+ table->autoinc_trx = trx;
+
+ ib_vector_push(trx->autoinc_locks, lock);
+ } else {
+ lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t));
+ }
+
+ UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock);
+
+ lock->type_mode = type_mode | LOCK_TABLE;
+ lock->trx = trx;
+
+ lock->un_member.tab_lock.table = table;
+
+ UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
+
+ if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+
+ lock_set_lock_and_trx_wait(lock, trx);
+ }
+
+ return(lock);
+}
+
+/*************************************************************//**
+Removes a table lock request from the queue and the trx list of locks;
+this is a low-level function which does NOT check if waiting requests
+can now be granted. */
+UNIV_INLINE
+void
+lock_table_remove_low(
+/*==================*/
+ lock_t* lock) /*!< in: table lock */
+{
+ trx_t* trx;
+ dict_table_t* table;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ trx = lock->trx;
+ table = lock->un_member.tab_lock.table;
+
+ /* Remove the table from the transaction's AUTOINC vector, if
+ the lock that is being release is an AUTOINC lock. */
+ if (lock_get_mode(lock) == LOCK_AUTO_INC) {
+
+ /* The table's AUTOINC lock can get transferred to
+ another transaction before we get here. */
+ if (table->autoinc_trx == trx) {
+ table->autoinc_trx = NULL;
+ }
+
+ /* The locks must be freed in the reverse order from
+ the one in which they were acquired. This is to avoid
+ traversing the AUTOINC lock vector unnecessarily.
+
+ We only store locks that were granted in the
+ trx->autoinc_locks vector (see lock_table_create()
+ and lock_grant()). Therefore it can be empty and we
+ need to check for that. */
+
+ if (!ib_vector_is_empty(trx->autoinc_locks)) {
+ lock_t* autoinc_lock;
+
+ autoinc_lock = ib_vector_pop(trx->autoinc_locks);
+ ut_a(autoinc_lock == lock);
+ }
+
+ ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
+ --table->n_waiting_or_granted_auto_inc_locks;
+ }
+
+ UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock);
+ UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
+}
+
+/*********************************************************************//**
+Enqueues a waiting request for a table lock which cannot be granted
+immediately. Checks for deadlocks.
+@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
+transaction was chosen as a victim, and we got the lock immediately:
+no need to wait then */
+static
+ulint
+lock_table_enqueue_waiting(
+/*=======================*/
+ ulint mode, /*!< in: lock mode this transaction is
+ requesting */
+ dict_table_t* table, /*!< in: table */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ lock_t* lock;
+ trx_t* trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ /* Test if there already is some other reason to suspend thread:
+ we do not enqueue a lock request if the query thread should be
+ stopped anyway */
+
+ if (que_thr_stop(thr)) {
+ ut_error;
+
+ return(DB_QUE_THR_SUSPENDED);
+ }
+
+ trx = thr_get_trx(thr);
+
+ switch (trx_get_dict_operation(trx)) {
+ case TRX_DICT_OP_NONE:
+ break;
+ case TRX_DICT_OP_TABLE:
+ case TRX_DICT_OP_INDEX:
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Error: a table lock wait happens"
+ " in a dictionary operation!\n"
+ "InnoDB: Table name ", stderr);
+ ut_print_name(stderr, trx, TRUE, table->name);
+ fputs(".\n"
+ "InnoDB: Submit a detailed bug report"
+ " to http://bugs.mysql.com\n",
+ stderr);
+ }
+
+ /* Enqueue the lock request that will wait to be granted */
+
+ lock = lock_table_create(table, mode | LOCK_WAIT, trx);
+
+ /* Check if a deadlock occurs: if yes, remove the lock request and
+ return an error code */
+
+ if (lock_deadlock_occurs(lock, trx)) {
+
+ lock_reset_lock_and_trx_wait(lock);
+ lock_table_remove_low(lock);
+
+ return(DB_DEADLOCK);
+ }
+
+ if (trx->wait_lock == NULL) {
+ /* Deadlock resolution chose another transaction as a victim,
+ and we accidentally got our lock granted! */
+
+ return(DB_SUCCESS);
+ }
+
+ trx->que_state = TRX_QUE_LOCK_WAIT;
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ trx->wait_started = time(NULL);
+
+ ut_a(que_thr_stop(thr));
+
+ return(DB_LOCK_WAIT);
+}
+
+/*********************************************************************//**
+Checks if other transactions have an incompatible mode lock request in
+the lock queue. */
+UNIV_INLINE
+ibool
+lock_table_other_has_incompatible(
+/*==============================*/
+ trx_t* trx, /*!< in: transaction, or NULL if all
+ transactions should be included */
+ ulint wait, /*!< in: LOCK_WAIT if also waiting locks are
+ taken into account, or 0 if not */
+ dict_table_t* table, /*!< in: table */
+ enum lock_mode mode) /*!< in: lock mode */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = UT_LIST_GET_LAST(table->locks);
+
+ while (lock != NULL) {
+
+ if ((lock->trx != trx)
+ && (!lock_mode_compatible(lock_get_mode(lock), mode))
+ && (wait || !(lock_get_wait(lock)))) {
+
+ return(TRUE);
+ }
+
+ lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************//**
+Locks the specified database table in the mode given. If the lock cannot
+be granted immediately, the query thread is put to wait.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_table(
+/*=======*/
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ dict_table_t* table, /*!< in: database table in dictionary cache */
+ enum lock_mode mode, /*!< in: lock mode */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ trx_t* trx;
+ ulint err;
+
+ ut_ad(table && thr);
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ ut_a(flags == 0);
+
+ trx = thr_get_trx(thr);
+
+ lock_mutex_enter_kernel();
+
+ /* Look for stronger locks the same trx already has on the table */
+
+ if (lock_table_has(trx, table, mode)) {
+
+ lock_mutex_exit_kernel();
+
+ return(DB_SUCCESS);
+ }
+
+ /* We have to check if the new lock is compatible with any locks
+ other transactions have in the table lock queue. */
+
+ if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) {
+
+ /* Another trx has a request on the table in an incompatible
+ mode: this trx may have to wait */
+
+ err = lock_table_enqueue_waiting(mode | flags, table, thr);
+
+ lock_mutex_exit_kernel();
+
+ return(err);
+ }
+
+ lock_table_create(table, mode | flags, trx);
+
+ ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
+
+ lock_mutex_exit_kernel();
+
+ return(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Checks if a waiting table lock request still has to wait in a queue.
+@return TRUE if still has to wait */
+static
+ibool
+lock_table_has_to_wait_in_queue(
+/*============================*/
+ lock_t* wait_lock) /*!< in: waiting table lock */
+{
+ dict_table_t* table;
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_get_wait(wait_lock));
+
+ table = wait_lock->un_member.tab_lock.table;
+
+ lock = UT_LIST_GET_FIRST(table->locks);
+
+ while (lock != wait_lock) {
+
+ if (lock_has_to_wait(wait_lock, lock)) {
+
+ return(TRUE);
+ }
+
+ lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************//**
+Removes a table lock request, waiting or granted, from the queue and grants
+locks to other transactions in the queue, if they now are entitled to a
+lock. */
+static
+void
+lock_table_dequeue(
+/*===============*/
+ lock_t* in_lock)/*!< in: table lock object; transactions waiting
+ behind will get their lock requests granted, if
+ they are now qualified to it */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
+
+ lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
+
+ lock_table_remove_low(in_lock);
+
+ /* Check if waiting locks in the queue can now be granted: grant
+ locks if there are no conflicting locks ahead. */
+
+ while (lock != NULL) {
+
+ if (lock_get_wait(lock)
+ && !lock_table_has_to_wait_in_queue(lock)) {
+
+ /* Grant the lock */
+ lock_grant(lock);
+ }
+
+ lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
+ }
+}
+
+/*=========================== LOCK RELEASE ==============================*/
+
+/*************************************************************//**
+Removes a granted record lock of a transaction from the queue and grants
+locks to other transactions waiting in the queue if they now are entitled
+to a lock. */
+UNIV_INTERN
+void
+lock_rec_unlock(
+/*============*/
+ trx_t* trx, /*!< in: transaction that has
+ set a record lock */
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec, /*!< in: record */
+ enum lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */
+{
+ lock_t* lock;
+ lock_t* release_lock = NULL;
+ ulint heap_no;
+
+ ut_ad(trx && rec);
+ ut_ad(block->frame == page_align(rec));
+
+ heap_no = page_rec_get_heap_no(rec);
+
+ mutex_enter(&kernel_mutex);
+
+ lock = lock_rec_get_first(block, heap_no);
+
+ /* Find the last lock with the same lock_mode and transaction
+ from the record. */
+
+ while (lock != NULL) {
+ if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
+ release_lock = lock;
+ ut_a(!lock_get_wait(lock));
+ }
+
+ lock = lock_rec_get_next(heap_no, lock);
+ }
+
+ /* If a record lock is found, release the record lock */
+
+ if (UNIV_LIKELY(release_lock != NULL)) {
+ lock_rec_reset_nth_bit(release_lock, heap_no);
+ } else {
+ mutex_exit(&kernel_mutex);
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: unlock row could not"
+ " find a %lu mode lock on the record\n",
+ (ulong) lock_mode);
+
+ return;
+ }
+
+ /* Check if we can now grant waiting lock requests */
+
+ lock = lock_rec_get_first(block, heap_no);
+
+ while (lock != NULL) {
+ if (lock_get_wait(lock)
+ && !lock_rec_has_to_wait_in_queue(lock)) {
+
+ /* Grant the lock */
+ lock_grant(lock);
+ }
+
+ lock = lock_rec_get_next(heap_no, lock);
+ }
+
+ mutex_exit(&kernel_mutex);
+}
+
+/*********************************************************************//**
+Releases transaction locks, and releases possible other transactions waiting
+because of these locks. */
+UNIV_INTERN
+void
+lock_release_off_kernel(
+/*====================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ dict_table_t* table;
+ ulint count;
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = UT_LIST_GET_LAST(trx->trx_locks);
+
+ count = 0;
+
+ while (lock != NULL) {
+
+ count++;
+
+ if (lock_get_type_low(lock) == LOCK_REC) {
+
+ lock_rec_dequeue_from_page(lock);
+ } else {
+ ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+
+ if (lock_get_mode(lock) != LOCK_IS
+ && !ut_dulint_is_zero(trx->undo_no)) {
+
+ /* The trx may have modified the table. We
+ block the use of the MySQL query cache for
+ all currently active transactions. */
+
+ table = lock->un_member.tab_lock.table;
+
+ table->query_cache_inv_trx_id
+ = trx_sys->max_trx_id;
+ }
+
+ lock_table_dequeue(lock);
+ }
+
+ if (count == LOCK_RELEASE_KERNEL_INTERVAL) {
+ /* Release the kernel mutex for a while, so that we
+ do not monopolize it */
+
+ lock_mutex_exit_kernel();
+
+ lock_mutex_enter_kernel();
+
+ count = 0;
+ }
+
+ lock = UT_LIST_GET_LAST(trx->trx_locks);
+ }
+
+ ut_a(ib_vector_size(trx->autoinc_locks) == 0);
+
+ mem_heap_empty(trx->lock_heap);
+}
+
+/*********************************************************************//**
+Cancels a waiting lock request and releases possible other transactions
+waiting behind it. */
+UNIV_INTERN
+void
+lock_cancel_waiting_and_release(
+/*============================*/
+ lock_t* lock) /*!< in: waiting lock request */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (lock_get_type_low(lock) == LOCK_REC) {
+
+ lock_rec_dequeue_from_page(lock);
+ } else {
+ ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+
+ if (lock->trx->autoinc_locks != NULL) {
+ /* Release the transaction's AUTOINC locks/ */
+ lock_release_autoinc_locks(lock->trx);
+ }
+
+ lock_table_dequeue(lock);
+ }
+
+ /* Reset the wait flag and the back pointer to lock in trx */
+
+ lock_reset_lock_and_trx_wait(lock);
+
+ /* The following function releases the trx from lock wait */
+
+ trx_end_lock_wait(lock->trx);
+}
+
+/* True if a lock mode is S or X */
+#define IS_LOCK_S_OR_X(lock) \
+ (lock_get_mode(lock) == LOCK_S \
+ || lock_get_mode(lock) == LOCK_X)
+
+
+/*********************************************************************//**
+Removes locks of a transaction on a table to be dropped.
+If remove_also_table_sx_locks is TRUE then table-level S and X locks are
+also removed in addition to other table-level and record-level locks.
+No lock, that is going to be removed, is allowed to be a wait lock. */
+static
+void
+lock_remove_all_on_table_for_trx(
+/*=============================*/
+ dict_table_t* table, /*!< in: table to be dropped */
+ trx_t* trx, /*!< in: a transaction */
+ ibool remove_also_table_sx_locks)/*!< in: also removes
+ table S and X locks */
+{
+ lock_t* lock;
+ lock_t* prev_lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = UT_LIST_GET_LAST(trx->trx_locks);
+
+ while (lock != NULL) {
+ prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
+
+ if (lock_get_type_low(lock) == LOCK_REC
+ && lock->index->table == table) {
+ ut_a(!lock_get_wait(lock));
+
+ lock_rec_discard(lock);
+ } else if (lock_get_type_low(lock) & LOCK_TABLE
+ && lock->un_member.tab_lock.table == table
+ && (remove_also_table_sx_locks
+ || !IS_LOCK_S_OR_X(lock))) {
+
+ ut_a(!lock_get_wait(lock));
+
+ lock_table_remove_low(lock);
+ }
+
+ lock = prev_lock;
+ }
+}
+
+/*********************************************************************//**
+Removes locks on a table to be dropped or truncated.
+If remove_also_table_sx_locks is TRUE then table-level S and X locks are
+also removed in addition to other table-level and record-level locks.
+No lock, that is going to be removed, is allowed to be a wait lock. */
+UNIV_INTERN
+void
+lock_remove_all_on_table(
+/*=====================*/
+ dict_table_t* table, /*!< in: table to be dropped
+ or truncated */
+ ibool remove_also_table_sx_locks)/*!< in: also removes
+ table S and X locks */
+{
+ lock_t* lock;
+ lock_t* prev_lock;
+
+ mutex_enter(&kernel_mutex);
+
+ lock = UT_LIST_GET_FIRST(table->locks);
+
+ while (lock != NULL) {
+
+ prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
+ lock);
+
+ /* If we should remove all locks (remove_also_table_sx_locks
+ is TRUE), or if the lock is not table-level S or X lock,
+ then check we are not going to remove a wait lock. */
+ if (remove_also_table_sx_locks
+ || !(lock_get_type(lock) == LOCK_TABLE
+ && IS_LOCK_S_OR_X(lock))) {
+
+ ut_a(!lock_get_wait(lock));
+ }
+
+ lock_remove_all_on_table_for_trx(table, lock->trx,
+ remove_also_table_sx_locks);
+
+ if (prev_lock == NULL) {
+ if (lock == UT_LIST_GET_FIRST(table->locks)) {
+ /* lock was not removed, pick its successor */
+ lock = UT_LIST_GET_NEXT(
+ un_member.tab_lock.locks, lock);
+ } else {
+ /* lock was removed, pick the first one */
+ lock = UT_LIST_GET_FIRST(table->locks);
+ }
+ } else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks,
+ prev_lock) != lock) {
+ /* If lock was removed by
+ lock_remove_all_on_table_for_trx() then pick the
+ successor of prev_lock ... */
+ lock = UT_LIST_GET_NEXT(
+ un_member.tab_lock.locks, prev_lock);
+ } else {
+ /* ... otherwise pick the successor of lock. */
+ lock = UT_LIST_GET_NEXT(
+ un_member.tab_lock.locks, lock);
+ }
+ }
+
+ mutex_exit(&kernel_mutex);
+}
+
+/*===================== VALIDATION AND DEBUGGING ====================*/
+
+/*********************************************************************//**
+Prints info of a table lock. */
+UNIV_INTERN
+void
+lock_table_print(
+/*=============*/
+ FILE* file, /*!< in: file where to print */
+ const lock_t* lock) /*!< in: table type lock */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_a(lock_get_type_low(lock) == LOCK_TABLE);
+
+ fputs("TABLE LOCK table ", file);
+ ut_print_name(file, lock->trx, TRUE,
+ lock->un_member.tab_lock.table->name);
+ fprintf(file, " trx id " TRX_ID_FMT,
+ TRX_ID_PREP_PRINTF(lock->trx->id));
+
+ if (lock_get_mode(lock) == LOCK_S) {
+ fputs(" lock mode S", file);
+ } else if (lock_get_mode(lock) == LOCK_X) {
+ fputs(" lock mode X", file);
+ } else if (lock_get_mode(lock) == LOCK_IS) {
+ fputs(" lock mode IS", file);
+ } else if (lock_get_mode(lock) == LOCK_IX) {
+ fputs(" lock mode IX", file);
+ } else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
+ fputs(" lock mode AUTO-INC", file);
+ } else {
+ fprintf(file, " unknown lock mode %lu",
+ (ulong) lock_get_mode(lock));
+ }
+
+ if (lock_get_wait(lock)) {
+ fputs(" waiting", file);
+ }
+
+ putc('\n', file);
+}
+
+/*********************************************************************//**
+Prints info of a record lock. */
+UNIV_INTERN
+void
+lock_rec_print(
+/*===========*/
+ FILE* file, /*!< in: file where to print */
+ const lock_t* lock) /*!< in: record type lock */
+{
+ const buf_block_t* block;
+ ulint space;
+ ulint page_no;
+ ulint i;
+ mtr_t mtr;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+ space = lock->un_member.rec_lock.space;
+ page_no = lock->un_member.rec_lock.page_no;
+
+ fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu ",
+ (ulong) space, (ulong) page_no,
+ (ulong) lock_rec_get_n_bits(lock));
+ dict_index_name_print(file, lock->trx, lock->index);
+ fprintf(file, " trx id " TRX_ID_FMT,
+ TRX_ID_PREP_PRINTF(lock->trx->id));
+
+ if (lock_get_mode(lock) == LOCK_S) {
+ fputs(" lock mode S", file);
+ } else if (lock_get_mode(lock) == LOCK_X) {
+ fputs(" lock_mode X", file);
+ } else {
+ ut_error;
+ }
+
+ if (lock_rec_get_gap(lock)) {
+ fputs(" locks gap before rec", file);
+ }
+
+ if (lock_rec_get_rec_not_gap(lock)) {
+ fputs(" locks rec but not gap", file);
+ }
+
+ if (lock_rec_get_insert_intention(lock)) {
+ fputs(" insert intention", file);
+ }
+
+ if (lock_get_wait(lock)) {
+ fputs(" waiting", file);
+ }
+
+ mtr_start(&mtr);
+
+ putc('\n', file);
+
+ block = buf_page_try_get(space, page_no, &mtr);
+
+ if (block) {
+ for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+
+ if (lock_rec_get_nth_bit(lock, i)) {
+
+ const rec_t* rec
+ = page_find_rec_with_heap_no(
+ buf_block_get_frame(block), i);
+ offsets = rec_get_offsets(
+ rec, lock->index, offsets,
+ ULINT_UNDEFINED, &heap);
+
+ fprintf(file, "Record lock, heap no %lu ",
+ (ulong) i);
+ rec_print_new(file, rec, offsets);
+ putc('\n', file);
+ }
+ }
+ } else {
+ for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+ fprintf(file, "Record lock, heap no %lu\n", (ulong) i);
+ }
+ }
+
+ mtr_commit(&mtr);
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+}
+
+#ifdef UNIV_DEBUG
+/* Print the number of lock structs from lock_print_info_summary() only
+in non-production builds for performance reasons, see
+http://bugs.mysql.com/36942 */
+#define PRINT_NUM_OF_LOCK_STRUCTS
+#endif /* UNIV_DEBUG */
+
+#ifdef PRINT_NUM_OF_LOCK_STRUCTS
+/*********************************************************************//**
+Calculates the number of record lock structs in the record lock hash table.
+@return number of record locks */
+static
+ulint
+lock_get_n_rec_locks(void)
+/*======================*/
+{
+ lock_t* lock;
+ ulint n_locks = 0;
+ ulint i;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
+
+ lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
+
+ while (lock) {
+ n_locks++;
+
+ lock = HASH_GET_NEXT(hash, lock);
+ }
+ }
+
+ return(n_locks);
+}
+#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
+
+/*********************************************************************//**
+Prints info of locks for all transactions. */
+UNIV_INTERN
+void
+lock_print_info_summary(
+/*====================*/
+ FILE* file) /*!< in: file where to print */
+{
+ /* We must protect the MySQL thd->query field with a MySQL mutex, and
+ because the MySQL mutex must be reserved before the kernel_mutex of
+ InnoDB, we call innobase_mysql_prepare_print_arbitrary_thd() here. */
+
+ innobase_mysql_prepare_print_arbitrary_thd();
+ lock_mutex_enter_kernel();
+
+ if (lock_deadlock_found) {
+ fputs("------------------------\n"
+ "LATEST DETECTED DEADLOCK\n"
+ "------------------------\n", file);
+
+ ut_copy_file(file, lock_latest_err_file);
+ }
+
+ fputs("------------\n"
+ "TRANSACTIONS\n"
+ "------------\n", file);
+
+ fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
+ TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
+
+ fprintf(file,
+ "Purge done for trx's n:o < " TRX_ID_FMT
+ " undo n:o < " TRX_ID_FMT "\n",
+ TRX_ID_PREP_PRINTF(purge_sys->purge_trx_no),
+ TRX_ID_PREP_PRINTF(purge_sys->purge_undo_no));
+
+ fprintf(file,
+ "History list length %lu\n",
+ (ulong) trx_sys->rseg_history_len);
+
+#ifdef PRINT_NUM_OF_LOCK_STRUCTS
+ fprintf(file,
+ "Total number of lock structs in row lock hash table %lu\n",
+ (ulong) lock_get_n_rec_locks());
+#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
+}
+
+/*********************************************************************//**
+Prints info of locks for each transaction. */
+UNIV_INTERN
+void
+lock_print_info_all_transactions(
+/*=============================*/
+ FILE* file) /*!< in: file where to print */
+{
+ lock_t* lock;
+ ibool load_page_first = TRUE;
+ ulint nth_trx = 0;
+ ulint nth_lock = 0;
+ ulint i;
+ mtr_t mtr;
+ trx_t* trx;
+
+ fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
+
+ /* First print info on non-active transactions */
+
+ trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
+
+ while (trx) {
+ if (trx->conc_state == TRX_NOT_STARTED) {
+ fputs("---", file);
+ trx_print(file, trx, 600);
+ }
+
+ trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
+ }
+
+loop:
+ trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ i = 0;
+
+ /* Since we temporarily release the kernel mutex when
+ reading a database page in below, variable trx may be
+ obsolete now and we must loop through the trx list to
+ get probably the same trx, or some other trx. */
+
+ while (trx && (i < nth_trx)) {
+ trx = UT_LIST_GET_NEXT(trx_list, trx);
+ i++;
+ }
+
+ if (trx == NULL) {
+ lock_mutex_exit_kernel();
+ innobase_mysql_end_print_arbitrary_thd();
+
+ ut_ad(lock_validate());
+
+ return;
+ }
+
+ if (nth_lock == 0) {
+ fputs("---", file);
+ trx_print(file, trx, 600);
+
+ if (trx->read_view) {
+ fprintf(file,
+ "Trx read view will not see trx with"
+ " id >= " TRX_ID_FMT
+ ", sees < " TRX_ID_FMT "\n",
+ TRX_ID_PREP_PRINTF(
+ trx->read_view->low_limit_id),
+ TRX_ID_PREP_PRINTF(
+ trx->read_view->up_limit_id));
+ }
+
+ if (trx->que_state == TRX_QUE_LOCK_WAIT) {
+ fprintf(file,
+ "------- TRX HAS BEEN WAITING %lu SEC"
+ " FOR THIS LOCK TO BE GRANTED:\n",
+ (ulong) difftime(time(NULL),
+ trx->wait_started));
+
+ if (lock_get_type_low(trx->wait_lock) == LOCK_REC) {
+ lock_rec_print(file, trx->wait_lock);
+ } else {
+ lock_table_print(file, trx->wait_lock);
+ }
+
+ fputs("------------------\n", file);
+ }
+ }
+
+ if (!srv_print_innodb_lock_monitor) {
+ nth_trx++;
+ goto loop;
+ }
+
+ i = 0;
+
+ /* Look at the note about the trx loop above why we loop here:
+ lock may be an obsolete pointer now. */
+
+ lock = UT_LIST_GET_FIRST(trx->trx_locks);
+
+ while (lock && (i < nth_lock)) {
+ lock = UT_LIST_GET_NEXT(trx_locks, lock);
+ i++;
+ }
+
+ if (lock == NULL) {
+ nth_trx++;
+ nth_lock = 0;
+
+ goto loop;
+ }
+
+ if (lock_get_type_low(lock) == LOCK_REC) {
+ if (load_page_first) {
+ ulint space = lock->un_member.rec_lock.space;
+ ulint zip_size= fil_space_get_zip_size(space);
+ ulint page_no = lock->un_member.rec_lock.page_no;
+
+ if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+
+ /* It is a single table tablespace and
+ the .ibd file is missing (TRUNCATE
+ TABLE probably stole the locks): just
+ print the lock without attempting to
+ load the page in the buffer pool. */
+
+ fprintf(file, "RECORD LOCKS on"
+ " non-existing space %lu\n",
+ (ulong) space);
+ goto print_rec;
+ }
+
+ lock_mutex_exit_kernel();
+ innobase_mysql_end_print_arbitrary_thd();
+
+ mtr_start(&mtr);
+
+ buf_page_get_with_no_latch(space, zip_size,
+ page_no, &mtr);
+
+ mtr_commit(&mtr);
+
+ load_page_first = FALSE;
+
+ innobase_mysql_prepare_print_arbitrary_thd();
+ lock_mutex_enter_kernel();
+
+ goto loop;
+ }
+
+print_rec:
+ lock_rec_print(file, lock);
+ } else {
+ ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+
+ lock_table_print(file, lock);
+ }
+
+ load_page_first = TRUE;
+
+ nth_lock++;
+
+ if (nth_lock >= 10) {
+ fputs("10 LOCKS PRINTED FOR THIS TRX:"
+ " SUPPRESSING FURTHER PRINTS\n",
+ file);
+
+ nth_trx++;
+ nth_lock = 0;
+
+ goto loop;
+ }
+
+ goto loop;
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Validates the lock queue on a table.
+@return TRUE if ok */
+static
+ibool
+lock_table_queue_validate(
+/*======================*/
+ dict_table_t* table) /*!< in: table */
+{
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ lock = UT_LIST_GET_FIRST(table->locks);
+
+ while (lock) {
+ ut_a(((lock->trx)->conc_state == TRX_ACTIVE)
+ || ((lock->trx)->conc_state == TRX_PREPARED)
+ || ((lock->trx)->conc_state == TRX_COMMITTED_IN_MEMORY));
+
+ if (!lock_get_wait(lock)) {
+
+ ut_a(!lock_table_other_has_incompatible(
+ lock->trx, 0, table,
+ lock_get_mode(lock)));
+ } else {
+
+ ut_a(lock_table_has_to_wait_in_queue(lock));
+ }
+
+ lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
+ }
+
+ return(TRUE);
+}
+
+/*********************************************************************//**
+Validates the lock queue on a single record.
+@return TRUE if ok */
+static
+ibool
+lock_rec_queue_validate(
+/*====================*/
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec, /*!< in: record to look at */
+ dict_index_t* index, /*!< in: index, or NULL if not known */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+ trx_t* impl_trx;
+ lock_t* lock;
+ ulint heap_no;
+
+ ut_a(rec);
+ ut_a(block->frame == page_align(rec));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
+
+ heap_no = page_rec_get_heap_no(rec);
+
+ lock_mutex_enter_kernel();
+
+ if (!page_rec_is_user_rec(rec)) {
+
+ lock = lock_rec_get_first(block, heap_no);
+
+ while (lock) {
+ switch(lock->trx->conc_state) {
+ case TRX_ACTIVE:
+ case TRX_PREPARED:
+ case TRX_COMMITTED_IN_MEMORY:
+ break;
+ default:
+ ut_error;
+ }
+
+ ut_a(trx_in_trx_list(lock->trx));
+
+ if (lock_get_wait(lock)) {
+ ut_a(lock_rec_has_to_wait_in_queue(lock));
+ }
+
+ if (index) {
+ ut_a(lock->index == index);
+ }
+
+ lock = lock_rec_get_next(heap_no, lock);
+ }
+
+ lock_mutex_exit_kernel();
+
+ return(TRUE);
+ }
+
+ if (!index);
+ else if (dict_index_is_clust(index)) {
+
+ impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
+
+ if (impl_trx
+ && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
+ block, heap_no, impl_trx)) {
+
+ ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
+ block, heap_no, impl_trx));
+ }
+ } else {
+
+ /* The kernel mutex may get released temporarily in the
+ next function call: we have to release lock table mutex
+ to obey the latching order */
+
+ impl_trx = lock_sec_rec_some_has_impl_off_kernel(
+ rec, index, offsets);
+
+ if (impl_trx
+ && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
+ block, heap_no, impl_trx)) {
+
+ ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
+ block, heap_no, impl_trx));
+ }
+ }
+
+ lock = lock_rec_get_first(block, heap_no);
+
+ while (lock) {
+ ut_a(lock->trx->conc_state == TRX_ACTIVE
+ || lock->trx->conc_state == TRX_PREPARED
+ || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
+ ut_a(trx_in_trx_list(lock->trx));
+
+ if (index) {
+ ut_a(lock->index == index);
+ }
+
+ if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
+
+ enum lock_mode mode;
+
+ if (lock_get_mode(lock) == LOCK_S) {
+ mode = LOCK_X;
+ } else {
+ mode = LOCK_S;
+ }
+ ut_a(!lock_rec_other_has_expl_req(
+ mode, 0, 0, block, heap_no, lock->trx));
+
+ } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
+
+ ut_a(lock_rec_has_to_wait_in_queue(lock));
+ }
+
+ lock = lock_rec_get_next(heap_no, lock);
+ }
+
+ lock_mutex_exit_kernel();
+
+ return(TRUE);
+}
+
+/*********************************************************************//**
+Validates the record lock queues on a page.
+@return TRUE if ok */
+static
+ibool
+lock_rec_validate_page(
+/*===================*/
+ ulint space, /*!< in: space id */
+ ulint page_no)/*!< in: page number */
+{
+ dict_index_t* index;
+ buf_block_t* block;
+ const page_t* page;
+ lock_t* lock;
+ const rec_t* rec;
+ ulint nth_lock = 0;
+ ulint nth_bit = 0;
+ ulint i;
+ ulint zip_size;
+ mtr_t mtr;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_ad(!mutex_own(&kernel_mutex));
+
+ mtr_start(&mtr);
+
+ zip_size = fil_space_get_zip_size(space);
+ ut_ad(zip_size != ULINT_UNDEFINED);
+ block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
+ page = block->frame;
+
+ lock_mutex_enter_kernel();
+loop:
+ lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+ if (!lock) {
+ goto function_exit;
+ }
+
+ for (i = 0; i < nth_lock; i++) {
+
+ lock = lock_rec_get_next_on_page(lock);
+
+ if (!lock) {
+ goto function_exit;
+ }
+ }
+
+ ut_a(trx_in_trx_list(lock->trx));
+ ut_a(lock->trx->conc_state == TRX_ACTIVE
+ || lock->trx->conc_state == TRX_PREPARED
+ || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
+
+ for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
+
+ if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
+
+ index = lock->index;
+ rec = page_find_rec_with_heap_no(page, i);
+ ut_a(rec);
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+
+ fprintf(stderr,
+ "Validating %lu %lu\n",
+ (ulong) space, (ulong) page_no);
+
+ lock_mutex_exit_kernel();
+
+ lock_rec_queue_validate(block, rec, index, offsets);
+
+ lock_mutex_enter_kernel();
+
+ nth_bit = i + 1;
+
+ goto loop;
+ }
+ }
+
+ nth_bit = 0;
+ nth_lock++;
+
+ goto loop;
+
+function_exit:
+ lock_mutex_exit_kernel();
+
+ mtr_commit(&mtr);
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ return(TRUE);
+}
+
+/*********************************************************************//**
+Validates the lock system.
+@return TRUE if ok */
+static
+ibool
+lock_validate(void)
+/*===============*/
+{
+ lock_t* lock;
+ trx_t* trx;
+ dulint limit;
+ ulint space;
+ ulint page_no;
+ ulint i;
+
+ lock_mutex_enter_kernel();
+
+ trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ while (trx) {
+ lock = UT_LIST_GET_FIRST(trx->trx_locks);
+
+ while (lock) {
+ if (lock_get_type_low(lock) & LOCK_TABLE) {
+
+ lock_table_queue_validate(
+ lock->un_member.tab_lock.table);
+ }
+
+ lock = UT_LIST_GET_NEXT(trx_locks, lock);
+ }
+
+ trx = UT_LIST_GET_NEXT(trx_list, trx);
+ }
+
+ for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
+
+ limit = ut_dulint_zero;
+
+ for (;;) {
+ lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
+
+ while (lock) {
+ ut_a(trx_in_trx_list(lock->trx));
+
+ space = lock->un_member.rec_lock.space;
+ page_no = lock->un_member.rec_lock.page_no;
+
+ if (ut_dulint_cmp(
+ ut_dulint_create(space, page_no),
+ limit) >= 0) {
+ break;
+ }
+
+ lock = HASH_GET_NEXT(hash, lock);
+ }
+
+ if (!lock) {
+
+ break;
+ }
+
+ lock_mutex_exit_kernel();
+
+ lock_rec_validate_page(space, page_no);
+
+ lock_mutex_enter_kernel();
+
+ limit = ut_dulint_create(space, page_no + 1);
+ }
+ }
+
+ lock_mutex_exit_kernel();
+
+ return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate insert of
+a record. If they do, first tests if the query thread should anyway
+be suspended for some reason; if not, then puts the transaction and
+the query thread to the lock wait state and inserts a waiting request
+for a gap x-lock to the lock queue.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_rec_insert_check_and_lock(
+/*===========================*/
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
+ set, does nothing */
+ const rec_t* rec, /*!< in: record after which to insert */
+ buf_block_t* block, /*!< in/out: buffer block of rec */
+ dict_index_t* index, /*!< in: index */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ ibool* inherit)/*!< out: set to TRUE if the new
+ inserted record maybe should inherit
+ LOCK_GAP type locks from the successor
+ record */
+{
+ const rec_t* next_rec;
+ trx_t* trx;
+ lock_t* lock;
+ ulint err;
+ ulint next_rec_heap_no;
+
+ ut_ad(block->frame == page_align(rec));
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ trx = thr_get_trx(thr);
+ next_rec = page_rec_get_next((rec_t*) rec);
+ next_rec_heap_no = page_rec_get_heap_no(next_rec);
+
+ lock_mutex_enter_kernel();
+
+ /* When inserting a record into an index, the table must be at
+ least IX-locked or we must be building an index, in which case
+ the table must be at least S-locked. */
+ ut_ad(lock_table_has(trx, index->table, LOCK_IX)
+ || (*index->name == TEMP_INDEX_PREFIX
+ && lock_table_has(trx, index->table, LOCK_S)));
+
+ lock = lock_rec_get_first(block, next_rec_heap_no);
+
+ if (UNIV_LIKELY(lock == NULL)) {
+ /* We optimize CPU time usage in the simplest case */
+
+ lock_mutex_exit_kernel();
+
+ if (!dict_index_is_clust(index)) {
+ /* Update the page max trx id field */
+ page_update_max_trx_id(block,
+ buf_block_get_page_zip(block),
+ trx->id, mtr);
+ }
+
+ *inherit = FALSE;
+
+ return(DB_SUCCESS);
+ }
+
+ *inherit = TRUE;
+
+ /* If another transaction has an explicit lock request which locks
+ the gap, waiting or granted, on the successor, the insert has to wait.
+
+ An exception is the case where the lock by the another transaction
+ is a gap type lock which it placed to wait for its turn to insert. We
+ do not consider that kind of a lock conflicting with our insert. This
+ eliminates an unnecessary deadlock which resulted when 2 transactions
+ had to wait for their insert. Both had waiting gap type lock requests
+ on the successor, which produced an unnecessary deadlock. */
+
+ if (lock_rec_other_has_conflicting(
+ LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
+ block, next_rec_heap_no, trx)) {
+
+ /* Note that we may get DB_SUCCESS also here! */
+ err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
+ | LOCK_INSERT_INTENTION,
+ block, next_rec_heap_no,
+ index, thr);
+ } else {
+ err = DB_SUCCESS;
+ }
+
+ lock_mutex_exit_kernel();
+
+ if ((err == DB_SUCCESS) && !dict_index_is_clust(index)) {
+ /* Update the page max trx id field */
+ page_update_max_trx_id(block,
+ buf_block_get_page_zip(block),
+ trx->id, mtr);
+ }
+
+#ifdef UNIV_DEBUG
+ {
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ const ulint* offsets;
+ rec_offs_init(offsets_);
+
+ offsets = rec_get_offsets(next_rec, index, offsets_,
+ ULINT_UNDEFINED, &heap);
+ ut_ad(lock_rec_queue_validate(block,
+ next_rec, index, offsets));
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ }
+#endif /* UNIV_DEBUG */
+
+ return(err);
+}
+
+/*********************************************************************//**
+If a transaction has an implicit x-lock on a record, but no explicit x-lock
+set on the record, sets one for it. NOTE that in the case of a secondary
+index, the kernel mutex may get temporarily released. */
+static
+void
+lock_rec_convert_impl_to_expl(
+/*==========================*/
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record on page */
+ dict_index_t* index, /*!< in: index of record */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+ trx_t* impl_trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(page_rec_is_user_rec(rec));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
+
+ if (dict_index_is_clust(index)) {
+ impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
+ } else {
+ impl_trx = lock_sec_rec_some_has_impl_off_kernel(
+ rec, index, offsets);
+ }
+
+ if (impl_trx) {
+ ulint heap_no = page_rec_get_heap_no(rec);
+
+ /* If the transaction has no explicit x-lock set on the
+ record, set one for it */
+
+ if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
+ heap_no, impl_trx)) {
+
+ lock_rec_add_to_queue(
+ LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
+ block, heap_no, index, impl_trx);
+ }
+ }
+}
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate modify (update,
+delete mark, or delete unmark) of a clustered index record. If they do,
+first tests if the query thread should anyway be suspended for some
+reason; if not, then puts the transaction and the query thread to the
+lock wait state and inserts a waiting request for a record x-lock to the
+lock queue.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_clust_rec_modify_check_and_lock(
+/*=================================*/
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: record which should be
+ modified */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ ulint err;
+ ulint heap_no;
+
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(block->frame == page_align(rec));
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ heap_no = rec_offs_comp(offsets)
+ ? rec_get_heap_no_new(rec)
+ : rec_get_heap_no_old(rec);
+
+ lock_mutex_enter_kernel();
+
+ ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+
+ /* If a transaction has no explicit x-lock set on the record, set one
+ for it */
+
+ lock_rec_convert_impl_to_expl(block, rec, index, offsets);
+
+ err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
+ block, heap_no, index, thr);
+
+ lock_mutex_exit_kernel();
+
+ ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
+
+ return(err);
+}
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate modify (delete
+mark or delete unmark) of a secondary index record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_sec_rec_modify_check_and_lock(
+/*===============================*/
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ buf_block_t* block, /*!< in/out: buffer block of rec */
+ const rec_t* rec, /*!< in: record which should be
+ modified; NOTE: as this is a secondary
+ index, we always have to modify the
+ clustered index record first: see the
+ comment below */
+ dict_index_t* index, /*!< in: secondary index */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ulint err;
+ ulint heap_no;
+
+ ut_ad(!dict_index_is_clust(index));
+ ut_ad(block->frame == page_align(rec));
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ heap_no = page_rec_get_heap_no(rec);
+
+ /* Another transaction cannot have an implicit lock on the record,
+ because when we come here, we already have modified the clustered
+ index record, and this would not have been possible if another active
+ transaction had modified this secondary index record. */
+
+ lock_mutex_enter_kernel();
+
+ ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+
+ err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
+ block, heap_no, index, thr);
+
+ lock_mutex_exit_kernel();
+
+#ifdef UNIV_DEBUG
+ {
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ const ulint* offsets;
+ rec_offs_init(offsets_);
+
+ offsets = rec_get_offsets(rec, index, offsets_,
+ ULINT_UNDEFINED, &heap);
+ ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ }
+#endif /* UNIV_DEBUG */
+
+ if (err == DB_SUCCESS) {
+ /* Update the page max trx id field */
+ page_update_max_trx_id(block,
+ buf_block_get_page_zip(block),
+ thr_get_trx(thr)->id, mtr);
+ }
+
+ return(err);
+}
+
+/*********************************************************************//**
+Like the counterpart for a clustered index below, but now we read a
+secondary index record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_sec_rec_read_check_and_lock(
+/*=============================*/
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record or page
+ supremum record which should
+ be read or passed over by a
+ read cursor */
+ dict_index_t* index, /*!< in: secondary index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ enum lock_mode mode, /*!< in: mode of the lock which
+ the read cursor should set on
+ records: LOCK_S or LOCK_X; the
+ latter is possible in
+ SELECT FOR UPDATE */
+ ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ ulint err;
+ ulint heap_no;
+
+ ut_ad(!dict_index_is_clust(index));
+ ut_ad(block->frame == page_align(rec));
+ ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(mode == LOCK_X || mode == LOCK_S);
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ heap_no = page_rec_get_heap_no(rec);
+
+ lock_mutex_enter_kernel();
+
+ ut_ad(mode != LOCK_X
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+ ut_ad(mode != LOCK_S
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+
+ /* Some transaction may have an implicit x-lock on the record only
+ if the max trx id for the page >= min trx id for the trx list or a
+ database recovery is running. */
+
+ if (((ut_dulint_cmp(page_get_max_trx_id(block->frame),
+ trx_list_get_min_trx_id()) >= 0)
+ || recv_recovery_is_on())
+ && !page_rec_is_supremum(rec)) {
+
+ lock_rec_convert_impl_to_expl(block, rec, index, offsets);
+ }
+
+ err = lock_rec_lock(FALSE, mode | gap_mode,
+ block, heap_no, index, thr);
+
+ lock_mutex_exit_kernel();
+
+ ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
+
+ return(err);
+}
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_clust_rec_read_check_and_lock(
+/*===============================*/
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record or page
+ supremum record which should
+ be read or passed over by a
+ read cursor */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ enum lock_mode mode, /*!< in: mode of the lock which
+ the read cursor should set on
+ records: LOCK_S or LOCK_X; the
+ latter is possible in
+ SELECT FOR UPDATE */
+ ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ ulint err;
+ ulint heap_no;
+
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(block->frame == page_align(rec));
+ ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
+ ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
+ || gap_mode == LOCK_REC_NOT_GAP);
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
+ if (flags & BTR_NO_LOCKING_FLAG) {
+
+ return(DB_SUCCESS);
+ }
+
+ heap_no = page_rec_get_heap_no(rec);
+
+ lock_mutex_enter_kernel();
+
+ ut_ad(mode != LOCK_X
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+ ut_ad(mode != LOCK_S
+ || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+
+ if (UNIV_LIKELY(heap_no != PAGE_HEAP_NO_SUPREMUM)) {
+
+ lock_rec_convert_impl_to_expl(block, rec, index, offsets);
+ }
+
+ err = lock_rec_lock(FALSE, mode | gap_mode,
+ block, heap_no, index, thr);
+
+ lock_mutex_exit_kernel();
+
+ ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
+
+ return(err);
+}
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. This is an alternative version of
+lock_clust_rec_read_check_and_lock() that does not require the parameter
+"offsets".
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_clust_rec_read_check_and_lock_alt(
+/*===================================*/
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record or page
+ supremum record which should
+ be read or passed over by a
+ read cursor */
+ dict_index_t* index, /*!< in: clustered index */
+ enum lock_mode mode, /*!< in: mode of the lock which
+ the read cursor should set on
+ records: LOCK_S or LOCK_X; the
+ latter is possible in
+ SELECT FOR UPDATE */
+ ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ mem_heap_t* tmp_heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ ulint ret;
+ rec_offs_init(offsets_);
+
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &tmp_heap);
+ ret = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
+ offsets, mode, gap_mode, thr);
+ if (tmp_heap) {
+ mem_heap_free(tmp_heap);
+ }
+ return(ret);
+}
+
+/*******************************************************************//**
+Release the last lock from the transaction's autoinc locks. */
+UNIV_INLINE
+void
+lock_release_autoinc_last_lock(
+/*===========================*/
+ ib_vector_t* autoinc_locks) /*!< in/out: vector of AUTOINC locks */
+{
+ ulint last;
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_a(!ib_vector_is_empty(autoinc_locks));
+
+ /* The lock to be release must be the last lock acquired. */
+ last = ib_vector_size(autoinc_locks) - 1;
+ lock = ib_vector_get(autoinc_locks, last);
+
+ /* Should have only AUTOINC locks in the vector. */
+ ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
+ ut_a(lock_get_type(lock) == LOCK_TABLE);
+
+ ut_a(lock->un_member.tab_lock.table != NULL);
+
+ /* This will remove the lock from the trx autoinc_locks too. */
+ lock_table_dequeue(lock);
+}
+
+/*******************************************************************//**
+Release all the transaction's autoinc locks. */
+UNIV_INTERN
+void
+lock_release_autoinc_locks(
+/*=======================*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ ut_a(trx->autoinc_locks != NULL);
+
+ /* We release the locks in the reverse order. This is to
+ avoid searching the vector for the element to delete at
+ the lower level. See (lock_table_remove_low()) for details. */
+ while (!ib_vector_is_empty(trx->autoinc_locks)) {
+
+ /* lock_table_remove_low() will also remove the lock from
+ the transaction's autoinc_locks vector. */
+ lock_release_autoinc_last_lock(trx->autoinc_locks);
+ }
+
+ /* Should release all locks. */
+ ut_a(ib_vector_is_empty(trx->autoinc_locks));
+}
+
+/*******************************************************************//**
+Gets the type of a lock. Non-inline version for using outside of the
+lock module.
+@return LOCK_TABLE or LOCK_REC */
+UNIV_INTERN
+ulint
+lock_get_type(
+/*==========*/
+ const lock_t* lock) /*!< in: lock */
+{
+ return(lock_get_type_low(lock));
+}
+
+/*******************************************************************//**
+Gets the id of the transaction owning a lock.
+@return transaction id */
+UNIV_INTERN
+ullint
+lock_get_trx_id(
+/*============*/
+ const lock_t* lock) /*!< in: lock */
+{
+ return(trx_get_id(lock->trx));
+}
+
+/*******************************************************************//**
+Gets the mode of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return lock mode */
+UNIV_INTERN
+const char*
+lock_get_mode_str(
+/*==============*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ibool is_gap_lock;
+
+ is_gap_lock = lock_get_type_low(lock) == LOCK_REC
+ && lock_rec_get_gap(lock);
+
+ switch (lock_get_mode(lock)) {
+ case LOCK_S:
+ if (is_gap_lock) {
+ return("S,GAP");
+ } else {
+ return("S");
+ }
+ case LOCK_X:
+ if (is_gap_lock) {
+ return("X,GAP");
+ } else {
+ return("X");
+ }
+ case LOCK_IS:
+ if (is_gap_lock) {
+ return("IS,GAP");
+ } else {
+ return("IS");
+ }
+ case LOCK_IX:
+ if (is_gap_lock) {
+ return("IX,GAP");
+ } else {
+ return("IX");
+ }
+ case LOCK_AUTO_INC:
+ return("AUTO_INC");
+ default:
+ return("UNKNOWN");
+ }
+}
+
+/*******************************************************************//**
+Gets the type of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return lock type */
+UNIV_INTERN
+const char*
+lock_get_type_str(
+/*==============*/
+ const lock_t* lock) /*!< in: lock */
+{
+ switch (lock_get_type_low(lock)) {
+ case LOCK_REC:
+ return("RECORD");
+ case LOCK_TABLE:
+ return("TABLE");
+ default:
+ return("UNKNOWN");
+ }
+}
+
+/*******************************************************************//**
+Gets the table on which the lock is.
+@return table */
+UNIV_INLINE
+dict_table_t*
+lock_get_table(
+/*===========*/
+ const lock_t* lock) /*!< in: lock */
+{
+ switch (lock_get_type_low(lock)) {
+ case LOCK_REC:
+ return(lock->index->table);
+ case LOCK_TABLE:
+ return(lock->un_member.tab_lock.table);
+ default:
+ ut_error;
+ return(NULL);
+ }
+}
+
+/*******************************************************************//**
+Gets the id of the table on which the lock is.
+@return id of the table */
+UNIV_INTERN
+ullint
+lock_get_table_id(
+/*==============*/
+ const lock_t* lock) /*!< in: lock */
+{
+ dict_table_t* table;
+
+ table = lock_get_table(lock);
+
+ return((ullint)ut_conv_dulint_to_longlong(table->id));
+}
+
+/*******************************************************************//**
+Gets the name of the table on which the lock is.
+The string should not be free()'d or modified.
+@return name of the table */
+UNIV_INTERN
+const char*
+lock_get_table_name(
+/*================*/
+ const lock_t* lock) /*!< in: lock */
+{
+ dict_table_t* table;
+
+ table = lock_get_table(lock);
+
+ return(table->name);
+}
+
+/*******************************************************************//**
+For a record lock, gets the index on which the lock is.
+@return index */
+UNIV_INTERN
+const dict_index_t*
+lock_rec_get_index(
+/*===============*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+ return(lock->index);
+}
+
+/*******************************************************************//**
+For a record lock, gets the name of the index on which the lock is.
+The string should not be free()'d or modified.
+@return name of the index */
+UNIV_INTERN
+const char*
+lock_rec_get_index_name(
+/*====================*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+ return(lock->index->name);
+}
+
+/*******************************************************************//**
+For a record lock, gets the tablespace number on which the lock is.
+@return tablespace number */
+UNIV_INTERN
+ulint
+lock_rec_get_space_id(
+/*==================*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+ return(lock->un_member.rec_lock.space);
+}
+
+/*******************************************************************//**
+For a record lock, gets the page number on which the lock is.
+@return page number */
+UNIV_INTERN
+ulint
+lock_rec_get_page_no(
+/*=================*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+ return(lock->un_member.rec_lock.page_no);
+}