move to storage/innobase

author: Sergei Golubchik <vuvova@gmail.com> 2015-05-04 19:17:21 +0200
committer: Sergei Golubchik <vuvova@gmail.com> 2015-05-04 19:17:21 +0200
commit: 6d06fbbd1dc25b3c12568f9038060dfdb69f9683 (patch)
tree: 21e27f3fddc89f9dda6b337091464ba10c490123 /storage/innobase/lock
parent: 1645930d0bd02f79df3ebff412b90acdc15bd9a0 (diff)
download: mariadb-git-6d06fbbd1dc25b3c12568f9038060dfdb69f9683.tar.gz
3 files changed, 7758 insertions, 0 deletions
diff --git a/storage/innobase/lock/lock0iter.cc b/storage/innobase/lock/lock0iter.cc
new file mode 100644
index 00000000000..b424d2fc757
--- /dev/null
+++ b/storage/innobase/lock/lock0iter.cc
@@ -0,0 +1,111 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0iter.cc
+Lock queue iterator. Can iterate over table and record
+lock queues.
+
+Created July 16, 2007 Vasil Dimov
+*******************************************************/
+
+#define LOCK_MODULE_IMPLEMENTATION
+
+#include "univ.i"
+#include "lock0iter.h"
+#include "lock0lock.h"
+#include "lock0priv.h"
+#include "ut0dbg.h"
+#include "ut0lst.h"
+
+/*******************************************************************//**
+Initialize lock queue iterator so that it starts to iterate from
+"lock". bit_no specifies the record number within the heap where the
+record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
+1. If the lock is a table lock, thus we have a table lock queue;
+2. If the lock is a record lock and it is a wait lock. In this case
+   bit_no is calculated in this function by using
+   lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
+   of a wait lock. */
+UNIV_INTERN
+void
+lock_queue_iterator_reset(
+/*======================*/
+	lock_queue_iterator_t*	iter,	/*!< out: iterator */
+	const lock_t*		lock,	/*!< in: lock to start from */
+	ulint			bit_no)	/*!< in: record number in the
+					heap */
+{
+	ut_ad(lock_mutex_own());
+
+	iter->current_lock = lock;
+
+	if (bit_no != ULINT_UNDEFINED) {
+
+		iter->bit_no = bit_no;
+	} else {
+
+		switch (lock_get_type_low(lock)) {
+		case LOCK_TABLE:
+			iter->bit_no = ULINT_UNDEFINED;
+			break;
+		case LOCK_REC:
+			iter->bit_no = lock_rec_find_set_bit(lock);
+			ut_a(iter->bit_no != ULINT_UNDEFINED);
+			break;
+		default:
+			ut_error;
+		}
+	}
+}
+
+/*******************************************************************//**
+Gets the previous lock in the lock queue, returns NULL if there are no
+more locks (i.e. the current lock is the first one). The iterator is
+receded (if not-NULL is returned).
+@return	previous lock or NULL */
+UNIV_INTERN
+const lock_t*
+lock_queue_iterator_get_prev(
+/*=========================*/
+	lock_queue_iterator_t*	iter)	/*!< in/out: iterator */
+{
+	const lock_t*	prev_lock;
+
+	ut_ad(lock_mutex_own());
+
+	switch (lock_get_type_low(iter->current_lock)) {
+	case LOCK_REC:
+		prev_lock = lock_rec_get_prev(
+			iter->current_lock, iter->bit_no);
+		break;
+	case LOCK_TABLE:
+		prev_lock = UT_LIST_GET_PREV(
+			un_member.tab_lock.locks, iter->current_lock);
+		break;
+	default:
+		ut_error;
+	}
+
+	if (prev_lock != NULL) {
+
+		iter->current_lock = prev_lock;
+	}
+
+	return(prev_lock);
+}
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
new file mode 100644
index 00000000000..bf7ca1607d1
--- /dev/null
+++ b/storage/innobase/lock/lock0lock.cc
@@ -0,0 +1,7104 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0lock.cc
+The transaction lock system
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#define LOCK_MODULE_IMPLEMENTATION
+
+#include "lock0lock.h"
+#include "lock0priv.h"
+
+#ifdef UNIV_NONINL
+#include "lock0lock.ic"
+#include "lock0priv.ic"
+#endif
+
+#include "ha_prototypes.h"
+#include "usr0sess.h"
+#include "trx0purge.h"
+#include "dict0mem.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "pars0pars.h" /* pars_complete_graph_for_exec() */
+#include "que0que.h" /* que_node_get_parent() */
+#include "row0mysql.h" /* row_mysql_handle_errors() */
+#include "row0sel.h" /* sel_node_create(), sel_node_t */
+#include "row0types.h" /* sel_node_t */
+#include "srv0mon.h"
+#include "ut0vec.h"
+#include "btr0btr.h"
+#include "dict0boot.h"
+#include <set>
+
+/* Restricts the length of search we will do in the waits-for
+graph of transactions */
+#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
+
+/* Restricts the search depth we will do in the waits-for graph of
+transactions */
+#define LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK 200
+
+/* When releasing transaction locks, this specifies how often we release
+the lock mutex for a moment to give also others access to it */
+
+#define LOCK_RELEASE_INTERVAL		1000
+
+/* Safety margin when creating a new record lock: this many extra records
+can be inserted to the page without need to create a lock with a bigger
+bitmap */
+
+#define LOCK_PAGE_BITMAP_MARGIN		64
+
+/* An explicit record lock affects both the record and the gap before it.
+An implicit x-lock does not affect the gap, it only locks the index
+record from read or update.
+
+If a transaction has modified or inserted an index record, then
+it owns an implicit x-lock on the record. On a secondary index record,
+a transaction has an implicit x-lock also if it has modified the
+clustered index record, the max trx id of the page where the secondary
+index record resides is >= trx id of the transaction (or database recovery
+is running), and there are no explicit non-gap lock requests on the
+secondary index record.
+
+This complicated definition for a secondary index comes from the
+implementation: we want to be able to determine if a secondary index
+record has an implicit x-lock, just by looking at the present clustered
+index record, not at the historical versions of the record. The
+complicated definition can be explained to the user so that there is
+nondeterminism in the access path when a query is answered: we may,
+or may not, access the clustered index record and thus may, or may not,
+bump into an x-lock set there.
+
+Different transaction can have conflicting locks set on the gap at the
+same time. The locks on the gap are purely inhibitive: an insert cannot
+be made, or a select cursor may have to wait if a different transaction
+has a conflicting lock on the gap. An x-lock on the gap does not give
+the right to insert into the gap.
+
+An explicit lock can be placed on a user record or the supremum record of
+a page. The locks on the supremum record are always thought to be of the gap
+type, though the gap bit is not set. When we perform an update of a record
+where the size of the record changes, we may temporarily store its explicit
+locks on the infimum record of the page, though the infimum otherwise never
+carries locks.
+
+A waiting record lock can also be of the gap type. A waiting lock request
+can be granted when there is no conflicting mode lock request by another
+transaction ahead of it in the explicit lock queue.
+
+In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
+It only locks the record it is placed on, not the gap before the record.
+This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
+level.
+
+-------------------------------------------------------------------------
+RULE 1: If there is an implicit x-lock on a record, and there are non-gap
+-------
+lock requests waiting in the queue, then the transaction holding the implicit
+x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
+released, we can grant locks to waiting lock requests purely by looking at
+the explicit lock requests in the queue.
+
+RULE 3: Different transactions cannot have conflicting granted non-gap locks
+-------
+on a record at the same time. However, they can have conflicting granted gap
+locks.
+RULE 4: If a there is a waiting lock request in a queue, no lock request,
+-------
+gap or not, can be inserted ahead of it in the queue. In record deletes
+and page splits new gap type locks can be created by the database manager
+for a transaction, and without rule 4, the waits-for graph of transactions
+might become cyclic without the database noticing it, as the deadlock check
+is only performed when a transaction itself requests a lock!
+-------------------------------------------------------------------------
+
+An insert is allowed to a gap if there are no explicit lock requests by
+other transactions on the next record. It does not matter if these lock
+requests are granted or waiting, gap bit set or not, with the exception
+that a gap type request set by another transaction to wait for
+its turn to do an insert is ignored. On the other hand, an
+implicit x-lock by another transaction does not prevent an insert, which
+allows for more concurrency when using an Oracle-style sequence number
+generator for the primary key with many transactions doing inserts
+concurrently.
+
+A modify of a record is allowed if the transaction has an x-lock on the
+record, or if other transactions do not have any non-gap lock requests on the
+record.
+
+A read of a single user record with a cursor is allowed if the transaction
+has a non-gap explicit, or an implicit lock on the record, or if the other
+transactions have no x-lock requests on the record. At a page supremum a
+read is always allowed.
+
+In summary, an implicit lock is seen as a granted x-lock only on the
+record, not on the gap. An explicit lock with no gap bit set is a lock
+both on the record and the gap. If the gap bit is set, the lock is only
+on the gap. Different transaction cannot own conflicting locks on the
+record at the same time, but they may own conflicting locks on the gap.
+Granted locks on a record give an access right to the record, but gap type
+locks just inhibit operations.
+
+NOTE: Finding out if some transaction has an implicit x-lock on a secondary
+index record can be cumbersome. We may have to look at previous versions of
+the corresponding clustered index record to find out if a delete marked
+secondary index record was delete marked by an active transaction, not by
+a committed one.
+
+FACT A: If a transaction has inserted a row, it can delete it any time
+without need to wait for locks.
+
+PROOF: The transaction has an implicit x-lock on every index record inserted
+for the row, and can thus modify each record without the need to wait. Q.E.D.
+
+FACT B: If a transaction has read some result set with a cursor, it can read
+it again, and retrieves the same result set, if it has not modified the
+result set in the meantime. Hence, there is no phantom problem. If the
+biggest record, in the alphabetical order, touched by the cursor is removed,
+a lock wait may occur, otherwise not.
+
+PROOF: When a read cursor proceeds, it sets an s-lock on each user record
+it passes, and a gap type s-lock on each page supremum. The cursor must
+wait until it has these locks granted. Then no other transaction can
+have a granted x-lock on any of the user records, and therefore cannot
+modify the user records. Neither can any other transaction insert into
+the gaps which were passed over by the cursor. Page splits and merges,
+and removal of obsolete versions of records do not affect this, because
+when a user record or a page supremum is removed, the next record inherits
+its locks as gap type locks, and therefore blocks inserts to the same gap.
+Also, if a page supremum is inserted, it inherits its locks from the successor
+record. When the cursor is positioned again at the start of the result set,
+the records it will touch on its course are either records it touched
+during the last pass or new inserted page supremums. It can immediately
+access all these records, and when it arrives at the biggest record, it
+notices that the result set is complete. If the biggest record was removed,
+lock wait can occur because the next record only inherits a gap type lock,
+and a wait may be needed. Q.E.D. */
+
+/* If an index record should be changed or a new inserted, we must check
+the lock on the record or the next. When a read cursor starts reading,
+we will set a record level s-lock on each record it passes, except on the
+initial record on which the cursor is positioned before we start to fetch
+records. Our index tree search has the convention that the B-tree
+cursor is positioned BEFORE the first possibly matching record in
+the search. Optimizations are possible here: if the record is searched
+on an equality condition to a unique key, we could actually set a special
+lock on the record, a lock which would not prevent any insert before
+this record. In the next key locking an x-lock set on a record also
+prevents inserts just before that record.
+	There are special infimum and supremum records on each page.
+A supremum record can be locked by a read cursor. This records cannot be
+updated but the lock prevents insert of a user record to the end of
+the page.
+	Next key locks will prevent the phantom problem where new rows
+could appear to SELECT result sets after the select operation has been
+performed. Prevention of phantoms ensures the serilizability of
+transactions.
+	What should we check if an insert of a new record is wanted?
+Only the lock on the next record on the same page, because also the
+supremum record can carry a lock. An s-lock prevents insertion, but
+what about an x-lock? If it was set by a searched update, then there
+is implicitly an s-lock, too, and the insert should be prevented.
+What if our transaction owns an x-lock to the next record, but there is
+a waiting s-lock request on the next record? If this s-lock was placed
+by a read cursor moving in the ascending order in the index, we cannot
+do the insert immediately, because when we finally commit our transaction,
+the read cursor should see also the new inserted record. So we should
+move the read cursor backward from the next record for it to pass over
+the new inserted record. This move backward may be too cumbersome to
+implement. If we in this situation just enqueue a second x-lock request
+for our transaction on the next record, then the deadlock mechanism
+notices a deadlock between our transaction and the s-lock request
+transaction. This seems to be an ok solution.
+	We could have the convention that granted explicit record locks,
+lock the corresponding records from changing, and also lock the gaps
+before them from inserting. A waiting explicit lock request locks the gap
+before from inserting. Implicit record x-locks, which we derive from the
+transaction id in the clustered index record, only lock the record itself
+from modification, not the gap before it from inserting.
+	How should we store update locks? If the search is done by a unique
+key, we could just modify the record trx id. Otherwise, we could put a record
+x-lock on the record. If the update changes ordering fields of the
+clustered index record, the inserted new record needs no record lock in
+lock table, the trx id is enough. The same holds for a secondary index
+record. Searched delete is similar to update.
+
+PROBLEM:
+What about waiting lock requests? If a transaction is waiting to make an
+update to a record which another modified, how does the other transaction
+know to send the end-lock-wait signal to the waiting transaction? If we have
+the convention that a transaction may wait for just one lock at a time, how
+do we preserve it if lock wait ends?
+
+PROBLEM:
+Checking the trx id label of a secondary index record. In the case of a
+modification, not an insert, is this necessary? A secondary index record
+is modified only by setting or resetting its deleted flag. A secondary index
+record contains fields to uniquely determine the corresponding clustered
+index record. A secondary index record is therefore only modified if we
+also modify the clustered index record, and the trx id checking is done
+on the clustered index record, before we come to modify the secondary index
+record. So, in the case of delete marking or unmarking a secondary index
+record, we do not have to care about trx ids, only the locks in the lock
+table must be checked. In the case of a select from a secondary index, the
+trx id is relevant, and in this case we may have to search the clustered
+index record.
+
+PROBLEM: How to update record locks when page is split or merged, or
+--------------------------------------------------------------------
+a record is deleted or updated?
+If the size of fields in a record changes, we perform the update by
+a delete followed by an insert. How can we retain the locks set or
+waiting on the record? Because a record lock is indexed in the bitmap
+by the heap number of the record, when we remove the record from the
+record list, it is possible still to keep the lock bits. If the page
+is reorganized, we could make a table of old and new heap numbers,
+and permute the bitmaps in the locks accordingly. We can add to the
+table a row telling where the updated record ended. If the update does
+not require a reorganization of the page, we can simply move the lock
+bits for the updated record to the position determined by its new heap
+number (we may have to allocate a new lock, if we run out of the bitmap
+in the old one).
+	A more complicated case is the one where the reinsertion of the
+updated record is done pessimistically, because the structure of the
+tree may change.
+
+PROBLEM: If a supremum record is removed in a page merge, or a record
+---------------------------------------------------------------------
+removed in a purge, what to do to the waiting lock requests? In a split to
+the right, we just move the lock requests to the new supremum. If a record
+is removed, we could move the waiting lock request to its inheritor, the
+next record in the index. But, the next record may already have lock
+requests on its own queue. A new deadlock check should be made then. Maybe
+it is easier just to release the waiting transactions. They can then enqueue
+new lock requests on appropriate records.
+
+PROBLEM: When a record is inserted, what locks should it inherit from the
+-------------------------------------------------------------------------
+upper neighbor? An insert of a new supremum record in a page split is
+always possible, but an insert of a new user record requires that the upper
+neighbor does not have any lock requests by other transactions, granted or
+waiting, in its lock queue. Solution: We can copy the locks as gap type
+locks, so that also the waiting locks are transformed to granted gap type
+locks on the inserted record. */
+
+#define LOCK_STACK_SIZE		OS_THREAD_MAX_N
+
+/* LOCK COMPATIBILITY MATRIX
+ *    IS IX S  X  AI
+ * IS +	 +  +  -  +
+ * IX +	 +  -  -  +
+ * S  +	 -  +  -  -
+ * X  -	 -  -  -  -
+ * AI +	 +  -  -  -
+ *
+ * Note that for rows, InnoDB only acquires S or X locks.
+ * For tables, InnoDB normally acquires IS or IX locks.
+ * S or X table locks are only acquired for LOCK TABLES.
+ * Auto-increment (AI) locks are needed because of
+ * statement-level MySQL binlog.
+ * See also lock_mode_compatible().
+ */
+static const byte lock_compatibility_matrix[5][5] = {
+ /**         IS     IX       S     X       AI */
+ /* IS */ {  TRUE,  TRUE,  TRUE,  FALSE,  TRUE},
+ /* IX */ {  TRUE,  TRUE,  FALSE, FALSE,  TRUE},
+ /* S  */ {  TRUE,  FALSE, TRUE,  FALSE,  FALSE},
+ /* X  */ {  FALSE, FALSE, FALSE, FALSE,  FALSE},
+ /* AI */ {  TRUE,  TRUE,  FALSE, FALSE,  FALSE}
+};
+
+/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
+ *    IS IX S  X  AI
+ * IS +  -  -  -  -
+ * IX +  +  -  -  -
+ * S  +  -  +  -  -
+ * X  +  +  +  +  +
+ * AI -  -  -  -  +
+ * See lock_mode_stronger_or_eq().
+ */
+static const byte lock_strength_matrix[5][5] = {
+ /**         IS     IX       S     X       AI */
+ /* IS */ {  TRUE,  FALSE, FALSE,  FALSE, FALSE},
+ /* IX */ {  TRUE,  TRUE,  FALSE, FALSE,  FALSE},
+ /* S  */ {  TRUE,  FALSE, TRUE,  FALSE,  FALSE},
+ /* X  */ {  TRUE,  TRUE,  TRUE,  TRUE,   TRUE},
+ /* AI */ {  FALSE, FALSE, FALSE, FALSE,  TRUE}
+};
+
+/** Deadlock check context. */
+struct lock_deadlock_ctx_t {
+	const trx_t*	start;		/*!< Joining transaction that is
+					requesting a lock in an incompatible
+					mode */
+
+	const lock_t*	wait_lock;	/*!< Lock that trx wants */
+
+	ib_uint64_t	mark_start;	/*!<  Value of lock_mark_count at
+					the start of the deadlock check. */
+
+	ulint		depth;		/*!< Stack depth */
+
+	ulint		cost;		/*!< Calculation steps thus far */
+
+	ibool		too_deep;	/*!< TRUE if search was too deep and
+					was aborted */
+};
+
+/** DFS visited node information used during deadlock checking. */
+struct lock_stack_t {
+	const lock_t*	lock;			/*!< Current lock */
+	const lock_t*	wait_lock;		/*!< Waiting for lock */
+	ulint		heap_no;		/*!< heap number if rec lock */
+};
+
+/** Stack to use during DFS search. Currently only a single stack is required
+because there is no parallel deadlock check. This stack is protected by
+the lock_sys_t::mutex. */
+static lock_stack_t*	lock_stack;
+
+/** The count of the types of locks. */
+static const ulint	lock_types = UT_ARR_SIZE(lock_compatibility_matrix);
+
+#ifdef UNIV_PFS_MUTEX
+/* Key to register mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t	lock_sys_mutex_key;
+/* Key to register mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t	lock_sys_wait_mutex_key;
+#endif /* UNIV_PFS_MUTEX */
+
+#ifdef UNIV_DEBUG
+UNIV_INTERN ibool	lock_print_waits	= FALSE;
+
+/*********************************************************************//**
+Validates the lock system.
+@return	TRUE if ok */
+static
+bool
+lock_validate();
+/*============*/
+
+/*********************************************************************//**
+Validates the record lock queues on a page.
+@return	TRUE if ok */
+static
+ibool
+lock_rec_validate_page(
+/*===================*/
+	const buf_block_t*	block)	/*!< in: buffer block */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_DEBUG */
+
+/* The lock system */
+UNIV_INTERN lock_sys_t*	lock_sys	= NULL;
+
+/** We store info on the latest deadlock error to this buffer. InnoDB
+Monitor will then fetch it and print */
+UNIV_INTERN ibool	lock_deadlock_found = FALSE;
+/** Only created if !srv_read_only_mode */
+static FILE*		lock_latest_err_file;
+
+/********************************************************************//**
+Checks if a joining lock request results in a deadlock. If a deadlock is
+found this function will resolve the dadlock by choosing a victim transaction
+and rolling it back. It will attempt to resolve all deadlocks. The returned
+transaction id will be the joining transaction id or 0 if some other
+transaction was chosen as a victim and rolled back or no deadlock found.
+
+@return id of transaction chosen as victim or 0 */
+static
+trx_id_t
+lock_deadlock_check_and_resolve(
+/*===========================*/
+	const lock_t*	lock,	/*!< in: lock the transaction is requesting */
+	const trx_t*	trx);	/*!< in: transaction */
+
+/*********************************************************************//**
+Gets the nth bit of a record lock.
+@return	TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
+UNIV_INLINE
+ibool
+lock_rec_get_nth_bit(
+/*=================*/
+	const lock_t*	lock,	/*!< in: record lock */
+	ulint		i)	/*!< in: index of the bit */
+{
+	const byte*	b;
+
+	ut_ad(lock);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	if (i >= lock->un_member.rec_lock.n_bits) {
+
+		return(FALSE);
+	}
+
+	b = ((const byte*) &lock[1]) + (i / 8);
+
+	return(1 & *b >> (i % 8));
+}
+
+/*********************************************************************//**
+Reports that a transaction id is insensible, i.e., in the future. */
+UNIV_INTERN
+void
+lock_report_trx_id_insanity(
+/*========================*/
+	trx_id_t	trx_id,		/*!< in: trx id */
+	const rec_t*	rec,		/*!< in: user record */
+	dict_index_t*	index,		/*!< in: index */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
+	trx_id_t	max_trx_id)	/*!< in: trx_sys_get_max_trx_id() */
+{
+	ut_print_timestamp(stderr);
+	fputs("  InnoDB: Error: transaction id associated with record\n",
+	      stderr);
+	rec_print_new(stderr, rec, offsets);
+	fputs("InnoDB: in ", stderr);
+	dict_index_name_print(stderr, NULL, index);
+	fprintf(stderr, "\n"
+		"InnoDB: is " TRX_ID_FMT " which is higher than the"
+		" global trx id counter " TRX_ID_FMT "!\n"
+		"InnoDB: The table is corrupt. You have to do"
+		" dump + drop + reimport.\n",
+		trx_id, max_trx_id);
+}
+
+/*********************************************************************//**
+Checks that a transaction id is sensible, i.e., not in the future.
+@return	true if ok */
+#ifdef UNIV_DEBUG
+UNIV_INTERN
+#else
+static __attribute__((nonnull, warn_unused_result))
+#endif
+bool
+lock_check_trx_id_sanity(
+/*=====================*/
+	trx_id_t	trx_id,		/*!< in: trx id */
+	const rec_t*	rec,		/*!< in: user record */
+	dict_index_t*	index,		/*!< in: index */
+	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
+{
+	bool		is_ok;
+	trx_id_t	max_trx_id;
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	max_trx_id = trx_sys_get_max_trx_id();
+	is_ok = trx_id < max_trx_id;
+
+	if (UNIV_UNLIKELY(!is_ok)) {
+		lock_report_trx_id_insanity(trx_id,
+					    rec, index, offsets, max_trx_id);
+	}
+
+	return(is_ok);
+}
+
+/*********************************************************************//**
+Checks that a record is seen in a consistent read.
+@return true if sees, or false if an earlier version of the record
+should be retrieved */
+UNIV_INTERN
+bool
+lock_clust_rec_cons_read_sees(
+/*==========================*/
+	const rec_t*	rec,	/*!< in: user record which should be read or
+				passed over by a read cursor */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	read_view_t*	view)	/*!< in: consistent read view */
+{
+	trx_id_t	trx_id;
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(page_rec_is_user_rec(rec));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	/* NOTE that we call this function while holding the search
+	system latch. */
+
+	trx_id = row_get_rec_trx_id(rec, index, offsets);
+
+	return(read_view_sees_trx_id(view, trx_id));
+}
+
+/*********************************************************************//**
+Checks that a non-clustered index record is seen in a consistent read.
+
+NOTE that a non-clustered index page contains so little information on
+its modifications that also in the case false, the present version of
+rec may be the right, but we must check this from the clustered index
+record.
+
+@return true if certainly sees, or false if an earlier version of the
+clustered index record might be needed */
+UNIV_INTERN
+bool
+lock_sec_rec_cons_read_sees(
+/*========================*/
+	const rec_t*		rec,	/*!< in: user record which
+					should be read or passed over
+					by a read cursor */
+	const read_view_t*	view)	/*!< in: consistent read view */
+{
+	trx_id_t	max_trx_id;
+
+	ut_ad(page_rec_is_user_rec(rec));
+
+	/* NOTE that we might call this function while holding the search
+	system latch. */
+
+	if (recv_recovery_is_on()) {
+
+		return(false);
+	}
+
+	max_trx_id = page_get_max_trx_id(page_align(rec));
+	ut_ad(max_trx_id);
+
+	return(max_trx_id < view->up_limit_id);
+}
+
+/*********************************************************************//**
+Creates the lock system at database start. */
+UNIV_INTERN
+void
+lock_sys_create(
+/*============*/
+	ulint	n_cells)	/*!< in: number of slots in lock hash table */
+{
+	ulint	lock_sys_sz;
+
+	lock_sys_sz = sizeof(*lock_sys)
+		+ OS_THREAD_MAX_N * sizeof(srv_slot_t);
+
+	lock_sys = static_cast<lock_sys_t*>(mem_zalloc(lock_sys_sz));
+
+	lock_stack = static_cast<lock_stack_t*>(
+		mem_zalloc(sizeof(*lock_stack) * LOCK_STACK_SIZE));
+
+	void*	ptr = &lock_sys[1];
+
+	lock_sys->waiting_threads = static_cast<srv_slot_t*>(ptr);
+
+	lock_sys->last_slot = lock_sys->waiting_threads;
+
+	mutex_create(lock_sys_mutex_key, &lock_sys->mutex, SYNC_LOCK_SYS);
+
+	mutex_create(lock_sys_wait_mutex_key,
+		     &lock_sys->wait_mutex, SYNC_LOCK_WAIT_SYS);
+
+	lock_sys->timeout_event = os_event_create();
+
+	lock_sys->rec_hash = hash_create(n_cells);
+
+	if (!srv_read_only_mode) {
+		lock_latest_err_file = os_file_create_tmpfile();
+		ut_a(lock_latest_err_file);
+	}
+}
+
+/*********************************************************************//**
+Closes the lock system at database shutdown. */
+UNIV_INTERN
+void
+lock_sys_close(void)
+/*================*/
+{
+	if (lock_latest_err_file != NULL) {
+		fclose(lock_latest_err_file);
+		lock_latest_err_file = NULL;
+	}
+
+	hash_table_free(lock_sys->rec_hash);
+
+	mutex_free(&lock_sys->mutex);
+	mutex_free(&lock_sys->wait_mutex);
+
+	mem_free(lock_stack);
+	mem_free(lock_sys);
+
+	lock_sys = NULL;
+	lock_stack = NULL;
+}
+
+/*********************************************************************//**
+Gets the size of a lock struct.
+@return	size in bytes */
+UNIV_INTERN
+ulint
+lock_get_size(void)
+/*===============*/
+{
+	return((ulint) sizeof(lock_t));
+}
+
+/*********************************************************************//**
+Gets the mode of a lock.
+@return	mode */
+UNIV_INLINE
+enum lock_mode
+lock_get_mode(
+/*==========*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_ad(lock);
+
+	return(static_cast<enum lock_mode>(lock->type_mode & LOCK_MODE_MASK));
+}
+
+/*********************************************************************//**
+Gets the wait flag of a lock.
+@return	LOCK_WAIT if waiting, 0 if not */
+UNIV_INLINE
+ulint
+lock_get_wait(
+/*==========*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_ad(lock);
+
+	return(lock->type_mode & LOCK_WAIT);
+}
+
+/*********************************************************************//**
+Gets the source table of an ALTER TABLE transaction.  The table must be
+covered by an IX or IS table lock.
+@return the source table of transaction, if it is covered by an IX or
+IS table lock; dest if there is no source table, and NULL if the
+transaction is locking more than two tables or an inconsistency is
+found */
+UNIV_INTERN
+dict_table_t*
+lock_get_src_table(
+/*===============*/
+	trx_t*		trx,	/*!< in: transaction */
+	dict_table_t*	dest,	/*!< in: destination of ALTER TABLE */
+	enum lock_mode*	mode)	/*!< out: lock mode of the source table */
+{
+	dict_table_t*	src;
+	lock_t*		lock;
+
+	ut_ad(!lock_mutex_own());
+
+	src = NULL;
+	*mode = LOCK_NONE;
+
+	/* The trx mutex protects the trx_locks for our purposes.
+	Other transactions could want to convert one of our implicit
+	record locks to an explicit one. For that, they would need our
+	trx mutex. Waiting locks can be removed while only holding
+	lock_sys->mutex, but this is a running transaction and cannot
+	thus be holding any waiting locks. */
+	trx_mutex_enter(trx);
+
+	for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
+	     lock != NULL;
+	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+		lock_table_t*	tab_lock;
+		enum lock_mode	lock_mode;
+		if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
+			/* We are only interested in table locks. */
+			continue;
+		}
+		tab_lock = &lock->un_member.tab_lock;
+		if (dest == tab_lock->table) {
+			/* We are not interested in the destination table. */
+			continue;
+		} else if (!src) {
+			/* This presumably is the source table. */
+			src = tab_lock->table;
+			if (UT_LIST_GET_LEN(src->locks) != 1
+			    || UT_LIST_GET_FIRST(src->locks) != lock) {
+				/* We only support the case when
+				there is only one lock on this table. */
+				src = NULL;
+				goto func_exit;
+			}
+		} else if (src != tab_lock->table) {
+			/* The transaction is locking more than
+			two tables (src and dest): abort */
+			src = NULL;
+			goto func_exit;
+		}
+
+		/* Check that the source table is locked by
+		LOCK_IX or LOCK_IS. */
+		lock_mode = lock_get_mode(lock);
+		if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) {
+			if (*mode != LOCK_NONE && *mode != lock_mode) {
+				/* There are multiple locks on src. */
+				src = NULL;
+				goto func_exit;
+			}
+			*mode = lock_mode;
+		}
+	}
+
+	if (!src) {
+		/* No source table lock found: flag the situation to caller */
+		src = dest;
+	}
+
+func_exit:
+	trx_mutex_exit(trx);
+	return(src);
+}
+
+/*********************************************************************//**
+Determine if the given table is exclusively "owned" by the given
+transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
+on the table.
+@return TRUE if table is only locked by trx, with LOCK_IX, and
+possibly LOCK_AUTO_INC */
+UNIV_INTERN
+ibool
+lock_is_table_exclusive(
+/*====================*/
+	const dict_table_t*	table,	/*!< in: table */
+	const trx_t*		trx)	/*!< in: transaction */
+{
+	const lock_t*	lock;
+	ibool		ok	= FALSE;
+
+	ut_ad(table);
+	ut_ad(trx);
+
+	lock_mutex_enter();
+
+	for (lock = UT_LIST_GET_FIRST(table->locks);
+	     lock != NULL;
+	     lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) {
+		if (lock->trx != trx) {
+			/* A lock on the table is held
+			by some other transaction. */
+			goto not_ok;
+		}
+
+		if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
+			/* We are interested in table locks only. */
+			continue;
+		}
+
+		switch (lock_get_mode(lock)) {
+		case LOCK_IX:
+			ok = TRUE;
+			break;
+		case LOCK_AUTO_INC:
+			/* It is allowed for trx to hold an
+			auto_increment lock. */
+			break;
+		default:
+not_ok:
+			/* Other table locks than LOCK_IX are not allowed. */
+			ok = FALSE;
+			goto func_exit;
+		}
+	}
+
+func_exit:
+	lock_mutex_exit();
+
+	return(ok);
+}
+
+/*********************************************************************//**
+Sets the wait flag of a lock and the back pointer in trx to lock. */
+UNIV_INLINE
+void
+lock_set_lock_and_trx_wait(
+/*=======================*/
+	lock_t*	lock,	/*!< in: lock */
+	trx_t*	trx)	/*!< in/out: trx */
+{
+	ut_ad(lock);
+	ut_ad(lock->trx == trx);
+	ut_ad(trx->lock.wait_lock == NULL);
+	ut_ad(lock_mutex_own());
+	ut_ad(trx_mutex_own(trx));
+
+	trx->lock.wait_lock = lock;
+	lock->type_mode |= LOCK_WAIT;
+}
+
+/**********************************************************************//**
+The back pointer to a waiting lock request in the transaction is set to NULL
+and the wait bit in lock type_mode is reset. */
+UNIV_INLINE
+void
+lock_reset_lock_and_trx_wait(
+/*=========================*/
+	lock_t*	lock)	/*!< in/out: record lock */
+{
+	ut_ad(lock->trx->lock.wait_lock == lock);
+	ut_ad(lock_get_wait(lock));
+	ut_ad(lock_mutex_own());
+
+	lock->trx->lock.wait_lock = NULL;
+	lock->type_mode &= ~LOCK_WAIT;
+}
+
+/*********************************************************************//**
+Gets the gap flag of a record lock.
+@return	LOCK_GAP or 0 */
+UNIV_INLINE
+ulint
+lock_rec_get_gap(
+/*=============*/
+	const lock_t*	lock)	/*!< in: record lock */
+{
+	ut_ad(lock);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	return(lock->type_mode & LOCK_GAP);
+}
+
+/*********************************************************************//**
+Gets the LOCK_REC_NOT_GAP flag of a record lock.
+@return	LOCK_REC_NOT_GAP or 0 */
+UNIV_INLINE
+ulint
+lock_rec_get_rec_not_gap(
+/*=====================*/
+	const lock_t*	lock)	/*!< in: record lock */
+{
+	ut_ad(lock);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	return(lock->type_mode & LOCK_REC_NOT_GAP);
+}
+
+/*********************************************************************//**
+Gets the waiting insert flag of a record lock.
+@return	LOCK_INSERT_INTENTION or 0 */
+UNIV_INLINE
+ulint
+lock_rec_get_insert_intention(
+/*==========================*/
+	const lock_t*	lock)	/*!< in: record lock */
+{
+	ut_ad(lock);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	return(lock->type_mode & LOCK_INSERT_INTENTION);
+}
+
+/*********************************************************************//**
+Calculates if lock mode 1 is stronger or equal to lock mode 2.
+@return	nonzero if mode1 stronger or equal to mode2 */
+UNIV_INLINE
+ulint
+lock_mode_stronger_or_eq(
+/*=====================*/
+	enum lock_mode	mode1,	/*!< in: lock mode */
+	enum lock_mode	mode2)	/*!< in: lock mode */
+{
+	ut_ad((ulint) mode1 < lock_types);
+	ut_ad((ulint) mode2 < lock_types);
+
+	return(lock_strength_matrix[mode1][mode2]);
+}
+
+/*********************************************************************//**
+Calculates if lock mode 1 is compatible with lock mode 2.
+@return	nonzero if mode1 compatible with mode2 */
+UNIV_INLINE
+ulint
+lock_mode_compatible(
+/*=================*/
+	enum lock_mode	mode1,	/*!< in: lock mode */
+	enum lock_mode	mode2)	/*!< in: lock mode */
+{
+	ut_ad((ulint) mode1 < lock_types);
+	ut_ad((ulint) mode2 < lock_types);
+
+	return(lock_compatibility_matrix[mode1][mode2]);
+}
+
+/*********************************************************************//**
+Checks if a lock request for a new lock has to wait for request lock2.
+@return	TRUE if new lock has to wait for lock2 to be removed */
+UNIV_INLINE
+ibool
+lock_rec_has_to_wait(
+/*=================*/
+	const trx_t*	trx,	/*!< in: trx of new lock */
+	ulint		type_mode,/*!< in: precise mode of the new lock
+				to set: LOCK_S or LOCK_X, possibly
+				ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
+				LOCK_INSERT_INTENTION */
+	const lock_t*	lock2,	/*!< in: another record lock; NOTE that
+				it is assumed that this has a lock bit
+				set on the same record as in the new
+				lock we are setting */
+	ibool lock_is_on_supremum)  /*!< in: TRUE if we are setting the
+				lock on the 'supremum' record of an
+				index page: we know then that the lock
+				request is really for a 'gap' type lock */
+{
+	ut_ad(trx && lock2);
+	ut_ad(lock_get_type_low(lock2) == LOCK_REC);
+
+	if (trx != lock2->trx
+	    && !lock_mode_compatible(static_cast<enum lock_mode>(
+			             LOCK_MODE_MASK & type_mode),
+				     lock_get_mode(lock2))) {
+
+		/* We have somewhat complex rules when gap type record locks
+		cause waits */
+
+		if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
+		    && !(type_mode & LOCK_INSERT_INTENTION)) {
+
+			/* Gap type locks without LOCK_INSERT_INTENTION flag
+			do not need to wait for anything. This is because
+			different users can have conflicting lock types
+			on gaps. */
+
+			return(FALSE);
+		}
+
+		if (!(type_mode & LOCK_INSERT_INTENTION)
+		    && lock_rec_get_gap(lock2)) {
+
+			/* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
+			does not need to wait for a gap type lock */
+
+			return(FALSE);
+		}
+
+		if ((type_mode & LOCK_GAP)
+		    && lock_rec_get_rec_not_gap(lock2)) {
+
+			/* Lock on gap does not need to wait for
+			a LOCK_REC_NOT_GAP type lock */
+
+			return(FALSE);
+		}
+
+		if (lock_rec_get_insert_intention(lock2)) {
+
+			/* No lock request needs to wait for an insert
+			intention lock to be removed. This is ok since our
+			rules allow conflicting locks on gaps. This eliminates
+			a spurious deadlock caused by a next-key lock waiting
+			for an insert intention lock; when the insert
+			intention lock was granted, the insert deadlocked on
+			the waiting next-key lock.
+
+			Also, insert intention locks do not disturb each
+			other. */
+
+			return(FALSE);
+		}
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*********************************************************************//**
+Checks if a lock request lock1 has to wait for request lock2.
+@return	TRUE if lock1 has to wait for lock2 to be removed */
+UNIV_INTERN
+ibool
+lock_has_to_wait(
+/*=============*/
+	const lock_t*	lock1,	/*!< in: waiting lock */
+	const lock_t*	lock2)	/*!< in: another lock; NOTE that it is
+				assumed that this has a lock bit set
+				on the same record as in lock1 if the
+				locks are record locks */
+{
+	ut_ad(lock1 && lock2);
+
+	if (lock1->trx != lock2->trx
+	    && !lock_mode_compatible(lock_get_mode(lock1),
+				     lock_get_mode(lock2))) {
+		if (lock_get_type_low(lock1) == LOCK_REC) {
+			ut_ad(lock_get_type_low(lock2) == LOCK_REC);
+
+			/* If this lock request is for a supremum record
+			then the second bit on the lock bitmap is set */
+
+			return(lock_rec_has_to_wait(lock1->trx,
+						    lock1->type_mode, lock2,
+						    lock_rec_get_nth_bit(
+							    lock1, 1)));
+		}
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*============== RECORD LOCK BASIC FUNCTIONS ============================*/
+
+/*********************************************************************//**
+Gets the number of bits in a record lock bitmap.
+@return	number of bits */
+UNIV_INLINE
+ulint
+lock_rec_get_n_bits(
+/*================*/
+	const lock_t*	lock)	/*!< in: record lock */
+{
+	return(lock->un_member.rec_lock.n_bits);
+}
+
+/**********************************************************************//**
+Sets the nth bit of a record lock to TRUE. */
+UNIV_INLINE
+void
+lock_rec_set_nth_bit(
+/*=================*/
+	lock_t*	lock,	/*!< in: record lock */
+	ulint	i)	/*!< in: index of the bit */
+{
+	ulint	byte_index;
+	ulint	bit_index;
+
+	ut_ad(lock);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(i < lock->un_member.rec_lock.n_bits);
+
+	byte_index = i / 8;
+	bit_index = i % 8;
+
+	((byte*) &lock[1])[byte_index] |= 1 << bit_index;
+}
+
+/**********************************************************************//**
+Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
+if none found.
+@return bit index == heap number of the record, or ULINT_UNDEFINED if
+none found */
+UNIV_INTERN
+ulint
+lock_rec_find_set_bit(
+/*==================*/
+	const lock_t*	lock)	/*!< in: record lock with at least one bit set */
+{
+	ulint	i;
+
+	for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+
+		if (lock_rec_get_nth_bit(lock, i)) {
+
+			return(i);
+		}
+	}
+
+	return(ULINT_UNDEFINED);
+}
+
+/**********************************************************************//**
+Resets the nth bit of a record lock. */
+UNIV_INLINE
+void
+lock_rec_reset_nth_bit(
+/*===================*/
+	lock_t*	lock,	/*!< in: record lock */
+	ulint	i)	/*!< in: index of the bit which must be set to TRUE
+			when this function is called */
+{
+	ulint	byte_index;
+	ulint	bit_index;
+
+	ut_ad(lock);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(i < lock->un_member.rec_lock.n_bits);
+
+	byte_index = i / 8;
+	bit_index = i % 8;
+
+	((byte*) &lock[1])[byte_index] &= ~(1 << bit_index);
+}
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return	next lock, NULL if none exists */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_on_page_const(
+/*============================*/
+	const lock_t*	lock)	/*!< in: a record lock */
+{
+	ulint	space;
+	ulint	page_no;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	space = lock->un_member.rec_lock.space;
+	page_no = lock->un_member.rec_lock.page_no;
+
+	for (;;) {
+		lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock));
+
+		if (!lock) {
+
+			break;
+		}
+
+		if ((lock->un_member.rec_lock.space == space)
+		    && (lock->un_member.rec_lock.page_no == page_no)) {
+
+			break;
+		}
+	}
+
+	return(lock);
+}
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return	next lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next_on_page(
+/*======================*/
+	lock_t*	lock)	/*!< in: a record lock */
+{
+	return((lock_t*) lock_rec_get_next_on_page_const(lock));
+}
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by its
+file address.
+@return	first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page_addr(
+/*============================*/
+	ulint	space,	/*!< in: space */
+	ulint	page_no)/*!< in: page number */
+{
+	lock_t*	lock;
+
+	ut_ad(lock_mutex_own());
+
+	for (lock = static_cast<lock_t*>(
+			HASH_GET_FIRST(lock_sys->rec_hash,
+				       lock_rec_hash(space, page_no)));
+	      lock != NULL;
+	      lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
+
+		if (lock->un_member.rec_lock.space == space
+		    && lock->un_member.rec_lock.page_no == page_no) {
+
+			break;
+		}
+	}
+
+	return(lock);
+}
+
+/*********************************************************************//**
+Determines if there are explicit record locks on a page.
+@return	an explicit record lock on the page, or NULL if there are none */
+UNIV_INTERN
+lock_t*
+lock_rec_expl_exist_on_page(
+/*========================*/
+	ulint	space,	/*!< in: space id */
+	ulint	page_no)/*!< in: page number */
+{
+	lock_t*	lock;
+
+	lock_mutex_enter();
+	lock = lock_rec_get_first_on_page_addr(space, page_no);
+	lock_mutex_exit();
+
+	return(lock);
+}
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by a
+pointer to it.
+@return	first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page(
+/*=======================*/
+	const buf_block_t*	block)	/*!< in: buffer block */
+{
+	ulint	hash;
+	lock_t*	lock;
+	ulint	space	= buf_block_get_space(block);
+	ulint	page_no	= buf_block_get_page_no(block);
+
+	ut_ad(lock_mutex_own());
+
+	hash = buf_block_get_lock_hash_val(block);
+
+	for (lock = static_cast<lock_t*>(
+			HASH_GET_FIRST( lock_sys->rec_hash, hash));
+	     lock != NULL;
+	     lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
+
+		if ((lock->un_member.rec_lock.space == space)
+		    && (lock->un_member.rec_lock.page_no == page_no)) {
+
+			break;
+		}
+	}
+
+	return(lock);
+}
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return	next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next(
+/*==============*/
+	ulint	heap_no,/*!< in: heap number of the record */
+	lock_t*	lock)	/*!< in: lock */
+{
+	ut_ad(lock_mutex_own());
+
+	do {
+		ut_ad(lock_get_type_low(lock) == LOCK_REC);
+		lock = lock_rec_get_next_on_page(lock);
+	} while (lock && !lock_rec_get_nth_bit(lock, heap_no));
+
+	return(lock);
+}
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return	next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_const(
+/*====================*/
+	ulint		heap_no,/*!< in: heap number of the record */
+	const lock_t*	lock)	/*!< in: lock */
+{
+	return(lock_rec_get_next(heap_no, (lock_t*) lock));
+}
+
+/*********************************************************************//**
+Gets the first explicit lock request on a record.
+@return	first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first(
+/*===============*/
+	const buf_block_t*	block,	/*!< in: block containing the record */
+	ulint			heap_no)/*!< in: heap number of the record */
+{
+	lock_t*	lock;
+
+	ut_ad(lock_mutex_own());
+
+	for (lock = lock_rec_get_first_on_page(block); lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		if (lock_rec_get_nth_bit(lock, heap_no)) {
+			break;
+		}
+	}
+
+	return(lock);
+}
+
+/*********************************************************************//**
+Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
+pointer in the transaction! This function is used in lock object creation
+and resetting. */
+static
+void
+lock_rec_bitmap_reset(
+/*==================*/
+	lock_t*	lock)	/*!< in: record lock */
+{
+	ulint	n_bytes;
+
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	/* Reset to zero the bitmap which resides immediately after the lock
+	struct */
+
+	n_bytes = lock_rec_get_n_bits(lock) / 8;
+
+	ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
+
+	memset(&lock[1], 0, n_bytes);
+}
+
+/*********************************************************************//**
+Copies a record lock to heap.
+@return	copy of lock */
+static
+lock_t*
+lock_rec_copy(
+/*==========*/
+	const lock_t*	lock,	/*!< in: record lock */
+	mem_heap_t*	heap)	/*!< in: memory heap */
+{
+	ulint	size;
+
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
+
+	return(static_cast<lock_t*>(mem_heap_dup(heap, lock, size)));
+}
+
+/*********************************************************************//**
+Gets the previous record lock set on a record.
+@return	previous lock on the same record, NULL if none exists */
+UNIV_INTERN
+const lock_t*
+lock_rec_get_prev(
+/*==============*/
+	const lock_t*	in_lock,/*!< in: record lock */
+	ulint		heap_no)/*!< in: heap number of the record */
+{
+	lock_t*	lock;
+	ulint	space;
+	ulint	page_no;
+	lock_t*	found_lock	= NULL;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
+
+	space = in_lock->un_member.rec_lock.space;
+	page_no = in_lock->un_member.rec_lock.page_no;
+
+	for (lock = lock_rec_get_first_on_page_addr(space, page_no);
+	     /* No op */;
+	     lock = lock_rec_get_next_on_page(lock)) {
+
+		ut_ad(lock);
+
+		if (lock == in_lock) {
+
+			return(found_lock);
+		}
+
+		if (lock_rec_get_nth_bit(lock, heap_no)) {
+
+			found_lock = lock;
+		}
+	}
+}
+
+/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/
+
+/*********************************************************************//**
+Checks if a transaction has the specified table lock, or stronger. This
+function should only be called by the thread that owns the transaction.
+@return	lock or NULL */
+UNIV_INLINE
+const lock_t*
+lock_table_has(
+/*===========*/
+	const trx_t*		trx,	/*!< in: transaction */
+	const dict_table_t*	table,	/*!< in: table */
+	enum lock_mode		mode)	/*!< in: lock mode */
+{
+	lint			i;
+
+	if (ib_vector_is_empty(trx->lock.table_locks)) {
+		return(NULL);
+	}
+
+	/* Look for stronger locks the same trx already has on the table */
+
+	for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
+		const lock_t*	lock;
+		enum lock_mode	lock_mode;
+
+		lock = *static_cast<const lock_t**>(
+			ib_vector_get(trx->lock.table_locks, i));
+
+		if (lock == NULL) {
+			continue;
+		}
+
+		lock_mode = lock_get_mode(lock);
+
+		ut_ad(trx == lock->trx);
+		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+		ut_ad(lock->un_member.tab_lock.table != NULL);
+
+		if (table == lock->un_member.tab_lock.table
+		    && lock_mode_stronger_or_eq(lock_mode, mode)) {
+
+			ut_ad(!lock_get_wait(lock));
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
+
+/*********************************************************************//**
+Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
+to precise_mode.
+@return	lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_rec_has_expl(
+/*==============*/
+	ulint			precise_mode,/*!< in: LOCK_S or LOCK_X
+					possibly ORed to LOCK_GAP or
+					LOCK_REC_NOT_GAP, for a
+					supremum record we regard this
+					always a gap type request */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	const trx_t*		trx)	/*!< in: transaction */
+{
+	lock_t*	lock;
+
+	ut_ad(lock_mutex_own());
+	ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
+	      || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
+	ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
+
+	for (lock = lock_rec_get_first(block, heap_no);
+	     lock != NULL;
+	     lock = lock_rec_get_next(heap_no, lock)) {
+
+		if (lock->trx == trx
+		    && !lock_rec_get_insert_intention(lock)
+		    && lock_mode_stronger_or_eq(
+			    lock_get_mode(lock),
+			    static_cast<enum lock_mode>(
+				    precise_mode & LOCK_MODE_MASK))
+		    && !lock_get_wait(lock)
+		    && (!lock_rec_get_rec_not_gap(lock)
+			|| (precise_mode & LOCK_REC_NOT_GAP)
+			|| heap_no == PAGE_HEAP_NO_SUPREMUM)
+		    && (!lock_rec_get_gap(lock)
+			|| (precise_mode & LOCK_GAP)
+			|| heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Checks if some other transaction has a lock request in the queue.
+@return	lock or NULL */
+static
+const lock_t*
+lock_rec_other_has_expl_req(
+/*========================*/
+	enum lock_mode		mode,	/*!< in: LOCK_S or LOCK_X */
+	ulint			gap,	/*!< in: LOCK_GAP if also gap
+					locks are taken into account,
+					or 0 if not */
+	ulint			wait,	/*!< in: LOCK_WAIT if also
+					waiting locks are taken into
+					account, or 0 if not */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	const trx_t*		trx)	/*!< in: transaction, or NULL if
+					requests by all transactions
+					are taken into account */
+{
+	const lock_t*	lock;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(mode == LOCK_X || mode == LOCK_S);
+	ut_ad(gap == 0 || gap == LOCK_GAP);
+	ut_ad(wait == 0 || wait == LOCK_WAIT);
+
+	for (lock = lock_rec_get_first(block, heap_no);
+	     lock != NULL;
+	     lock = lock_rec_get_next_const(heap_no, lock)) {
+
+		if (lock->trx != trx
+		    && (gap
+			|| !(lock_rec_get_gap(lock)
+			     || heap_no == PAGE_HEAP_NO_SUPREMUM))
+		    && (wait || !lock_get_wait(lock))
+		    && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Checks if some other transaction has a conflicting explicit lock request
+in the queue, so that we have to wait.
+@return	lock or NULL */
+static
+const lock_t*
+lock_rec_other_has_conflicting(
+/*===========================*/
+	enum lock_mode		mode,	/*!< in: LOCK_S or LOCK_X,
+					possibly ORed to LOCK_GAP or
+					LOC_REC_NOT_GAP,
+					LOCK_INSERT_INTENTION */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	const trx_t*		trx)	/*!< in: our transaction */
+{
+	const lock_t*		lock;
+	ibool			is_supremum;
+
+	ut_ad(lock_mutex_own());
+
+	is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
+
+	for (lock = lock_rec_get_first(block, heap_no);
+	     lock != NULL;
+	     lock = lock_rec_get_next_const(heap_no, lock)) {
+
+		if (lock_rec_has_to_wait(trx, mode, lock, is_supremum)) {
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Looks for a suitable type record lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old is found.
+@return	lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_rec_find_similar_on_page(
+/*==========================*/
+	ulint		type_mode,	/*!< in: lock type_mode field */
+	ulint		heap_no,	/*!< in: heap number of the record */
+	lock_t*		lock,		/*!< in: lock_rec_get_first_on_page() */
+	const trx_t*	trx)		/*!< in: transaction */
+{
+	ut_ad(lock_mutex_own());
+
+	for (/* No op */;
+	     lock != NULL;
+	     lock = lock_rec_get_next_on_page(lock)) {
+
+		if (lock->trx == trx
+		    && lock->type_mode == type_mode
+		    && lock_rec_get_n_bits(lock) > heap_no) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Checks if some transaction has an implicit x-lock on a record in a secondary
+index.
+@return	transaction id of the transaction which has the x-lock, or 0;
+NOTE that this function can return false positives but never false
+negatives. The caller must confirm all positive results by calling
+trx_is_active(). */
+static
+trx_id_t
+lock_sec_rec_some_has_impl(
+/*=======================*/
+	const rec_t*	rec,	/*!< in: user record */
+	dict_index_t*	index,	/*!< in: secondary index */
+	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+	trx_id_t	trx_id;
+	trx_id_t	max_trx_id;
+	const page_t*	page = page_align(rec);
+
+	ut_ad(!lock_mutex_own());
+	ut_ad(!mutex_own(&trx_sys->mutex));
+	ut_ad(!dict_index_is_clust(index));
+	ut_ad(page_rec_is_user_rec(rec));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	max_trx_id = page_get_max_trx_id(page);
+
+	/* Some transaction may have an implicit x-lock on the record only
+	if the max trx id for the page >= min trx id for the trx list, or
+	database recovery is running. We do not write the changes of a page
+	max trx id to the log, and therefore during recovery, this value
+	for a page may be incorrect. */
+
+	if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) {
+
+		trx_id = 0;
+
+	} else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) {
+
+		buf_page_print(page, 0, 0);
+
+		/* The page is corrupt: try to avoid a crash by returning 0 */
+		trx_id = 0;
+
+	/* In this case it is possible that some transaction has an implicit
+	x-lock. We have to look in the clustered index. */
+
+	} else {
+		trx_id = row_vers_impl_x_locked(rec, index, offsets);
+	}
+
+	return(trx_id);
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Checks if some transaction, other than given trx_id, has an explicit
+lock on the given rec, in the given precise_mode.
+@return	the transaction, whose id is not equal to trx_id, that has an
+explicit lock on the given rec, in the given precise_mode or NULL.*/
+static
+trx_t*
+lock_rec_other_trx_holds_expl(
+/*==========================*/
+	ulint			precise_mode,	/*!< in: LOCK_S or LOCK_X
+						possibly ORed to LOCK_GAP or
+						LOCK_REC_NOT_GAP. */
+	trx_id_t		trx_id,		/*!< in: trx holding implicit
+						lock on rec */
+	const rec_t*		rec,		/*!< in: user record */
+	const buf_block_t*	block)		/*!< in: buffer block
+						containing the record */
+{
+	trx_t* holds = NULL;
+
+	lock_mutex_enter();
+
+	if (trx_t *impl_trx = trx_rw_is_active(trx_id, NULL)) {
+		ulint heap_no = page_rec_get_heap_no(rec);
+		mutex_enter(&trx_sys->mutex);
+
+		for (trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+		     t != NULL;
+		     t = UT_LIST_GET_NEXT(trx_list, t)) {
+
+			lock_t *expl_lock = lock_rec_has_expl(
+				precise_mode, block, heap_no, t);
+
+			if (expl_lock && expl_lock->trx != impl_trx) {
+				/* An explicit lock is held by trx other than
+				the trx holding the implicit lock. */
+				holds = expl_lock->trx;
+				break;
+			}
+		}
+
+		mutex_exit(&trx_sys->mutex);
+        }
+
+	lock_mutex_exit();
+
+	return(holds);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Return approximate number or record locks (bits set in the bitmap) for
+this transaction. Since delete-marked records may be removed, the
+record count will not be precise.
+The caller must be holding lock_sys->mutex. */
+UNIV_INTERN
+ulint
+lock_number_of_rows_locked(
+/*=======================*/
+	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
+{
+	const lock_t*	lock;
+	ulint		n_records = 0;
+
+	ut_ad(lock_mutex_own());
+
+	for (lock = UT_LIST_GET_FIRST(trx_lock->trx_locks);
+	     lock != NULL;
+	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+
+		if (lock_get_type_low(lock) == LOCK_REC) {
+			ulint	n_bit;
+			ulint	n_bits = lock_rec_get_n_bits(lock);
+
+			for (n_bit = 0; n_bit < n_bits; n_bit++) {
+				if (lock_rec_get_nth_bit(lock, n_bit)) {
+					n_records++;
+				}
+			}
+		}
+	}
+
+	return(n_records);
+}
+
+/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
+
+/*********************************************************************//**
+Creates a new record lock and inserts it to the lock queue. Does NOT check
+for deadlocks or lock compatibility!
+@return	created lock */
+static
+lock_t*
+lock_rec_create(
+/*============*/
+	ulint			type_mode,/*!< in: lock mode and wait
+					flag, type is ignored and
+					replaced by LOCK_REC */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	dict_index_t*		index,	/*!< in: index of record */
+	trx_t*			trx,	/*!< in/out: transaction */
+	ibool			caller_owns_trx_mutex)
+					/*!< in: TRUE if caller owns
+					trx mutex */
+{
+	lock_t*		lock;
+	ulint		page_no;
+	ulint		space;
+	ulint		n_bits;
+	ulint		n_bytes;
+	const page_t*	page;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+
+	/* Non-locking autocommit read-only transactions should not set
+	any locks. */
+	assert_trx_in_list(trx);
+
+	space = buf_block_get_space(block);
+	page_no	= buf_block_get_page_no(block);
+	page = block->frame;
+
+	btr_assert_not_corrupted(block, index);
+
+	/* If rec is the supremum record, then we reset the gap and
+	LOCK_REC_NOT_GAP bits, as all locks on the supremum are
+	automatically of the gap type */
+
+	if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+		ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
+
+		type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
+	}
+
+	/* Make lock bitmap bigger by a safety margin */
+	n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN;
+	n_bytes = 1 + n_bits / 8;
+
+	lock = static_cast<lock_t*>(
+		mem_heap_alloc(trx->lock.lock_heap, sizeof(lock_t) + n_bytes));
+
+	lock->trx = trx;
+
+	lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
+	lock->index = index;
+
+	lock->un_member.rec_lock.space = space;
+	lock->un_member.rec_lock.page_no = page_no;
+	lock->un_member.rec_lock.n_bits = n_bytes * 8;
+
+	/* Reset to zero the bitmap which resides immediately after the
+	lock struct */
+
+	lock_rec_bitmap_reset(lock);
+
+	/* Set the bit corresponding to rec */
+	lock_rec_set_nth_bit(lock, heap_no);
+
+	index->table->n_rec_locks++;
+
+	ut_ad(index->table->n_ref_count > 0 || !index->table->can_be_evicted);
+
+	HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
+		    lock_rec_fold(space, page_no), lock);
+
+	if (!caller_owns_trx_mutex) {
+		trx_mutex_enter(trx);
+	}
+	ut_ad(trx_mutex_own(trx));
+
+	if (type_mode & LOCK_WAIT) {
+
+		lock_set_lock_and_trx_wait(lock, trx);
+	}
+
+	UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock);
+
+	if (!caller_owns_trx_mutex) {
+		trx_mutex_exit(trx);
+	}
+
+	MONITOR_INC(MONITOR_RECLOCK_CREATED);
+	MONITOR_INC(MONITOR_NUM_RECLOCK);
+
+	return(lock);
+}
+
+/*********************************************************************//**
+Enqueues a waiting request for a lock which cannot be granted immediately.
+Checks for deadlocks.
+@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
+there was a deadlock, but another transaction was chosen as a victim,
+and we got the lock immediately: no need to wait then */
+static
+dberr_t
+lock_rec_enqueue_waiting(
+/*=====================*/
+	ulint			type_mode,/*!< in: lock mode this
+					transaction is requesting:
+					LOCK_S or LOCK_X, possibly
+					ORed with LOCK_GAP or
+					LOCK_REC_NOT_GAP, ORed with
+					LOCK_INSERT_INTENTION if this
+					waiting lock request is set
+					when performing an insert of
+					an index record */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	dict_index_t*		index,	/*!< in: index of record */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+	trx_t*			trx;
+	lock_t*			lock;
+	trx_id_t		victim_trx_id;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+
+	trx = thr_get_trx(thr);
+
+	ut_ad(trx_mutex_own(trx));
+
+	/* Test if there already is some other reason to suspend thread:
+	we do not enqueue a lock request if the query thread should be
+	stopped anyway */
+
+	if (que_thr_stop(thr)) {
+		ut_error;
+
+		return(DB_QUE_THR_SUSPENDED);
+	}
+
+	switch (trx_get_dict_operation(trx)) {
+	case TRX_DICT_OP_NONE:
+		break;
+	case TRX_DICT_OP_TABLE:
+	case TRX_DICT_OP_INDEX:
+		ut_print_timestamp(stderr);
+		fputs("  InnoDB: Error: a record lock wait happens"
+		      " in a dictionary operation!\n"
+		      "InnoDB: ", stderr);
+		dict_index_name_print(stderr, trx, index);
+		fputs(".\n"
+		      "InnoDB: Submit a detailed bug report"
+		      " to http://bugs.mysql.com\n",
+		      stderr);
+		ut_ad(0);
+	}
+
+	/* Enqueue the lock request that will wait to be granted, note that
+	we already own the trx mutex. */
+	lock = lock_rec_create(
+		type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE);
+
+	/* Release the mutex to obey the latching order.
+	This is safe, because lock_deadlock_check_and_resolve()
+	is invoked when a lock wait is enqueued for the currently
+	running transaction. Because trx is a running transaction
+	(it is not currently suspended because of a lock wait),
+	its state can only be changed by this thread, which is
+	currently associated with the transaction. */
+
+	trx_mutex_exit(trx);
+
+	victim_trx_id = lock_deadlock_check_and_resolve(lock, trx);
+
+	trx_mutex_enter(trx);
+
+	if (victim_trx_id != 0) {
+
+		ut_ad(victim_trx_id == trx->id);
+
+		lock_reset_lock_and_trx_wait(lock);
+		lock_rec_reset_nth_bit(lock, heap_no);
+
+		return(DB_DEADLOCK);
+
+	} else if (trx->lock.wait_lock == NULL) {
+
+		/* If there was a deadlock but we chose another
+		transaction as a victim, it is possible that we
+		already have the lock now granted! */
+
+		return(DB_SUCCESS_LOCKED_REC);
+	}
+
+	trx->lock.que_state = TRX_QUE_LOCK_WAIT;
+
+	trx->lock.was_chosen_as_deadlock_victim = FALSE;
+	trx->lock.wait_started = ut_time();
+
+	ut_a(que_thr_stop(thr));
+
+#ifdef UNIV_DEBUG
+	if (lock_print_waits) {
+		fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " in index ",
+			trx->id);
+		ut_print_name(stderr, trx, FALSE, index->name);
+	}
+#endif /* UNIV_DEBUG */
+
+	MONITOR_INC(MONITOR_LOCKREC_WAIT);
+
+	return(DB_LOCK_WAIT);
+}
+
+/*********************************************************************//**
+Adds a record lock request in the record queue. The request is normally
+added as the last in the queue, but if there are no waiting lock requests
+on the record, and the request to be added is not a waiting request, we
+can reuse a suitable record lock object already existing on the same page,
+just setting the appropriate bit in its bitmap. This is a low-level function
+which does NOT check for deadlocks or lock compatibility!
+@return	lock where the bit was set */
+static
+lock_t*
+lock_rec_add_to_queue(
+/*==================*/
+	ulint			type_mode,/*!< in: lock mode, wait, gap
+					etc. flags; type is ignored
+					and replaced by LOCK_REC */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	dict_index_t*		index,	/*!< in: index of record */
+	trx_t*			trx,	/*!< in/out: transaction */
+	ibool			caller_owns_trx_mutex)
+					/*!< in: TRUE if caller owns the
+					transaction mutex */
+{
+	lock_t*	lock;
+	lock_t*	first_lock;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
+	ut_ad(dict_index_is_clust(index)
+	      || dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
+#ifdef UNIV_DEBUG
+	switch (type_mode & LOCK_MODE_MASK) {
+	case LOCK_X:
+	case LOCK_S:
+		break;
+	default:
+		ut_error;
+	}
+
+	if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
+		enum lock_mode	mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
+			? LOCK_X
+			: LOCK_S;
+		const lock_t*	other_lock
+			= lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT,
+						      block, heap_no, trx);
+		ut_a(!other_lock);
+	}
+#endif /* UNIV_DEBUG */
+
+	type_mode |= LOCK_REC;
+
+	/* If rec is the supremum record, then we can reset the gap bit, as
+	all locks on the supremum are automatically of the gap type, and we
+	try to avoid unnecessary memory consumption of a new record lock
+	struct for a gap type lock */
+
+	if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+		ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
+
+		/* There should never be LOCK_REC_NOT_GAP on a supremum
+		record, but let us play safe */
+
+		type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
+	}
+
+	/* Look for a waiting lock request on the same record or on a gap */
+
+	for (first_lock = lock = lock_rec_get_first_on_page(block);
+	     lock != NULL;
+	     lock = lock_rec_get_next_on_page(lock)) {
+
+		if (lock_get_wait(lock)
+		    && lock_rec_get_nth_bit(lock, heap_no)) {
+
+			goto somebody_waits;
+		}
+	}
+
+	if (UNIV_LIKELY(!(type_mode & LOCK_WAIT))) {
+
+		/* Look for a similar record lock on the same page:
+		if one is found and there are no waiting lock requests,
+		we can just set the bit */
+
+		lock = lock_rec_find_similar_on_page(
+			type_mode, heap_no, first_lock, trx);
+
+		if (lock) {
+
+			lock_rec_set_nth_bit(lock, heap_no);
+
+			return(lock);
+		}
+	}
+
+somebody_waits:
+	return(lock_rec_create(
+			type_mode, block, heap_no, index, trx,
+			caller_owns_trx_mutex));
+}
+
+/** Record locking request status */
+enum lock_rec_req_status {
+	/** Failed to acquire a lock */
+	LOCK_REC_FAIL,
+	/** Succeeded in acquiring a lock (implicit or already acquired) */
+	LOCK_REC_SUCCESS,
+	/** Explicitly created a new lock */
+	LOCK_REC_SUCCESS_CREATED
+};
+
+/*********************************************************************//**
+This is a fast routine for locking a record in the most common cases:
+there are no explicit locks on the page, or there is just one lock, owned
+by this transaction, and of the right type_mode. This is a low-level function
+which does NOT look at implicit locks! Checks lock compatibility within
+explicit locks. This function sets a normal next-key lock, or in the case of
+a page supremum record, a gap type lock.
+@return whether the locking succeeded */
+UNIV_INLINE
+enum lock_rec_req_status
+lock_rec_lock_fast(
+/*===============*/
+	ibool			impl,	/*!< in: if TRUE, no lock is set
+					if no wait is necessary: we
+					assume that the caller will
+					set an implicit lock */
+	ulint			mode,	/*!< in: lock mode: LOCK_X or
+					LOCK_S possibly ORed to either
+					LOCK_GAP or LOCK_REC_NOT_GAP */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of record */
+	dict_index_t*		index,	/*!< in: index of record */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+	lock_t*			lock;
+	trx_t*			trx;
+	enum lock_rec_req_status status = LOCK_REC_SUCCESS;
+
+	ut_ad(lock_mutex_own());
+	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
+	      || (LOCK_MODE_MASK & mode) == LOCK_X);
+	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
+	      || mode - (LOCK_MODE_MASK & mode) == 0
+	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+
+	DBUG_EXECUTE_IF("innodb_report_deadlock", return(LOCK_REC_FAIL););
+
+	lock = lock_rec_get_first_on_page(block);
+
+	trx = thr_get_trx(thr);
+
+	if (lock == NULL) {
+		if (!impl) {
+			/* Note that we don't own the trx mutex. */
+			lock = lock_rec_create(
+				mode, block, heap_no, index, trx, FALSE);
+
+		}
+		status = LOCK_REC_SUCCESS_CREATED;
+	} else {
+		trx_mutex_enter(trx);
+
+		if (lock_rec_get_next_on_page(lock)
+		     || lock->trx != trx
+		     || lock->type_mode != (mode | LOCK_REC)
+		     || lock_rec_get_n_bits(lock) <= heap_no) {
+
+			status = LOCK_REC_FAIL;
+		} else if (!impl) {
+			/* If the nth bit of the record lock is already set
+			then we do not set a new lock bit, otherwise we do
+			set */
+			if (!lock_rec_get_nth_bit(lock, heap_no)) {
+				lock_rec_set_nth_bit(lock, heap_no);
+				status = LOCK_REC_SUCCESS_CREATED;
+			}
+		}
+
+		trx_mutex_exit(trx);
+	}
+
+	return(status);
+}
+
+/*********************************************************************//**
+This is the general, and slower, routine for locking a record. This is a
+low-level function which does NOT look at implicit locks! Checks lock
+compatibility within explicit locks. This function sets a normal next-key
+lock, or in the case of a page supremum record, a gap type lock.
+@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+or DB_QUE_THR_SUSPENDED */
+static
+dberr_t
+lock_rec_lock_slow(
+/*===============*/
+	ibool			impl,	/*!< in: if TRUE, no lock is set
+					if no wait is necessary: we
+					assume that the caller will
+					set an implicit lock */
+	ulint			mode,	/*!< in: lock mode: LOCK_X or
+					LOCK_S possibly ORed to either
+					LOCK_GAP or LOCK_REC_NOT_GAP */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of record */
+	dict_index_t*		index,	/*!< in: index of record */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+	trx_t*			trx;
+	dberr_t			err = DB_SUCCESS;
+
+	ut_ad(lock_mutex_own());
+	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
+	      || (LOCK_MODE_MASK & mode) == LOCK_X);
+	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
+	      || mode - (LOCK_MODE_MASK & mode) == 0
+	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+
+	DBUG_EXECUTE_IF("innodb_report_deadlock", return(DB_DEADLOCK););
+
+	trx = thr_get_trx(thr);
+	trx_mutex_enter(trx);
+
+	if (lock_rec_has_expl(mode, block, heap_no, trx)) {
+
+		/* The trx already has a strong enough lock on rec: do
+		nothing */
+
+	} else if (lock_rec_other_has_conflicting(
+			static_cast<enum lock_mode>(mode),
+			block, heap_no, trx)) {
+
+		/* If another transaction has a non-gap conflicting
+		request in the queue, as this transaction does not
+		have a lock strong enough already granted on the
+		record, we have to wait. */
+
+		err = lock_rec_enqueue_waiting(
+			mode, block, heap_no, index, thr);
+
+	} else if (!impl) {
+		/* Set the requested lock on the record, note that
+		we already own the transaction mutex. */
+
+		lock_rec_add_to_queue(
+			LOCK_REC | mode, block, heap_no, index, trx, TRUE);
+
+		err = DB_SUCCESS_LOCKED_REC;
+	}
+
+	trx_mutex_exit(trx);
+
+	return(err);
+}
+
+/*********************************************************************//**
+Tries to lock the specified record in the mode requested. If not immediately
+possible, enqueues a waiting lock request. This is a low-level function
+which does NOT look at implicit locks! Checks lock compatibility within
+explicit locks. This function sets a normal next-key lock, or in the case
+of a page supremum record, a gap type lock.
+@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+or DB_QUE_THR_SUSPENDED */
+static
+dberr_t
+lock_rec_lock(
+/*==========*/
+	ibool			impl,	/*!< in: if TRUE, no lock is set
+					if no wait is necessary: we
+					assume that the caller will
+					set an implicit lock */
+	ulint			mode,	/*!< in: lock mode: LOCK_X or
+					LOCK_S possibly ORed to either
+					LOCK_GAP or LOCK_REC_NOT_GAP */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of record */
+	dict_index_t*		index,	/*!< in: index of record */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+	ut_ad(lock_mutex_own());
+	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
+	      || (LOCK_MODE_MASK & mode) == LOCK_X);
+	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
+	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
+	      || mode - (LOCK_MODE_MASK & mode) == 0);
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+
+	/* We try a simplified and faster subroutine for the most
+	common cases */
+	switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
+	case LOCK_REC_SUCCESS:
+		return(DB_SUCCESS);
+	case LOCK_REC_SUCCESS_CREATED:
+		return(DB_SUCCESS_LOCKED_REC);
+	case LOCK_REC_FAIL:
+		return(lock_rec_lock_slow(impl, mode, block,
+					  heap_no, index, thr));
+	}
+
+	ut_error;
+	return(DB_ERROR);
+}
+
+/*********************************************************************//**
+Checks if a waiting record lock request still has to wait in a queue.
+@return	lock that is causing the wait */
+static
+const lock_t*
+lock_rec_has_to_wait_in_queue(
+/*==========================*/
+	const lock_t*	wait_lock)	/*!< in: waiting record lock */
+{
+	const lock_t*	lock;
+	ulint		space;
+	ulint		page_no;
+	ulint		heap_no;
+	ulint		bit_mask;
+	ulint		bit_offset;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(lock_get_wait(wait_lock));
+	ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
+
+	space = wait_lock->un_member.rec_lock.space;
+	page_no = wait_lock->un_member.rec_lock.page_no;
+	heap_no = lock_rec_find_set_bit(wait_lock);
+
+	bit_offset = heap_no / 8;
+	bit_mask = static_cast<ulint>(1 << (heap_no % 8));
+
+	for (lock = lock_rec_get_first_on_page_addr(space, page_no);
+	     lock != wait_lock;
+	     lock = lock_rec_get_next_on_page_const(lock)) {
+
+		const byte*	p = (const byte*) &lock[1];
+
+		if (heap_no < lock_rec_get_n_bits(lock)
+		    && (p[bit_offset] & bit_mask)
+		    && lock_has_to_wait(wait_lock, lock)) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*************************************************************//**
+Grants a lock to a waiting lock request and releases the waiting transaction.
+The caller must hold lock_sys->mutex but not lock->trx->mutex. */
+static
+void
+lock_grant(
+/*=======*/
+	lock_t*	lock)	/*!< in/out: waiting lock request */
+{
+	ut_ad(lock_mutex_own());
+
+	lock_reset_lock_and_trx_wait(lock);
+
+	trx_mutex_enter(lock->trx);
+
+	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
+		dict_table_t*	table = lock->un_member.tab_lock.table;
+
+		if (UNIV_UNLIKELY(table->autoinc_trx == lock->trx)) {
+			fprintf(stderr,
+				"InnoDB: Error: trx already had"
+				" an AUTO-INC lock!\n");
+		} else {
+			table->autoinc_trx = lock->trx;
+
+			ib_vector_push(lock->trx->autoinc_locks, &lock);
+		}
+	}
+
+#ifdef UNIV_DEBUG
+	if (lock_print_waits) {
+		fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " ends\n",
+			lock->trx->id);
+	}
+#endif /* UNIV_DEBUG */
+
+	/* If we are resolving a deadlock by choosing another transaction
+	as a victim, then our original transaction may not be in the
+	TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
+	for it */
+
+	if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+		que_thr_t*	thr;
+
+		thr = que_thr_end_lock_wait(lock->trx);
+
+		if (thr != NULL) {
+			lock_wait_release_thread_if_suspended(thr);
+		}
+	}
+
+	trx_mutex_exit(lock->trx);
+}
+
+/*************************************************************//**
+Cancels a waiting record lock request and releases the waiting transaction
+that requested it. NOTE: does NOT check if waiting lock requests behind this
+one can now be granted! */
+static
+void
+lock_rec_cancel(
+/*============*/
+	lock_t*	lock)	/*!< in: waiting record lock request */
+{
+	que_thr_t*	thr;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	/* Reset the bit (there can be only one set bit) in the lock bitmap */
+	lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
+
+	/* Reset the wait flag and the back pointer to lock in trx */
+
+	lock_reset_lock_and_trx_wait(lock);
+
+	/* The following function releases the trx from lock wait */
+
+	trx_mutex_enter(lock->trx);
+
+	thr = que_thr_end_lock_wait(lock->trx);
+
+	if (thr != NULL) {
+		lock_wait_release_thread_if_suspended(thr);
+	}
+
+	trx_mutex_exit(lock->trx);
+}
+
+/*************************************************************//**
+Removes a record lock request, waiting or granted, from the queue and
+grants locks to other transactions in the queue if they now are entitled
+to a lock. NOTE: all record locks contained in in_lock are removed. */
+static
+void
+lock_rec_dequeue_from_page(
+/*=======================*/
+	lock_t*		in_lock)	/*!< in: record lock object: all
+					record locks which are contained in
+					this lock object are removed;
+					transactions waiting behind will
+					get their lock requests granted,
+					if they are now qualified to it */
+{
+	ulint		space;
+	ulint		page_no;
+	lock_t*		lock;
+	trx_lock_t*	trx_lock;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
+	/* We may or may not be holding in_lock->trx->mutex here. */
+
+	trx_lock = &in_lock->trx->lock;
+
+	space = in_lock->un_member.rec_lock.space;
+	page_no = in_lock->un_member.rec_lock.page_no;
+
+	in_lock->index->table->n_rec_locks--;
+
+	HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
+		    lock_rec_fold(space, page_no), in_lock);
+
+	UT_LIST_REMOVE(trx_locks, trx_lock->trx_locks, in_lock);
+
+	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
+	MONITOR_DEC(MONITOR_NUM_RECLOCK);
+
+	/* Check if waiting locks in the queue can now be granted: grant
+	locks if there are no conflicting locks ahead. Stop at the first
+	X lock that is waiting or has been granted. */
+
+	for (lock = lock_rec_get_first_on_page_addr(space, page_no);
+	     lock != NULL;
+	     lock = lock_rec_get_next_on_page(lock)) {
+
+		if (lock_get_wait(lock)
+		    && !lock_rec_has_to_wait_in_queue(lock)) {
+
+			/* Grant the lock */
+			ut_ad(lock->trx != in_lock->trx);
+			lock_grant(lock);
+		}
+	}
+}
+
+/*************************************************************//**
+Removes a record lock request, waiting or granted, from the queue. */
+static
+void
+lock_rec_discard(
+/*=============*/
+	lock_t*		in_lock)	/*!< in: record lock object: all
+					record locks which are contained
+					in this lock object are removed */
+{
+	ulint		space;
+	ulint		page_no;
+	trx_lock_t*	trx_lock;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
+
+	trx_lock = &in_lock->trx->lock;
+
+	space = in_lock->un_member.rec_lock.space;
+	page_no = in_lock->un_member.rec_lock.page_no;
+
+	in_lock->index->table->n_rec_locks--;
+
+	HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
+		    lock_rec_fold(space, page_no), in_lock);
+
+	UT_LIST_REMOVE(trx_locks, trx_lock->trx_locks, in_lock);
+
+	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
+	MONITOR_DEC(MONITOR_NUM_RECLOCK);
+}
+
+/*************************************************************//**
+Removes record lock objects set on an index page which is discarded. This
+function does not move locks, or check for waiting locks, therefore the
+lock bitmaps must already be reset when this function is called. */
+static
+void
+lock_rec_free_all_from_discard_page(
+/*================================*/
+	const buf_block_t*	block)	/*!< in: page to be discarded */
+{
+	ulint	space;
+	ulint	page_no;
+	lock_t*	lock;
+	lock_t*	next_lock;
+
+	ut_ad(lock_mutex_own());
+
+	space = buf_block_get_space(block);
+	page_no = buf_block_get_page_no(block);
+
+	lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+	while (lock != NULL) {
+		ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
+		ut_ad(!lock_get_wait(lock));
+
+		next_lock = lock_rec_get_next_on_page(lock);
+
+		lock_rec_discard(lock);
+
+		lock = next_lock;
+	}
+}
+
+/*============= RECORD LOCK MOVING AND INHERITING ===================*/
+
+/*************************************************************//**
+Resets the lock bits for a single record. Releases transactions waiting for
+lock requests here. */
+static
+void
+lock_rec_reset_and_release_wait(
+/*============================*/
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no)/*!< in: heap number of record */
+{
+	lock_t*	lock;
+
+	ut_ad(lock_mutex_own());
+
+	for (lock = lock_rec_get_first(block, heap_no);
+	     lock != NULL;
+	     lock = lock_rec_get_next(heap_no, lock)) {
+
+		if (lock_get_wait(lock)) {
+			lock_rec_cancel(lock);
+		} else {
+			lock_rec_reset_nth_bit(lock, heap_no);
+		}
+	}
+}
+
+/*************************************************************//**
+Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
+of another record as gap type locks, but does not reset the lock bits of
+the other record. Also waiting lock requests on rec are inherited as
+GRANTED gap locks. */
+static
+void
+lock_rec_inherit_to_gap(
+/*====================*/
+	const buf_block_t*	heir_block,	/*!< in: block containing the
+						record which inherits */
+	const buf_block_t*	block,		/*!< in: block containing the
+						record from which inherited;
+						does NOT reset the locks on
+						this record */
+	ulint			heir_heap_no,	/*!< in: heap_no of the
+						inheriting record */
+	ulint			heap_no)	/*!< in: heap_no of the
+						donating record */
+{
+	lock_t*	lock;
+
+	ut_ad(lock_mutex_own());
+
+	/* If srv_locks_unsafe_for_binlog is TRUE or session is using
+	READ COMMITTED isolation level, we do not want locks set
+	by an UPDATE or a DELETE to be inherited as gap type locks. But we
+	DO want S-locks set by a consistency constraint to be inherited also
+	then. */
+
+	for (lock = lock_rec_get_first(block, heap_no);
+	     lock != NULL;
+	     lock = lock_rec_get_next(heap_no, lock)) {
+
+		if (!lock_rec_get_insert_intention(lock)
+		    && !((srv_locks_unsafe_for_binlog
+			  || lock->trx->isolation_level
+			  <= TRX_ISO_READ_COMMITTED)
+			 && lock_get_mode(lock) == LOCK_X)) {
+
+			lock_rec_add_to_queue(
+				LOCK_REC | LOCK_GAP | lock_get_mode(lock),
+				heir_block, heir_heap_no, lock->index,
+				lock->trx, FALSE);
+		}
+	}
+}
+
+/*************************************************************//**
+Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
+of another record as gap type locks, but does not reset the lock bits of the
+other record. Also waiting lock requests are inherited as GRANTED gap locks. */
+static
+void
+lock_rec_inherit_to_gap_if_gap_lock(
+/*================================*/
+	const buf_block_t*	block,		/*!< in: buffer block */
+	ulint			heir_heap_no,	/*!< in: heap_no of
+						record which inherits */
+	ulint			heap_no)	/*!< in: heap_no of record
+						from which inherited;
+						does NOT reset the locks
+						on this record */
+{
+	lock_t*	lock;
+
+	lock_mutex_enter();
+
+	for (lock = lock_rec_get_first(block, heap_no);
+	     lock != NULL;
+	     lock = lock_rec_get_next(heap_no, lock)) {
+
+		if (!lock_rec_get_insert_intention(lock)
+		    && (heap_no == PAGE_HEAP_NO_SUPREMUM
+			|| !lock_rec_get_rec_not_gap(lock))) {
+
+			lock_rec_add_to_queue(
+				LOCK_REC | LOCK_GAP | lock_get_mode(lock),
+				block, heir_heap_no, lock->index,
+				lock->trx, FALSE);
+		}
+	}
+
+	lock_mutex_exit();
+}
+
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+static
+void
+lock_rec_move(
+/*==========*/
+	const buf_block_t*	receiver,	/*!< in: buffer block containing
+						the receiving record */
+	const buf_block_t*	donator,	/*!< in: buffer block containing
+						the donating record */
+	ulint			receiver_heap_no,/*!< in: heap_no of the record
+						which gets the locks; there
+						must be no lock requests
+						on it! */
+	ulint			donator_heap_no)/*!< in: heap_no of the record
+						which gives the locks */
+{
+	lock_t*	lock;
+
+	ut_ad(lock_mutex_own());
+
+	ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL);
+
+	for (lock = lock_rec_get_first(donator, donator_heap_no);
+	     lock != NULL;
+	     lock = lock_rec_get_next(donator_heap_no, lock)) {
+
+		const ulint	type_mode = lock->type_mode;
+
+		lock_rec_reset_nth_bit(lock, donator_heap_no);
+
+		if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+			lock_reset_lock_and_trx_wait(lock);
+		}
+
+		/* Note that we FIRST reset the bit, and then set the lock:
+		the function works also if donator == receiver */
+
+		lock_rec_add_to_queue(
+			type_mode, receiver, receiver_heap_no,
+			lock->index, lock->trx, FALSE);
+	}
+
+	ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL);
+}
+
+/*************************************************************//**
+Updates the lock table when we have reorganized a page. NOTE: we copy
+also the locks set on the infimum of the page; the infimum may carry
+locks if an update of a record is occurring on the page, and its locks
+were temporarily stored on the infimum. */
+UNIV_INTERN
+void
+lock_move_reorganize_page(
+/*======================*/
+	const buf_block_t*	block,	/*!< in: old index page, now
+					reorganized */
+	const buf_block_t*	oblock)	/*!< in: copy of the old, not
+					reorganized page */
+{
+	lock_t*		lock;
+	UT_LIST_BASE_NODE_T(lock_t)	old_locks;
+	mem_heap_t*	heap		= NULL;
+	ulint		comp;
+
+	lock_mutex_enter();
+
+	lock = lock_rec_get_first_on_page(block);
+
+	if (lock == NULL) {
+		lock_mutex_exit();
+
+		return;
+	}
+
+	heap = mem_heap_create(256);
+
+	/* Copy first all the locks on the page to heap and reset the
+	bitmaps in the original locks; chain the copies of the locks
+	using the trx_locks field in them. */
+
+	UT_LIST_INIT(old_locks);
+
+	do {
+		/* Make a copy of the lock */
+		lock_t*	old_lock = lock_rec_copy(lock, heap);
+
+		UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock);
+
+		/* Reset bitmap of lock */
+		lock_rec_bitmap_reset(lock);
+
+		if (lock_get_wait(lock)) {
+
+			lock_reset_lock_and_trx_wait(lock);
+		}
+
+		lock = lock_rec_get_next_on_page(lock);
+	} while (lock != NULL);
+
+	comp = page_is_comp(block->frame);
+	ut_ad(comp == page_is_comp(oblock->frame));
+
+	for (lock = UT_LIST_GET_FIRST(old_locks); lock;
+	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+		/* NOTE: we copy also the locks set on the infimum and
+		supremum of the page; the infimum may carry locks if an
+		update of a record is occurring on the page, and its locks
+		were temporarily stored on the infimum */
+		page_cur_t	cur1;
+		page_cur_t	cur2;
+
+		page_cur_set_before_first(block, &cur1);
+		page_cur_set_before_first(oblock, &cur2);
+
+		/* Set locks according to old locks */
+		for (;;) {
+			ulint	old_heap_no;
+			ulint	new_heap_no;
+
+			ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
+					      page_cur_get_rec(&cur2),
+					      rec_get_data_size_old(
+						      page_cur_get_rec(
+							      &cur2))));
+			if (UNIV_LIKELY(comp)) {
+				old_heap_no = rec_get_heap_no_new(
+					page_cur_get_rec(&cur2));
+				new_heap_no = rec_get_heap_no_new(
+					page_cur_get_rec(&cur1));
+			} else {
+				old_heap_no = rec_get_heap_no_old(
+					page_cur_get_rec(&cur2));
+				new_heap_no = rec_get_heap_no_old(
+					page_cur_get_rec(&cur1));
+			}
+
+			if (lock_rec_get_nth_bit(lock, old_heap_no)) {
+
+				/* Clear the bit in old_lock. */
+				ut_d(lock_rec_reset_nth_bit(lock,
+							    old_heap_no));
+
+				/* NOTE that the old lock bitmap could be too
+				small for the new heap number! */
+
+				lock_rec_add_to_queue(
+					lock->type_mode, block, new_heap_no,
+					lock->index, lock->trx, FALSE);
+
+				/* if (new_heap_no == PAGE_HEAP_NO_SUPREMUM
+				&& lock_get_wait(lock)) {
+				fprintf(stderr,
+				"---\n--\n!!!Lock reorg: supr type %lu\n",
+				lock->type_mode);
+				} */
+			}
+
+			if (UNIV_UNLIKELY
+			    (new_heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+
+				ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
+				break;
+			}
+
+			page_cur_move_to_next(&cur1);
+			page_cur_move_to_next(&cur2);
+		}
+
+#ifdef UNIV_DEBUG
+		{
+			ulint	i = lock_rec_find_set_bit(lock);
+
+			/* Check that all locks were moved. */
+			if (UNIV_UNLIKELY(i != ULINT_UNDEFINED)) {
+				fprintf(stderr,
+					"lock_move_reorganize_page():"
+					" %lu not moved in %p\n",
+					(ulong) i, (void*) lock);
+				ut_error;
+			}
+		}
+#endif /* UNIV_DEBUG */
+	}
+
+	lock_mutex_exit();
+
+	mem_heap_free(heap);
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+	ut_ad(lock_rec_validate_page(block));
+#endif
+}
+
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list end is moved to another page. */
+UNIV_INTERN
+void
+lock_move_rec_list_end(
+/*===================*/
+	const buf_block_t*	new_block,	/*!< in: index page to move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	const rec_t*		rec)		/*!< in: record on page: this
+						is the first record moved */
+{
+	lock_t*		lock;
+	const ulint	comp	= page_rec_is_comp(rec);
+
+	lock_mutex_enter();
+
+	/* Note: when we move locks from record to record, waiting locks
+	and possible granted gap type locks behind them are enqueued in
+	the original order, because new elements are inserted to a hash
+	table to the end of the hash chain, and lock_rec_add_to_queue
+	does not reuse locks if there are waiters in the queue. */
+
+	for (lock = lock_rec_get_first_on_page(block); lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		page_cur_t	cur1;
+		page_cur_t	cur2;
+		const ulint	type_mode = lock->type_mode;
+
+		page_cur_position(rec, block, &cur1);
+
+		if (page_cur_is_before_first(&cur1)) {
+			page_cur_move_to_next(&cur1);
+		}
+
+		page_cur_set_before_first(new_block, &cur2);
+		page_cur_move_to_next(&cur2);
+
+		/* Copy lock requests on user records to new page and
+		reset the lock bits on the old */
+
+		while (!page_cur_is_after_last(&cur1)) {
+			ulint	heap_no;
+
+			if (comp) {
+				heap_no = rec_get_heap_no_new(
+					page_cur_get_rec(&cur1));
+			} else {
+				heap_no = rec_get_heap_no_old(
+					page_cur_get_rec(&cur1));
+				ut_ad(!memcmp(page_cur_get_rec(&cur1),
+					 page_cur_get_rec(&cur2),
+					 rec_get_data_size_old(
+						 page_cur_get_rec(&cur2))));
+			}
+
+			if (lock_rec_get_nth_bit(lock, heap_no)) {
+				lock_rec_reset_nth_bit(lock, heap_no);
+
+				if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+					lock_reset_lock_and_trx_wait(lock);
+				}
+
+				if (comp) {
+					heap_no = rec_get_heap_no_new(
+						page_cur_get_rec(&cur2));
+				} else {
+					heap_no = rec_get_heap_no_old(
+						page_cur_get_rec(&cur2));
+				}
+
+				lock_rec_add_to_queue(
+					type_mode, new_block, heap_no,
+					lock->index, lock->trx, FALSE);
+			}
+
+			page_cur_move_to_next(&cur1);
+			page_cur_move_to_next(&cur2);
+		}
+	}
+
+	lock_mutex_exit();
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+	ut_ad(lock_rec_validate_page(block));
+	ut_ad(lock_rec_validate_page(new_block));
+#endif
+}
+
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+UNIV_INTERN
+void
+lock_move_rec_list_start(
+/*=====================*/
+	const buf_block_t*	new_block,	/*!< in: index page to
+						move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	const rec_t*		rec,		/*!< in: record on page:
+						this is the first
+						record NOT copied */
+	const rec_t*		old_end)	/*!< in: old
+						previous-to-last
+						record on new_page
+						before the records
+						were copied */
+{
+	lock_t*		lock;
+	const ulint	comp	= page_rec_is_comp(rec);
+
+	ut_ad(block->frame == page_align(rec));
+	ut_ad(new_block->frame == page_align(old_end));
+
+	lock_mutex_enter();
+
+	for (lock = lock_rec_get_first_on_page(block); lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		page_cur_t	cur1;
+		page_cur_t	cur2;
+		const ulint	type_mode = lock->type_mode;
+
+		page_cur_set_before_first(block, &cur1);
+		page_cur_move_to_next(&cur1);
+
+		page_cur_position(old_end, new_block, &cur2);
+		page_cur_move_to_next(&cur2);
+
+		/* Copy lock requests on user records to new page and
+		reset the lock bits on the old */
+
+		while (page_cur_get_rec(&cur1) != rec) {
+			ulint	heap_no;
+
+			if (comp) {
+				heap_no = rec_get_heap_no_new(
+					page_cur_get_rec(&cur1));
+			} else {
+				heap_no = rec_get_heap_no_old(
+					page_cur_get_rec(&cur1));
+				ut_ad(!memcmp(page_cur_get_rec(&cur1),
+					      page_cur_get_rec(&cur2),
+					      rec_get_data_size_old(
+						      page_cur_get_rec(
+							      &cur2))));
+			}
+
+			if (lock_rec_get_nth_bit(lock, heap_no)) {
+				lock_rec_reset_nth_bit(lock, heap_no);
+
+				if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+					lock_reset_lock_and_trx_wait(lock);
+				}
+
+				if (comp) {
+					heap_no = rec_get_heap_no_new(
+						page_cur_get_rec(&cur2));
+				} else {
+					heap_no = rec_get_heap_no_old(
+						page_cur_get_rec(&cur2));
+				}
+
+				lock_rec_add_to_queue(
+					type_mode, new_block, heap_no,
+					lock->index, lock->trx, FALSE);
+			}
+
+			page_cur_move_to_next(&cur1);
+			page_cur_move_to_next(&cur2);
+		}
+
+#ifdef UNIV_DEBUG
+		if (page_rec_is_supremum(rec)) {
+			ulint	i;
+
+			for (i = PAGE_HEAP_NO_USER_LOW;
+			     i < lock_rec_get_n_bits(lock); i++) {
+				if (UNIV_UNLIKELY
+				    (lock_rec_get_nth_bit(lock, i))) {
+
+					fprintf(stderr,
+						"lock_move_rec_list_start():"
+						" %lu not moved in %p\n",
+						(ulong) i, (void*) lock);
+					ut_error;
+				}
+			}
+		}
+#endif /* UNIV_DEBUG */
+	}
+
+	lock_mutex_exit();
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+	ut_ad(lock_rec_validate_page(block));
+#endif
+}
+
+/*************************************************************//**
+Updates the lock table when a page is split to the right. */
+UNIV_INTERN
+void
+lock_update_split_right(
+/*====================*/
+	const buf_block_t*	right_block,	/*!< in: right page */
+	const buf_block_t*	left_block)	/*!< in: left page */
+{
+	ulint	heap_no = lock_get_min_heap_no(right_block);
+
+	lock_mutex_enter();
+
+	/* Move the locks on the supremum of the left page to the supremum
+	of the right page */
+
+	lock_rec_move(right_block, left_block,
+		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+
+	/* Inherit the locks to the supremum of left page from the successor
+	of the infimum on right page */
+
+	lock_rec_inherit_to_gap(left_block, right_block,
+				PAGE_HEAP_NO_SUPREMUM, heap_no);
+
+	lock_mutex_exit();
+}
+
+/*************************************************************//**
+Updates the lock table when a page is merged to the right. */
+UNIV_INTERN
+void
+lock_update_merge_right(
+/*====================*/
+	const buf_block_t*	right_block,	/*!< in: right page to
+						which merged */
+	const rec_t*		orig_succ,	/*!< in: original
+						successor of infimum
+						on the right page
+						before merge */
+	const buf_block_t*	left_block)	/*!< in: merged index
+						page which will be
+						discarded */
+{
+	lock_mutex_enter();
+
+	/* Inherit the locks from the supremum of the left page to the
+	original successor of infimum on the right page, to which the left
+	page was merged */
+
+	lock_rec_inherit_to_gap(right_block, left_block,
+				page_rec_get_heap_no(orig_succ),
+				PAGE_HEAP_NO_SUPREMUM);
+
+	/* Reset the locks on the supremum of the left page, releasing
+	waiting transactions */
+
+	lock_rec_reset_and_release_wait(left_block,
+					PAGE_HEAP_NO_SUPREMUM);
+
+	lock_rec_free_all_from_discard_page(left_block);
+
+	lock_mutex_exit();
+}
+
+/*************************************************************//**
+Updates the lock table when the root page is copied to another in
+btr_root_raise_and_insert. Note that we leave lock structs on the
+root page, even though they do not make sense on other than leaf
+pages: the reason is that in a pessimistic update the infimum record
+of the root page will act as a dummy carrier of the locks of the record
+to be updated. */
+UNIV_INTERN
+void
+lock_update_root_raise(
+/*===================*/
+	const buf_block_t*	block,	/*!< in: index page to which copied */
+	const buf_block_t*	root)	/*!< in: root page */
+{
+	lock_mutex_enter();
+
+	/* Move the locks on the supremum of the root to the supremum
+	of block */
+
+	lock_rec_move(block, root,
+		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+	lock_mutex_exit();
+}
+
+/*************************************************************//**
+Updates the lock table when a page is copied to another and the original page
+is removed from the chain of leaf pages, except if page is the root! */
+UNIV_INTERN
+void
+lock_update_copy_and_discard(
+/*=========================*/
+	const buf_block_t*	new_block,	/*!< in: index page to
+						which copied */
+	const buf_block_t*	block)		/*!< in: index page;
+						NOT the root! */
+{
+	lock_mutex_enter();
+
+	/* Move the locks on the supremum of the old page to the supremum
+	of new_page */
+
+	lock_rec_move(new_block, block,
+		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+	lock_rec_free_all_from_discard_page(block);
+
+	lock_mutex_exit();
+}
+
+/*************************************************************//**
+Updates the lock table when a page is split to the left. */
+UNIV_INTERN
+void
+lock_update_split_left(
+/*===================*/
+	const buf_block_t*	right_block,	/*!< in: right page */
+	const buf_block_t*	left_block)	/*!< in: left page */
+{
+	ulint	heap_no = lock_get_min_heap_no(right_block);
+
+	lock_mutex_enter();
+
+	/* Inherit the locks to the supremum of the left page from the
+	successor of the infimum on the right page */
+
+	lock_rec_inherit_to_gap(left_block, right_block,
+				PAGE_HEAP_NO_SUPREMUM, heap_no);
+
+	lock_mutex_exit();
+}
+
+/*************************************************************//**
+Updates the lock table when a page is merged to the left. */
+UNIV_INTERN
+void
+lock_update_merge_left(
+/*===================*/
+	const buf_block_t*	left_block,	/*!< in: left page to
+						which merged */
+	const rec_t*		orig_pred,	/*!< in: original predecessor
+						of supremum on the left page
+						before merge */
+	const buf_block_t*	right_block)	/*!< in: merged index page
+						which will be discarded */
+{
+	const rec_t*	left_next_rec;
+
+	ut_ad(left_block->frame == page_align(orig_pred));
+
+	lock_mutex_enter();
+
+	left_next_rec = page_rec_get_next_const(orig_pred);
+
+	if (!page_rec_is_supremum(left_next_rec)) {
+
+		/* Inherit the locks on the supremum of the left page to the
+		first record which was moved from the right page */
+
+		lock_rec_inherit_to_gap(left_block, left_block,
+					page_rec_get_heap_no(left_next_rec),
+					PAGE_HEAP_NO_SUPREMUM);
+
+		/* Reset the locks on the supremum of the left page,
+		releasing waiting transactions */
+
+		lock_rec_reset_and_release_wait(left_block,
+						PAGE_HEAP_NO_SUPREMUM);
+	}
+
+	/* Move the locks from the supremum of right page to the supremum
+	of the left page */
+
+	lock_rec_move(left_block, right_block,
+		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+
+	lock_rec_free_all_from_discard_page(right_block);
+
+	lock_mutex_exit();
+}
+
+/*************************************************************//**
+Resets the original locks on heir and replaces them with gap type locks
+inherited from rec. */
+UNIV_INTERN
+void
+lock_rec_reset_and_inherit_gap_locks(
+/*=================================*/
+	const buf_block_t*	heir_block,	/*!< in: block containing the
+						record which inherits */
+	const buf_block_t*	block,		/*!< in: block containing the
+						record from which inherited;
+						does NOT reset the locks on
+						this record */
+	ulint			heir_heap_no,	/*!< in: heap_no of the
+						inheriting record */
+	ulint			heap_no)	/*!< in: heap_no of the
+						donating record */
+{
+	lock_mutex_enter();
+
+	lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
+
+	lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
+
+	lock_mutex_exit();
+}
+
+/*************************************************************//**
+Updates the lock table when a page is discarded. */
+UNIV_INTERN
+void
+lock_update_discard(
+/*================*/
+	const buf_block_t*	heir_block,	/*!< in: index page
+						which will inherit the locks */
+	ulint			heir_heap_no,	/*!< in: heap_no of the record
+						which will inherit the locks */
+	const buf_block_t*	block)		/*!< in: index page
+						which will be discarded */
+{
+	const page_t*	page = block->frame;
+	const rec_t*	rec;
+	ulint		heap_no;
+
+	lock_mutex_enter();
+
+	if (!lock_rec_get_first_on_page(block)) {
+		/* No locks exist on page, nothing to do */
+
+		lock_mutex_exit();
+
+		return;
+	}
+
+	/* Inherit all the locks on the page to the record and reset all
+	the locks on the page */
+
+	if (page_is_comp(page)) {
+		rec = page + PAGE_NEW_INFIMUM;
+
+		do {
+			heap_no = rec_get_heap_no_new(rec);
+
+			lock_rec_inherit_to_gap(heir_block, block,
+						heir_heap_no, heap_no);
+
+			lock_rec_reset_and_release_wait(block, heap_no);
+
+			rec = page + rec_get_next_offs(rec, TRUE);
+		} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
+	} else {
+		rec = page + PAGE_OLD_INFIMUM;
+
+		do {
+			heap_no = rec_get_heap_no_old(rec);
+
+			lock_rec_inherit_to_gap(heir_block, block,
+						heir_heap_no, heap_no);
+
+			lock_rec_reset_and_release_wait(block, heap_no);
+
+			rec = page + rec_get_next_offs(rec, FALSE);
+		} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
+	}
+
+	lock_rec_free_all_from_discard_page(block);
+
+	lock_mutex_exit();
+}
+
+/*************************************************************//**
+Updates the lock table when a new user record is inserted. */
+UNIV_INTERN
+void
+lock_update_insert(
+/*===============*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec)	/*!< in: the inserted record */
+{
+	ulint	receiver_heap_no;
+	ulint	donator_heap_no;
+
+	ut_ad(block->frame == page_align(rec));
+
+	/* Inherit the gap-locking locks for rec, in gap mode, from the next
+	record */
+
+	if (page_rec_is_comp(rec)) {
+		receiver_heap_no = rec_get_heap_no_new(rec);
+		donator_heap_no = rec_get_heap_no_new(
+			page_rec_get_next_low(rec, TRUE));
+	} else {
+		receiver_heap_no = rec_get_heap_no_old(rec);
+		donator_heap_no = rec_get_heap_no_old(
+			page_rec_get_next_low(rec, FALSE));
+	}
+
+	lock_rec_inherit_to_gap_if_gap_lock(
+		block, receiver_heap_no, donator_heap_no);
+}
+
+/*************************************************************//**
+Updates the lock table when a record is removed. */
+UNIV_INTERN
+void
+lock_update_delete(
+/*===============*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec)	/*!< in: the record to be removed */
+{
+	const page_t*	page = block->frame;
+	ulint		heap_no;
+	ulint		next_heap_no;
+
+	ut_ad(page == page_align(rec));
+
+	if (page_is_comp(page)) {
+		heap_no = rec_get_heap_no_new(rec);
+		next_heap_no = rec_get_heap_no_new(page
+						   + rec_get_next_offs(rec,
+								       TRUE));
+	} else {
+		heap_no = rec_get_heap_no_old(rec);
+		next_heap_no = rec_get_heap_no_old(page
+						   + rec_get_next_offs(rec,
+								       FALSE));
+	}
+
+	lock_mutex_enter();
+
+	/* Let the next record inherit the locks from rec, in gap mode */
+
+	lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
+
+	/* Reset the lock bits on rec and release waiting transactions */
+
+	lock_rec_reset_and_release_wait(block, heap_no);
+
+	lock_mutex_exit();
+}
+
+/*********************************************************************//**
+Stores on the page infimum record the explicit locks of another record.
+This function is used to store the lock state of a record when it is
+updated and the size of the record changes in the update. The record
+is moved in such an update, perhaps to another page. The infimum record
+acts as a dummy carrier record, taking care of lock releases while the
+actual record is being moved. */
+UNIV_INTERN
+void
+lock_rec_store_on_page_infimum(
+/*===========================*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec)	/*!< in: record whose lock state
+					is stored on the infimum
+					record of the same page; lock
+					bits are reset on the
+					record */
+{
+	ulint	heap_no = page_rec_get_heap_no(rec);
+
+	ut_ad(block->frame == page_align(rec));
+
+	lock_mutex_enter();
+
+	lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
+
+	lock_mutex_exit();
+}
+
+/*********************************************************************//**
+Restores the state of explicit lock requests on a single record, where the
+state was stored on the infimum of the page. */
+UNIV_INTERN
+void
+lock_rec_restore_from_page_infimum(
+/*===============================*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec,	/*!< in: record whose lock state
+					is restored */
+	const buf_block_t*	donator)/*!< in: page (rec is not
+					necessarily on this page)
+					whose infimum stored the lock
+					state; lock bits are reset on
+					the infimum */
+{
+	ulint	heap_no = page_rec_get_heap_no(rec);
+
+	lock_mutex_enter();
+
+	lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
+
+	lock_mutex_exit();
+}
+
+/*=========== DEADLOCK CHECKING ======================================*/
+
+/*********************************************************************//**
+rewind(3) the file used for storing the latest detected deadlock and
+print a heading message to stderr if printing of all deadlocks to stderr
+is enabled. */
+UNIV_INLINE
+void
+lock_deadlock_start_print()
+/*=======================*/
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
+
+	rewind(lock_latest_err_file);
+	ut_print_timestamp(lock_latest_err_file);
+
+	if (srv_print_all_deadlocks) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr, "InnoDB: transactions deadlock detected, "
+			"dumping detailed information.\n");
+		ut_print_timestamp(stderr);
+	}
+}
+
+/*********************************************************************//**
+Print a message to the deadlock file and possibly to stderr. */
+UNIV_INLINE
+void
+lock_deadlock_fputs(
+/*================*/
+	const char*	msg)	/*!< in: message to print */
+{
+	if (!srv_read_only_mode) {
+		fputs(msg, lock_latest_err_file);
+
+		if (srv_print_all_deadlocks) {
+			fputs(msg, stderr);
+		}
+	}
+}
+
+/*********************************************************************//**
+Print transaction data to the deadlock file and possibly to stderr. */
+UNIV_INLINE
+void
+lock_deadlock_trx_print(
+/*====================*/
+	const trx_t*	trx,		/*!< in: transaction */
+	ulint		max_query_len)	/*!< in: max query length to print,
+					or 0 to use the default max length */
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
+
+	ulint	n_rec_locks = lock_number_of_rows_locked(&trx->lock);
+	ulint	n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
+	ulint	heap_size = mem_heap_get_size(trx->lock.lock_heap);
+
+	mutex_enter(&trx_sys->mutex);
+
+	trx_print_low(lock_latest_err_file, trx, max_query_len,
+		      n_rec_locks, n_trx_locks, heap_size);
+
+	if (srv_print_all_deadlocks) {
+		trx_print_low(stderr, trx, max_query_len,
+			      n_rec_locks, n_trx_locks, heap_size);
+	}
+
+	mutex_exit(&trx_sys->mutex);
+}
+
+/*********************************************************************//**
+Print lock data to the deadlock file and possibly to stderr. */
+UNIV_INLINE
+void
+lock_deadlock_lock_print(
+/*=====================*/
+	const lock_t*	lock)	/*!< in: record or table type lock */
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
+
+	if (lock_get_type_low(lock) == LOCK_REC) {
+		lock_rec_print(lock_latest_err_file, lock);
+
+		if (srv_print_all_deadlocks) {
+			lock_rec_print(stderr, lock);
+		}
+	} else {
+		lock_table_print(lock_latest_err_file, lock);
+
+		if (srv_print_all_deadlocks) {
+			lock_table_print(stderr, lock);
+		}
+	}
+}
+
+/** Used in deadlock tracking. Protected by lock_sys->mutex. */
+static ib_uint64_t	lock_mark_counter = 0;
+
+/** Check if the search is too deep. */
+#define lock_deadlock_too_deep(c)				\
+	(c->depth > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK		\
+	 || c->cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK)
+
+/********************************************************************//**
+Get the next lock in the queue that is owned by a transaction whose
+sub-tree has not already been searched.
+@return next lock or NULL if at end of queue */
+static
+const lock_t*
+lock_get_next_lock(
+/*===============*/
+	const lock_deadlock_ctx_t*
+				ctx,	/*!< in: deadlock context */
+	const lock_t*		lock,	/*!< in: lock in the queue */
+	ulint			heap_no)/*!< in: heap no if rec lock else
+					ULINT_UNDEFINED */
+{
+	ut_ad(lock_mutex_own());
+
+	do {
+		if (lock_get_type_low(lock) == LOCK_REC) {
+			ut_ad(heap_no != ULINT_UNDEFINED);
+			lock = lock_rec_get_next_const(heap_no, lock);
+		} else {
+			ut_ad(heap_no == ULINT_UNDEFINED);
+			ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
+
+			lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
+		}
+	} while (lock != NULL
+		 && lock->trx->lock.deadlock_mark > ctx->mark_start);
+
+	ut_ad(lock == NULL
+	      || lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
+
+	return(lock);
+}
+
+/********************************************************************//**
+Get the first lock to search. The search starts from the current
+wait_lock. What we are really interested in is an edge from the
+current wait_lock's owning transaction to another transaction that has
+a lock ahead in the queue. We skip locks where the owning transaction's
+sub-tree has already been searched.
+@return first lock or NULL */
+static
+const lock_t*
+lock_get_first_lock(
+/*================*/
+	const lock_deadlock_ctx_t*
+				ctx,	/*!< in: deadlock context */
+	ulint*			heap_no)/*!< out: heap no if rec lock,
+					else ULINT_UNDEFINED */
+{
+	const lock_t*		lock;
+
+	ut_ad(lock_mutex_own());
+
+	lock = ctx->wait_lock;
+
+	if (lock_get_type_low(lock) == LOCK_REC) {
+
+		*heap_no = lock_rec_find_set_bit(lock);
+		ut_ad(*heap_no != ULINT_UNDEFINED);
+
+		lock = lock_rec_get_first_on_page_addr(
+			lock->un_member.rec_lock.space,
+			lock->un_member.rec_lock.page_no);
+
+		/* Position on the first lock on the physical record. */
+		if (!lock_rec_get_nth_bit(lock, *heap_no)) {
+			lock = lock_rec_get_next_const(*heap_no, lock);
+		}
+
+	} else {
+		*heap_no = ULINT_UNDEFINED;
+		ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
+		lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
+	}
+
+	ut_a(lock != NULL);
+	ut_a(lock != ctx->wait_lock);
+	ut_ad(lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
+
+	return(lock);
+}
+
+/********************************************************************//**
+Notify that a deadlock has been detected and print the conflicting
+transaction info. */
+static
+void
+lock_deadlock_notify(
+/*=================*/
+	const lock_deadlock_ctx_t*	ctx,	/*!< in: deadlock context */
+	const lock_t*			lock)	/*!< in: lock causing
+						deadlock */
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
+
+	lock_deadlock_start_print();
+
+	lock_deadlock_fputs("\n*** (1) TRANSACTION:\n");
+
+	lock_deadlock_trx_print(ctx->wait_lock->trx, 3000);
+
+	lock_deadlock_fputs("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n");
+
+	lock_deadlock_lock_print(ctx->wait_lock);
+
+	lock_deadlock_fputs("*** (2) TRANSACTION:\n");
+
+	lock_deadlock_trx_print(lock->trx, 3000);
+
+	lock_deadlock_fputs("*** (2) HOLDS THE LOCK(S):\n");
+
+	lock_deadlock_lock_print(lock);
+
+	/* It is possible that the joining transaction was granted its
+	lock when we rolled back some other waiting transaction. */
+
+	if (ctx->start->lock.wait_lock != 0) {
+		lock_deadlock_fputs(
+			"*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
+
+		lock_deadlock_lock_print(ctx->start->lock.wait_lock);
+	}
+
+#ifdef UNIV_DEBUG
+	if (lock_print_waits) {
+		fputs("Deadlock detected\n", stderr);
+	}
+#endif /* UNIV_DEBUG */
+}
+
+/********************************************************************//**
+Select the victim transaction that should be rolledback.
+@return victim transaction */
+static
+const trx_t*
+lock_deadlock_select_victim(
+/*========================*/
+	const lock_deadlock_ctx_t*	ctx)	/*!< in: deadlock context */
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(ctx->start->lock.wait_lock != 0);
+	ut_ad(ctx->wait_lock->trx != ctx->start);
+
+	if (trx_weight_ge(ctx->wait_lock->trx, ctx->start)) {
+		/* The joining  transaction is 'smaller',
+		choose it as the victim and roll it back. */
+
+		return(ctx->start);
+	}
+
+	return(ctx->wait_lock->trx);
+}
+
+/********************************************************************//**
+Pop the deadlock search state from the stack.
+@return stack slot instance that was on top of the stack. */
+static
+const lock_stack_t*
+lock_deadlock_pop(
+/*==============*/
+	lock_deadlock_ctx_t*	ctx)		/*!< in/out: context */
+{
+	ut_ad(lock_mutex_own());
+
+	ut_ad(ctx->depth > 0);
+
+	return(&lock_stack[--ctx->depth]);
+}
+
+/********************************************************************//**
+Push the deadlock search state onto the stack.
+@return slot that was used in the stack */
+static
+lock_stack_t*
+lock_deadlock_push(
+/*===============*/
+	lock_deadlock_ctx_t*	ctx,		/*!< in/out: context */
+	const lock_t*		lock,		/*!< in: current lock */
+	ulint			heap_no)	/*!< in: heap number */
+{
+	ut_ad(lock_mutex_own());
+
+	/* Save current search state. */
+
+	if (LOCK_STACK_SIZE > ctx->depth) {
+		lock_stack_t*	stack;
+
+		stack = &lock_stack[ctx->depth++];
+
+		stack->lock = lock;
+		stack->heap_no = heap_no;
+		stack->wait_lock = ctx->wait_lock;
+
+		return(stack);
+	}
+
+	return(NULL);
+}
+
+/********************************************************************//**
+Looks iteratively for a deadlock. Note: the joining transaction may
+have been granted its lock by the deadlock checks.
+@return 0 if no deadlock else the victim transaction id.*/
+static
+trx_id_t
+lock_deadlock_search(
+/*=================*/
+	lock_deadlock_ctx_t*	ctx)	/*!< in/out: deadlock context */
+{
+	const lock_t*	lock;
+	ulint		heap_no;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(!trx_mutex_own(ctx->start));
+
+	ut_ad(ctx->start != NULL);
+	ut_ad(ctx->wait_lock != NULL);
+	assert_trx_in_list(ctx->wait_lock->trx);
+	ut_ad(ctx->mark_start <= lock_mark_counter);
+
+	/* Look at the locks ahead of wait_lock in the lock queue. */
+	lock = lock_get_first_lock(ctx, &heap_no);
+
+	for (;;) {
+
+		/* We should never visit the same sub-tree more than once. */
+		ut_ad(lock == NULL
+		      || lock->trx->lock.deadlock_mark <= ctx->mark_start);
+
+		while (ctx->depth > 0 && lock == NULL) {
+			const lock_stack_t*	stack;
+
+			/* Restore previous search state. */
+
+			stack = lock_deadlock_pop(ctx);
+
+			lock = stack->lock;
+			heap_no = stack->heap_no;
+			ctx->wait_lock = stack->wait_lock;
+
+			lock = lock_get_next_lock(ctx, lock, heap_no);
+		}
+
+		if (lock == NULL) {
+			break;
+		} else if (lock == ctx->wait_lock) {
+
+			/* We can mark this subtree as searched */
+			ut_ad(lock->trx->lock.deadlock_mark <= ctx->mark_start);
+
+			lock->trx->lock.deadlock_mark = ++lock_mark_counter;
+
+			/* We are not prepared for an overflow. This 64-bit
+			counter should never wrap around. At 10^9 increments
+			per second, it would take 10^3 years of uptime. */
+
+			ut_ad(lock_mark_counter > 0);
+
+			lock = NULL;
+
+		} else if (!lock_has_to_wait(ctx->wait_lock, lock)) {
+
+			/* No conflict, next lock */
+			lock = lock_get_next_lock(ctx, lock, heap_no);
+
+		} else if (lock->trx == ctx->start) {
+
+			/* Found a cycle. */
+
+			lock_deadlock_notify(ctx, lock);
+
+			return(lock_deadlock_select_victim(ctx)->id);
+
+		} else if (lock_deadlock_too_deep(ctx)) {
+
+			/* Search too deep to continue. */
+
+			ctx->too_deep = TRUE;
+
+			/* Select the joining transaction as the victim. */
+			return(ctx->start->id);
+
+		} else if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+
+			/* Another trx ahead has requested a lock in an
+			incompatible mode, and is itself waiting for a lock. */
+
+			++ctx->cost;
+
+			/* Save current search state. */
+			if (!lock_deadlock_push(ctx, lock, heap_no)) {
+
+				/* Unable to save current search state, stack
+				size not big enough. */
+
+				ctx->too_deep = TRUE;
+
+				return(ctx->start->id);
+			}
+
+			ctx->wait_lock = lock->trx->lock.wait_lock;
+			lock = lock_get_first_lock(ctx, &heap_no);
+
+			if (lock->trx->lock.deadlock_mark > ctx->mark_start) {
+				lock = lock_get_next_lock(ctx, lock, heap_no);
+			}
+
+		} else {
+			lock = lock_get_next_lock(ctx, lock, heap_no);
+		}
+	}
+
+	ut_a(lock == NULL && ctx->depth == 0);
+
+	/* No deadlock found. */
+	return(0);
+}
+
+/********************************************************************//**
+Print info about transaction that was rolled back. */
+static
+void
+lock_deadlock_joining_trx_print(
+/*============================*/
+	const trx_t*	trx,		/*!< in: transaction rolled back */
+	const lock_t*	lock)		/*!< in: lock trx wants */
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
+
+	/* If the lock search exceeds the max step
+	or the max depth, the current trx will be
+	the victim. Print its information. */
+	lock_deadlock_start_print();
+
+	lock_deadlock_fputs(
+		"TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
+		" WAITS-FOR GRAPH, WE WILL ROLL BACK"
+		" FOLLOWING TRANSACTION \n\n"
+		"*** TRANSACTION:\n");
+
+	lock_deadlock_trx_print(trx, 3000);
+
+	lock_deadlock_fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
+
+	lock_deadlock_lock_print(lock);
+}
+
+/********************************************************************//**
+Rollback transaction selected as the victim. */
+static
+void
+lock_deadlock_trx_rollback(
+/*=======================*/
+	lock_deadlock_ctx_t*	ctx)		/*!< in: deadlock context */
+{
+	trx_t*			trx;
+
+	ut_ad(lock_mutex_own());
+
+	trx = ctx->wait_lock->trx;
+
+	lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (1)\n");
+
+	trx_mutex_enter(trx);
+
+	trx->lock.was_chosen_as_deadlock_victim = TRUE;
+
+	lock_cancel_waiting_and_release(trx->lock.wait_lock);
+
+	trx_mutex_exit(trx);
+}
+
+/********************************************************************//**
+Checks if a joining lock request results in a deadlock. If a deadlock is
+found this function will resolve the dadlock by choosing a victim transaction
+and rolling it back. It will attempt to resolve all deadlocks. The returned
+transaction id will be the joining transaction id or 0 if some other
+transaction was chosen as a victim and rolled back or no deadlock found.
+
+@return id of transaction chosen as victim or 0 */
+static
+trx_id_t
+lock_deadlock_check_and_resolve(
+/*============================*/
+	const lock_t*	lock,	/*!< in: lock the transaction is requesting */
+	const trx_t*	trx)	/*!< in: transaction */
+{
+	trx_id_t	victim_trx_id;
+
+	ut_ad(trx != NULL);
+	ut_ad(lock != NULL);
+	ut_ad(lock_mutex_own());
+	assert_trx_in_list(trx);
+
+	/* Try and resolve as many deadlocks as possible. */
+	do {
+		lock_deadlock_ctx_t	ctx;
+
+		/* Reset the context. */
+		ctx.cost = 0;
+		ctx.depth = 0;
+		ctx.start = trx;
+		ctx.too_deep = FALSE;
+		ctx.wait_lock = lock;
+		ctx.mark_start = lock_mark_counter;
+
+		victim_trx_id = lock_deadlock_search(&ctx);
+
+		/* Search too deep, we rollback the joining transaction. */
+		if (ctx.too_deep) {
+
+			ut_a(trx == ctx.start);
+			ut_a(victim_trx_id == trx->id);
+
+			if (!srv_read_only_mode) {
+				lock_deadlock_joining_trx_print(trx, lock);
+			}
+
+			MONITOR_INC(MONITOR_DEADLOCK);
+
+		} else if (victim_trx_id != 0 && victim_trx_id != trx->id) {
+
+			ut_ad(victim_trx_id == ctx.wait_lock->trx->id);
+			lock_deadlock_trx_rollback(&ctx);
+
+			lock_deadlock_found = TRUE;
+
+			MONITOR_INC(MONITOR_DEADLOCK);
+		}
+
+	} while (victim_trx_id != 0 && victim_trx_id != trx->id);
+
+	/* If the joining transaction was selected as the victim. */
+	if (victim_trx_id != 0) {
+		ut_a(victim_trx_id == trx->id);
+
+		lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (2)\n");
+
+		lock_deadlock_found = TRUE;
+	}
+
+	return(victim_trx_id);
+}
+
+/*========================= TABLE LOCKS ==============================*/
+
+/*********************************************************************//**
+Creates a table lock object and adds it as the last in the lock queue
+of the table. Does NOT check for deadlocks or lock compatibility.
+@return	own: new lock object */
+UNIV_INLINE
+lock_t*
+lock_table_create(
+/*==============*/
+	dict_table_t*	table,	/*!< in/out: database table
+				in dictionary cache */
+	ulint		type_mode,/*!< in: lock mode possibly ORed with
+				LOCK_WAIT */
+	trx_t*		trx)	/*!< in: trx */
+{
+	lock_t*	lock;
+
+	ut_ad(table && trx);
+	ut_ad(lock_mutex_own());
+	ut_ad(trx_mutex_own(trx));
+
+	/* Non-locking autocommit read-only transactions should not set
+	any locks. */
+	assert_trx_in_list(trx);
+
+	if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
+		++table->n_waiting_or_granted_auto_inc_locks;
+	}
+
+	/* For AUTOINC locking we reuse the lock instance only if
+	there is no wait involved else we allocate the waiting lock
+	from the transaction lock heap. */
+	if (type_mode == LOCK_AUTO_INC) {
+
+		lock = table->autoinc_lock;
+
+		table->autoinc_trx = trx;
+
+		ib_vector_push(trx->autoinc_locks, &lock);
+	} else {
+		lock = static_cast<lock_t*>(
+			mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock)));
+	}
+
+	lock->type_mode = type_mode | LOCK_TABLE;
+	lock->trx = trx;
+
+	lock->un_member.tab_lock.table = table;
+
+	ut_ad(table->n_ref_count > 0 || !table->can_be_evicted);
+
+	UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock);
+	UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
+
+	if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+
+		lock_set_lock_and_trx_wait(lock, trx);
+	}
+
+	ib_vector_push(lock->trx->lock.table_locks, &lock);
+
+	MONITOR_INC(MONITOR_TABLELOCK_CREATED);
+	MONITOR_INC(MONITOR_NUM_TABLELOCK);
+
+	return(lock);
+}
+
+/*************************************************************//**
+Pops autoinc lock requests from the transaction's autoinc_locks. We
+handle the case where there are gaps in the array and they need to
+be popped off the stack. */
+UNIV_INLINE
+void
+lock_table_pop_autoinc_locks(
+/*=========================*/
+	trx_t*	trx)	/*!< in/out: transaction that owns the AUTOINC locks */
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
+
+	/* Skip any gaps, gaps are NULL lock entries in the
+	trx->autoinc_locks vector. */
+
+	do {
+		ib_vector_pop(trx->autoinc_locks);
+
+		if (ib_vector_is_empty(trx->autoinc_locks)) {
+			return;
+		}
+
+	} while (*(lock_t**) ib_vector_get_last(trx->autoinc_locks) == NULL);
+}
+
+/*************************************************************//**
+Removes an autoinc lock request from the transaction's autoinc_locks. */
+UNIV_INLINE
+void
+lock_table_remove_autoinc_lock(
+/*===========================*/
+	lock_t*	lock,	/*!< in: table lock */
+	trx_t*	trx)	/*!< in/out: transaction that owns the lock */
+{
+	lock_t*	autoinc_lock;
+	lint	i = ib_vector_size(trx->autoinc_locks) - 1;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC);
+	ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+	ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
+
+	/* With stored functions and procedures the user may drop
+	a table within the same "statement". This special case has
+	to be handled by deleting only those AUTOINC locks that were
+	held by the table being dropped. */
+
+	autoinc_lock = *static_cast<lock_t**>(
+		ib_vector_get(trx->autoinc_locks, i));
+
+	/* This is the default fast case. */
+
+	if (autoinc_lock == lock) {
+		lock_table_pop_autoinc_locks(trx);
+	} else {
+		/* The last element should never be NULL */
+		ut_a(autoinc_lock != NULL);
+
+		/* Handle freeing the locks from within the stack. */
+
+		while (--i >= 0) {
+			autoinc_lock = *static_cast<lock_t**>(
+				ib_vector_get(trx->autoinc_locks, i));
+
+			if (UNIV_LIKELY(autoinc_lock == lock)) {
+				void*	null_var = NULL;
+				ib_vector_set(trx->autoinc_locks, i, &null_var);
+				return;
+			}
+		}
+
+		/* Must find the autoinc lock. */
+		ut_error;
+	}
+}
+
+/*************************************************************//**
+Removes a table lock request from the queue and the trx list of locks;
+this is a low-level function which does NOT check if waiting requests
+can now be granted. */
+UNIV_INLINE
+void
+lock_table_remove_low(
+/*==================*/
+	lock_t*	lock)	/*!< in/out: table lock */
+{
+	trx_t*		trx;
+	dict_table_t*	table;
+
+	ut_ad(lock_mutex_own());
+
+	trx = lock->trx;
+	table = lock->un_member.tab_lock.table;
+
+	/* Remove the table from the transaction's AUTOINC vector, if
+	the lock that is being released is an AUTOINC lock. */
+	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
+
+		/* The table's AUTOINC lock can get transferred to
+		another transaction before we get here. */
+		if (table->autoinc_trx == trx) {
+			table->autoinc_trx = NULL;
+		}
+
+		/* The locks must be freed in the reverse order from
+		the one in which they were acquired. This is to avoid
+		traversing the AUTOINC lock vector unnecessarily.
+
+		We only store locks that were granted in the
+		trx->autoinc_locks vector (see lock_table_create()
+		and lock_grant()). Therefore it can be empty and we
+		need to check for that. */
+
+		if (!lock_get_wait(lock)
+		    && !ib_vector_is_empty(trx->autoinc_locks)) {
+
+			lock_table_remove_autoinc_lock(lock, trx);
+		}
+
+		ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
+		table->n_waiting_or_granted_auto_inc_locks--;
+	}
+
+	UT_LIST_REMOVE(trx_locks, trx->lock.trx_locks, lock);
+	UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
+
+	MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
+	MONITOR_DEC(MONITOR_NUM_TABLELOCK);
+}
+
+/*********************************************************************//**
+Enqueues a waiting request for a table lock which cannot be granted
+immediately. Checks for deadlocks.
+@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
+transaction was chosen as a victim, and we got the lock immediately:
+no need to wait then */
+static
+dberr_t
+lock_table_enqueue_waiting(
+/*=======================*/
+	ulint		mode,	/*!< in: lock mode this transaction is
+				requesting */
+	dict_table_t*	table,	/*!< in/out: table */
+	que_thr_t*	thr)	/*!< in: query thread */
+{
+	trx_t*		trx;
+	lock_t*		lock;
+	trx_id_t	victim_trx_id;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
+
+	trx = thr_get_trx(thr);
+	ut_ad(trx_mutex_own(trx));
+
+	/* Test if there already is some other reason to suspend thread:
+	we do not enqueue a lock request if the query thread should be
+	stopped anyway */
+
+	if (que_thr_stop(thr)) {
+		ut_error;
+
+		return(DB_QUE_THR_SUSPENDED);
+	}
+
+	switch (trx_get_dict_operation(trx)) {
+	case TRX_DICT_OP_NONE:
+		break;
+	case TRX_DICT_OP_TABLE:
+	case TRX_DICT_OP_INDEX:
+		ut_print_timestamp(stderr);
+		fputs("  InnoDB: Error: a table lock wait happens"
+		      " in a dictionary operation!\n"
+		      "InnoDB: Table name ", stderr);
+		ut_print_name(stderr, trx, TRUE, table->name);
+		fputs(".\n"
+		      "InnoDB: Submit a detailed bug report"
+		      " to http://bugs.mysql.com\n",
+		      stderr);
+		ut_ad(0);
+	}
+
+	/* Enqueue the lock request that will wait to be granted */
+
+	lock = lock_table_create(table, mode | LOCK_WAIT, trx);
+
+	/* Release the mutex to obey the latching order.
+	This is safe, because lock_deadlock_check_and_resolve()
+	is invoked when a lock wait is enqueued for the currently
+	running transaction. Because trx is a running transaction
+	(it is not currently suspended because of a lock wait),
+	its state can only be changed by this thread, which is
+	currently associated with the transaction. */
+
+	trx_mutex_exit(trx);
+
+	victim_trx_id = lock_deadlock_check_and_resolve(lock, trx);
+
+	trx_mutex_enter(trx);
+
+	if (victim_trx_id != 0) {
+		ut_ad(victim_trx_id == trx->id);
+
+		/* The order here is important, we don't want to
+		lose the state of the lock before calling remove. */
+		lock_table_remove_low(lock);
+		lock_reset_lock_and_trx_wait(lock);
+
+		return(DB_DEADLOCK);
+	} else if (trx->lock.wait_lock == NULL) {
+		/* Deadlock resolution chose another transaction as a victim,
+		and we accidentally got our lock granted! */
+
+		return(DB_SUCCESS);
+	}
+
+	trx->lock.que_state = TRX_QUE_LOCK_WAIT;
+
+	trx->lock.wait_started = ut_time();
+	trx->lock.was_chosen_as_deadlock_victim = FALSE;
+
+	ut_a(que_thr_stop(thr));
+
+	MONITOR_INC(MONITOR_TABLELOCK_WAIT);
+
+	return(DB_LOCK_WAIT);
+}
+
+/*********************************************************************//**
+Checks if other transactions have an incompatible mode lock request in
+the lock queue.
+@return	lock or NULL */
+UNIV_INLINE
+const lock_t*
+lock_table_other_has_incompatible(
+/*==============================*/
+	const trx_t*		trx,	/*!< in: transaction, or NULL if all
+					transactions should be included */
+	ulint			wait,	/*!< in: LOCK_WAIT if also
+					waiting locks are taken into
+					account, or 0 if not */
+	const dict_table_t*	table,	/*!< in: table */
+	enum lock_mode		mode)	/*!< in: lock mode */
+{
+	const lock_t*	lock;
+
+	ut_ad(lock_mutex_own());
+
+	for (lock = UT_LIST_GET_LAST(table->locks);
+	     lock != NULL;
+	     lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock)) {
+
+		if (lock->trx != trx
+		    && !lock_mode_compatible(lock_get_mode(lock), mode)
+		    && (wait || !lock_get_wait(lock))) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Locks the specified database table in the mode given. If the lock cannot
+be granted immediately, the query thread is put to wait.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_table(
+/*=======*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	dict_table_t*	table,	/*!< in/out: database table
+				in dictionary cache */
+	enum lock_mode	mode,	/*!< in: lock mode */
+	que_thr_t*	thr)	/*!< in: query thread */
+{
+	trx_t*		trx;
+	dberr_t		err;
+	const lock_t*	wait_for;
+
+	ut_ad(table && thr);
+
+	if (flags & BTR_NO_LOCKING_FLAG) {
+
+		return(DB_SUCCESS);
+	}
+
+	ut_a(flags == 0);
+
+	trx = thr_get_trx(thr);
+
+	/* Look for equal or stronger locks the same trx already
+	has on the table. No need to acquire the lock mutex here
+	because only this transacton can add/access table locks
+	to/from trx_t::table_locks. */
+
+	if (lock_table_has(trx, table, mode)) {
+
+		return(DB_SUCCESS);
+	}
+
+	lock_mutex_enter();
+
+	/* We have to check if the new lock is compatible with any locks
+	other transactions have in the table lock queue. */
+
+	wait_for = lock_table_other_has_incompatible(
+		trx, LOCK_WAIT, table, mode);
+
+	trx_mutex_enter(trx);
+
+	/* Another trx has a request on the table in an incompatible
+	mode: this trx may have to wait */
+
+	if (wait_for != NULL) {
+		err = lock_table_enqueue_waiting(mode | flags, table, thr);
+	} else {
+		lock_table_create(table, mode | flags, trx);
+
+		ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
+
+		err = DB_SUCCESS;
+	}
+
+	lock_mutex_exit();
+
+	trx_mutex_exit(trx);
+
+	return(err);
+}
+
+/*********************************************************************//**
+Creates a table IX lock object for a resurrected transaction. */
+UNIV_INTERN
+void
+lock_table_ix_resurrect(
+/*====================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	trx_t*		trx)	/*!< in/out: transaction */
+{
+	ut_ad(trx->is_recovered);
+
+	if (lock_table_has(trx, table, LOCK_IX)) {
+		return;
+	}
+
+	lock_mutex_enter();
+
+	/* We have to check if the new lock is compatible with any locks
+	other transactions have in the table lock queue. */
+
+	ut_ad(!lock_table_other_has_incompatible(
+		      trx, LOCK_WAIT, table, LOCK_IX));
+
+	trx_mutex_enter(trx);
+	lock_table_create(table, LOCK_IX, trx);
+	lock_mutex_exit();
+	trx_mutex_exit(trx);
+}
+
+/*********************************************************************//**
+Checks if a waiting table lock request still has to wait in a queue.
+@return	TRUE if still has to wait */
+static
+ibool
+lock_table_has_to_wait_in_queue(
+/*============================*/
+	const lock_t*	wait_lock)	/*!< in: waiting table lock */
+{
+	const dict_table_t*	table;
+	const lock_t*		lock;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(lock_get_wait(wait_lock));
+
+	table = wait_lock->un_member.tab_lock.table;
+
+	for (lock = UT_LIST_GET_FIRST(table->locks);
+	     lock != wait_lock;
+	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
+
+		if (lock_has_to_wait(wait_lock, lock)) {
+
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************//**
+Removes a table lock request, waiting or granted, from the queue and grants
+locks to other transactions in the queue, if they now are entitled to a
+lock. */
+static
+void
+lock_table_dequeue(
+/*===============*/
+	lock_t*	in_lock)/*!< in/out: table lock object; transactions waiting
+			behind will get their lock requests granted, if
+			they are now qualified to it */
+{
+	lock_t*	lock;
+
+	ut_ad(lock_mutex_own());
+	ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
+
+	lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
+
+	lock_table_remove_low(in_lock);
+
+	/* Check if waiting locks in the queue can now be granted: grant
+	locks if there are no conflicting locks ahead. */
+
+	for (/* No op */;
+	     lock != NULL;
+	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
+
+		if (lock_get_wait(lock)
+		    && !lock_table_has_to_wait_in_queue(lock)) {
+
+			/* Grant the lock */
+			ut_ad(in_lock->trx != lock->trx);
+			lock_grant(lock);
+		}
+	}
+}
+
+/*=========================== LOCK RELEASE ==============================*/
+
+/*************************************************************//**
+Removes a granted record lock of a transaction from the queue and grants
+locks to other transactions waiting in the queue if they now are entitled
+to a lock. */
+UNIV_INTERN
+void
+lock_rec_unlock(
+/*============*/
+	trx_t*			trx,	/*!< in/out: transaction that has
+					set a record lock */
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec,	/*!< in: record */
+	enum lock_mode		lock_mode)/*!< in: LOCK_S or LOCK_X */
+{
+	lock_t*		first_lock;
+	lock_t*		lock;
+	ulint		heap_no;
+	const char*	stmt;
+	size_t		stmt_len;
+
+	ut_ad(trx);
+	ut_ad(rec);
+	ut_ad(block->frame == page_align(rec));
+	ut_ad(!trx->lock.wait_lock);
+	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+
+	heap_no = page_rec_get_heap_no(rec);
+
+	lock_mutex_enter();
+	trx_mutex_enter(trx);
+
+	first_lock = lock_rec_get_first(block, heap_no);
+
+	/* Find the last lock with the same lock_mode and transaction
+	on the record. */
+
+	for (lock = first_lock; lock != NULL;
+	     lock = lock_rec_get_next(heap_no, lock)) {
+		if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
+			goto released;
+		}
+	}
+
+	lock_mutex_exit();
+	trx_mutex_exit(trx);
+
+	stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len);
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+		" InnoDB: Error: unlock row could not"
+		" find a %lu mode lock on the record\n",
+		(ulong) lock_mode);
+	ut_print_timestamp(stderr);
+	fprintf(stderr, " InnoDB: current statement: %.*s\n",
+		(int) stmt_len, stmt);
+
+	return;
+
+released:
+	ut_a(!lock_get_wait(lock));
+	lock_rec_reset_nth_bit(lock, heap_no);
+
+	/* Check if we can now grant waiting lock requests */
+
+	for (lock = first_lock; lock != NULL;
+	     lock = lock_rec_get_next(heap_no, lock)) {
+		if (lock_get_wait(lock)
+		    && !lock_rec_has_to_wait_in_queue(lock)) {
+
+			/* Grant the lock */
+			ut_ad(trx != lock->trx);
+			lock_grant(lock);
+		}
+	}
+
+	lock_mutex_exit();
+	trx_mutex_exit(trx);
+}
+
+/*********************************************************************//**
+Releases transaction locks, and releases possible other transactions waiting
+because of these locks. */
+static
+void
+lock_release(
+/*=========*/
+	trx_t*	trx)	/*!< in/out: transaction */
+{
+	lock_t*		lock;
+	ulint		count = 0;
+	trx_id_t	max_trx_id;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(!trx_mutex_own(trx));
+
+	max_trx_id = trx_sys_get_max_trx_id();
+
+	for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
+	     lock != NULL;
+	     lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
+
+		if (lock_get_type_low(lock) == LOCK_REC) {
+
+#ifdef UNIV_DEBUG
+			/* Check if the transcation locked a record
+			in a system table in X mode. It should have set
+			the dict_op code correctly if it did. */
+			if (lock->index->table->id < DICT_HDR_FIRST_ID
+			    && lock_get_mode(lock) == LOCK_X) {
+
+				ut_ad(lock_get_mode(lock) != LOCK_IX);
+				ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+			}
+#endif /* UNIV_DEBUG */
+
+			lock_rec_dequeue_from_page(lock);
+		} else {
+			dict_table_t*	table;
+
+			table = lock->un_member.tab_lock.table;
+#ifdef UNIV_DEBUG
+			ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+
+			/* Check if the transcation locked a system table
+			in IX mode. It should have set the dict_op code
+			correctly if it did. */
+			if (table->id < DICT_HDR_FIRST_ID
+			    && (lock_get_mode(lock) == LOCK_X
+				|| lock_get_mode(lock) == LOCK_IX)) {
+
+				ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+			}
+#endif /* UNIV_DEBUG */
+
+			if (lock_get_mode(lock) != LOCK_IS
+			    && trx->undo_no != 0) {
+
+				/* The trx may have modified the table. We
+				block the use of the MySQL query cache for
+				all currently active transactions. */
+
+				table->query_cache_inv_trx_id = max_trx_id;
+			}
+
+			lock_table_dequeue(lock);
+		}
+
+		if (count == LOCK_RELEASE_INTERVAL) {
+			/* Release the  mutex for a while, so that we
+			do not monopolize it */
+
+			lock_mutex_exit();
+
+			lock_mutex_enter();
+
+			count = 0;
+		}
+
+		++count;
+	}
+
+	/* We don't remove the locks one by one from the vector for
+	efficiency reasons. We simply reset it because we would have
+	released all the locks anyway. */
+
+	ib_vector_reset(trx->lock.table_locks);
+
+	ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+	ut_a(ib_vector_is_empty(trx->autoinc_locks));
+	ut_a(ib_vector_is_empty(trx->lock.table_locks));
+
+	mem_heap_empty(trx->lock.lock_heap);
+}
+
+/* True if a lock mode is S or X */
+#define IS_LOCK_S_OR_X(lock) \
+	(lock_get_mode(lock) == LOCK_S \
+	 || lock_get_mode(lock) == LOCK_X)
+
+/*********************************************************************//**
+Removes table locks of the transaction on a table to be dropped. */
+static
+void
+lock_trx_table_locks_remove(
+/*========================*/
+	const lock_t*	lock_to_remove)		/*!< in: lock to remove */
+{
+	lint		i;
+	trx_t*		trx = lock_to_remove->trx;
+
+	ut_ad(lock_mutex_own());
+
+	/* It is safe to read this because we are holding the lock mutex */
+	if (!trx->lock.cancel) {
+		trx_mutex_enter(trx);
+	} else {
+		ut_ad(trx_mutex_own(trx));
+	}
+
+	for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
+		const lock_t*	lock;
+
+		lock = *static_cast<lock_t**>(
+			ib_vector_get(trx->lock.table_locks, i));
+
+		if (lock == NULL) {
+			continue;
+		}
+
+		ut_a(trx == lock->trx);
+		ut_a(lock_get_type_low(lock) & LOCK_TABLE);
+		ut_a(lock->un_member.tab_lock.table != NULL);
+
+		if (lock == lock_to_remove) {
+			void*	null_var = NULL;
+			ib_vector_set(trx->lock.table_locks, i, &null_var);
+
+			if (!trx->lock.cancel) {
+				trx_mutex_exit(trx);
+			}
+
+			return;
+		}
+	}
+
+	if (!trx->lock.cancel) {
+		trx_mutex_exit(trx);
+	}
+
+	/* Lock must exist in the vector. */
+	ut_error;
+}
+
+/*********************************************************************//**
+Removes locks of a transaction on a table to be dropped.
+If remove_also_table_sx_locks is TRUE then table-level S and X locks are
+also removed in addition to other table-level and record-level locks.
+No lock that is going to be removed is allowed to be a wait lock. */
+static
+void
+lock_remove_all_on_table_for_trx(
+/*=============================*/
+	dict_table_t*	table,			/*!< in: table to be dropped */
+	trx_t*		trx,			/*!< in: a transaction */
+	ibool		remove_also_table_sx_locks)/*!< in: also removes
+						table S and X locks */
+{
+	lock_t*		lock;
+	lock_t*		prev_lock;
+
+	ut_ad(lock_mutex_own());
+
+	for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
+	     lock != NULL;
+	     lock = prev_lock) {
+
+		prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
+
+		if (lock_get_type_low(lock) == LOCK_REC
+		    && lock->index->table == table) {
+			ut_a(!lock_get_wait(lock));
+
+			lock_rec_discard(lock);
+		} else if (lock_get_type_low(lock) & LOCK_TABLE
+			   && lock->un_member.tab_lock.table == table
+			   && (remove_also_table_sx_locks
+			       || !IS_LOCK_S_OR_X(lock))) {
+
+			ut_a(!lock_get_wait(lock));
+
+			lock_trx_table_locks_remove(lock);
+			lock_table_remove_low(lock);
+		}
+	}
+}
+
+/*******************************************************************//**
+Remove any explicit record locks held by recovering transactions on
+the table.
+@return number of recovered transactions examined */
+static
+ulint
+lock_remove_recovered_trx_record_locks(
+/*===================================*/
+	dict_table_t*	table)	/*!< in: check if there are any locks
+				held on records in this table or on the
+				table itself */
+{
+	trx_t*		trx;
+	ulint		n_recovered_trx = 0;
+
+	ut_a(table != NULL);
+	ut_ad(lock_mutex_own());
+
+	mutex_enter(&trx_sys->mutex);
+
+	for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+	     trx != NULL;
+	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+
+		lock_t*	lock;
+		lock_t*	next_lock;
+
+		assert_trx_in_rw_list(trx);
+
+		if (!trx->is_recovered) {
+			continue;
+		}
+
+		/* Because we are holding the lock_sys->mutex,
+		implicit locks cannot be converted to explicit ones
+		while we are scanning the explicit locks. */
+
+		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
+		     lock != NULL;
+		     lock = next_lock) {
+
+			ut_a(lock->trx == trx);
+
+			/* Recovered transactions can't wait on a lock. */
+
+			ut_a(!lock_get_wait(lock));
+
+			next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
+
+			switch (lock_get_type_low(lock)) {
+			default:
+				ut_error;
+			case LOCK_TABLE:
+				if (lock->un_member.tab_lock.table == table) {
+					lock_trx_table_locks_remove(lock);
+					lock_table_remove_low(lock);
+				}
+				break;
+			case LOCK_REC:
+				if (lock->index->table == table) {
+					lock_rec_discard(lock);
+				}
+			}
+		}
+
+		++n_recovered_trx;
+	}
+
+	mutex_exit(&trx_sys->mutex);
+
+	return(n_recovered_trx);
+}
+
+/*********************************************************************//**
+Removes locks on a table to be dropped or truncated.
+If remove_also_table_sx_locks is TRUE then table-level S and X locks are
+also removed in addition to other table-level and record-level locks.
+No lock, that is going to be removed, is allowed to be a wait lock. */
+UNIV_INTERN
+void
+lock_remove_all_on_table(
+/*=====================*/
+	dict_table_t*	table,			/*!< in: table to be dropped
+						or truncated */
+	ibool		remove_also_table_sx_locks)/*!< in: also removes
+						table S and X locks */
+{
+	lock_t*		lock;
+
+	lock_mutex_enter();
+
+	for (lock = UT_LIST_GET_FIRST(table->locks);
+	     lock != NULL;
+	     /* No op */) {
+
+		lock_t*	prev_lock;
+
+		prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
+
+		/* If we should remove all locks (remove_also_table_sx_locks
+		is TRUE), or if the lock is not table-level S or X lock,
+		then check we are not going to remove a wait lock. */
+		if (remove_also_table_sx_locks
+		    || !(lock_get_type(lock) == LOCK_TABLE
+			 && IS_LOCK_S_OR_X(lock))) {
+
+			ut_a(!lock_get_wait(lock));
+		}
+
+		lock_remove_all_on_table_for_trx(
+			table, lock->trx, remove_also_table_sx_locks);
+
+		if (prev_lock == NULL) {
+			if (lock == UT_LIST_GET_FIRST(table->locks)) {
+				/* lock was not removed, pick its successor */
+				lock = UT_LIST_GET_NEXT(
+					un_member.tab_lock.locks, lock);
+			} else {
+				/* lock was removed, pick the first one */
+				lock = UT_LIST_GET_FIRST(table->locks);
+			}
+		} else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks,
+					    prev_lock) != lock) {
+			/* If lock was removed by
+			lock_remove_all_on_table_for_trx() then pick the
+			successor of prev_lock ... */
+			lock = UT_LIST_GET_NEXT(
+				un_member.tab_lock.locks, prev_lock);
+		} else {
+			/* ... otherwise pick the successor of lock. */
+			lock = UT_LIST_GET_NEXT(
+				un_member.tab_lock.locks, lock);
+		}
+	}
+
+	/* Note: Recovered transactions don't have table level IX or IS locks
+	but can have implicit record locks that have been converted to explicit
+	record locks. Such record locks cannot be freed by traversing the
+	transaction lock list in dict_table_t (as above). */
+
+	if (!lock_sys->rollback_complete
+	    && lock_remove_recovered_trx_record_locks(table) == 0) {
+
+		lock_sys->rollback_complete = TRUE;
+	}
+
+	lock_mutex_exit();
+}
+
+/*===================== VALIDATION AND DEBUGGING  ====================*/
+
+/*********************************************************************//**
+Prints info of a table lock. */
+UNIV_INTERN
+void
+lock_table_print(
+/*=============*/
+	FILE*		file,	/*!< in: file where to print */
+	const lock_t*	lock)	/*!< in: table type lock */
+{
+	ut_ad(lock_mutex_own());
+	ut_a(lock_get_type_low(lock) == LOCK_TABLE);
+
+	fputs("TABLE LOCK table ", file);
+	ut_print_name(file, lock->trx, TRUE,
+		      lock->un_member.tab_lock.table->name);
+	fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id);
+
+	if (lock_get_mode(lock) == LOCK_S) {
+		fputs(" lock mode S", file);
+	} else if (lock_get_mode(lock) == LOCK_X) {
+		fputs(" lock mode X", file);
+	} else if (lock_get_mode(lock) == LOCK_IS) {
+		fputs(" lock mode IS", file);
+	} else if (lock_get_mode(lock) == LOCK_IX) {
+		fputs(" lock mode IX", file);
+	} else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
+		fputs(" lock mode AUTO-INC", file);
+	} else {
+		fprintf(file, " unknown lock mode %lu",
+			(ulong) lock_get_mode(lock));
+	}
+
+	if (lock_get_wait(lock)) {
+		fputs(" waiting", file);
+	}
+
+	putc('\n', file);
+}
+
+/*********************************************************************//**
+Prints info of a record lock. */
+UNIV_INTERN
+void
+lock_rec_print(
+/*===========*/
+	FILE*		file,	/*!< in: file where to print */
+	const lock_t*	lock)	/*!< in: record type lock */
+{
+	const buf_block_t*	block;
+	ulint			space;
+	ulint			page_no;
+	ulint			i;
+	mtr_t			mtr;
+	mem_heap_t*		heap		= NULL;
+	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*			offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_ad(lock_mutex_own());
+	ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+	space = lock->un_member.rec_lock.space;
+	page_no = lock->un_member.rec_lock.page_no;
+
+	fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu ",
+		(ulong) space, (ulong) page_no,
+		(ulong) lock_rec_get_n_bits(lock));
+	dict_index_name_print(file, lock->trx, lock->index);
+	fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id);
+
+	if (lock_get_mode(lock) == LOCK_S) {
+		fputs(" lock mode S", file);
+	} else if (lock_get_mode(lock) == LOCK_X) {
+		fputs(" lock_mode X", file);
+	} else {
+		ut_error;
+	}
+
+	if (lock_rec_get_gap(lock)) {
+		fputs(" locks gap before rec", file);
+	}
+
+	if (lock_rec_get_rec_not_gap(lock)) {
+		fputs(" locks rec but not gap", file);
+	}
+
+	if (lock_rec_get_insert_intention(lock)) {
+		fputs(" insert intention", file);
+	}
+
+	if (lock_get_wait(lock)) {
+		fputs(" waiting", file);
+	}
+
+	mtr_start(&mtr);
+
+	putc('\n', file);
+
+	block = buf_page_try_get(space, page_no, &mtr);
+
+	for (i = 0; i < lock_rec_get_n_bits(lock); ++i) {
+
+		if (!lock_rec_get_nth_bit(lock, i)) {
+			continue;
+		}
+
+		fprintf(file, "Record lock, heap no %lu", (ulong) i);
+
+		if (block) {
+			const rec_t*	rec;
+
+			rec = page_find_rec_with_heap_no(
+				buf_block_get_frame(block), i);
+
+			offsets = rec_get_offsets(
+				rec, lock->index, offsets,
+				ULINT_UNDEFINED, &heap);
+
+			putc(' ', file);
+			rec_print_new(file, rec, offsets);
+		}
+
+		putc('\n', file);
+	}
+
+	mtr_commit(&mtr);
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+}
+
+#ifdef UNIV_DEBUG
+/* Print the number of lock structs from lock_print_info_summary() only
+in non-production builds for performance reasons, see
+http://bugs.mysql.com/36942 */
+#define PRINT_NUM_OF_LOCK_STRUCTS
+#endif /* UNIV_DEBUG */
+
+#ifdef PRINT_NUM_OF_LOCK_STRUCTS
+/*********************************************************************//**
+Calculates the number of record lock structs in the record lock hash table.
+@return	number of record locks */
+static
+ulint
+lock_get_n_rec_locks(void)
+/*======================*/
+{
+	ulint	n_locks	= 0;
+	ulint	i;
+
+	ut_ad(lock_mutex_own());
+
+	for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
+		const lock_t*	lock;
+
+		for (lock = static_cast<const lock_t*>(
+				HASH_GET_FIRST(lock_sys->rec_hash, i));
+		     lock != 0;
+		     lock = static_cast<const lock_t*>(
+				HASH_GET_NEXT(hash, lock))) {
+
+			n_locks++;
+		}
+	}
+
+	return(n_locks);
+}
+#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
+
+/*********************************************************************//**
+Prints info of locks for all transactions.
+@return FALSE if not able to obtain lock mutex
+and exits without printing info */
+UNIV_INTERN
+ibool
+lock_print_info_summary(
+/*====================*/
+	FILE*	file,	/*!< in: file where to print */
+	ibool   nowait)	/*!< in: whether to wait for the lock mutex */
+{
+	/* if nowait is FALSE, wait on the lock mutex,
+	otherwise return immediately if fail to obtain the
+	mutex. */
+	if (!nowait) {
+		lock_mutex_enter();
+	} else if (lock_mutex_enter_nowait()) {
+		fputs("FAIL TO OBTAIN LOCK MUTEX, "
+		      "SKIP LOCK INFO PRINTING\n", file);
+		return(FALSE);
+	}
+
+	if (lock_deadlock_found) {
+		fputs("------------------------\n"
+		      "LATEST DETECTED DEADLOCK\n"
+		      "------------------------\n", file);
+
+		if (!srv_read_only_mode) {
+			ut_copy_file(file, lock_latest_err_file);
+		}
+	}
+
+	fputs("------------\n"
+	      "TRANSACTIONS\n"
+	      "------------\n", file);
+
+	fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
+		trx_sys_get_max_trx_id());
+
+	fprintf(file,
+		"Purge done for trx's n:o < " TRX_ID_FMT
+		" undo n:o < " TRX_ID_FMT " state: ",
+		purge_sys->iter.trx_no,
+		purge_sys->iter.undo_no);
+
+	/* Note: We are reading the state without the latch. One because it
+	will violate the latching order and two because we are merely querying
+	the state of the variable for display. */
+
+	switch (purge_sys->state){
+	case PURGE_STATE_INIT:
+		/* Should never be in this state while the system is running. */
+		ut_error;
+
+	case PURGE_STATE_EXIT:
+		fprintf(file, "exited");
+		break;
+
+	case PURGE_STATE_DISABLED:
+		fprintf(file, "disabled");
+		break;
+
+	case PURGE_STATE_RUN:
+		fprintf(file, "running");
+		/* Check if it is waiting for more data to arrive. */
+		if (!purge_sys->running) {
+			fprintf(file, " but idle");
+		}
+		break;
+
+	case PURGE_STATE_STOP:
+		fprintf(file, "stopped");
+		break;
+	}
+
+	fprintf(file, "\n");
+
+	fprintf(file,
+		"History list length %lu\n",
+		(ulong) trx_sys->rseg_history_len);
+
+#ifdef PRINT_NUM_OF_LOCK_STRUCTS
+	fprintf(file,
+		"Total number of lock structs in row lock hash table %lu\n",
+		(ulong) lock_get_n_rec_locks());
+#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
+	return(TRUE);
+}
+
+/*********************************************************************//**
+Prints info of locks for each transaction. This function assumes that the
+caller holds the lock mutex and more importantly it will release the lock
+mutex on behalf of the caller. (This should be fixed in the future). */
+UNIV_INTERN
+void
+lock_print_info_all_transactions(
+/*=============================*/
+	FILE*	file)	/*!< in: file where to print */
+{
+	const lock_t*	lock;
+	ibool		load_page_first = TRUE;
+	ulint		nth_trx		= 0;
+	ulint		nth_lock	= 0;
+	ulint		i;
+	mtr_t		mtr;
+	const trx_t*	trx;
+	trx_list_t*	trx_list = &trx_sys->rw_trx_list;
+
+	fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
+
+	ut_ad(lock_mutex_own());
+
+	mutex_enter(&trx_sys->mutex);
+
+	/* First print info on non-active transactions */
+
+	/* NOTE: information of auto-commit non-locking read-only
+	transactions will be omitted here. The information will be
+	available from INFORMATION_SCHEMA.INNODB_TRX. */
+
+	for (trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
+	     trx != NULL;
+	     trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
+
+		ut_ad(trx->in_mysql_trx_list);
+
+		/* See state transitions and locking rules in trx0trx.h */
+
+		if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
+			fputs("---", file);
+			trx_print_latched(file, trx, 600);
+		}
+	}
+
+loop:
+	/* Since we temporarily release lock_sys->mutex and
+	trx_sys->mutex when reading a database page in below,
+	variable trx may be obsolete now and we must loop
+	through the trx list to get probably the same trx,
+	or some other trx. */
+
+	for (trx = UT_LIST_GET_FIRST(*trx_list), i = 0;
+	     trx && (i < nth_trx);
+	     trx = UT_LIST_GET_NEXT(trx_list, trx), i++) {
+
+		assert_trx_in_list(trx);
+		ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
+	}
+
+	ut_ad(trx == NULL
+	      || trx->read_only == (trx_list == &trx_sys->ro_trx_list));
+
+	if (trx == NULL) {
+		/* Check the read-only transaction list next. */
+		if (trx_list == &trx_sys->rw_trx_list) {
+			trx_list = &trx_sys->ro_trx_list;
+			nth_trx = 0;
+			nth_lock = 0;
+			goto loop;
+		}
+
+		lock_mutex_exit();
+		mutex_exit(&trx_sys->mutex);
+
+		ut_ad(lock_validate());
+
+		return;
+	}
+
+	assert_trx_in_list(trx);
+
+	if (nth_lock == 0) {
+		fputs("---", file);
+
+		trx_print_latched(file, trx, 600);
+
+		if (trx->read_view) {
+			fprintf(file,
+				"Trx read view will not see trx with"
+				" id >= " TRX_ID_FMT
+				", sees < " TRX_ID_FMT "\n",
+				trx->read_view->low_limit_id,
+				trx->read_view->up_limit_id);
+		}
+
+		if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+
+			fprintf(file,
+				"------- TRX HAS BEEN WAITING %lu SEC"
+				" FOR THIS LOCK TO BE GRANTED:\n",
+				(ulong) difftime(ut_time(),
+						 trx->lock.wait_started));
+
+			if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
+				lock_rec_print(file, trx->lock.wait_lock);
+			} else {
+				lock_table_print(file, trx->lock.wait_lock);
+			}
+
+			fputs("------------------\n", file);
+		}
+	}
+
+	if (!srv_print_innodb_lock_monitor) {
+		nth_trx++;
+		goto loop;
+	}
+
+	i = 0;
+
+	/* Look at the note about the trx loop above why we loop here:
+	lock may be an obsolete pointer now. */
+
+	lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
+
+	while (lock && (i < nth_lock)) {
+		lock = UT_LIST_GET_NEXT(trx_locks, lock);
+		i++;
+	}
+
+	if (lock == NULL) {
+		nth_trx++;
+		nth_lock = 0;
+
+		goto loop;
+	}
+
+	if (lock_get_type_low(lock) == LOCK_REC) {
+		if (load_page_first) {
+			ulint	space	= lock->un_member.rec_lock.space;
+			ulint	zip_size= fil_space_get_zip_size(space);
+			ulint	page_no = lock->un_member.rec_lock.page_no;
+			ibool	tablespace_being_deleted = FALSE;
+
+			if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+
+				/* It is a single table tablespace and
+				the .ibd file is missing (TRUNCATE
+				TABLE probably stole the locks): just
+				print the lock without attempting to
+				load the page in the buffer pool. */
+
+				fprintf(file, "RECORD LOCKS on"
+					" non-existing space %lu\n",
+					(ulong) space);
+				goto print_rec;
+			}
+
+			lock_mutex_exit();
+			mutex_exit(&trx_sys->mutex);
+
+			DEBUG_SYNC_C("innodb_monitor_before_lock_page_read");
+
+			/* Check if the space is exists or not. only when the space
+			is valid, try to get the page. */
+			tablespace_being_deleted = fil_inc_pending_ops(space, false);
+
+			if (!tablespace_being_deleted) {
+				mtr_start(&mtr);
+
+				buf_page_get_gen(space, zip_size, page_no,
+						 RW_NO_LATCH, NULL,
+						 BUF_GET_POSSIBLY_FREED,
+						 __FILE__, __LINE__, &mtr);
+
+				mtr_commit(&mtr);
+
+				fil_decr_pending_ops(space);
+			} else {
+				fprintf(file, "RECORD LOCKS on"
+					" non-existing space %lu\n",
+					(ulong) space);
+			}
+
+			load_page_first = FALSE;
+
+			lock_mutex_enter();
+
+			mutex_enter(&trx_sys->mutex);
+
+			goto loop;
+		}
+
+print_rec:
+		lock_rec_print(file, lock);
+	} else {
+		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+
+		lock_table_print(file, lock);
+	}
+
+	load_page_first = TRUE;
+
+	nth_lock++;
+
+	if (nth_lock >= 10) {
+		fputs("10 LOCKS PRINTED FOR THIS TRX:"
+		      " SUPPRESSING FURTHER PRINTS\n",
+		      file);
+
+		nth_trx++;
+		nth_lock = 0;
+	}
+
+	goto loop;
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Find the the lock in the trx_t::trx_lock_t::table_locks vector.
+@return TRUE if found */
+static
+ibool
+lock_trx_table_locks_find(
+/*======================*/
+	trx_t*		trx,		/*!< in: trx to validate */
+	const lock_t*	find_lock)	/*!< in: lock to find */
+{
+	lint		i;
+	ibool		found = FALSE;
+
+	trx_mutex_enter(trx);
+
+	for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
+		const lock_t*	lock;
+
+		lock = *static_cast<const lock_t**>(
+			ib_vector_get(trx->lock.table_locks, i));
+
+		if (lock == NULL) {
+			continue;
+		} else if (lock == find_lock) {
+			/* Can't be duplicates. */
+			ut_a(!found);
+			found = TRUE;
+		}
+
+		ut_a(trx == lock->trx);
+		ut_a(lock_get_type_low(lock) & LOCK_TABLE);
+		ut_a(lock->un_member.tab_lock.table != NULL);
+	}
+
+	trx_mutex_exit(trx);
+
+	return(found);
+}
+
+/*********************************************************************//**
+Validates the lock queue on a table.
+@return	TRUE if ok */
+static
+ibool
+lock_table_queue_validate(
+/*======================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	const lock_t*	lock;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	for (lock = UT_LIST_GET_FIRST(table->locks);
+	     lock != NULL;
+	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
+
+		/* lock->trx->state cannot change from or to NOT_STARTED
+		while we are holding the trx_sys->mutex. It may change
+		from ACTIVE to PREPARED, but it may not change to
+		COMMITTED, because we are holding the lock_sys->mutex. */
+		ut_ad(trx_assert_started(lock->trx));
+
+		if (!lock_get_wait(lock)) {
+
+			ut_a(!lock_table_other_has_incompatible(
+				     lock->trx, 0, table,
+				     lock_get_mode(lock)));
+		} else {
+
+			ut_a(lock_table_has_to_wait_in_queue(lock));
+		}
+
+		ut_a(lock_trx_table_locks_find(lock->trx, lock));
+	}
+
+	return(TRUE);
+}
+
+/*********************************************************************//**
+Validates the lock queue on a single record.
+@return	TRUE if ok */
+static
+ibool
+lock_rec_queue_validate(
+/*====================*/
+	ibool			locked_lock_trx_sys,
+					/*!< in: if the caller holds
+					both the lock mutex and
+					trx_sys_t->lock. */
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec,	/*!< in: record to look at */
+	const dict_index_t*	index,	/*!< in: index, or NULL if not known */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+	const trx_t*	impl_trx;
+	const lock_t*	lock;
+	ulint		heap_no;
+
+	ut_a(rec);
+	ut_a(block->frame == page_align(rec));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
+	ut_ad(lock_mutex_own() == locked_lock_trx_sys);
+	ut_ad(!index || dict_index_is_clust(index)
+	      || !dict_index_is_online_ddl(index));
+
+	heap_no = page_rec_get_heap_no(rec);
+
+	if (!locked_lock_trx_sys) {
+		lock_mutex_enter();
+		mutex_enter(&trx_sys->mutex);
+	}
+
+	if (!page_rec_is_user_rec(rec)) {
+
+		for (lock = lock_rec_get_first(block, heap_no);
+		     lock != NULL;
+		     lock = lock_rec_get_next_const(heap_no, lock)) {
+
+			ut_a(trx_in_trx_list(lock->trx));
+
+			if (lock_get_wait(lock)) {
+				ut_a(lock_rec_has_to_wait_in_queue(lock));
+			}
+
+			if (index) {
+				ut_a(lock->index == index);
+			}
+		}
+
+		goto func_exit;
+	}
+
+	if (!index);
+	else if (dict_index_is_clust(index)) {
+		trx_id_t	trx_id;
+
+		/* Unlike the non-debug code, this invariant can only succeed
+		if the check and assertion are covered by the lock mutex. */
+
+		trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
+		impl_trx = trx_rw_is_active_low(trx_id, NULL);
+
+		ut_ad(lock_mutex_own());
+		/* impl_trx cannot be committed until lock_mutex_exit()
+		because lock_trx_release_locks() acquires lock_sys->mutex */
+
+		if (impl_trx != NULL
+		    && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
+						   block, heap_no, impl_trx)) {
+
+			ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
+					       block, heap_no, impl_trx));
+		}
+	}
+
+	for (lock = lock_rec_get_first(block, heap_no);
+	     lock != NULL;
+	     lock = lock_rec_get_next_const(heap_no, lock)) {
+
+		ut_a(trx_in_trx_list(lock->trx));
+
+		if (index) {
+			ut_a(lock->index == index);
+		}
+
+		if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
+
+			enum lock_mode	mode;
+
+			if (lock_get_mode(lock) == LOCK_S) {
+				mode = LOCK_X;
+			} else {
+				mode = LOCK_S;
+			}
+			ut_a(!lock_rec_other_has_expl_req(
+				     mode, 0, 0, block, heap_no, lock->trx));
+
+		} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
+
+			ut_a(lock_rec_has_to_wait_in_queue(lock));
+		}
+	}
+
+func_exit:
+	if (!locked_lock_trx_sys) {
+		lock_mutex_exit();
+		mutex_exit(&trx_sys->mutex);
+	}
+
+	return(TRUE);
+}
+
+/*********************************************************************//**
+Validates the record lock queues on a page.
+@return	TRUE if ok */
+static
+ibool
+lock_rec_validate_page(
+/*===================*/
+	const buf_block_t*	block)	/*!< in: buffer block */
+{
+	const lock_t*	lock;
+	const rec_t*	rec;
+	ulint		nth_lock	= 0;
+	ulint		nth_bit		= 0;
+	ulint		i;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_ad(!lock_mutex_own());
+
+	lock_mutex_enter();
+	mutex_enter(&trx_sys->mutex);
+loop:
+	lock = lock_rec_get_first_on_page_addr(buf_block_get_space(block),
+					       buf_block_get_page_no(block));
+
+	if (!lock) {
+		goto function_exit;
+	}
+
+#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+	ut_a(!block->page.file_page_was_freed);
+#endif
+
+	for (i = 0; i < nth_lock; i++) {
+
+		lock = lock_rec_get_next_on_page_const(lock);
+
+		if (!lock) {
+			goto function_exit;
+		}
+	}
+
+	ut_a(trx_in_trx_list(lock->trx));
+
+# ifdef UNIV_SYNC_DEBUG
+	/* Only validate the record queues when this thread is not
+	holding a space->latch.  Deadlocks are possible due to
+	latching order violation when UNIV_DEBUG is defined while
+	UNIV_SYNC_DEBUG is not. */
+	if (!sync_thread_levels_contains(SYNC_FSP))
+# endif /* UNIV_SYNC_DEBUG */
+	for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
+
+		if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
+
+			rec = page_find_rec_with_heap_no(block->frame, i);
+			ut_a(rec);
+			offsets = rec_get_offsets(rec, lock->index, offsets,
+						  ULINT_UNDEFINED, &heap);
+#if 0
+			fprintf(stderr,
+				"Validating %u %u\n",
+				block->page.space, block->page.offset);
+#endif
+			/* If this thread is holding the file space
+			latch (fil_space_t::latch), the following
+			check WILL break the latching order and may
+			cause a deadlock of threads. */
+
+			lock_rec_queue_validate(
+				TRUE, block, rec, lock->index, offsets);
+
+			nth_bit = i + 1;
+
+			goto loop;
+		}
+	}
+
+	nth_bit = 0;
+	nth_lock++;
+
+	goto loop;
+
+function_exit:
+	lock_mutex_exit();
+	mutex_exit(&trx_sys->mutex);
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+	return(TRUE);
+}
+
+/*********************************************************************//**
+Validates the table locks.
+@return	TRUE if ok */
+static
+ibool
+lock_validate_table_locks(
+/*======================*/
+	const trx_list_t*	trx_list)	/*!< in: trx list */
+{
+	const trx_t*	trx;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	ut_ad(trx_list == &trx_sys->rw_trx_list
+	      || trx_list == &trx_sys->ro_trx_list);
+
+	for (trx = UT_LIST_GET_FIRST(*trx_list);
+	     trx != NULL;
+	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+
+		const lock_t*	lock;
+
+		assert_trx_in_list(trx);
+		ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
+
+		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
+		     lock != NULL;
+		     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+
+			if (lock_get_type_low(lock) & LOCK_TABLE) {
+
+				lock_table_queue_validate(
+					lock->un_member.tab_lock.table);
+			}
+		}
+	}
+
+	return(TRUE);
+}
+
+/*********************************************************************//**
+Validate record locks up to a limit.
+@return lock at limit or NULL if no more locks in the hash bucket */
+static __attribute__((nonnull, warn_unused_result))
+const lock_t*
+lock_rec_validate(
+/*==============*/
+	ulint		start,		/*!< in: lock_sys->rec_hash
+					bucket */
+	ib_uint64_t*	limit)		/*!< in/out: upper limit of
+					(space, page_no) */
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	for (const lock_t* lock = static_cast<const lock_t*>(
+			HASH_GET_FIRST(lock_sys->rec_hash, start));
+	     lock != NULL;
+	     lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock))) {
+
+		ib_uint64_t	current;
+
+		ut_a(trx_in_trx_list(lock->trx));
+		ut_a(lock_get_type(lock) == LOCK_REC);
+
+		current = ut_ull_create(
+			lock->un_member.rec_lock.space,
+			lock->un_member.rec_lock.page_no);
+
+		if (current > *limit) {
+			*limit = current + 1;
+			return(lock);
+		}
+	}
+
+	return(0);
+}
+
+/*********************************************************************//**
+Validate a record lock's block */
+static
+void
+lock_rec_block_validate(
+/*====================*/
+	ulint		space,
+	ulint		page_no)
+{
+	/* The lock and the block that it is referring to may be freed at
+	this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check.
+	If the lock exists in lock_rec_validate_page() we assert
+	!block->page.file_page_was_freed. */
+
+	buf_block_t*	block;
+	mtr_t		mtr;
+
+	/* Make sure that the tablespace is not deleted while we are
+	trying to access the page. */
+	if (!fil_inc_pending_ops(space, true)) {
+		mtr_start(&mtr);
+		block = buf_page_get_gen(
+			space, fil_space_get_zip_size(space),
+			page_no, RW_X_LATCH, NULL,
+			BUF_GET_POSSIBLY_FREED,
+			__FILE__, __LINE__, &mtr);
+
+		buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
+		ut_ad(lock_rec_validate_page(block));
+		mtr_commit(&mtr);
+
+		fil_decr_pending_ops(space);
+	}
+}
+
+/*********************************************************************//**
+Validates the lock system.
+@return	TRUE if ok */
+static
+bool
+lock_validate()
+/*===========*/
+{
+	typedef	std::pair<ulint, ulint> page_addr_t;
+	typedef std::set<page_addr_t> page_addr_set;
+	page_addr_set pages;
+
+	lock_mutex_enter();
+	mutex_enter(&trx_sys->mutex);
+
+	ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list));
+	ut_a(lock_validate_table_locks(&trx_sys->ro_trx_list));
+
+	/* Iterate over all the record locks and validate the locks. We
+	don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex.
+	Release both mutexes during the validation check. */
+
+	for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
+		const lock_t*	lock;
+		ib_uint64_t	limit = 0;
+
+		while ((lock = lock_rec_validate(i, &limit)) != 0) {
+
+			ulint	space = lock->un_member.rec_lock.space;
+			ulint	page_no = lock->un_member.rec_lock.page_no;
+
+			pages.insert(std::make_pair(space, page_no));
+		}
+	}
+
+	mutex_exit(&trx_sys->mutex);
+	lock_mutex_exit();
+
+	for (page_addr_set::const_iterator it = pages.begin();
+	     it != pages.end();
+	     ++it) {
+		lock_rec_block_validate((*it).first, (*it).second);
+	}
+
+	return(true);
+}
+#endif /* UNIV_DEBUG */
+/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate insert of
+a record. If they do, first tests if the query thread should anyway
+be suspended for some reason; if not, then puts the transaction and
+the query thread to the lock wait state and inserts a waiting request
+for a gap x-lock to the lock queue.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_rec_insert_check_and_lock(
+/*===========================*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
+				set, does nothing */
+	const rec_t*	rec,	/*!< in: record after which to insert */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	dict_index_t*	index,	/*!< in: index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	ibool*		inherit)/*!< out: set to TRUE if the new
+				inserted record maybe should inherit
+				LOCK_GAP type locks from the successor
+				record */
+{
+	const rec_t*	next_rec;
+	trx_t*		trx;
+	lock_t*		lock;
+	dberr_t		err;
+	ulint		next_rec_heap_no;
+	ibool		inherit_in = *inherit;
+
+	ut_ad(block->frame == page_align(rec));
+	ut_ad(!dict_index_is_online_ddl(index)
+	      || dict_index_is_clust(index)
+	      || (flags & BTR_CREATE_FLAG));
+
+	if (flags & BTR_NO_LOCKING_FLAG) {
+
+		return(DB_SUCCESS);
+	}
+
+	trx = thr_get_trx(thr);
+	next_rec = page_rec_get_next_const(rec);
+	next_rec_heap_no = page_rec_get_heap_no(next_rec);
+
+	lock_mutex_enter();
+	/* Because this code is invoked for a running transaction by
+	the thread that is serving the transaction, it is not necessary
+	to hold trx->mutex here. */
+
+	/* When inserting a record into an index, the table must be at
+	least IX-locked. When we are building an index, we would pass
+	BTR_NO_LOCKING_FLAG and skip the locking altogether. */
+	ut_ad(lock_table_has(trx, index->table, LOCK_IX));
+
+	lock = lock_rec_get_first(block, next_rec_heap_no);
+
+	if (UNIV_LIKELY(lock == NULL)) {
+		/* We optimize CPU time usage in the simplest case */
+
+		lock_mutex_exit();
+
+		if (inherit_in && !dict_index_is_clust(index)) {
+			/* Update the page max trx id field */
+			page_update_max_trx_id(block,
+					       buf_block_get_page_zip(block),
+					       trx->id, mtr);
+		}
+
+		*inherit = FALSE;
+
+		return(DB_SUCCESS);
+	}
+
+	*inherit = TRUE;
+
+	/* If another transaction has an explicit lock request which locks
+	the gap, waiting or granted, on the successor, the insert has to wait.
+
+	An exception is the case where the lock by the another transaction
+	is a gap type lock which it placed to wait for its turn to insert. We
+	do not consider that kind of a lock conflicting with our insert. This
+	eliminates an unnecessary deadlock which resulted when 2 transactions
+	had to wait for their insert. Both had waiting gap type lock requests
+	on the successor, which produced an unnecessary deadlock. */
+
+	if (lock_rec_other_has_conflicting(
+		    static_cast<enum lock_mode>(
+			    LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION),
+		    block, next_rec_heap_no, trx)) {
+
+		/* Note that we may get DB_SUCCESS also here! */
+		trx_mutex_enter(trx);
+
+		err = lock_rec_enqueue_waiting(
+			LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
+			block, next_rec_heap_no, index, thr);
+
+		trx_mutex_exit(trx);
+	} else {
+		err = DB_SUCCESS;
+	}
+
+	lock_mutex_exit();
+
+	switch (err) {
+	case DB_SUCCESS_LOCKED_REC:
+		err = DB_SUCCESS;
+		/* fall through */
+	case DB_SUCCESS:
+		if (!inherit_in || dict_index_is_clust(index)) {
+			break;
+		}
+		/* Update the page max trx id field */
+		page_update_max_trx_id(block,
+				       buf_block_get_page_zip(block),
+				       trx->id, mtr);
+	default:
+		/* We only care about the two return values. */
+		break;
+	}
+
+#ifdef UNIV_DEBUG
+	{
+		mem_heap_t*	heap		= NULL;
+		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+		const ulint*	offsets;
+		rec_offs_init(offsets_);
+
+		offsets = rec_get_offsets(next_rec, index, offsets_,
+					  ULINT_UNDEFINED, &heap);
+
+		ut_ad(lock_rec_queue_validate(
+				FALSE, block, next_rec, index, offsets));
+
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+	}
+#endif /* UNIV_DEBUG */
+
+	return(err);
+}
+
+/*********************************************************************//**
+If a transaction has an implicit x-lock on a record, but no explicit x-lock
+set on the record, sets one for it. */
+static
+void
+lock_rec_convert_impl_to_expl(
+/*==========================*/
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record on page */
+	dict_index_t*		index,	/*!< in: index of record */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+	trx_id_t		trx_id;
+
+	ut_ad(!lock_mutex_own());
+	ut_ad(page_rec_is_user_rec(rec));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
+
+	if (dict_index_is_clust(index)) {
+		trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
+		/* The clustered index record was last modified by
+		this transaction. The transaction may have been
+		committed a long time ago. */
+	} else {
+		ut_ad(!dict_index_is_online_ddl(index));
+		trx_id = lock_sec_rec_some_has_impl(rec, index, offsets);
+		/* The transaction can be committed before the
+		trx_is_active(trx_id, NULL) check below, because we are not
+		holding lock_mutex. */
+
+		ut_ad(!lock_rec_other_trx_holds_expl(LOCK_S | LOCK_REC_NOT_GAP,
+						     trx_id, rec, block));
+	}
+
+	if (trx_id != 0) {
+		trx_t*	impl_trx;
+		ulint	heap_no = page_rec_get_heap_no(rec);
+
+		lock_mutex_enter();
+
+		/* If the transaction is still active and has no
+		explicit x-lock set on the record, set one for it */
+
+		impl_trx = trx_rw_is_active(trx_id, NULL);
+
+		/* impl_trx cannot be committed until lock_mutex_exit()
+		because lock_trx_release_locks() acquires lock_sys->mutex */
+
+		if (impl_trx != NULL
+		    && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
+					  heap_no, impl_trx)) {
+			ulint	type_mode = (LOCK_REC | LOCK_X
+					     | LOCK_REC_NOT_GAP);
+
+			lock_rec_add_to_queue(
+				type_mode, block, heap_no, index,
+				impl_trx, FALSE);
+		}
+
+		lock_mutex_exit();
+	}
+}
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate modify (update,
+delete mark, or delete unmark) of a clustered index record. If they do,
+first tests if the query thread should anyway be suspended for some
+reason; if not, then puts the transaction and the query thread to the
+lock wait state and inserts a waiting request for a record x-lock to the
+lock queue.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_clust_rec_modify_check_and_lock(
+/*=================================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: record which should be
+					modified */
+	dict_index_t*		index,	/*!< in: clustered index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+	dberr_t	err;
+	ulint	heap_no;
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(block->frame == page_align(rec));
+
+	if (flags & BTR_NO_LOCKING_FLAG) {
+
+		return(DB_SUCCESS);
+	}
+
+	heap_no = rec_offs_comp(offsets)
+		? rec_get_heap_no_new(rec)
+		: rec_get_heap_no_old(rec);
+
+	/* If a transaction has no explicit x-lock set on the record, set one
+	for it */
+
+	lock_rec_convert_impl_to_expl(block, rec, index, offsets);
+
+	lock_mutex_enter();
+
+	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+
+	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
+			    block, heap_no, index, thr);
+
+	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
+
+	lock_mutex_exit();
+
+	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
+
+	if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) {
+		err = DB_SUCCESS;
+	}
+
+	return(err);
+}
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate modify (delete
+mark or delete unmark) of a secondary index record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_sec_rec_modify_check_and_lock(
+/*===============================*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+				bit is set, does nothing */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	const rec_t*	rec,	/*!< in: record which should be
+				modified; NOTE: as this is a secondary
+				index, we always have to modify the
+				clustered index record first: see the
+				comment below */
+	dict_index_t*	index,	/*!< in: secondary index */
+	que_thr_t*	thr,	/*!< in: query thread
+				(can be NULL if BTR_NO_LOCKING_FLAG) */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+{
+	dberr_t	err;
+	ulint	heap_no;
+
+	ut_ad(!dict_index_is_clust(index));
+	ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
+	ut_ad(block->frame == page_align(rec));
+
+	if (flags & BTR_NO_LOCKING_FLAG) {
+
+		return(DB_SUCCESS);
+	}
+
+	heap_no = page_rec_get_heap_no(rec);
+
+	/* Another transaction cannot have an implicit lock on the record,
+	because when we come here, we already have modified the clustered
+	index record, and this would not have been possible if another active
+	transaction had modified this secondary index record. */
+
+	lock_mutex_enter();
+
+	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+
+	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
+			    block, heap_no, index, thr);
+
+	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
+
+	lock_mutex_exit();
+
+#ifdef UNIV_DEBUG
+	{
+		mem_heap_t*	heap		= NULL;
+		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+		const ulint*	offsets;
+		rec_offs_init(offsets_);
+
+		offsets = rec_get_offsets(rec, index, offsets_,
+					  ULINT_UNDEFINED, &heap);
+
+		ut_ad(lock_rec_queue_validate(
+			FALSE, block, rec, index, offsets));
+
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+	}
+#endif /* UNIV_DEBUG */
+
+	if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
+		/* Update the page max trx id field */
+		/* It might not be necessary to do this if
+		err == DB_SUCCESS (no new lock created),
+		but it should not cost too much performance. */
+		page_update_max_trx_id(block,
+				       buf_block_get_page_zip(block),
+				       thr_get_trx(thr)->id, mtr);
+		err = DB_SUCCESS;
+	}
+
+	return(err);
+}
+
+/*********************************************************************//**
+Like lock_clust_rec_read_check_and_lock(), but reads a
+secondary index record.
+@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_sec_rec_read_check_and_lock(
+/*=============================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: secondary index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	enum lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+	dberr_t	err;
+	ulint	heap_no;
+
+	ut_ad(!dict_index_is_clust(index));
+	ut_ad(!dict_index_is_online_ddl(index));
+	ut_ad(block->frame == page_align(rec));
+	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(mode == LOCK_X || mode == LOCK_S);
+
+	if (flags & BTR_NO_LOCKING_FLAG) {
+
+		return(DB_SUCCESS);
+	}
+
+	heap_no = page_rec_get_heap_no(rec);
+
+	/* Some transaction may have an implicit x-lock on the record only
+	if the max trx id for the page >= min trx id for the trx list or a
+	database recovery is running. */
+
+	if ((page_get_max_trx_id(block->frame) >= trx_rw_min_trx_id()
+	     || recv_recovery_is_on())
+	    && !page_rec_is_supremum(rec)) {
+
+		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
+	}
+
+	lock_mutex_enter();
+
+	ut_ad(mode != LOCK_X
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+	ut_ad(mode != LOCK_S
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+
+	err = lock_rec_lock(FALSE, mode | gap_mode,
+			    block, heap_no, index, thr);
+
+	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
+
+	lock_mutex_exit();
+
+	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
+
+	return(err);
+}
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record.
+@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_clust_rec_read_check_and_lock(
+/*===============================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: clustered index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	enum lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+	dberr_t	err;
+	ulint	heap_no;
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(block->frame == page_align(rec));
+	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
+	ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
+	      || gap_mode == LOCK_REC_NOT_GAP);
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	if (flags & BTR_NO_LOCKING_FLAG) {
+
+		return(DB_SUCCESS);
+	}
+
+	heap_no = page_rec_get_heap_no(rec);
+
+	if (UNIV_LIKELY(heap_no != PAGE_HEAP_NO_SUPREMUM)) {
+
+		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
+	}
+
+	lock_mutex_enter();
+
+	ut_ad(mode != LOCK_X
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+	ut_ad(mode != LOCK_S
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+
+	err = lock_rec_lock(FALSE, mode | gap_mode,
+			    block, heap_no, index, thr);
+
+	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
+
+	lock_mutex_exit();
+
+	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
+
+	return(err);
+}
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. This is an alternative version of
+lock_clust_rec_read_check_and_lock() that does not require the parameter
+"offsets".
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_clust_rec_read_check_and_lock_alt(
+/*===================================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: clustered index */
+	enum lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+	mem_heap_t*	tmp_heap	= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	dberr_t		err;
+	rec_offs_init(offsets_);
+
+	offsets = rec_get_offsets(rec, index, offsets,
+				  ULINT_UNDEFINED, &tmp_heap);
+	err = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
+						 offsets, mode, gap_mode, thr);
+	if (tmp_heap) {
+		mem_heap_free(tmp_heap);
+	}
+
+	if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) {
+		err = DB_SUCCESS;
+	}
+
+	return(err);
+}
+
+/*******************************************************************//**
+Release the last lock from the transaction's autoinc locks. */
+UNIV_INLINE
+void
+lock_release_autoinc_last_lock(
+/*===========================*/
+	ib_vector_t*	autoinc_locks)	/*!< in/out: vector of AUTOINC locks */
+{
+	ulint		last;
+	lock_t*		lock;
+
+	ut_ad(lock_mutex_own());
+	ut_a(!ib_vector_is_empty(autoinc_locks));
+
+	/* The lock to be release must be the last lock acquired. */
+	last = ib_vector_size(autoinc_locks) - 1;
+	lock = *static_cast<lock_t**>(ib_vector_get(autoinc_locks, last));
+
+	/* Should have only AUTOINC locks in the vector. */
+	ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
+	ut_a(lock_get_type(lock) == LOCK_TABLE);
+
+	ut_a(lock->un_member.tab_lock.table != NULL);
+
+	/* This will remove the lock from the trx autoinc_locks too. */
+	lock_table_dequeue(lock);
+
+	/* Remove from the table vector too. */
+	lock_trx_table_locks_remove(lock);
+}
+
+/*******************************************************************//**
+Check if a transaction holds any autoinc locks.
+@return TRUE if the transaction holds any AUTOINC locks. */
+static
+ibool
+lock_trx_holds_autoinc_locks(
+/*=========================*/
+	const trx_t*	trx)		/*!< in: transaction */
+{
+	ut_a(trx->autoinc_locks != NULL);
+
+	return(!ib_vector_is_empty(trx->autoinc_locks));
+}
+
+/*******************************************************************//**
+Release all the transaction's autoinc locks. */
+static
+void
+lock_release_autoinc_locks(
+/*=======================*/
+	trx_t*		trx)		/*!< in/out: transaction */
+{
+	ut_ad(lock_mutex_own());
+	/* If this is invoked for a running transaction by the thread
+	that is serving the transaction, then it is not necessary to
+	hold trx->mutex here. */
+
+	ut_a(trx->autoinc_locks != NULL);
+
+	/* We release the locks in the reverse order. This is to
+	avoid searching the vector for the element to delete at
+	the lower level. See (lock_table_remove_low()) for details. */
+	while (!ib_vector_is_empty(trx->autoinc_locks)) {
+
+		/* lock_table_remove_low() will also remove the lock from
+		the transaction's autoinc_locks vector. */
+		lock_release_autoinc_last_lock(trx->autoinc_locks);
+	}
+
+	/* Should release all locks. */
+	ut_a(ib_vector_is_empty(trx->autoinc_locks));
+}
+
+/*******************************************************************//**
+Gets the type of a lock. Non-inline version for using outside of the
+lock module.
+@return	LOCK_TABLE or LOCK_REC */
+UNIV_INTERN
+ulint
+lock_get_type(
+/*==========*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	return(lock_get_type_low(lock));
+}
+
+/*******************************************************************//**
+Gets the id of the transaction owning a lock.
+@return	transaction id */
+UNIV_INTERN
+trx_id_t
+lock_get_trx_id(
+/*============*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	return(lock->trx->id);
+}
+
+/*******************************************************************//**
+Gets the mode of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return	lock mode */
+UNIV_INTERN
+const char*
+lock_get_mode_str(
+/*==============*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ibool	is_gap_lock;
+
+	is_gap_lock = lock_get_type_low(lock) == LOCK_REC
+		&& lock_rec_get_gap(lock);
+
+	switch (lock_get_mode(lock)) {
+	case LOCK_S:
+		if (is_gap_lock) {
+			return("S,GAP");
+		} else {
+			return("S");
+		}
+	case LOCK_X:
+		if (is_gap_lock) {
+			return("X,GAP");
+		} else {
+			return("X");
+		}
+	case LOCK_IS:
+		if (is_gap_lock) {
+			return("IS,GAP");
+		} else {
+			return("IS");
+		}
+	case LOCK_IX:
+		if (is_gap_lock) {
+			return("IX,GAP");
+		} else {
+			return("IX");
+		}
+	case LOCK_AUTO_INC:
+		return("AUTO_INC");
+	default:
+		return("UNKNOWN");
+	}
+}
+
+/*******************************************************************//**
+Gets the type of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return	lock type */
+UNIV_INTERN
+const char*
+lock_get_type_str(
+/*==============*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	switch (lock_get_type_low(lock)) {
+	case LOCK_REC:
+		return("RECORD");
+	case LOCK_TABLE:
+		return("TABLE");
+	default:
+		return("UNKNOWN");
+	}
+}
+
+/*******************************************************************//**
+Gets the table on which the lock is.
+@return	table */
+UNIV_INLINE
+dict_table_t*
+lock_get_table(
+/*===========*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	switch (lock_get_type_low(lock)) {
+	case LOCK_REC:
+		ut_ad(dict_index_is_clust(lock->index)
+		      || !dict_index_is_online_ddl(lock->index));
+		return(lock->index->table);
+	case LOCK_TABLE:
+		return(lock->un_member.tab_lock.table);
+	default:
+		ut_error;
+		return(NULL);
+	}
+}
+
+/*******************************************************************//**
+Gets the id of the table on which the lock is.
+@return	id of the table */
+UNIV_INTERN
+table_id_t
+lock_get_table_id(
+/*==============*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	dict_table_t*	table;
+
+	table = lock_get_table(lock);
+
+	return(table->id);
+}
+
+/*******************************************************************//**
+Gets the name of the table on which the lock is.
+The string should not be free()'d or modified.
+@return	name of the table */
+UNIV_INTERN
+const char*
+lock_get_table_name(
+/*================*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	dict_table_t*	table;
+
+	table = lock_get_table(lock);
+
+	return(table->name);
+}
+
+/*******************************************************************//**
+For a record lock, gets the index on which the lock is.
+@return	index */
+UNIV_INTERN
+const dict_index_t*
+lock_rec_get_index(
+/*===============*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_a(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(dict_index_is_clust(lock->index)
+	      || !dict_index_is_online_ddl(lock->index));
+
+	return(lock->index);
+}
+
+/*******************************************************************//**
+For a record lock, gets the name of the index on which the lock is.
+The string should not be free()'d or modified.
+@return	name of the index */
+UNIV_INTERN
+const char*
+lock_rec_get_index_name(
+/*====================*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_a(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(dict_index_is_clust(lock->index)
+	      || !dict_index_is_online_ddl(lock->index));
+
+	return(lock->index->name);
+}
+
+/*******************************************************************//**
+For a record lock, gets the tablespace number on which the lock is.
+@return	tablespace number */
+UNIV_INTERN
+ulint
+lock_rec_get_space_id(
+/*==================*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+	return(lock->un_member.rec_lock.space);
+}
+
+/*******************************************************************//**
+For a record lock, gets the page number on which the lock is.
+@return	page number */
+UNIV_INTERN
+ulint
+lock_rec_get_page_no(
+/*=================*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+	return(lock->un_member.rec_lock.page_no);
+}
+
+/*********************************************************************//**
+Cancels a waiting lock request and releases possible other transactions
+waiting behind it. */
+UNIV_INTERN
+void
+lock_cancel_waiting_and_release(
+/*============================*/
+	lock_t*	lock)	/*!< in/out: waiting lock request */
+{
+	que_thr_t*	thr;
+
+	ut_ad(lock_mutex_own());
+	ut_ad(trx_mutex_own(lock->trx));
+
+	lock->trx->lock.cancel = TRUE;
+
+	if (lock_get_type_low(lock) == LOCK_REC) {
+
+		lock_rec_dequeue_from_page(lock);
+	} else {
+		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+
+		if (lock->trx->autoinc_locks != NULL) {
+			/* Release the transaction's AUTOINC locks. */
+			lock_release_autoinc_locks(lock->trx);
+		}
+
+		lock_table_dequeue(lock);
+	}
+
+	/* Reset the wait flag and the back pointer to lock in trx. */
+
+	lock_reset_lock_and_trx_wait(lock);
+
+	/* The following function releases the trx from lock wait. */
+
+	thr = que_thr_end_lock_wait(lock->trx);
+
+	if (thr != NULL) {
+		lock_wait_release_thread_if_suspended(thr);
+	}
+
+	lock->trx->lock.cancel = FALSE;
+}
+
+/*********************************************************************//**
+Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
+function should be called at the the end of an SQL statement, by the
+connection thread that owns the transaction (trx->mysql_thd). */
+UNIV_INTERN
+void
+lock_unlock_table_autoinc(
+/*======================*/
+	trx_t*	trx)	/*!< in/out: transaction */
+{
+	ut_ad(!lock_mutex_own());
+	ut_ad(!trx_mutex_own(trx));
+	ut_ad(!trx->lock.wait_lock);
+	/* This can be invoked on NOT_STARTED, ACTIVE, PREPARED,
+	but not COMMITTED transactions. */
+	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
+	      || !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
+
+	/* This function is invoked for a running transaction by the
+	thread that is serving the transaction. Therefore it is not
+	necessary to hold trx->mutex here. */
+
+	if (lock_trx_holds_autoinc_locks(trx)) {
+		lock_mutex_enter();
+
+		lock_release_autoinc_locks(trx);
+
+		lock_mutex_exit();
+	}
+}
+
+/*********************************************************************//**
+Releases a transaction's locks, and releases possible other transactions
+waiting because of these locks. Change the state of the transaction to
+TRX_STATE_COMMITTED_IN_MEMORY. */
+UNIV_INTERN
+void
+lock_trx_release_locks(
+/*===================*/
+	trx_t*	trx)	/*!< in/out: transaction */
+{
+	assert_trx_in_list(trx);
+
+	if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
+		mutex_enter(&trx_sys->mutex);
+		ut_a(trx_sys->n_prepared_trx > 0);
+		trx_sys->n_prepared_trx--;
+		if (trx->is_recovered) {
+			ut_a(trx_sys->n_prepared_recovered_trx > 0);
+			trx_sys->n_prepared_recovered_trx--;
+		}
+		mutex_exit(&trx_sys->mutex);
+	} else {
+		ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+	}
+
+	/* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY
+	is protected by both the lock_sys->mutex and the trx->mutex. */
+	lock_mutex_enter();
+	trx_mutex_enter(trx);
+
+	/* The following assignment makes the transaction committed in memory
+	and makes its changes to data visible to other transactions.
+	NOTE that there is a small discrepancy from the strict formal
+	visibility rules here: a human user of the database can see
+	modifications made by another transaction T even before the necessary
+	log segment has been flushed to the disk. If the database happens to
+	crash before the flush, the user has seen modifications from T which
+	will never be a committed transaction. However, any transaction T2
+	which sees the modifications of the committing transaction T, and
+	which also itself makes modifications to the database, will get an lsn
+	larger than the committing transaction T. In the case where the log
+	flush fails, and T never gets committed, also T2 will never get
+	committed. */
+
+	/*--------------------------------------*/
+	trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
+	/*--------------------------------------*/
+
+	/* If the background thread trx_rollback_or_clean_recovered()
+	is still active then there is a chance that the rollback
+	thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
+	to clean it up calling trx_cleanup_at_db_startup(). This can
+	happen in the case we are committing a trx here that is left
+	in PREPARED state during the crash. Note that commit of the
+	rollback of a PREPARED trx happens in the recovery thread
+	while the rollback of other transactions happen in the
+	background thread. To avoid this race we unconditionally unset
+	the is_recovered flag. */
+
+	trx->is_recovered = FALSE;
+
+	trx_mutex_exit(trx);
+
+	lock_release(trx);
+
+	lock_mutex_exit();
+}
+
+/*********************************************************************//**
+Check whether the transaction has already been rolled back because it
+was selected as a deadlock victim, or if it has to wait then cancel
+the wait lock.
+@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+lock_trx_handle_wait(
+/*=================*/
+	trx_t*	trx)	/*!< in/out: trx lock state */
+{
+	dberr_t	err;
+
+	lock_mutex_enter();
+
+	trx_mutex_enter(trx);
+
+	if (trx->lock.was_chosen_as_deadlock_victim) {
+		err = DB_DEADLOCK;
+	} else if (trx->lock.wait_lock != NULL) {
+		lock_cancel_waiting_and_release(trx->lock.wait_lock);
+		err = DB_LOCK_WAIT;
+	} else {
+		/* The lock was probably granted before we got here. */
+		err = DB_SUCCESS;
+	}
+
+	lock_mutex_exit();
+	trx_mutex_exit(trx);
+
+	return(err);
+}
+
+/*********************************************************************//**
+Get the number of locks on a table.
+@return number of locks */
+UNIV_INTERN
+ulint
+lock_table_get_n_locks(
+/*===================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ulint		n_table_locks;
+
+	lock_mutex_enter();
+
+	n_table_locks = UT_LIST_GET_LEN(table->locks);
+
+	lock_mutex_exit();
+
+	return(n_table_locks);
+}
+
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Do an exhaustive check for any locks (table or rec) against the table.
+@return	lock if found */
+static
+const lock_t*
+lock_table_locks_lookup(
+/*====================*/
+	const dict_table_t*	table,		/*!< in: check if there are
+						any locks held on records in
+						this table or on the table
+						itself */
+	const trx_list_t*	trx_list)	/*!< in: trx list to check */
+{
+	trx_t*			trx;
+
+	ut_a(table != NULL);
+	ut_ad(lock_mutex_own());
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	ut_ad(trx_list == &trx_sys->rw_trx_list
+	      || trx_list == &trx_sys->ro_trx_list);
+
+	for (trx = UT_LIST_GET_FIRST(*trx_list);
+	     trx != NULL;
+	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+
+		const lock_t*	lock;
+
+		assert_trx_in_list(trx);
+		ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
+
+		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
+		     lock != NULL;
+		     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+
+			ut_a(lock->trx == trx);
+
+			if (lock_get_type_low(lock) == LOCK_REC) {
+				ut_ad(!dict_index_is_online_ddl(lock->index)
+				      || dict_index_is_clust(lock->index));
+				if (lock->index->table == table) {
+					return(lock);
+				}
+			} else if (lock->un_member.tab_lock.table == table) {
+				return(lock);
+			}
+		}
+	}
+
+	return(NULL);
+}
+#endif /* UNIV_DEBUG */
+
+/*******************************************************************//**
+Check if there are any locks (table or rec) against table.
+@return	TRUE if table has either table or record locks. */
+UNIV_INTERN
+ibool
+lock_table_has_locks(
+/*=================*/
+	const dict_table_t*	table)	/*!< in: check if there are any locks
+					held on records in this table or on the
+					table itself */
+{
+	ibool			has_locks;
+
+	lock_mutex_enter();
+
+	has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
+
+#ifdef UNIV_DEBUG
+	if (!has_locks) {
+		mutex_enter(&trx_sys->mutex);
+
+		ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
+		ut_ad(!lock_table_locks_lookup(table, &trx_sys->ro_trx_list));
+
+		mutex_exit(&trx_sys->mutex);
+	}
+#endif /* UNIV_DEBUG */
+
+	lock_mutex_exit();
+
+	return(has_locks);
+}
+
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Check if the transaction holds any locks on the sys tables
+or its records.
+@return	the strongest lock found on any sys table or 0 for none */
+UNIV_INTERN
+const lock_t*
+lock_trx_has_sys_table_locks(
+/*=========================*/
+	const trx_t*	trx)	/*!< in: transaction to check */
+{
+	lint		i;
+	const lock_t*	strongest_lock = 0;
+	lock_mode	strongest = LOCK_NONE;
+
+	lock_mutex_enter();
+
+	/* Find a valid mode. Note: ib_vector_size() can be 0. */
+	for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
+		const lock_t*	lock;
+
+		lock = *static_cast<const lock_t**>(
+			ib_vector_get(trx->lock.table_locks, i));
+
+		if (lock != NULL
+		    && dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
+
+			strongest = lock_get_mode(lock);
+			ut_ad(strongest != LOCK_NONE);
+			strongest_lock = lock;
+			break;
+		}
+	}
+
+	if (strongest == LOCK_NONE) {
+		lock_mutex_exit();
+		return(NULL);
+	}
+
+	for (/* No op */; i >= 0; --i) {
+		const lock_t*	lock;
+
+		lock = *static_cast<const lock_t**>(
+			ib_vector_get(trx->lock.table_locks, i));
+
+		if (lock == NULL) {
+			continue;
+		}
+
+		ut_ad(trx == lock->trx);
+		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+		ut_ad(lock->un_member.tab_lock.table != NULL);
+
+		lock_mode	mode = lock_get_mode(lock);
+
+		if (dict_is_sys_table(lock->un_member.tab_lock.table->id)
+		    && lock_mode_stronger_or_eq(mode, strongest)) {
+
+			strongest = mode;
+			strongest_lock = lock;
+		}
+	}
+
+	lock_mutex_exit();
+
+	return(strongest_lock);
+}
+
+/*******************************************************************//**
+Check if the transaction holds an exclusive lock on a record.
+@return	whether the locks are held */
+UNIV_INTERN
+bool
+lock_trx_has_rec_x_lock(
+/*====================*/
+	const trx_t*		trx,	/*!< in: transaction to check */
+	const dict_table_t*	table,	/*!< in: table to check */
+	const buf_block_t*	block,	/*!< in: buffer block of the record */
+	ulint			heap_no)/*!< in: record heap number */
+{
+	ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
+
+	lock_mutex_enter();
+	ut_a(lock_table_has(trx, table, LOCK_IX));
+	ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
+			       block, heap_no, trx));
+	lock_mutex_exit();
+	return(true);
+}
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc
new file mode 100644
index 00000000000..a1c35e20ead
--- /dev/null
+++ b/storage/innobase/lock/lock0wait.cc
@@ -0,0 +1,543 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0wait.cc
+The transaction lock system
+
+Created 25/5/2010 Sunny Bains
+*******************************************************/
+
+#define LOCK_MODULE_IMPLEMENTATION
+
+#include "srv0mon.h"
+#include "que0que.h"
+#include "lock0lock.h"
+#include "row0mysql.h"
+#include "srv0start.h"
+#include "ha_prototypes.h"
+#include "lock0priv.h"
+
+/*********************************************************************//**
+Print the contents of the lock_sys_t::waiting_threads array. */
+static
+void
+lock_wait_table_print(void)
+/*=======================*/
+{
+	ulint			i;
+	const srv_slot_t*	slot;
+
+	ut_ad(lock_wait_mutex_own());
+
+	slot = lock_sys->waiting_threads;
+
+	for (i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
+
+		fprintf(stderr,
+			"Slot %lu: thread type %lu,"
+			" in use %lu, susp %lu, timeout %lu, time %lu\n",
+			(ulong) i,
+			(ulong) slot->type,
+			(ulong) slot->in_use,
+			(ulong) slot->suspended,
+			slot->wait_timeout,
+			(ulong) difftime(ut_time(), slot->suspend_time));
+	}
+}
+
+/*********************************************************************//**
+Release a slot in the lock_sys_t::waiting_threads. Adjust the array last pointer
+if there are empty slots towards the end of the table. */
+static
+void
+lock_wait_table_release_slot(
+/*=========================*/
+	srv_slot_t*	slot)		/*!< in: slot to release */
+{
+#ifdef UNIV_DEBUG
+	srv_slot_t*	upper = lock_sys->waiting_threads + OS_THREAD_MAX_N;
+#endif /* UNIV_DEBUG */
+
+	lock_wait_mutex_enter();
+
+	ut_ad(slot->in_use);
+	ut_ad(slot->thr != NULL);
+	ut_ad(slot->thr->slot != NULL);
+	ut_ad(slot->thr->slot == slot);
+
+	/* Must be within the array boundaries. */
+	ut_ad(slot >= lock_sys->waiting_threads);
+	ut_ad(slot < upper);
+
+	/* Note: When we reserve the slot we use the trx_t::mutex to update
+	the slot values to change the state to reserved. Here we are using the
+	lock mutex to change the state of the slot to free. This is by design,
+	because when we query the slot state we always hold both the lock and
+	trx_t::mutex. To reduce contention on the lock mutex when reserving the
+	slot we avoid acquiring the lock mutex. */
+
+	lock_mutex_enter();
+
+	slot->thr->slot = NULL;
+	slot->thr = NULL;
+	slot->in_use = FALSE;
+
+	lock_mutex_exit();
+
+	/* Scan backwards and adjust the last free slot pointer. */
+	for (slot = lock_sys->last_slot;
+	     slot > lock_sys->waiting_threads && !slot->in_use;
+	     --slot) {
+		/* No op */
+	}
+
+	/* Either the array is empty or the last scanned slot is in use. */
+	ut_ad(slot->in_use || slot == lock_sys->waiting_threads);
+
+	lock_sys->last_slot = slot + 1;
+
+	/* The last slot is either outside of the array boundary or it's
+	on an empty slot. */
+	ut_ad(lock_sys->last_slot == upper || !lock_sys->last_slot->in_use);
+
+	ut_ad(lock_sys->last_slot >= lock_sys->waiting_threads);
+	ut_ad(lock_sys->last_slot <= upper);
+
+	lock_wait_mutex_exit();
+}
+
+/*********************************************************************//**
+Reserves a slot in the thread table for the current user OS thread.
+@return	reserved slot */
+static
+srv_slot_t*
+lock_wait_table_reserve_slot(
+/*=========================*/
+	que_thr_t*	thr,		/*!< in: query thread associated
+					with the user OS thread */
+	ulong		wait_timeout)	/*!< in: lock wait timeout value */
+{
+	ulint		i;
+	srv_slot_t*	slot;
+
+	ut_ad(lock_wait_mutex_own());
+	ut_ad(trx_mutex_own(thr_get_trx(thr)));
+
+	slot = lock_sys->waiting_threads;
+
+	for (i = OS_THREAD_MAX_N; i--; ++slot) {
+		if (!slot->in_use) {
+			slot->in_use = TRUE;
+			slot->thr = thr;
+			slot->thr->slot = slot;
+
+			if (slot->event == NULL) {
+				slot->event = os_event_create();
+				ut_a(slot->event);
+			}
+
+			os_event_reset(slot->event);
+			slot->suspended = TRUE;
+			slot->suspend_time = ut_time();
+			slot->wait_timeout = wait_timeout;
+
+			if (slot == lock_sys->last_slot) {
+				++lock_sys->last_slot;
+			}
+
+			ut_ad(lock_sys->last_slot
+			      <= lock_sys->waiting_threads + OS_THREAD_MAX_N);
+
+			return(slot);
+		}
+	}
+
+	ut_print_timestamp(stderr);
+
+	fprintf(stderr,
+		"  InnoDB: There appear to be %lu user"
+		" threads currently waiting\n"
+		"InnoDB: inside InnoDB, which is the"
+		" upper limit. Cannot continue operation.\n"
+		"InnoDB: As a last thing, we print"
+		" a list of waiting threads.\n", (ulong) OS_THREAD_MAX_N);
+
+	lock_wait_table_print();
+
+	ut_error;
+	return(NULL);
+}
+
+/***************************************************************//**
+Puts a user OS thread to wait for a lock to be released. If an error
+occurs during the wait trx->error_state associated with thr is
+!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
+are possible errors. DB_DEADLOCK is returned if selective deadlock
+resolution chose this transaction as a victim. */
+UNIV_INTERN
+void
+lock_wait_suspend_thread(
+/*=====================*/
+	que_thr_t*	thr)	/*!< in: query thread associated with the
+				user OS thread */
+{
+	srv_slot_t*	slot;
+	double		wait_time;
+	trx_t*		trx;
+	ulint		had_dict_lock;
+	ibool		was_declared_inside_innodb;
+	ib_int64_t	start_time			= 0;
+	ib_int64_t	finish_time;
+	ulint		sec;
+	ulint		ms;
+	ulong		lock_wait_timeout;
+
+	trx = thr_get_trx(thr);
+
+	if (trx->mysql_thd != 0) {
+		DEBUG_SYNC_C("lock_wait_suspend_thread_enter");
+	}
+
+	/* InnoDB system transactions (such as the purge, and
+	incomplete transactions that are being rolled back after crash
+	recovery) will use the global value of
+	innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
+	lock_wait_timeout = trx_lock_wait_timeout_get(trx);
+
+	lock_wait_mutex_enter();
+
+	trx_mutex_enter(trx);
+
+	trx->error_state = DB_SUCCESS;
+
+	if (thr->state == QUE_THR_RUNNING) {
+
+		ut_ad(thr->is_active);
+
+		/* The lock has already been released or this transaction
+		was chosen as a deadlock victim: no need to suspend */
+
+		if (trx->lock.was_chosen_as_deadlock_victim) {
+
+			trx->error_state = DB_DEADLOCK;
+			trx->lock.was_chosen_as_deadlock_victim = FALSE;
+		}
+
+		lock_wait_mutex_exit();
+		trx_mutex_exit(trx);
+		return;
+	}
+
+	ut_ad(!thr->is_active);
+
+	slot = lock_wait_table_reserve_slot(thr, lock_wait_timeout);
+
+	if (thr->lock_state == QUE_THR_LOCK_ROW) {
+		srv_stats.n_lock_wait_count.inc();
+		srv_stats.n_lock_wait_current_count.inc();
+
+		if (ut_usectime(&sec, &ms) == -1) {
+			start_time = -1;
+		} else {
+			start_time = (ib_int64_t) sec * 1000000 + ms;
+		}
+	}
+
+	/* Wake the lock timeout monitor thread, if it is suspended */
+
+	os_event_set(lock_sys->timeout_event);
+
+	lock_wait_mutex_exit();
+	trx_mutex_exit(trx);
+
+	ulint	lock_type = ULINT_UNDEFINED;
+
+	lock_mutex_enter();
+
+	if (const lock_t* wait_lock = trx->lock.wait_lock) {
+		lock_type = lock_get_type_low(wait_lock);
+	}
+
+	lock_mutex_exit();
+
+	had_dict_lock = trx->dict_operation_lock_mode;
+
+	switch (had_dict_lock) {
+	case 0:
+		break;
+	case RW_S_LATCH:
+		/* Release foreign key check latch */
+		row_mysql_unfreeze_data_dictionary(trx);
+
+		DEBUG_SYNC_C("lock_wait_release_s_latch_before_sleep");
+		break;
+	default:
+		/* There should never be a lock wait when the
+		dictionary latch is reserved in X mode.  Dictionary
+		transactions should only acquire locks on dictionary
+		tables, not other tables. All access to dictionary
+		tables should be covered by dictionary
+		transactions. */
+		ut_error;
+	}
+
+	ut_a(trx->dict_operation_lock_mode == 0);
+
+	/* Suspend this thread and wait for the event. */
+
+	was_declared_inside_innodb = trx->declared_to_be_inside_innodb;
+
+	if (was_declared_inside_innodb) {
+		/* We must declare this OS thread to exit InnoDB, since a
+		possible other thread holding a lock which this thread waits
+		for must be allowed to enter, sooner or later */
+
+		srv_conc_force_exit_innodb(trx);
+	}
+
+	/* Unknown is also treated like a record lock */
+	if (lock_type == ULINT_UNDEFINED || lock_type == LOCK_REC) {
+		thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_LOCK);
+	} else {
+		ut_ad(lock_type == LOCK_TABLE);
+		thd_wait_begin(trx->mysql_thd, THD_WAIT_TABLE_LOCK);
+	}
+
+	os_event_wait(slot->event);
+
+	thd_wait_end(trx->mysql_thd);
+
+	/* After resuming, reacquire the data dictionary latch if
+	necessary. */
+
+	if (was_declared_inside_innodb) {
+
+		/* Return back inside InnoDB */
+
+		srv_conc_force_enter_innodb(trx);
+	}
+
+	if (had_dict_lock) {
+
+		row_mysql_freeze_data_dictionary(trx);
+	}
+
+	wait_time = ut_difftime(ut_time(), slot->suspend_time);
+
+	/* Release the slot for others to use */
+
+	lock_wait_table_release_slot(slot);
+
+	if (thr->lock_state == QUE_THR_LOCK_ROW) {
+		ulint	diff_time;
+
+		if (ut_usectime(&sec, &ms) == -1) {
+			finish_time = -1;
+		} else {
+			finish_time = (ib_int64_t) sec * 1000000 + ms;
+		}
+
+		diff_time = (finish_time > start_time) ?
+			    (ulint) (finish_time - start_time) : 0;
+
+		srv_stats.n_lock_wait_current_count.dec();
+		srv_stats.n_lock_wait_time.add(diff_time);
+
+		/* Only update the variable if we successfully
+		retrieved the start and finish times. See Bug#36819. */
+		if (diff_time > lock_sys->n_lock_max_wait_time
+		    && start_time != -1
+		    && finish_time != -1) {
+
+			lock_sys->n_lock_max_wait_time = diff_time;
+		}
+
+		/* Record the lock wait time for this thread */
+		thd_set_lock_wait_time(trx->mysql_thd, diff_time);
+
+	}
+
+	if (lock_wait_timeout < 100000000
+	    && wait_time > (double) lock_wait_timeout) {
+
+		trx->error_state = DB_LOCK_WAIT_TIMEOUT;
+
+		MONITOR_INC(MONITOR_TIMEOUT);
+	}
+
+	if (trx_is_interrupted(trx)) {
+
+		trx->error_state = DB_INTERRUPTED;
+	}
+}
+
+/********************************************************************//**
+Releases a user OS thread waiting for a lock to be released, if the
+thread is already suspended. */
+UNIV_INTERN
+void
+lock_wait_release_thread_if_suspended(
+/*==================================*/
+	que_thr_t*	thr)	/*!< in: query thread associated with the
+				user OS thread	 */
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(trx_mutex_own(thr_get_trx(thr)));
+
+	/* We own both the lock mutex and the trx_t::mutex but not the
+	lock wait mutex. This is OK because other threads will see the state
+	of this slot as being in use and no other thread can change the state
+	of the slot to free unless that thread also owns the lock mutex. */
+
+	if (thr->slot != NULL && thr->slot->in_use && thr->slot->thr == thr) {
+		trx_t*	trx = thr_get_trx(thr);
+
+		if (trx->lock.was_chosen_as_deadlock_victim) {
+
+			trx->error_state = DB_DEADLOCK;
+			trx->lock.was_chosen_as_deadlock_victim = FALSE;
+		}
+
+		os_event_set(thr->slot->event);
+	}
+}
+
+/*********************************************************************//**
+Check if the thread lock wait has timed out. Release its locks if the
+wait has actually timed out. */
+static
+void
+lock_wait_check_and_cancel(
+/*=======================*/
+	const srv_slot_t*	slot)	/*!< in: slot reserved by a user
+					thread when the wait started */
+{
+	trx_t*		trx;
+	double		wait_time;
+	ib_time_t	suspend_time = slot->suspend_time;
+
+	ut_ad(lock_wait_mutex_own());
+
+	ut_ad(slot->in_use);
+
+	ut_ad(slot->suspended);
+
+	wait_time = ut_difftime(ut_time(), suspend_time);
+
+	trx = thr_get_trx(slot->thr);
+
+	if (trx_is_interrupted(trx)
+	    || (slot->wait_timeout < 100000000
+		&& (wait_time > (double) slot->wait_timeout
+		   || wait_time < 0))) {
+
+		/* Timeout exceeded or a wrap-around in system
+		time counter: cancel the lock request queued
+		by the transaction and release possible
+		other transactions waiting behind; it is
+		possible that the lock has already been
+		granted: in that case do nothing */
+
+		lock_mutex_enter();
+
+		trx_mutex_enter(trx);
+
+		if (trx->lock.wait_lock) {
+
+			ut_a(trx->lock.que_state == TRX_QUE_LOCK_WAIT);
+
+			lock_cancel_waiting_and_release(trx->lock.wait_lock);
+		}
+
+		lock_mutex_exit();
+
+		trx_mutex_exit(trx);
+	}
+
+}
+
+/*********************************************************************//**
+A thread which wakes up threads whose lock wait may have lasted too long.
+@return	a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(lock_wait_timeout_thread)(
+/*=====================================*/
+	void*	arg __attribute__((unused)))
+			/* in: a dummy parameter required by
+			os_thread_create */
+{
+	ib_int64_t	sig_count = 0;
+	os_event_t	event = lock_sys->timeout_event;
+
+	ut_ad(!srv_read_only_mode);
+
+#ifdef UNIV_PFS_THREAD
+	pfs_register_thread(srv_lock_timeout_thread_key);
+#endif /* UNIV_PFS_THREAD */
+
+	lock_sys->timeout_thread_active = true;
+
+	do {
+		srv_slot_t*	slot;
+
+		/* When someone is waiting for a lock, we wake up every second
+		and check if a timeout has passed for a lock wait */
+
+		os_event_wait_time_low(event, 1000000, sig_count);
+		sig_count = os_event_reset(event);
+
+		if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
+			break;
+		}
+
+		lock_wait_mutex_enter();
+
+		/* Check all slots for user threads that are waiting
+	       	on locks, and if they have exceeded the time limit. */
+
+		for (slot = lock_sys->waiting_threads;
+		     slot < lock_sys->last_slot;
+		     ++slot) {
+
+			/* We are doing a read without the lock mutex
+			and/or the trx mutex. This is OK because a slot
+		       	can't be freed or reserved without the lock wait
+		       	mutex. */
+
+			if (slot->in_use) {
+				lock_wait_check_and_cancel(slot);
+			}
+		}
+
+		sig_count = os_event_reset(event);
+
+		lock_wait_mutex_exit();
+
+	} while (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP);
+
+	lock_sys->timeout_thread_active = false;
+
+	/* We count the number of threads in os_thread_exit(). A created
+	thread should always use that to exit and not use return() to exit. */
+
+	os_thread_exit(NULL);
+
+	OS_THREAD_DUMMY_RETURN;
+}
author	Sergei Golubchik <vuvova@gmail.com>	2015-05-04 19:17:21 +0200
committer	Sergei Golubchik <vuvova@gmail.com>	2015-05-04 19:17:21 +0200
commit	6d06fbbd1dc25b3c12568f9038060dfdb69f9683 (patch)
tree	21e27f3fddc89f9dda6b337091464ba10c490123 /storage/innobase/lock
parent	1645930d0bd02f79df3ebff412b90acdc15bd9a0 (diff)
download	mariadb-git-6d06fbbd1dc25b3c12568f9038060dfdb69f9683.tar.gz