1 files changed, 326 insertions, 115 deletions
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 476b305ca70..1152152cc77 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -37,15 +37,17 @@ Created 5/7/1996 Heikki Tuuri
 #include "usr0sess.h"
 #include "trx0purge.h"
 #include "dict0mem.h"
+#include "dict0boot.h"
 #include "trx0sys.h"
 #include "pars0pars.h" /* pars_complete_graph_for_exec() */
 #include "que0que.h" /* que_node_get_parent() */
 #include "row0mysql.h" /* row_mysql_handle_errors() */
-#include "row0sel.h" /* sel_node_create(), sel_node_struct */
+#include "row0sel.h" /* sel_node_create(), sel_node_t */
 #include "row0types.h" /* sel_node_t */
 #include "srv0mon.h"
 #include "ut0vec.h"
 #include "btr0btr.h"
+#include "dict0boot.h"
 
 /* Restricts the length of search we will do in the waits-for
 graph of transactions */
@@ -345,10 +347,7 @@ static const byte lock_strength_matrix[5][5] = {
 };
 
 /** Deadlock check context. */
-typedef struct lock_deadlock_ctx_struct lock_deadlock_ctx_t;
-
-/** Deadlock check context. */
-struct lock_deadlock_ctx_struct {
+struct lock_deadlock_ctx_t {
 	const trx_t*	start;		/*!< Joining transaction that is
 					requesting a lock in an incompatible
 					mode */
@@ -366,10 +365,8 @@ struct lock_deadlock_ctx_struct {
 					was aborted */
 };
 
-typedef struct lock_stack_struct lock_stack_t;
-
 /** DFS visited node information used during deadlock checking. */
-struct lock_stack_struct {
+struct lock_stack_t {
 	const lock_t*	lock;			/*!< Current lock */
 	const lock_t*	wait_lock;		/*!< Waiting for lock */
 	unsigned	heap_no:16;		/*!< heap number if rec lock */
@@ -415,9 +412,10 @@ lock_rec_validate_page(
 /* The lock system */
 UNIV_INTERN lock_sys_t*	lock_sys	= NULL;
 
-/* We store info on the latest deadlock error to this buffer. InnoDB
+/** We store info on the latest deadlock error to this buffer. InnoDB
 Monitor will then fetch it and print */
 UNIV_INTERN ibool	lock_deadlock_found = FALSE;
+/** Only created if !srv_read_only_mode */
 static FILE*		lock_latest_err_file;
 
 /********************************************************************//**
@@ -502,7 +500,7 @@ lock_check_trx_id_sanity(
 	dict_index_t*	index,		/*!< in: index */
 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
 {
-	ibool		is_ok;
+	bool		is_ok;
 	trx_id_t	max_trx_id;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
@@ -520,10 +518,10 @@ lock_check_trx_id_sanity(
 
 /*********************************************************************//**
 Checks that a record is seen in a consistent read.
-@return TRUE if sees, or FALSE if an earlier version of the record
+@return true if sees, or false if an earlier version of the record
 should be retrieved */
 UNIV_INTERN
-ibool
+bool
 lock_clust_rec_cons_read_sees(
 /*==========================*/
 	const rec_t*	rec,	/*!< in: user record which should be read or
@@ -550,14 +548,14 @@ lock_clust_rec_cons_read_sees(
 Checks that a non-clustered index record is seen in a consistent read.
 
 NOTE that a non-clustered index page contains so little information on
-its modifications that also in the case FALSE, the present version of
+its modifications that also in the case false, the present version of
 rec may be the right, but we must check this from the clustered index
 record.
 
-@return TRUE if certainly sees, or FALSE if an earlier version of the
+@return true if certainly sees, or false if an earlier version of the
 clustered index record might be needed */
 UNIV_INTERN
-ulint
+bool
 lock_sec_rec_cons_read_sees(
 /*========================*/
 	const rec_t*		rec,	/*!< in: user record which
@@ -574,7 +572,7 @@ lock_sec_rec_cons_read_sees(
 
 	if (recv_recovery_is_on()) {
 
-		return(FALSE);
+		return(false);
 	}
 
 	max_trx_id = page_get_max_trx_id(page_align(rec));
@@ -593,12 +591,6 @@ lock_sys_create(
 {
 	ulint	lock_sys_sz;
 
-	srv_n_lock_wait_count = 0;
-	srv_n_lock_wait_time = 0;
-	srv_n_lock_max_wait_time = 0;
-	srv_lock_timeout_active = FALSE;
-	srv_n_lock_wait_current_count = 0;
-
 	lock_sys_sz = sizeof(*lock_sys)
 		+ OS_THREAD_MAX_N * sizeof(srv_slot_t);
 
@@ -618,12 +610,14 @@ lock_sys_create(
 	mutex_create(lock_sys_wait_mutex_key,
 		     &lock_sys->wait_mutex, SYNC_LOCK_WAIT_SYS);
 
-	lock_sys->rec_hash = hash_create(n_cells);
+	lock_sys->timeout_event = os_event_create();
 
-	lock_latest_err_file = os_file_create_tmpfile();
-	ut_a(lock_latest_err_file);
+	lock_sys->rec_hash = hash_create(n_cells);
 
-	srv_timeout_event = os_event_create(NULL);
+	if (!srv_read_only_mode) {
+		lock_latest_err_file = os_file_create_tmpfile();
+		ut_a(lock_latest_err_file);
+	}
 }
 
 /*********************************************************************//**
@@ -858,13 +852,16 @@ lock_reset_lock_and_trx_wait(
 /*=========================*/
 	lock_t*	lock)	/*!< in/out: record lock */
 {
-	ut_ad(lock->trx->lock.wait_lock == lock);
 	ut_ad(lock_get_wait(lock));
 	ut_ad(lock_mutex_own());
 
 	/* Reset the back pointer in trx to this waiting lock request */
-
-	lock->trx->lock.wait_lock = NULL;
+	if (!(lock->type_mode & LOCK_CONV_BY_OTHER)) {
+		ut_ad(lock->trx->lock.wait_lock == lock);
+		lock->trx->lock.wait_lock = NULL;
+	} else {
+		ut_ad(lock_get_type_low(lock) == LOCK_REC);
+	}
 	lock->type_mode &= ~LOCK_WAIT;
 }
 
@@ -1476,7 +1473,7 @@ Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
 to precise_mode.
 @return	lock or NULL */
 UNIV_INLINE
-const lock_t*
+lock_t*
 lock_rec_has_expl(
 /*==============*/
 	ulint			precise_mode,/*!< in: LOCK_S or LOCK_X
@@ -1489,7 +1486,7 @@ lock_rec_has_expl(
 	ulint			heap_no,/*!< in: heap number of the record */
 	const trx_t*		trx)	/*!< in: transaction */
 {
-	const lock_t*	lock;
+	lock_t*	lock;
 
 	ut_ad(lock_mutex_own());
 	ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
@@ -1498,14 +1495,14 @@ lock_rec_has_expl(
 
 	for (lock = lock_rec_get_first(block, heap_no);
 	     lock != NULL;
-	     lock = lock_rec_get_next_const(heap_no, lock)) {
+	     lock = lock_rec_get_next(heap_no, lock)) {
 
 		if (lock->trx == trx
+		    && !lock_is_wait_not_by_other(lock->type_mode)
 		    && lock_mode_stronger_or_eq(
 			    lock_get_mode(lock),
 			    static_cast<enum lock_mode>(
 				    precise_mode & LOCK_MODE_MASK))
-		    && !lock_get_wait(lock)
 		    && (!lock_rec_get_rec_not_gap(lock)
 			|| (precise_mode & LOCK_REC_NOT_GAP)
 			|| heap_no == PAGE_HEAP_NO_SUPREMUM)
@@ -1756,6 +1753,7 @@ lock_rec_create(
 
 	ut_ad(lock_mutex_own());
 	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
 
 	/* Non-locking autocommit read-only transactions should not set
 	any locks. */
@@ -1813,7 +1811,7 @@ lock_rec_create(
 	}
 	ut_ad(trx_mutex_own(trx));
 
-	if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+	if (lock_is_wait_not_by_other(type_mode)) {
 
 		lock_set_lock_and_trx_wait(lock, trx);
 	}
@@ -1838,7 +1836,7 @@ DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
 there was a deadlock, but another transaction was chosen as a victim,
 and we got the lock immediately: no need to wait then */
 static
-enum db_err
+dberr_t
 lock_rec_enqueue_waiting(
 /*=====================*/
 	ulint			type_mode,/*!< in: lock mode this
@@ -1853,14 +1851,16 @@ lock_rec_enqueue_waiting(
 	const buf_block_t*	block,	/*!< in: buffer block containing
 					the record */
 	ulint			heap_no,/*!< in: heap number of the record */
+	lock_t*			lock,	/*!< in: lock object; NULL if a new
+					one should be created. */
 	dict_index_t*		index,	/*!< in: index of record */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
 	trx_t*			trx;
-	lock_t*			lock;
 	trx_id_t		victim_trx_id;
 
 	ut_ad(lock_mutex_own());
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
 
 	trx = thr_get_trx(thr);
 
@@ -1893,10 +1893,20 @@ lock_rec_enqueue_waiting(
 		ut_ad(0);
 	}
 
-	/* Enqueue the lock request that will wait to be granted, note that
-	we already own the trx mutex. */
-	lock = lock_rec_create(
-		type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE);
+	if (lock == NULL) {
+		/* Enqueue the lock request that will wait
+		to be granted, note that we already own
+		the trx mutex. */
+		lock = lock_rec_create(
+			type_mode | LOCK_WAIT, block, heap_no,
+			index, trx, TRUE);
+	} else {
+		ut_ad(lock->type_mode & LOCK_WAIT);
+		ut_ad(lock->type_mode & LOCK_CONV_BY_OTHER);
+
+		lock->type_mode &= ~LOCK_CONV_BY_OTHER;
+		lock_set_lock_and_trx_wait(lock, trx);
+	}
 
 	/* Release the mutex to obey the latching order.
 	This is safe, because lock_deadlock_check_and_resolve()
@@ -1979,6 +1989,7 @@ lock_rec_add_to_queue(
 
 	ut_ad(lock_mutex_own());
 	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
 #ifdef UNIV_DEBUG
 	switch (type_mode & LOCK_MODE_MASK) {
 	case LOCK_X:
@@ -2100,6 +2111,7 @@ lock_rec_lock_fast(
 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
 	      || mode - (LOCK_MODE_MASK & mode) == 0
 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
 
 	lock = lock_rec_get_first_on_page(block);
 
@@ -2146,7 +2158,7 @@ lock, or in the case of a page supremum record, a gap type lock.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
 static
-enum db_err
+dberr_t
 lock_rec_lock_slow(
 /*===============*/
 	ibool			impl,	/*!< in: if TRUE, no lock is set
@@ -2163,7 +2175,8 @@ lock_rec_lock_slow(
 	que_thr_t*		thr)	/*!< in: query thread */
 {
 	trx_t*			trx;
-	enum db_err		err = DB_SUCCESS;
+	lock_t*			lock;
+	dberr_t			err = DB_SUCCESS;
 
 	ut_ad(lock_mutex_own());
 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
@@ -2175,12 +2188,33 @@ lock_rec_lock_slow(
 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
 	      || mode - (LOCK_MODE_MASK & mode) == 0
 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
 
 	trx = thr_get_trx(thr);
 
 	trx_mutex_enter(trx);
 
-	if (lock_rec_has_expl(mode, block, heap_no, trx)) {
+	lock = lock_rec_has_expl(mode, block, heap_no, trx);
+	if (lock) {
+		if (lock->type_mode & LOCK_CONV_BY_OTHER) {
+			/* This lock or lock waiting was created by the other
+			transaction, not by the transaction (trx) itself.
+			So, the transaction (trx) should treat it collectly
+			according as whether granted or not. */
+
+			if (lock->type_mode & LOCK_WAIT) {
+				/* This lock request was not granted yet.
+				Should wait for granted. */
+
+				goto enqueue_waiting;
+			} else {
+				/* This lock request was already granted.
+				Just clearing the flag. */
+
+				lock->type_mode &= ~LOCK_CONV_BY_OTHER;
+			}
+		}
+
 		/* The trx already has a strong enough lock on rec: do
 		nothing */
 
@@ -2193,8 +2227,10 @@ lock_rec_lock_slow(
 		have a lock strong enough already granted on the
 		record, we have to wait. */
 
+		ut_ad(lock == NULL);
+enqueue_waiting:
 		err = lock_rec_enqueue_waiting(
-			mode, block, heap_no, index, thr);
+			mode, block, heap_no, lock, index, thr);
 
 	} else if (!impl) {
 		/* Set the requested lock on the record, note that
@@ -2220,7 +2256,7 @@ of a page supremum record, a gap type lock.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
 static
-enum db_err
+dberr_t
 lock_rec_lock(
 /*==========*/
 	ibool			impl,	/*!< in: if TRUE, no lock is set
@@ -2246,6 +2282,7 @@ lock_rec_lock(
 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
 	      || mode - (LOCK_MODE_MASK & mode) == 0);
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
 
 	/* We try a simplified and faster subroutine for the most
 	common cases */
@@ -2348,7 +2385,8 @@ lock_grant(
 	TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
 	for it */
 
-	if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+	if (!(lock->type_mode & LOCK_CONV_BY_OTHER)
+	    && lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
 		que_thr_t*	thr;
 
 		thr = que_thr_end_lock_wait(lock->trx);
@@ -2375,6 +2413,7 @@ lock_rec_cancel(
 
 	ut_ad(lock_mutex_own());
 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(!(lock->type_mode & LOCK_CONV_BY_OTHER));
 
 	/* Reset the bit (there can be only one set bit) in the lock bitmap */
 	lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
@@ -2541,8 +2580,12 @@ lock_rec_reset_and_release_wait(
 	     lock != NULL;
 	     lock = lock_rec_get_next(heap_no, lock)) {
 
-		if (lock_get_wait(lock)) {
+		if (lock_is_wait_not_by_other(lock->type_mode)) {
 			lock_rec_cancel(lock);
+		} else if (lock_get_wait(lock)) {
+			/* just reset LOCK_WAIT */
+			lock_rec_reset_nth_bit(lock, heap_no);
+			lock_reset_lock_and_trx_wait(lock);
 		} else {
 			lock_rec_reset_nth_bit(lock, heap_no);
 		}
@@ -3439,11 +3482,13 @@ lock_deadlock_start_print()
 /*=======================*/
 {
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 
 	rewind(lock_latest_err_file);
 	ut_print_timestamp(lock_latest_err_file);
 
 	if (srv_print_all_deadlocks) {
+		ut_print_timestamp(stderr);
 		fprintf(stderr, "InnoDB: transactions deadlock detected, "
 			"dumping detailed information.\n");
 		ut_print_timestamp(stderr);
@@ -3458,10 +3503,12 @@ lock_deadlock_fputs(
 /*================*/
 	const char*	msg)	/*!< in: message to print */
 {
-	fputs(msg, lock_latest_err_file);
+	if (!srv_read_only_mode) {
+		fputs(msg, lock_latest_err_file);
 
-	if (srv_print_all_deadlocks) {
-		fputs(msg, stderr);
+		if (srv_print_all_deadlocks) {
+			fputs(msg, stderr);
+		}
 	}
 }
 
@@ -3475,24 +3522,21 @@ lock_deadlock_trx_print(
 	ulint		max_query_len)	/*!< in: max query length to print,
 					or 0 to use the default max length */
 {
-	ulint	n_lock_rec;
-	ulint	n_lock_struct;
-	ulint	heap_size;
-
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 
-	n_lock_rec = lock_number_of_rows_locked(&trx->lock);
-	n_lock_struct = UT_LIST_GET_LEN(trx->lock.trx_locks);
-	heap_size = mem_heap_get_size(trx->lock.lock_heap);
+	ulint	n_rec_locks = lock_number_of_rows_locked(&trx->lock);
+	ulint	n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
+	ulint	heap_size = mem_heap_get_size(trx->lock.lock_heap);
 
 	mutex_enter(&trx_sys->mutex);
 
 	trx_print_low(lock_latest_err_file, trx, max_query_len,
-		      n_lock_rec, n_lock_struct, heap_size);
+		      n_rec_locks, n_trx_locks, heap_size);
 
 	if (srv_print_all_deadlocks) {
 		trx_print_low(stderr, trx, max_query_len,
-			      n_lock_rec, n_lock_struct, heap_size);
+			      n_rec_locks, n_trx_locks, heap_size);
 	}
 
 	mutex_exit(&trx_sys->mutex);
@@ -3507,6 +3551,7 @@ lock_deadlock_lock_print(
 	const lock_t*	lock)	/*!< in: record or table type lock */
 {
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 
 	if (lock_get_type_low(lock) == LOCK_REC) {
 		lock_rec_print(lock_latest_err_file, lock);
@@ -3629,6 +3674,7 @@ lock_deadlock_notify(
 						deadlock */
 {
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 
 	lock_deadlock_start_print();
 
@@ -3648,9 +3694,15 @@ lock_deadlock_notify(
 
 	lock_deadlock_lock_print(lock);
 
-	lock_deadlock_fputs("*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
+	/* It is possible that the joining transaction was granted its
+	lock when we rolled back some other waiting transaction. */
+
+	if (ctx->start->lock.wait_lock != 0) {
+		lock_deadlock_fputs(
+			"*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
 
-	lock_deadlock_lock_print(ctx->start->lock.wait_lock);
+		lock_deadlock_lock_print(ctx->start->lock.wait_lock);
+	}
 
 #ifdef UNIV_DEBUG
 	if (lock_print_waits) {
@@ -3669,6 +3721,7 @@ lock_deadlock_select_victim(
 	const lock_deadlock_ctx_t*	ctx)	/*!< in: deadlock context */
 {
 	ut_ad(lock_mutex_own());
+	ut_ad(ctx->start->lock.wait_lock != 0);
 	ut_ad(ctx->wait_lock->trx != ctx->start);
 
 	if (trx_weight_ge(ctx->wait_lock->trx, ctx->start)) {
@@ -3694,8 +3747,10 @@ lock_deadlock_check(
 {
 	ut_ad(lock_mutex_own());
 
-	/* If it is the joining transaction wait lock. */
-	if (lock == ctx->start->lock.wait_lock) {
+	/* If it is the joining transaction wait lock or the joining
+	transaction was granted its lock due to deadlock detection. */
+	if (lock == ctx->start->lock.wait_lock
+	    || ctx->start->lock.wait_lock == NULL) {
 		; /* Skip */
 	} else if (lock == ctx->wait_lock) {
 
@@ -3776,7 +3831,8 @@ lock_deadlock_push(
 }
 
 /********************************************************************//**
-Looks iteratively for a deadlock.
+Looks iteratively for a deadlock. Note: the joining transaction may
+have been granted its lock by the deadlock checks.
 @return 0 if no deadlock else the victim transaction id.*/
 static
 trx_id_t
@@ -3811,7 +3867,9 @@ lock_deadlock_search(
 
 			/* Found a cycle. */
 
-			lock_deadlock_notify(ctx, lock);
+			if (!srv_read_only_mode) {
+				lock_deadlock_notify(ctx, lock);
+			}
 
 			return(lock_deadlock_select_victim(ctx)->id);
 
@@ -3882,6 +3940,7 @@ lock_deadlock_joining_trx_print(
 	const lock_t*	lock)		/*!< in: lock trx wants */
 {
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 
 	/* If the lock search exceeds the max step
 	or the max depth, the current trx will be
@@ -3968,7 +4027,9 @@ lock_deadlock_check_and_resolve(
 			ut_a(trx == ctx.start);
 			ut_a(victim_trx_id == trx->id);
 
-			lock_deadlock_joining_trx_print(trx, lock);
+			if (!srv_read_only_mode) {
+				lock_deadlock_joining_trx_print(trx, lock);
+			}
 
 			MONITOR_INC(MONITOR_DEADLOCK);
 
@@ -4017,6 +4078,7 @@ lock_table_create(
 	ut_ad(table && trx);
 	ut_ad(lock_mutex_own());
 	ut_ad(trx_mutex_own(trx));
+	ut_ad(!(type_mode & LOCK_CONV_BY_OTHER));
 
 	/* Non-locking autocommit read-only transactions should not set
 	any locks. */
@@ -4203,7 +4265,7 @@ DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
 transaction was chosen as a victim, and we got the lock immediately:
 no need to wait then */
 static
-ulint
+dberr_t
 lock_table_enqueue_waiting(
 /*=======================*/
 	ulint		mode,	/*!< in: lock mode this transaction is
@@ -4333,7 +4395,7 @@ Locks the specified database table in the mode given. If the lock cannot
 be granted immediately, the query thread is put to wait.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_table(
 /*=======*/
 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is set,
@@ -4344,7 +4406,7 @@ lock_table(
 	que_thr_t*	thr)	/*!< in: query thread */
 {
 	trx_t*		trx;
-	ulint		err;
+	dberr_t		err;
 	const lock_t*	wait_for;
 
 	ut_ad(table && thr);
@@ -4570,11 +4632,38 @@ lock_release(
 	     lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
 
 		if (lock_get_type_low(lock) == LOCK_REC) {
-			lock_rec_dequeue_from_page(lock);
 
+#ifdef UNIV_DEBUG
+			/* Check if the transcation locked a record
+			in a system table in X mode. It should have set
+			the dict_op code correctly if it did. */
+			if (lock->index->table->id < DICT_HDR_FIRST_ID
+			    && lock_get_mode(lock) == LOCK_X) {
+
+				ut_ad(lock_get_mode(lock) != LOCK_IX);
+				ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+			}
+#endif /* UNIV_DEBUG */
+
+			lock_rec_dequeue_from_page(lock);
 		} else {
+			dict_table_t*	table;
+
+			table = lock->un_member.tab_lock.table;
+#ifdef UNIV_DEBUG
 			ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
 
+			/* Check if the transcation locked a system table
+			in IX mode. It should have set the dict_op code
+			correctly if it did. */
+			if (table->id < DICT_HDR_FIRST_ID
+			    && (lock_get_mode(lock) == LOCK_X
+				|| lock_get_mode(lock) == LOCK_IX)) {
+
+				ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+			}
+#endif /* UNIV_DEBUG */
+
 			if (lock_get_mode(lock) != LOCK_IS
 			    && trx->undo_no != 0) {
 
@@ -4582,8 +4671,7 @@ lock_release(
 				block the use of the MySQL query cache for
 				all currently active transactions. */
 
-				lock->un_member.tab_lock.table
-					->query_cache_inv_trx_id = max_trx_id;
+				table->query_cache_inv_trx_id = max_trx_id;
 			}
 
 			lock_table_dequeue(lock);
@@ -5059,7 +5147,9 @@ lock_print_info_summary(
 		      "LATEST DETECTED DEADLOCK\n"
 		      "------------------------\n", file);
 
-		ut_copy_file(file, lock_latest_err_file);
+		if (!srv_read_only_mode) {
+			ut_copy_file(file, lock_latest_err_file);
+		}
 	}
 
 	fputs("------------\n"
@@ -5085,6 +5175,10 @@ lock_print_info_summary(
 		/* Should never be in this state while the system is running. */
 		ut_error;
 
+	case PURGE_STATE_DISABLED:
+		fprintf(file, "disabled");
+		break;
+
 	case PURGE_STATE_RUN:
 		fprintf(file, "running");
 		/* Check if it is waiting for more data to arrive. */
@@ -5418,6 +5512,8 @@ lock_rec_queue_validate(
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
 	ut_ad(lock_mutex_own() == locked_lock_trx_sys);
+	ut_ad(!index || dict_index_is_clust(index)
+	      || !dict_index_is_online_ddl(index));
 
 	heap_no = page_rec_get_heap_no(rec);
 
@@ -5694,20 +5790,26 @@ lock_rec_block_validate(
 	If the lock exists in lock_rec_validate_page() we assert
 	!block->page.file_page_was_freed. */
 
+	buf_block_t*	block;
 	mtr_t		mtr;
 
-	mtr_start(&mtr);
+	/* Make sure that the tablespace is not deleted while we are
+	trying to access the page. */
+	if (!fil_inc_pending_ops(space)) {
+		mtr_start(&mtr);
+		block = buf_page_get_gen(
+			space, fil_space_get_zip_size(space),
+			page_no, RW_X_LATCH, NULL,
+			BUF_GET_POSSIBLY_FREED,
+			__FILE__, __LINE__, &mtr);
 
-	buf_block_t*	block = buf_page_get_gen(
-		space, fil_space_get_zip_size(space),
-		page_no, RW_X_LATCH, NULL,
-		BUF_GET_POSSIBLY_FREED,
-		__FILE__, __LINE__, &mtr);
+		buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
-	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+		ut_ad(lock_rec_validate_page(block));
+		mtr_commit(&mtr);
 
-	ut_ad(lock_rec_validate_page(block));
-	mtr_commit(&mtr);
+		fil_decr_pending_ops(space);
+	}
 }
 
 /*********************************************************************//**
@@ -5765,7 +5867,7 @@ the query thread to the lock wait state and inserts a waiting request
 for a gap x-lock to the lock queue.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_rec_insert_check_and_lock(
 /*===========================*/
 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
@@ -5783,10 +5885,13 @@ lock_rec_insert_check_and_lock(
 	const rec_t*	next_rec;
 	trx_t*		trx;
 	lock_t*		lock;
-	ulint		err;
+	dberr_t		err;
 	ulint		next_rec_heap_no;
 
 	ut_ad(block->frame == page_align(rec));
+	ut_ad(!dict_index_is_online_ddl(index)
+	      || dict_index_is_clust(index)
+	      || (flags & BTR_CREATE_FLAG));
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
 
@@ -5803,11 +5908,9 @@ lock_rec_insert_check_and_lock(
 	to hold trx->mutex here. */
 
 	/* When inserting a record into an index, the table must be at
-	least IX-locked or we must be building an index, in which case
-	the table must be at least S-locked. */
-	ut_ad(lock_table_has(trx, index->table, LOCK_IX)
-	      || (*index->name == TEMP_INDEX_PREFIX
-		  && lock_table_has(trx, index->table, LOCK_S)));
+	least IX-locked. When we are building an index, we would pass
+	BTR_NO_LOCKING_FLAG and skip the locking altogether. */
+	ut_ad(lock_table_has(trx, index->table, LOCK_IX));
 
 	lock = lock_rec_get_first(block, next_rec_heap_no);
 
@@ -5850,7 +5953,7 @@ lock_rec_insert_check_and_lock(
 
 		err = lock_rec_enqueue_waiting(
 			LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
-			block, next_rec_heap_no, index, thr);
+			block, next_rec_heap_no, NULL, index, thr);
 
 		trx_mutex_exit(trx);
 	} else {
@@ -5871,6 +5974,9 @@ lock_rec_insert_check_and_lock(
 		page_update_max_trx_id(block,
 				       buf_block_get_page_zip(block),
 				       trx->id, mtr);
+	default:
+		/* We only care about the two return values. */
+		break;
 	}
 
 #ifdef UNIV_DEBUG
@@ -5920,6 +6026,7 @@ lock_rec_convert_impl_to_expl(
 		this transaction. The transaction may have been
 		committed a long time ago. */
 	} else {
+		ut_ad(!dict_index_is_online_ddl(index));
 		trx_id = lock_sec_rec_some_has_impl(rec, index, offsets);
 		/* The transaction can be committed before the
 		trx_is_active(trx_id, NULL) check below, because we are not
@@ -5943,10 +6050,26 @@ lock_rec_convert_impl_to_expl(
 		if (impl_trx != NULL
 		    && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
 				       heap_no, impl_trx)) {
+			ulint	type_mode = (LOCK_REC | LOCK_X
+					     | LOCK_REC_NOT_GAP);
+
+			/* If the delete-marked record was locked already,
+			we should reserve lock waiting for impl_trx as
+			implicit lock. Because cannot lock at this moment.*/
+
+			if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))
+			    && lock_rec_other_has_conflicting(
+					static_cast<enum lock_mode>
+					(LOCK_X | LOCK_REC_NOT_GAP), block,
+					heap_no, impl_trx)) {
+
+				type_mode |= (LOCK_WAIT
+					      | LOCK_CONV_BY_OTHER);
+			}
 
 			lock_rec_add_to_queue(
-				LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
-				block, heap_no, index, impl_trx, FALSE);
+				type_mode, block, heap_no, index,
+				impl_trx, FALSE);
 		}
 
 		lock_mutex_exit();
@@ -5962,7 +6085,7 @@ lock wait state and inserts a waiting request for a record x-lock to the
 lock queue.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_clust_rec_modify_check_and_lock(
 /*=================================*/
 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -5974,7 +6097,7 @@ lock_clust_rec_modify_check_and_lock(
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
-	ulint	err;
+	dberr_t	err;
 	ulint	heap_no;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
@@ -6020,7 +6143,7 @@ Checks if locks of other transactions prevent an immediate modify (delete
 mark or delete unmark) of a secondary index record.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_sec_rec_modify_check_and_lock(
 /*===============================*/
 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -6032,13 +6155,15 @@ lock_sec_rec_modify_check_and_lock(
 				clustered index record first: see the
 				comment below */
 	dict_index_t*	index,	/*!< in: secondary index */
-	que_thr_t*	thr,	/*!< in: query thread */
+	que_thr_t*	thr,	/*!< in: query thread
+				(can be NULL if BTR_NO_LOCKING_FLAG) */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
-	ulint	err;
+	dberr_t	err;
 	ulint	heap_no;
 
 	ut_ad(!dict_index_is_clust(index));
+	ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
 	ut_ad(block->frame == page_align(rec));
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
@@ -6103,7 +6228,7 @@ secondary index record.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-enum db_err
+dberr_t
 lock_sec_rec_read_check_and_lock(
 /*=============================*/
 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -6124,10 +6249,11 @@ lock_sec_rec_read_check_and_lock(
 					LOCK_REC_NOT_GAP */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
-	enum db_err	err;
-	ulint		heap_no;
+	dberr_t	err;
+	ulint	heap_no;
 
 	ut_ad(!dict_index_is_clust(index));
+	ut_ad(!dict_index_is_online_ddl(index));
 	ut_ad(block->frame == page_align(rec));
 	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
@@ -6180,7 +6306,7 @@ lock on the record.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-enum db_err
+dberr_t
 lock_clust_rec_read_check_and_lock(
 /*===============================*/
 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -6201,8 +6327,8 @@ lock_clust_rec_read_check_and_lock(
 					LOCK_REC_NOT_GAP */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
-	enum db_err	err;
-	ulint		heap_no;
+	dberr_t	err;
+	ulint	heap_no;
 
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(block->frame == page_align(rec));
@@ -6230,7 +6356,8 @@ lock_clust_rec_read_check_and_lock(
 	ut_ad(mode != LOCK_S
 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
 
-	err = lock_rec_lock(FALSE, mode | gap_mode, block, heap_no, index, thr);
+	err = lock_rec_lock(FALSE, mode | gap_mode,
+			    block, heap_no, index, thr);
 
 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
 
@@ -6251,7 +6378,7 @@ lock_clust_rec_read_check_and_lock() that does not require the parameter
 "offsets".
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_clust_rec_read_check_and_lock_alt(
 /*===================================*/
 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -6274,7 +6401,7 @@ lock_clust_rec_read_check_and_lock_alt(
 	mem_heap_t*	tmp_heap	= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
-	ulint		err;
+	dberr_t		err;
 	rec_offs_init(offsets_);
 
 	offsets = rec_get_offsets(rec, index, offsets,
@@ -6469,6 +6596,8 @@ lock_get_table(
 {
 	switch (lock_get_type_low(lock)) {
 	case LOCK_REC:
+		ut_ad(dict_index_is_clust(lock->index)
+		      || !dict_index_is_online_ddl(lock->index));
 		return(lock->index->table);
 	case LOCK_TABLE:
 		return(lock->un_member.tab_lock.table);
@@ -6521,6 +6650,8 @@ lock_rec_get_index(
 	const lock_t*	lock)	/*!< in: lock */
 {
 	ut_a(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(dict_index_is_clust(lock->index)
+	      || !dict_index_is_online_ddl(lock->index));
 
 	return(lock->index);
 }
@@ -6536,6 +6667,8 @@ lock_rec_get_index_name(
 	const lock_t*	lock)	/*!< in: lock */
 {
 	ut_a(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(dict_index_is_clust(lock->index)
+	      || !dict_index_is_online_ddl(lock->index));
 
 	return(lock->index->name);
 }
@@ -6581,6 +6714,7 @@ lock_cancel_waiting_and_release(
 
 	ut_ad(lock_mutex_own());
 	ut_ad(trx_mutex_own(lock->trx));
+	ut_ad(!(lock->type_mode & LOCK_CONV_BY_OTHER));
 
 	lock->trx->lock.cancel = TRUE;
 
@@ -6656,10 +6790,14 @@ lock_trx_release_locks(
 {
 	assert_trx_in_list(trx);
 
-	if (UNIV_UNLIKELY(trx_state_eq(trx, TRX_STATE_PREPARED))) {
+	if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
 		mutex_enter(&trx_sys->mutex);
 		ut_a(trx_sys->n_prepared_trx > 0);
 		trx_sys->n_prepared_trx--;
+		if (trx->is_recovered) {
+			ut_a(trx_sys->n_prepared_recovered_trx > 0);
+			trx_sys->n_prepared_recovered_trx--;
+		}
 		mutex_exit(&trx_sys->mutex);
 	} else {
 		ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
@@ -6714,12 +6852,12 @@ was selected as a deadlock victim, or if it has to wait then cancel
 the wait lock.
 @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
 UNIV_INTERN
-enum db_err
+dberr_t
 lock_trx_handle_wait(
 /*=================*/
 	trx_t*	trx)	/*!< in/out: trx lock state */
 {
-	enum db_err	err;
+	dberr_t	err;
 
 	lock_mutex_enter();
 
@@ -6800,6 +6938,8 @@ lock_table_locks_lookup(
 			ut_a(lock->trx == trx);
 
 			if (lock_get_type_low(lock) == LOCK_REC) {
+				ut_ad(!dict_index_is_online_ddl(lock->index)
+				      || dict_index_is_clust(lock->index));
 				if (lock->index->table == table) {
 					return(lock);
 				}
@@ -6828,18 +6968,89 @@ lock_table_has_locks(
 
 	lock_mutex_enter();
 
+	has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
+
 #ifdef UNIV_DEBUG
-	mutex_enter(&trx_sys->mutex);
+	if (!has_locks) {
+		mutex_enter(&trx_sys->mutex);
 
-	ut_ad(lock_table_locks_lookup(table, &trx_sys->rw_trx_list) == NULL);
-	ut_ad(lock_table_locks_lookup(table, &trx_sys->ro_trx_list) == NULL);
+		ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
+		ut_ad(!lock_table_locks_lookup(table, &trx_sys->ro_trx_list));
 
-	mutex_exit(&trx_sys->mutex);
+		mutex_exit(&trx_sys->mutex);
+	}
 #endif /* UNIV_DEBUG */
 
-	has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
-
 	lock_mutex_exit();
 
 	return(has_locks);
 }
+
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Check if the transaction holds any locks on the sys tables
+or its records.
+@return	the strongest lock found on any sys table or 0 for none */
+UNIV_INTERN
+const lock_t*
+lock_trx_has_sys_table_locks(
+/*=========================*/
+	const trx_t*	trx)	/*!< in: transaction to check */
+{
+	lint		i;
+	const lock_t*	strongest_lock = 0;
+	lock_mode	strongest = LOCK_NONE;
+
+	lock_mutex_enter();
+
+	/* Find a valid mode. Note: ib_vector_size() can be 0. */
+	for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
+		const lock_t*	lock;
+
+		lock = *static_cast<const lock_t**>(
+			ib_vector_get(trx->lock.table_locks, i));
+
+		if (lock != NULL
+		    && dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
+
+			strongest = lock_get_mode(lock);
+			ut_ad(strongest != LOCK_NONE);
+			strongest_lock = lock;
+			break;
+		}
+	}
+
+	if (strongest == LOCK_NONE) {
+		lock_mutex_exit();
+		return(NULL);
+	}
+
+	for (/* No op */; i >= 0; --i) {
+		const lock_t*	lock;
+
+		lock = *static_cast<const lock_t**>(
+			ib_vector_get(trx->lock.table_locks, i));
+
+		if (lock == NULL) {
+			continue;
+		}
+
+		ut_ad(trx == lock->trx);
+		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+		ut_ad(lock->un_member.tab_lock.table != NULL);
+
+		lock_mode	mode = lock_get_mode(lock);
+
+		if (dict_is_sys_table(lock->un_member.tab_lock.table->id)
+		    && lock_mode_stronger_or_eq(mode, strongest)) {
+
+			strongest = mode;
+			strongest_lock = lock;
+		}
+	}
+
+	lock_mutex_exit();
+
+	return(strongest_lock);
+}
+#endif /* UNIV_DEBUG */