54 files changed, 2296 insertions, 989 deletions
diff --git a/storage/xtradb/btr/btr0btr.c b/storage/xtradb/btr/btr0btr.c
index ed97fc5bd58..a3e57d632a0 100644
--- a/storage/xtradb/btr/btr0btr.c
+++ b/storage/xtradb/btr/btr0btr.c
@@ -1641,7 +1641,7 @@ btr_page_reorganize_low(
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	btr_assert_not_corrupted(block, index);
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 	data_size1 = page_get_data_size(page);
 	max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
@@ -1760,7 +1760,7 @@ btr_page_reorganize_low(
 
 func_exit:
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 #ifndef UNIV_HOTBACKUP
 	buf_block_free(temp_block);
@@ -1835,7 +1835,7 @@ btr_page_empty(
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(page_zip == buf_block_get_page_zip(block));
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	btr_search_drop_page_hash_index(block);
@@ -1892,10 +1892,10 @@ btr_root_raise_and_insert(
 	root_block = btr_cur_get_block(cursor);
 	root_page_zip = buf_block_get_page_zip(root_block);
 	ut_ad(page_get_n_recs(root) > 0);
+	index = btr_cur_get_index(cursor);
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!root_page_zip || page_zip_validate(root_page_zip, root));
+	ut_a(!root_page_zip || page_zip_validate(root_page_zip, root, index));
 #endif /* UNIV_ZIP_DEBUG */
-	index = btr_cur_get_index(cursor);
 #ifdef UNIV_BTR_DEBUG
 	if (!dict_index_is_ibuf(index)) {
 		ulint	space = dict_index_get_space(index);
@@ -2825,8 +2825,8 @@ insert_empty:
 
 #ifdef UNIV_ZIP_DEBUG
 	if (UNIV_LIKELY_NULL(page_zip)) {
-		ut_a(page_zip_validate(page_zip, page));
-		ut_a(page_zip_validate(new_page_zip, new_page));
+		ut_a(page_zip_validate(page_zip, page, cursor->index));
+		ut_a(page_zip_validate(new_page_zip, new_page, cursor->index));
 	}
 #endif /* UNIV_ZIP_DEBUG */
 
@@ -2860,7 +2860,8 @@ insert_empty:
 			= buf_block_get_page_zip(insert_block);
 
 		ut_a(!insert_page_zip
-		     || page_zip_validate(insert_page_zip, insert_page));
+		     || page_zip_validate(insert_page_zip, insert_page,
+					  cursor->index));
 	}
 #endif /* UNIV_ZIP_DEBUG */
 
@@ -3140,6 +3141,8 @@ btr_lift_page_up(
 	buf_block_t*	blocks[BTR_MAX_LEVELS];
 	ulint		n_blocks;	/*!< last used index in blocks[] */
 	ulint		i;
+	ibool		lift_father_up	= FALSE;
+	buf_block_t*	block_orig	= block;
 
 	ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
 	ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
@@ -3150,11 +3153,13 @@ btr_lift_page_up(
 
 	{
 		btr_cur_t	cursor;
-		mem_heap_t*	heap	= mem_heap_create(100);
-		ulint*		offsets;
+		ulint*		offsets	= NULL;
+		mem_heap_t*	heap	= mem_heap_create(
+			sizeof(*offsets)
+			* (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields));
 		buf_block_t*	b;
 
-		offsets = btr_page_get_father_block(NULL, heap, index,
+		offsets = btr_page_get_father_block(offsets, heap, index,
 						    block, mtr, &cursor);
 		father_block = btr_cur_get_block(&cursor);
 		father_page_zip = buf_block_get_page_zip(father_block);
@@ -3178,6 +3183,29 @@ btr_lift_page_up(
 			blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
 		}
 
+		if (n_blocks && page_level == 0) {
+			/* The father page also should be the only on its level (not
+			root). We should lift up the father page at first.
+			Because the leaf page should be lifted up only for root page.
+			The freeing page is based on page_level (==0 or !=0)
+			to choose segment. If the page_level is changed ==0 from !=0,
+			later freeing of the page doesn't find the page allocation
+			to be freed.*/
+
+			lift_father_up = TRUE;
+			block = father_block;
+			page = buf_block_get_frame(block);
+			page_level = btr_page_get_level(page, mtr);
+
+			ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
+			ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
+			ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+
+			father_block = blocks[0];
+			father_page_zip = buf_block_get_page_zip(father_block);
+			father_page = buf_block_get_frame(father_block);
+		}
+
 		mem_heap_free(heap);
 	}
 
@@ -3185,6 +3213,7 @@ btr_lift_page_up(
 
 	/* Make the father empty */
 	btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
+	page_level++;
 
 	/* Copy the records to the father page one by one. */
 	if (0
@@ -3217,7 +3246,7 @@ btr_lift_page_up(
 	lock_update_copy_and_discard(father_block, block);
 
 	/* Go upward to root page, decrementing levels by one. */
-	for (i = 0; i < n_blocks; i++, page_level++) {
+	for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) {
 		page_t*		page	= buf_block_get_frame(blocks[i]);
 		page_zip_des_t*	page_zip= buf_block_get_page_zip(blocks[i]);
 
@@ -3225,7 +3254,7 @@ btr_lift_page_up(
 
 		btr_page_set_level(page, page_zip, page_level, mtr);
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(!page_zip || page_zip_validate(page_zip, page));
+		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 	}
 
@@ -3239,7 +3268,7 @@ btr_lift_page_up(
 	ut_ad(page_validate(father_page, index));
 	ut_ad(btr_check_node_ptr(index, father_block, mtr));
 
-	return(father_block);
+	return(lift_father_up ? block_orig : father_block);
 }
 
 /*************************************************************//**
@@ -3310,6 +3339,7 @@ btr_compress(
 
 	if (adjust) {
 		nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
+		ut_ad(nth_rec > 0);
 	}
 
 	/* Decide the page to which we try to merge and which will inherit
@@ -3400,8 +3430,8 @@ err_exit:
 		const page_zip_des_t*	page_zip
 			= buf_block_get_page_zip(block);
 		ut_a(page_zip);
-		ut_a(page_zip_validate(merge_page_zip, merge_page));
-		ut_a(page_zip_validate(page_zip, page));
+		ut_a(page_zip_validate(merge_page_zip, merge_page, index));
+		ut_a(page_zip_validate(page_zip, page, index));
 	}
 #endif /* UNIV_ZIP_DEBUG */
 
@@ -3534,7 +3564,8 @@ err_exit:
 
 	ut_ad(page_validate(merge_page, index));
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page));
+	ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page,
+						  index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	/* Free the file page */
@@ -3545,6 +3576,7 @@ func_exit:
 	mem_heap_free(heap);
 
 	if (adjust) {
+		ut_ad(nth_rec > 0);
 		btr_cur_position(
 			index,
 			page_rec_get_nth(merge_block->frame, nth_rec),
@@ -3716,7 +3748,7 @@ btr_discard_page(
 		page_zip_des_t*	merge_page_zip
 			= buf_block_get_page_zip(merge_block);
 		ut_a(!merge_page_zip
-		     || page_zip_validate(merge_page_zip, merge_page));
+		     || page_zip_validate(merge_page_zip, merge_page, index));
 	}
 #endif /* UNIV_ZIP_DEBUG */
 
@@ -4058,8 +4090,22 @@ btr_index_page_validate(
 {
 	page_cur_t	cur;
 	ibool		ret	= TRUE;
+#ifndef DBUG_OFF
+	ulint		nth	= 1;
+#endif /* !DBUG_OFF */
 
 	page_cur_set_before_first(block, &cur);
+
+	/* Directory slot 0 should only contain the infimum record. */
+	DBUG_EXECUTE_IF("check_table_rec_next",
+			ut_a(page_rec_get_nth_const(
+				     page_cur_get_page(&cur), 0)
+			     == cur.rec);
+			ut_a(page_dir_slot_get_n_owned(
+				     page_dir_get_nth_slot(
+					     page_cur_get_page(&cur), 0))
+			     == 1););
+
 	page_cur_move_to_next(&cur);
 
 	for (;;) {
@@ -4073,6 +4119,16 @@ btr_index_page_validate(
 			return(FALSE);
 		}
 
+		/* Verify that page_rec_get_nth_const() is correctly
+		retrieving each record. */
+		DBUG_EXECUTE_IF("check_table_rec_next",
+				ut_a(cur.rec == page_rec_get_nth_const(
+					     page_cur_get_page(&cur),
+					     page_rec_get_n_recs_before(
+						     cur.rec)));
+				ut_a(nth++ == page_rec_get_n_recs_before(
+					     cur.rec)););
+
 		page_cur_move_to_next(&cur);
 	}
 
@@ -4170,7 +4226,7 @@ btr_validate_level(
 		ut_a(space == page_get_space_id(page));
 #ifdef UNIV_ZIP_DEBUG
 		page_zip = buf_block_get_page_zip(block);
-		ut_a(!page_zip || page_zip_validate(page_zip, page));
+		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 		ut_a(!page_is_leaf(page));
 
@@ -4198,7 +4254,7 @@ loop:
 
 #ifdef UNIV_ZIP_DEBUG
 	page_zip = buf_block_get_page_zip(block);
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	/* Check ordering etc. of records */
diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c
index 687853a422e..d089fb5ad22 100644
--- a/storage/xtradb/btr/btr0cur.c
+++ b/storage/xtradb/btr/btr0cur.c
@@ -97,6 +97,11 @@ srv_refresh_innodb_monitor_stats().  Referenced by
 srv_printf_innodb_monitor(). */
 UNIV_INTERN ulint	btr_cur_n_sea_old	= 0;
 
+#ifdef UNIV_DEBUG
+/* Flag to limit optimistic insert records */
+UNIV_INTERN uint	btr_cur_limit_optimistic_insert_debug = 0;
+#endif /* UNIV_DEBUG */
+
 /** In the optimistic insert, if the insert does not fit, but this much space
 can be released by page reorganize, then it is reorganized */
 #define BTR_CUR_PAGE_REORGANIZE_LIMIT	(UNIV_PAGE_SIZE / 32)
@@ -748,7 +753,7 @@ retry_page_get:
 #ifdef UNIV_ZIP_DEBUG
 		const page_zip_des_t*	page_zip
 			= buf_block_get_page_zip(block);
-		ut_a(!page_zip || page_zip_validate(page_zip, page));
+		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 		buf_block_dbg_add_level(
@@ -1378,6 +1383,9 @@ btr_cur_optimistic_insert(
 		}
 	}
 
+	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
+				      goto fail);
+
 	/* If there have been many consecutive inserts, and we are on the leaf
 	level, check if we have to split the page to reserve enough free space
 	for future updates of records. */
@@ -2189,7 +2197,7 @@ any_extern:
 
 	page_zip = buf_block_get_page_zip(block);
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	if (page_zip
@@ -2406,7 +2414,7 @@ btr_cur_pessimistic_update(
 				MTR_MEMO_X_LOCK));
 	ut_ad((thr && thr_get_trx(thr)->fake_changes) || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 	/* The insert buffer tree should never be updated in place. */
 	ut_ad(!dict_index_is_ibuf(index));
@@ -2561,7 +2569,7 @@ make_external:
 	btr_search_update_hash_on_delete(cursor);
 
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 	page_cursor = btr_cur_get_page_cur(cursor);
 
@@ -2668,7 +2676,7 @@ make_external:
 		buf_block_t*	rec_block = btr_cur_get_block(cursor);
 
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(!page_zip || page_zip_validate(page_zip, page));
+		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 		page = buf_block_get_frame(rec_block);
 #endif /* UNIV_ZIP_DEBUG */
 		page_zip = buf_block_get_page_zip(rec_block);
@@ -2694,7 +2702,7 @@ make_external:
 
 return_after_reservations:
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	if (n_extents > 0) {
@@ -3066,7 +3074,7 @@ btr_cur_set_deleted_flag_for_ibuf(
 					when the tablespace is
 					uncompressed */
 	ibool		val,		/*!< in: value to set */
-	mtr_t*		mtr)		/*!< in: mtr */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
 	/* We do not need to reserve btr_search_latch, as the page
 	has just been read to the buffer pool and there cannot be
@@ -3171,12 +3179,14 @@ btr_cur_optimistic_delete(
 				page, 1);
 		}
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(!page_zip || page_zip_validate(page_zip, page));
+		ut_a(!page_zip
+		     || page_zip_validate(page_zip, page, cursor->index));
 #endif /* UNIV_ZIP_DEBUG */
 		page_cur_delete_rec(btr_cur_get_page_cur(cursor),
 				    cursor->index, offsets, mtr);
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(!page_zip || page_zip_validate(page_zip, page));
+		ut_a(!page_zip
+		     || page_zip_validate(page_zip, page, cursor->index));
 #endif /* UNIV_ZIP_DEBUG */
 
 		if (dict_index_is_clust(cursor->index)
@@ -3273,7 +3283,7 @@ btr_cur_pessimistic_delete(
 	rec = btr_cur_get_rec(cursor);
 	page_zip = buf_block_get_page_zip(block);
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
@@ -3283,7 +3293,7 @@ btr_cur_pessimistic_delete(
 						      rec, offsets, page_zip,
 						      rb_ctx, mtr);
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(!page_zip || page_zip_validate(page_zip, page));
+		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 	}
 
@@ -3344,7 +3354,7 @@ btr_cur_pessimistic_delete(
 
 	page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	ut_ad(btr_check_node_ptr(index, block, mtr));
diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c
index bbc1042ca78..5bcfb0f51b9 100644
--- a/storage/xtradb/buf/buf0buf.c
+++ b/storage/xtradb/buf/buf0buf.c
@@ -66,9 +66,7 @@ _increment_page_get_statistics(buf_block_t* block, trx_t* trx)
 	byte            block_hash_offset;
 
 	ut_ad(block);
-
-	if (!innobase_get_slow_log() || !trx || !trx->take_stats)
-		return;
+	ut_ad(trx && trx->take_stats);
 
 	if (!trx->distinct_page_access_hash) {
 		trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
@@ -279,7 +277,7 @@ the read requests for the whole area.
 
 #ifndef UNIV_HOTBACKUP
 /** Value in microseconds */
-static const int WAIT_FOR_READ	= 5000;
+static const int WAIT_FOR_READ	= 100;
 /** Number of attemtps made to read in a page in the buffer pool */
 static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
 
@@ -412,6 +410,33 @@ buf_get_total_list_len(
 }
 
 /********************************************************************//**
+Get total list size in bytes from all buffer pools. */
+UNIV_INTERN
+void
+buf_get_total_list_size_in_bytes(
+/*=============================*/
+	buf_pools_list_size_t*	buf_pools_list_size)	/*!< out: list sizes
+							in all buffer pools */
+{
+	ulint			i;
+	ut_ad(buf_pools_list_size);
+	memset(buf_pools_list_size, 0, sizeof(*buf_pools_list_size));
+
+	for (i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool_t*	buf_pool;
+
+		buf_pool = buf_pool_from_array(i);
+		/* We don't need mutex protection since this is
+		for statistics purpose */
+		buf_pools_list_size->LRU_bytes += buf_pool->stat.LRU_bytes;
+		buf_pools_list_size->unzip_LRU_bytes +=
+			UT_LIST_GET_LEN(buf_pool->unzip_LRU) * UNIV_PAGE_SIZE;
+		buf_pools_list_size->flush_list_bytes +=
+			buf_pool->stat.flush_list_bytes;
+	}
+}
+
+/********************************************************************//**
 Get total buffer pool statistics. */
 UNIV_INTERN
 void
@@ -1807,40 +1832,24 @@ buf_page_make_young(
 }
 
 /********************************************************************//**
-Sets the time of the first access of a page and moves a page to the
-start of the buffer pool LRU list if it is too old.  This high-level
-function can be used to prevent an important page from slipping
-out of the buffer pool. */
+Moves a page to the start of the buffer pool LRU list if it is too old.
+This high-level function can be used to prevent an important page from
+slipping out of the buffer pool. */
 static
 void
-buf_page_set_accessed_make_young(
-/*=============================*/
-	buf_page_t*	bpage,		/*!< in/out: buffer block of a
+buf_page_make_young_if_needed(
+/*==========================*/
+	buf_page_t*	bpage)		/*!< in/out: buffer block of a
 					file page */
-	unsigned	access_time)	/*!< in: bpage->access_time
-					read under mutex protection,
-					or 0 if unknown */
 {
+#ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-
 	ut_ad(!buf_pool_mutex_own(buf_pool));
+#endif /* UNIV_DEBUG */
 	ut_a(buf_page_in_file(bpage));
 
 	if (buf_page_peek_if_too_old(bpage)) {
-		//buf_pool_mutex_enter(buf_pool);
-		mutex_enter(&buf_pool->LRU_list_mutex);
-		buf_LRU_make_block_young(bpage);
-		//buf_pool_mutex_exit(buf_pool);
-		mutex_exit(&buf_pool->LRU_list_mutex);
-	} else if (!access_time) {
-		ulint	time_ms = ut_time_ms();
-		mutex_t*	block_mutex = buf_page_get_mutex_enter(bpage);
-		//buf_pool_mutex_enter(buf_pool);
-		if (block_mutex) {
-		buf_page_set_accessed(bpage, time_ms);
-		mutex_exit(block_mutex);
-		}
-		//buf_pool_mutex_exit(buf_pool);
+		buf_page_make_young(bpage);
 	}
 }
 
@@ -1959,7 +1968,6 @@ buf_page_get_zip(
 	buf_page_t*	bpage;
 	mutex_t*	block_mutex;
 	ibool		must_read;
-	unsigned	access_time;
 	trx_t*		trx = NULL;
 	ulint		sec;
 	ulint		ms;
@@ -1967,7 +1975,7 @@ buf_page_get_zip(
 	ib_uint64_t	finish_time;
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
 
-	if (innobase_get_slow_log()) {
+	if (UNIV_UNLIKELY(innobase_get_slow_log())) {
 		trx = innobase_get_trx();
 	}
 	buf_pool->stat.n_page_gets++;
@@ -2089,13 +2097,14 @@ err_exit:
 
 got_block:
 	must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
-	access_time = buf_page_is_accessed(bpage);
 
 	//buf_pool_mutex_exit(buf_pool);
 
+	buf_page_set_accessed(bpage);
+
 	mutex_exit(block_mutex);
 
-	buf_page_set_accessed_make_young(bpage, access_time);
+	buf_page_make_young_if_needed(bpage);
 
 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
 	ut_a(!bpage->file_page_was_freed);
@@ -2111,7 +2120,7 @@ got_block:
 		/* Let us wait until the read operation
 		completes */
 
-		if (innobase_get_slow_log() && trx && trx->take_stats)
+		if (UNIV_UNLIKELY(trx && trx->take_stats))
 		{
 			ut_usectime(&sec, &ms);
 			start_time = (ib_uint64_t)sec * 1000000 + ms;
@@ -2132,7 +2141,7 @@ got_block:
 				break;
 			}
 		}
-		if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
+		if (UNIV_UNLIKELY(start_time != 0))
 		{
 			ut_usectime(&sec, &ms);
 			finish_time = (ib_uint64_t)sec * 1000000 + ms;
@@ -2487,7 +2496,7 @@ buf_page_get_gen(
 	      || ibuf_page_low(space, zip_size, offset,
 			       FALSE, file, line, NULL));
 #endif
-	if (innobase_get_slow_log()) {
+	if (UNIV_UNLIKELY(innobase_get_slow_log())) {
 		trx = innobase_get_trx();
 	}
 	buf_pool->stat.n_page_gets++;
@@ -2774,6 +2783,8 @@ wait_until_unfixed:
 
 		UNIV_MEM_INVALID(bpage, sizeof *bpage);
 
+		access_time = buf_page_is_accessed(&block->page);
+
 		mutex_exit(block_mutex);
 		mutex_exit(&buf_pool->zip_mutex);
 
@@ -2781,18 +2792,22 @@ wait_until_unfixed:
 		buf_pool->n_pend_unzip++;
 		buf_pool_mutex_exit(buf_pool);
 
-		//buf_pool_mutex_exit(buf_pool);
-
 		buf_page_free_descriptor(bpage);
 
-		/* Decompress the page and apply buffered operations
-		while not holding buf_pool->mutex or block->mutex. */
+		/* Decompress the page while not holding
+		buf_pool->mutex or block->mutex. */
 		success = buf_zip_decompress(block, srv_use_checksums);
 		ut_a(success);
 
 		if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
-			ibuf_merge_or_delete_for_page(block, space, offset,
-						      zip_size, TRUE);
+			if (access_time) {
+#ifdef UNIV_IBUF_COUNT_DEBUG
+				ut_a(ibuf_count_get(space, offset) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+			} else {
+				ibuf_merge_or_delete_for_page(
+					block, space, offset, zip_size, TRUE);
+			}
 		}
 
 		/* Unfix and unlatch the block. */
@@ -2888,17 +2903,16 @@ wait_until_unfixed:
 	ut_a(mode == BUF_GET_POSSIBLY_FREED
 	     || !block->page.file_page_was_freed);
 #endif
-	//mutex_exit(&block->mutex);
 
 	/* Check if this is the first access to the page */
-
 	access_time = buf_page_is_accessed(&block->page);
 
-	//buf_pool_mutex_exit(buf_pool);
-	mutex_exit(block_mutex);
+	buf_page_set_accessed(&block->page);
+
+	mutex_exit(&block->mutex);
 
-	if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
-		buf_page_set_accessed_make_young(&block->page, access_time);
+	if (mode != BUF_PEEK_IF_IN_POOL) {
+		buf_page_make_young_if_needed(&block->page);
 	}
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -2913,7 +2927,7 @@ wait_until_unfixed:
 			/* Let us wait until the read operation
 			completes */
 
-			if (innobase_get_slow_log() && trx && trx->take_stats)
+			if (UNIV_UNLIKELY(trx && trx->take_stats))
 			{
 				ut_usectime(&sec, &ms);
 				start_time = (ib_uint64_t)sec * 1000000 + ms;
@@ -2928,13 +2942,14 @@ wait_until_unfixed:
 				mutex_exit(&block->mutex);
 
 				if (io_fix == BUF_IO_READ) {
-
-					os_thread_sleep(WAIT_FOR_READ);
+					/* wait by temporaly s-latch */
+					rw_lock_s_lock(&(block->lock));
+					rw_lock_s_unlock(&(block->lock));
 				} else {
 					break;
 				}
 			}
-			if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
+			if (UNIV_UNLIKELY(start_time != 0))
 			{
 				ut_usectime(&sec, &ms);
 				finish_time = (ib_uint64_t)sec * 1000000 + ms;
@@ -2961,7 +2976,7 @@ wait_until_unfixed:
 
 	mtr_memo_push(mtr, block, fix_type);
 
-	if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) {
+	if (mode != BUF_PEEK_IF_IN_POOL && !access_time) {
 		/* In the case of a first access, try to apply linear
 		read-ahead */
 
@@ -2973,7 +2988,7 @@ wait_until_unfixed:
 	ut_a(ibuf_count_get(buf_block_get_space(block),
 			    buf_block_get_page_no(block)) == 0);
 #endif
-	if (innobase_get_slow_log()) {
+	if (UNIV_UNLIKELY(trx && trx->take_stats)) {
 		_increment_page_get_statistics(block, trx);
 	}
 
@@ -3018,15 +3033,13 @@ buf_page_optimistic_get(
 
 	buf_block_buf_fix_inc(block, file, line);
 
-	mutex_exit(&block->mutex);
+	access_time = buf_page_is_accessed(&block->page);
 
-	/* Check if this is the first access to the page.
-	We do a dirty read on purpose, to avoid mutex contention.
-	This field is only used for heuristic purposes; it does not
-	affect correctness. */
+	buf_page_set_accessed(&block->page);
 
-	access_time = buf_page_is_accessed(&block->page);
-	buf_page_set_accessed_make_young(&block->page, access_time);
+	mutex_exit(&block->mutex);
+
+	buf_page_make_young_if_needed(&block->page);
 
 	ut_ad(!ibuf_inside(mtr)
 	      || ibuf_page(buf_block_get_space(block),
@@ -3078,11 +3091,11 @@ buf_page_optimistic_get(
 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
 	ut_a(block->page.file_page_was_freed == FALSE);
 #endif
-	if (innobase_get_slow_log()) {
+	if (UNIV_UNLIKELY(innobase_get_slow_log())) {
 		trx = innobase_get_trx();
 	}
 
-	if (UNIV_UNLIKELY(!access_time)) {
+	if (!access_time) {
 		/* In the case of a first access, try to apply linear
 		read-ahead */
 
@@ -3099,7 +3112,7 @@ buf_page_optimistic_get(
 	buf_pool = buf_pool_from_block(block);
 	buf_pool->stat.n_page_gets++;
 
-	if (innobase_get_slow_log()) {
+	if (UNIV_UNLIKELY(trx && trx->take_stats)) {
 		_increment_page_get_statistics(block, trx);
 	}
 	return(TRUE);
@@ -3149,28 +3162,14 @@ buf_page_get_known_nowait(
 
 	buf_block_buf_fix_inc(block, file, line);
 
+	buf_page_set_accessed(&block->page);
+
 	mutex_exit(&block->mutex);
 
 	buf_pool = buf_pool_from_block(block);
 
-	if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
-		//buf_pool_mutex_enter(buf_pool);
-		mutex_enter(&buf_pool->LRU_list_mutex);
-		buf_LRU_make_block_young(&block->page);
-		//buf_pool_mutex_exit(buf_pool);
-		mutex_exit(&buf_pool->LRU_list_mutex);
-	} else if (!buf_page_is_accessed(&block->page)) {
-		/* Above, we do a dirty read on purpose, to avoid
-		mutex contention.  The field buf_page_t::access_time
-		is only used for heuristic purposes.  Writes to the
-		field must be protected by mutex, however. */
-		ulint	time_ms = ut_time_ms();
-
-		//buf_pool_mutex_enter(buf_pool);
-		mutex_enter(&block->mutex);
-		buf_page_set_accessed(&block->page, time_ms);
-		//buf_pool_mutex_exit(buf_pool);
-		mutex_exit(&block->mutex);
+	if (mode == BUF_MAKE_YOUNG) {
+		buf_page_make_young_if_needed(&block->page);
 	}
 
 	ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
@@ -3211,9 +3210,13 @@ buf_page_get_known_nowait(
 #endif
 	buf_pool->stat.n_page_gets++;
 
-	if (innobase_get_slow_log()) {
+	if (UNIV_UNLIKELY(innobase_get_slow_log())) {
+
 		trx = innobase_get_trx();
-		_increment_page_get_statistics(block, trx);
+		if (trx != NULL && trx->take_stats) {
+
+			_increment_page_get_statistics(block, trx);
+		}
 	}
 
 	return(TRUE);
@@ -3342,6 +3345,7 @@ buf_page_init(
 	ulint		offset,	/*!< in: offset of the page within space
 				in units of a page */
 	ulint		fold,	/*!< in: buf_page_address_fold(space,offset) */
+	ulint		zip_size,/*!< in: compressed page size, or 0 */
 	buf_block_t*	block)	/*!< in/out: block to init */
 {
 	buf_page_t*	hash_page;
@@ -3411,6 +3415,9 @@ buf_page_init(
 	ut_d(block->page.in_page_hash = TRUE);
 	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
 		    fold, &block->page);
+	if (zip_size) {
+		page_zip_set_size(&block->page.zip, zip_size);
+	}
 }
 
 /********************************************************************//**
@@ -3537,7 +3544,7 @@ err_exit:
 
 		ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
 
-		buf_page_init(buf_pool, space, offset, fold, block);
+		buf_page_init(buf_pool, space, offset, fold, zip_size, block);
 
 		rw_lock_x_unlock(&buf_pool->page_hash_latch);
 
@@ -3557,8 +3564,6 @@ err_exit:
 		buf_page_set_io_fix(bpage, BUF_IO_READ);
 
 		if (UNIV_UNLIKELY(zip_size)) {
-			page_zip_set_size(&block->page.zip, zip_size);
-
 			/* buf_pool->mutex may be released and
 			reacquired by buf_buddy_alloc().  Thus, we
 			must release block->mutex in order not to
@@ -3658,7 +3663,8 @@ err_exit:
 
 		rw_lock_x_unlock(&buf_pool->page_hash_latch);
 
-		/* The block must be put to the LRU list, to the old blocks */
+		/* The block must be put to the LRU list, to the old blocks
+		The zip_size is already set into the page zip */
 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		buf_LRU_insert_zip_clean(bpage);
@@ -3706,7 +3712,6 @@ buf_page_create(
 	buf_block_t*	block;
 	ulint		fold;
 	buf_block_t*	free_block	= NULL;
-	ulint		time_ms		= ut_time_ms();
 	buf_pool_t*	buf_pool 	= buf_pool_get(space, offset);
 
 	ut_ad(mtr);
@@ -3774,7 +3779,7 @@ retry:
 
 	mutex_enter(&block->mutex);
 
-	buf_page_init(buf_pool, space, offset, fold, block);
+	buf_page_init(buf_pool, space, offset, fold, zip_size,block);
 	rw_lock_x_unlock(&buf_pool->page_hash_latch);
 
 	/* The block must be put to the LRU list */
@@ -3793,8 +3798,6 @@ retry:
 
 		buf_page_set_io_fix(&block->page, BUF_IO_READ);
 		rw_lock_x_lock(&block->lock);
-
-		page_zip_set_size(&block->page.zip, zip_size);
 		mutex_exit(&block->mutex);
 		/* buf_pool->mutex may be released and reacquired by
 		buf_buddy_alloc().  Thus, we must release block->mutex
@@ -3818,13 +3821,12 @@ retry:
 		rw_lock_x_unlock(&block->lock);
 	}
 
-	buf_page_set_accessed(&block->page, time_ms);
-
-	//buf_pool_mutex_exit(buf_pool);
 	mutex_exit(&buf_pool->LRU_list_mutex);
 
 	mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
 
+	buf_page_set_accessed(&block->page);
+
 	mutex_exit(&block->mutex);
 
 	/* Delete possible entries for the page from the insert buffer:
@@ -3937,7 +3939,7 @@ buf_page_io_complete(
 	ensures that this is the only thread that handles the i/o for this
 	block. */
 
-	io_type = buf_page_get_io_fix(bpage);
+	io_type = buf_page_get_io_fix_unlocked(bpage);
 	ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
 
 	if (io_type == BUF_IO_READ) {
diff --git a/storage/xtradb/buf/buf0flu.c b/storage/xtradb/buf/buf0flu.c
index 39351cd3678..663e2d8f537 100644
--- a/storage/xtradb/buf/buf0flu.c
+++ b/storage/xtradb/buf/buf0flu.c
@@ -79,6 +79,23 @@ static buf_flush_stat_t	buf_flush_stat_sum;
 
 /* @} */
 
+/******************************************************************//**
+Increases flush_list size in bytes with zip_size for compressed page,
+UNIV_PAGE_SIZE for uncompressed page in inline function */
+static inline
+void
+incr_flush_list_size_in_bytes(
+/*==========================*/
+	buf_block_t*	block,		/*!< in: control block */
+	buf_pool_t*	buf_pool)	/*!< in: buffer pool instance */
+{
+	ulint		zip_size;
+	ut_ad(buf_flush_list_mutex_own(buf_pool));
+	zip_size = page_zip_get_size(&block->page.zip);
+	buf_pool->stat.flush_list_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+	ut_ad(buf_pool->stat.flush_list_bytes <= buf_pool->curr_pool_size);
+}
+
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /******************************************************************//**
 Validates the flush list.
@@ -308,6 +325,7 @@ buf_flush_insert_into_flush_list(
 	ut_d(block->page.in_flush_list = TRUE);
 	block->page.oldest_modification = lsn;
 	UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
+	incr_flush_list_size_in_bytes(block, buf_pool);
 
 #ifdef UNIV_DEBUG_VALGRIND
 	{
@@ -412,6 +430,8 @@ buf_flush_insert_sorted_into_flush_list(
 				     prev_b, &block->page);
 	}
 
+	incr_flush_list_size_in_bytes(block, buf_pool);
+
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_flush_validate_low(buf_pool));
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@@ -514,6 +534,7 @@ buf_flush_remove(
 	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	ulint		zip_size;
 
 	//ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
@@ -552,6 +573,9 @@ buf_flush_remove(
 	because we assert on in_flush_list in comparison function. */
 	ut_d(bpage->in_flush_list = FALSE);
 
+	zip_size = page_zip_get_size(&bpage->zip);
+	buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+
 	bpage->oldest_modification = 0;
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -915,7 +939,7 @@ flush:
 				"InnoDB: Page buf fix count %lu,"
 				" io fix %lu, state %lu\n",
 				(ulong)block->page.buf_fix_count,
-				(ulong)buf_block_get_io_fix(block),
+				(ulong)buf_block_get_io_fix_unlocked(block),
 				(ulong)buf_block_get_state(block));
 		}
 
@@ -1115,7 +1139,7 @@ buf_flush_write_block_low(
 	ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
 	ut_ad(!buf_flush_list_mutex_own(buf_pool));
 	ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
-	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
+	ut_ad(buf_page_get_io_fix_unlocked(bpage) == BUF_IO_WRITE);
 	ut_ad(bpage->oldest_modification != 0);
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
@@ -1181,10 +1205,10 @@ buf_flush_write_block_low(
 # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 /********************************************************************//**
 Writes a flushable page asynchronously from the buffer pool to a file.
-NOTE: buf_pool->mutex and block->mutex must be held upon entering this
-function, and they will be released by this function after flushing.
+NOTE: block->mutex must be held upon entering this function, and it will be
+released by this function after flushing.
 This is loosely based on buf_flush_batch() and buf_flush_page().
-@return TRUE if the page was flushed and the mutexes released */
+@return TRUE if the page was flushed and the mutex released */
 UNIV_INTERN
 ibool
 buf_flush_page_try(
@@ -1553,16 +1577,14 @@ scan:
 Check if the block is modified and ready for flushing. If the the block
 is ready to flush then flush the page and try o flush its neighbors.
 
-@return	TRUE if buf_pool mutex was not released during this function.
+@return	TRUE if LRU list mutex was not released during this function.
 This does not guarantee that some pages were written as well.
 Number of pages written are incremented to the count. */
 static
 ibool
 buf_flush_page_and_try_neighbors(
 /*=============================*/
-	buf_page_t*	bpage,		/*!< in: buffer control block,
-					must be
-					buf_page_in_file(bpage) */
+	buf_page_t*	bpage,		/*!< in: buffer control block */
 	enum buf_flush	flush_type,	/*!< in: BUF_FLUSH_LRU
 					or BUF_FLUSH_LIST */
 	ulint		n_to_flush,	/*!< in: number of pages to
diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c
index 14b5c65132c..cd99d3e4e13 100644
--- a/storage/xtradb/buf/buf0lru.c
+++ b/storage/xtradb/buf/buf0lru.c
@@ -153,6 +153,23 @@ buf_LRU_block_free_hashed_page(
 	ibool		have_page_hash_mutex);
 
 /******************************************************************//**
+Increases LRU size in bytes with zip_size for compressed page,
+UNIV_PAGE_SIZE for uncompressed page in inline function */
+static inline
+void
+incr_LRU_size_in_bytes(
+/*===================*/
+	buf_page_t*	bpage,		/*!< in: control block */
+	buf_pool_t*	buf_pool)	/*!< in: buffer pool instance */
+{
+	ulint		zip_size;
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+	zip_size = page_zip_get_size(&bpage->zip);
+	buf_pool->stat.LRU_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+	ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size);
+}
+
+/******************************************************************//**
 Determines if the unzip_LRU list should be used for evicting a victim
 instead of the general LRU list.
 @return	TRUE if should use unzip_LRU */
@@ -393,18 +410,18 @@ buf_flush_yield(
 {
 	mutex_t*	block_mutex;
 
+	block_mutex = buf_page_get_mutex(bpage);
+
+	ut_ad(mutex_own(block_mutex));
 	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 	ut_ad(buf_page_in_file(bpage));
 
-	block_mutex = buf_page_get_mutex(bpage);
-
-	mutex_enter(block_mutex);
 	/* "Fix" the block so that the position cannot be
 	changed after we release the buffer pool and
 	block mutexes. */
 	buf_page_set_sticky(bpage);
 
-	/* Now it is safe to release the buf_pool->mutex. */
+	/* Now it is safe to release the LRU list mutex. */
 	mutex_exit(&buf_pool->LRU_list_mutex);
 
 	mutex_exit(block_mutex);
@@ -415,7 +432,7 @@ buf_flush_yield(
 
 	mutex_enter(block_mutex);
 	/* "Unfix" the block now that we have both the
-	buffer pool and block mutex again. */
+	LRU list and block mutex again. */
 	buf_page_unset_sticky(bpage);
 	mutex_exit(block_mutex);
 }
@@ -431,7 +448,9 @@ buf_flush_try_yield(
 /*================*/
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
 	buf_page_t*	bpage,		/*!< in/out: bpage to remove */
-	ulint		processed)	/*!< in: number of pages processed */
+	ulint		processed,	/*!< in: number of pages processed */
+	ibool*		must_restart)	/*!< in/out: if TRUE, we have to
+					restart the flush list scan */
 {
 	/* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the
 	loop we release buf_pool->mutex to let other threads
@@ -441,10 +460,40 @@ buf_flush_try_yield(
 
 	if (bpage != NULL
 	    && processed >= BUF_LRU_DROP_SEARCH_SIZE
-	    && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
+	    && buf_page_get_io_fix_unlocked(bpage) == BUF_IO_NONE) {
+
+		mutex_t*	block_mutex;
 
 		buf_flush_list_mutex_exit(buf_pool);
 
+		/* We don't have to worry about bpage becoming a dangling
+		pointer by a compressed page flush list relocation because
+		buf_page_get_gen() won't be called for pages from this
+		tablespace.  */
+
+		block_mutex = buf_page_get_mutex_enter(bpage);
+		if (UNIV_UNLIKELY(block_mutex == NULL)) {
+
+			buf_flush_list_mutex_enter(buf_pool);
+
+			*must_restart = TRUE;
+			return FALSE;
+		}
+
+		/* Recheck the I/O fix and the flush list presence now that we
+		hold the right mutex */
+		if (UNIV_UNLIKELY(buf_page_get_io_fix(bpage) != BUF_IO_NONE
+				  || bpage->oldest_modification == 0)) {
+
+			mutex_exit(block_mutex);
+			buf_flush_list_mutex_enter(buf_pool);
+
+			*must_restart = TRUE;
+			return FALSE;
+		}
+
+		*must_restart = FALSE;
+
 		/* Release the LRU list and block mutex
 		to give the other threads a go. */
 
@@ -473,7 +522,9 @@ ibool
 buf_flush_or_remove_page(
 /*=====================*/
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
-	buf_page_t*	bpage)		/*!< in/out: bpage to remove */
+	buf_page_t*	bpage,		/*!< in/out: bpage to remove */
+	ibool*		must_restart)	/*!< in/out: if TRUE, must restart the
+					flush list scan */
 {
 	mutex_t*	block_mutex;
 	ibool		processed = FALSE;
@@ -487,7 +538,8 @@ buf_flush_or_remove_page(
 	buf_pool->mutex and block_mutex. It is safe to check
 	them while holding buf_pool->mutex only. */
 
-	if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+	if (UNIV_UNLIKELY(buf_page_get_io_fix_unlocked(bpage)
+			  != BUF_IO_NONE)) {
 
 		/* We cannot remove this page during this scan
 		yet; maybe the system is currently reading it
@@ -496,21 +548,38 @@ buf_flush_or_remove_page(
 	} else {
 
 		/* We have to release the flush_list_mutex to obey the
-		latching order. We are however guaranteed that the page
-		will stay in the flush_list because buf_flush_remove()
-		needs buf_pool->mutex as well (for the non-flush case). */
+		latching order. We are not however guaranteed that the page
+		will stay in the flush_list. */
 
 		buf_flush_list_mutex_exit(buf_pool);
 
+		/* We don't have to worry about bpage becoming a dangling
+		pointer by a compressed page flush list relocation because
+		buf_page_get_gen() won't be called for pages from this
+		tablespace.  */
+
 		mutex_enter(block_mutex);
 
-		ut_ad(bpage->oldest_modification != 0);
+		/* Recheck the page I/O fix and the flush list presence now
+		thatwe hold the right mutex. */
+		if (UNIV_UNLIKELY(buf_page_get_io_fix(bpage) != BUF_IO_NONE
+				  || bpage->oldest_modification == 0)) {
 
-		if (bpage->buf_fix_count == 0) {
+			/* The page became I/O-fixed or is not on the flush
+			list anymore, this invalidates any flush-list-page
+			pointers we have. */
+			*must_restart = TRUE;
 
-			buf_flush_remove(bpage);
+		} else {
+
+			ut_ad(bpage->oldest_modification != 0);
+
+			if (bpage->buf_fix_count == 0) {
+
+				buf_flush_remove(bpage);
 
-			processed = TRUE;
+				processed = TRUE;
+			}
 		}
 
 		mutex_exit(block_mutex);
@@ -541,11 +610,12 @@ buf_flush_or_remove_pages(
 	buf_page_t*	bpage;
 	ulint		processed = 0;
 	ibool		all_freed = TRUE;
+	ibool		must_restart = FALSE;
 
 	buf_flush_list_mutex_enter(buf_pool);
 
 	for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
-	     bpage != NULL;
+	     !must_restart && bpage != NULL;
 	     bpage = prev) {
 
 		ut_a(buf_page_in_file(bpage));
@@ -561,22 +631,31 @@ buf_flush_or_remove_pages(
 			/* Skip this block, as it does not belong to
 			the target space. */
 
-		} else if (!buf_flush_or_remove_page(buf_pool, bpage)) {
+		} else if (!buf_flush_or_remove_page(buf_pool, bpage,
+						     &must_restart)) {
 
 			/* Remove was unsuccessful, we have to try again
 			by scanning the entire list from the end. */
 
 			all_freed = FALSE;
 		}
+		if (UNIV_UNLIKELY(must_restart)) {
+			ut_ad(!all_freed);
+			break;
+		}
 
 		++processed;
 
 		/* Yield if we have hogged the CPU and mutexes for too long. */
-		if (buf_flush_try_yield(buf_pool, prev, processed)) {
+		if (buf_flush_try_yield(buf_pool, prev, processed,
+					&must_restart)) {
 
+			ut_ad(!must_restart);
 			/* Reset the batch size counter if we had to yield. */
 
 			processed = 0;
+		} else if (UNIV_UNLIKELY(must_restart)) {
+			all_freed = FALSE;
 		}
 
 	}
@@ -641,41 +720,39 @@ scan_again:
 	     /* No op */) {
 
 		buf_page_t*	prev_bpage;
-		mutex_t*	block_mutex = NULL;
+		mutex_t*	block_mutex;
 
 		ut_a(buf_page_in_file(bpage));
 		ut_ad(bpage->in_LRU_list);
 
 		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
-		/* bpage->space and bpage->io_fix are protected by
-		buf_pool->mutex and the block_mutex. It is safe to check
-		them while holding buf_pool->mutex only. */
+		block_mutex = buf_page_get_mutex_enter(bpage);
+
+		if (!block_mutex) {
+			/* It may be impossible case...
+			   Something wrong, so will be scan_again */
+
+			all_freed = FALSE;
+			goto next_page;
+		}
 
 		if (buf_page_get_space(bpage) != id) {
 			/* Skip this block, as it does not belong to
 			the space that is being invalidated. */
+
+			mutex_exit(block_mutex);
 			goto next_page;
 		} else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
 			/* We cannot remove this page during this scan
 			yet; maybe the system is currently reading it
 			in, or flushing the modifications to the file */
 
+			mutex_exit(block_mutex);
 			all_freed = FALSE;
 			goto next_page;
 		} else {
 
-			block_mutex = buf_page_get_mutex_enter(bpage);
-
-			if (!block_mutex) {
-				/* It may be impossible case...
-				   Something wrong, so will be scan_again */
-
-				all_freed = FALSE;
-				goto next_page;
-			}
-
-
 			if (bpage->buf_fix_count > 0) {
 
 				mutex_exit(block_mutex);
@@ -1491,6 +1568,7 @@ buf_LRU_remove_block(
 	buf_page_t*	bpage)	/*!< in: control block */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	ulint		zip_size;
 
 	ut_ad(buf_pool);
 	ut_ad(bpage);
@@ -1527,6 +1605,9 @@ buf_LRU_remove_block(
 	UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
 	bpage->in_LRU_list = FALSE;
 
+	zip_size = page_zip_get_size(&bpage->zip);
+	buf_pool->stat.LRU_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+
 	buf_unzip_LRU_remove_block_if_needed(bpage);
 
 	/* If the LRU list is so short that LRU_old is not defined,
@@ -1588,7 +1669,10 @@ buf_unzip_LRU_add_block(
 }
 
 /******************************************************************//**
-Adds a block to the LRU list end. */
+Adds a block to the LRU list end. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
 UNIV_INLINE
 void
 buf_LRU_add_block_to_end_low(
@@ -1608,6 +1692,8 @@ buf_LRU_add_block_to_end_low(
 	UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
 	bpage->in_LRU_list = TRUE;
 
+	incr_LRU_size_in_bytes(bpage, buf_pool);
+
 	if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
 
 		ut_ad(buf_pool->LRU_old);
@@ -1636,7 +1722,10 @@ buf_LRU_add_block_to_end_low(
 }
 
 /******************************************************************//**
-Adds a block to the LRU list. */
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
 UNIV_INLINE
 void
 buf_LRU_add_block_low(
@@ -1679,6 +1768,8 @@ buf_LRU_add_block_low(
 
 	bpage->in_LRU_list = TRUE;
 
+	incr_LRU_size_in_bytes(bpage, buf_pool);
+
 	if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
 
 		ut_ad(buf_pool->LRU_old);
@@ -1706,7 +1797,10 @@ buf_LRU_add_block_low(
 }
 
 /******************************************************************//**
-Adds a block to the LRU list. */
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
 UNIV_INTERN
 void
 buf_LRU_add_block(
@@ -1853,7 +1947,7 @@ alloc:
 	    || !buf_page_can_relocate(bpage)) {
 not_freed:
 		if (b) {
-			buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
+			buf_page_free_descriptor(b);
 		}
 		if (!have_LRU_mutex)
 			mutex_exit(&buf_pool->LRU_list_mutex);
@@ -1935,6 +2029,8 @@ not_freed:
 				UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
 						     prev_b, b);
 
+				incr_LRU_size_in_bytes(b, buf_pool);
+
 				if (buf_page_is_old(b)) {
 					buf_pool->LRU_old_len++;
 					if (UNIV_UNLIKELY
@@ -2196,7 +2292,9 @@ buf_LRU_block_remove_hashed_page(
 				break;
 			case FIL_PAGE_INDEX:
 #ifdef UNIV_ZIP_DEBUG
-				ut_a(page_zip_validate(&bpage->zip, page));
+				ut_a(page_zip_validate(
+					     &bpage->zip, page,
+					     ((buf_block_t*) bpage)->index));
 #endif /* UNIV_ZIP_DEBUG */
 				break;
 			default:
diff --git a/storage/xtradb/buf/buf0rea.c b/storage/xtradb/buf/buf0rea.c
index 6d76a488af7..cf0a029df92 100644
--- a/storage/xtradb/buf/buf0rea.c
+++ b/storage/xtradb/buf/buf0rea.c
@@ -235,7 +235,8 @@ not_to_recover:
 			      sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
 			      ((buf_block_t*) bpage)->frame, bpage, trx);
 	}
-	if(sync) {
+
+	if (sync) {
 		thd_wait_end(NULL);
 	}
 
diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c
index 516b6e927e0..29063f028f1 100644
--- a/storage/xtradb/dict/dict0dict.c
+++ b/storage/xtradb/dict/dict0dict.c
@@ -525,6 +525,20 @@ dict_index_get_nth_col_or_prefix_pos(
 	return(ULINT_UNDEFINED);
 }
 
+/********************************************************************//**
+Looks for column n in an index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
+ulint
+dict_index_get_nth_col_pos(
+/*=======================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			n)	/*!< in: column number */
+{
+	return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE));
+}
+
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Returns TRUE if the index contains a column or a prefix of that column.
@@ -2088,7 +2102,6 @@ dict_index_build_internal_clust(
 {
 	dict_index_t*	new_index;
 	dict_field_t*	field;
-	ulint		fixed_size;
 	ulint		trx_id_pos;
 	ulint		i;
 	ibool*		indexed;
@@ -2165,7 +2178,7 @@ dict_index_build_internal_clust(
 
 		for (i = 0; i < trx_id_pos; i++) {
 
-			fixed_size = dict_col_get_fixed_size(
+			ulint fixed_size = dict_col_get_fixed_size(
 				dict_index_get_nth_col(new_index, i),
 				dict_table_is_comp(table));
 
@@ -2182,7 +2195,20 @@ dict_index_build_internal_clust(
 				break;
 			}
 
-			new_index->trx_id_offset += (unsigned int) fixed_size;
+			/* Add fixed_size to new_index->trx_id_offset.
+			Because the latter is a bit-field, an overflow
+			can theoretically occur. Check for it. */
+			fixed_size += new_index->trx_id_offset;
+
+			new_index->trx_id_offset = fixed_size;
+
+			if (new_index->trx_id_offset != fixed_size) {
+				/* Overflow. Pretend that this is a
+				variable-length PRIMARY KEY. */
+				ut_ad(0);
+				new_index->trx_id_offset = 0;
+				break;
+                        }
 		}
 
 	}
diff --git a/storage/xtradb/dict/dict0load.c b/storage/xtradb/dict/dict0load.c
index 0ef74ea8b7c..cdef0d1b270 100644
--- a/storage/xtradb/dict/dict0load.c
+++ b/storage/xtradb/dict/dict0load.c
@@ -2399,7 +2399,8 @@ dict_load_foreigns(
 	ibool		check_charsets)	/*!< in: TRUE=check charset
 					compatibility */
 {
-	char		tuple_buf[DTUPLE_EST_ALLOC(1)];
+	ulint		tuple_buf[(DTUPLE_EST_ALLOC(1) + sizeof(ulint) - 1)
+				/ sizeof(ulint)];
 	btr_pcur_t	pcur;
 	dtuple_t*	tuple;
 	dfield_t*	dfield;
diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c
index 4ab2c16d41f..397c4de4b6e 100644
--- a/storage/xtradb/fil/fil0fil.c
+++ b/storage/xtradb/fil/fil0fil.c
@@ -195,14 +195,16 @@ struct fil_space_struct {
 				requests on the file */
 	ibool		stop_new_ops;
 				/*!< we set this TRUE when we start
-				deleting a single-table tablespace */
-	ibool		is_being_deleted;
-				/*!< this is set to TRUE when we start
-				deleting a single-table tablespace and its
-				file; when this flag is set no further i/o
-				or flush requests can be placed on this space,
-				though there may be such requests still being
-				processed on this space */
+				deleting a single-table tablespace.
+				When this is set following new ops
+				are not allowed:
+				* read IO request
+				* ibuf merge
+				* file flush
+				Note that we can still possibly have
+				new write operations because we don't
+				check this flag when doing flush
+				batches. */
 	ulint		purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
 				FIL_ARCH_LOG */
 	UT_LIST_BASE_NODE_T(fil_node_t) chain;
@@ -865,7 +867,7 @@ fil_node_close_file(
 	ut_ad(node && system);
 	ut_ad(mutex_own(&(system->mutex)));
 	ut_a(node->open);
-	ut_a(node->n_pending == 0 || node->space->is_being_deleted);
+	ut_a(node->n_pending == 0 || node->space->stop_new_ops);
 	ut_a(node->n_pending_flushes == 0);
 #ifndef UNIV_HOTBACKUP
 	ut_a(node->modification_counter == node->flush_counter
@@ -1099,7 +1101,7 @@ fil_node_free(
 	ut_ad(node && system && space);
 	ut_ad(mutex_own(&(system->mutex)));
 	ut_a(node->magic_n == FIL_NODE_MAGIC_N);
-	ut_a(node->n_pending == 0 || space->is_being_deleted);
+	ut_a(node->n_pending == 0 || space->stop_new_ops);
 
 	if (node->open) {
 		/* We fool the assertion in fil_node_close_file() to think
@@ -1297,7 +1299,6 @@ try_again:
 
 	space->stop_ios = FALSE;
 	space->stop_new_ops = FALSE;
-	space->is_being_deleted = FALSE;
 	space->purpose = purpose;
 	space->size = 0;
 	space->flags = flags;
@@ -1478,7 +1479,7 @@ fil_space_get_size(
 
 	ut_ad(fil_system);
 
-	fil_mutex_enter_and_prepare_for_io(id);
+	mutex_enter(&fil_system->mutex);
 
 	space = fil_space_get_by_id(id);
 
@@ -1493,6 +1494,23 @@ fil_space_get_size(
 
 		ut_a(1 == UT_LIST_GET_LEN(space->chain));
 
+		mutex_exit(&fil_system->mutex);
+
+		/* It is possible that the space gets evicted at this point
+		before the fil_mutex_enter_and_prepare_for_io() acquires
+		the fil_system->mutex. Check for this after completing the
+		call to fil_mutex_enter_and_prepare_for_io(). */
+		fil_mutex_enter_and_prepare_for_io(id);
+
+		/* We are still holding the fil_system->mutex. Check if
+		the space is still in memory cache. */
+		space = fil_space_get_by_id(id);
+
+		if (space == NULL) {
+			mutex_exit(&fil_system->mutex);
+			return(0);
+		}
+
 		node = UT_LIST_GET_FIRST(space->chain);
 
 		/* It must be a single-table tablespace and we have not opened
@@ -1530,7 +1548,7 @@ fil_space_get_flags(
 		return(0);
 	}
 
-	fil_mutex_enter_and_prepare_for_io(id);
+	mutex_enter(&fil_system->mutex);
 
 	space = fil_space_get_by_id(id);
 
@@ -1545,6 +1563,23 @@ fil_space_get_flags(
 
 		ut_a(1 == UT_LIST_GET_LEN(space->chain));
 
+		mutex_exit(&fil_system->mutex);
+
+		/* It is possible that the space gets evicted at this point
+		before the fil_mutex_enter_and_prepare_for_io() acquires
+		the fil_system->mutex. Check for this after completing the
+		call to fil_mutex_enter_and_prepare_for_io(). */
+		fil_mutex_enter_and_prepare_for_io(id);
+
+		/* We are still holding the fil_system->mutex. Check if
+		the space is still in memory cache. */
+		space = fil_space_get_by_id(id);
+
+		if (space == NULL) {
+			mutex_exit(&fil_system->mutex);
+			return(0);
+		}
+
 		node = UT_LIST_GET_FIRST(space->chain);
 
 		/* It must be a single-table tablespace and we have not opened
@@ -2325,11 +2360,9 @@ try_again:
 		return(FALSE);
 	}
 
-	ut_a(space);
+	ut_a(space->stop_new_ops);
 	ut_a(space->n_pending_ops == 0);
 
-	space->is_being_deleted = TRUE;
-
 	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
 	node = UT_LIST_GET_FIRST(space->chain);
 
@@ -2372,12 +2405,26 @@ try_again:
 	rw_lock_x_lock(&space->latch);
 
 #ifndef UNIV_HOTBACKUP
-	/* Invalidate in the buffer pool all pages belonging to the
-	tablespace. Since we have set space->is_being_deleted = TRUE, readahead
-	or ibuf merge can no longer read more pages of this tablespace to the
-	buffer pool. Thus we can clean the tablespace out of the buffer pool
-	completely and permanently. The flag is_being_deleted also prevents
-	fil_flush() from being applied to this tablespace. */
+	/* IMPORTANT: Because we have set space::stop_new_ops there
+	can't be any new ibuf merges, reads or flushes. We are here
+	because node::n_pending was zero above. However, it is still
+	possible to have pending read and write requests:
+
+	A read request can happen because the reader thread has
+	gone through the ::stop_new_ops check in buf_page_init_for_read()
+	before the flag was set and has not yet incremented ::n_pending
+	when we checked it above.
+
+	A write request can be issued any time because we don't check
+	the ::stop_new_ops flag when queueing a block for write.
+
+	We deal with pending write requests in the following function
+	where we'd minimally evict all dirty pages belonging to this
+	space from the flush_list. Not that if a block is IO-fixed
+	we'll wait for IO to complete.
+
+	To deal with potential read requests by checking the
+	::stop_new_ops flag in fil_io() */
 
 	if (srv_lazy_drop_table) {
 		buf_LRU_mark_space_was_deleted(id);
@@ -2393,6 +2440,15 @@ try_again:
 
 	mutex_enter(&fil_system->mutex);
 
+	/* Double check the sanity of pending ops after reacquiring
+	the fil_system::mutex. */
+	if (fil_space_get_by_id(id)) {
+		ut_a(space->n_pending_ops == 0);
+		ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+		node = UT_LIST_GET_FIRST(space->chain);
+		ut_a(node->n_pending == 0);
+	}
+
 	success = fil_space_free(id, TRUE);
 
 	mutex_exit(&fil_system->mutex);
@@ -2450,7 +2506,7 @@ fil_tablespace_is_being_deleted(
 
 	ut_a(space != NULL);
 
-	is_being_deleted = space->is_being_deleted;
+	is_being_deleted = space->stop_new_ops;
 
 	mutex_exit(&fil_system->mutex);
 
@@ -2710,7 +2766,7 @@ retry:
 	mutex_exit(&fil_system->mutex);
 
 #ifndef UNIV_HOTBACKUP
-	if (success) {
+	if (success && !recv_recovery_on) {
 		mtr_t		mtr;
 
 		mtr_start(&mtr);
@@ -4531,7 +4587,7 @@ fil_tablespace_deleted_or_being_deleted_in_mem(
 
 	space = fil_space_get_by_id(id);
 
-	if (space == NULL || space->is_being_deleted) {
+	if (space == NULL || space->stop_new_ops) {
 		mutex_exit(&fil_system->mutex);
 
 		return(TRUE);
@@ -4809,6 +4865,24 @@ fil_extend_space_to_desired_size(
 	start_page_no = space->size;
 	file_start_page_no = space->size - node->size;
 
+#ifdef HAVE_POSIX_FALLOCATE
+	if (srv_use_posix_fallocate) {
+		offset_high = size_after_extend * page_size / (4ULL*1024*1024*1024);
+		offset_low = size_after_extend * page_size % (4ULL*1024*1024*1024);
+
+		mutex_exit(&fil_system->mutex);
+		success = os_file_set_size(node->name, node->handle,
+				offset_low, offset_high);
+		mutex_enter(&fil_system->mutex);
+		if (success) {
+			node->size += (size_after_extend - start_page_no);
+			space->size += (size_after_extend - start_page_no);
+			os_has_said_disk_full = FALSE;
+		}
+		goto complete_io;
+	}
+#endif
+
 	/* Extend at most 64 pages at a time */
 	buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
 	buf2 = mem_alloc(buf_size + page_size);
@@ -4865,6 +4939,10 @@ fil_extend_space_to_desired_size(
 
 	mem_free(buf2);
 
+#ifdef HAVE_POSIX_FALLOCATE
+complete_io:
+#endif
+
 	fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
 
 	*actual_size = space->size;
@@ -5271,7 +5349,9 @@ _fil_io(
 
 	space = fil_space_get_by_id(space_id);
 
-	if (!space) {
+	/* If we are deleting a tablespace we don't allow any read
+	operations on that. However, we do allow write operations. */
+	if (!space || (type == OS_FILE_READ && space->stop_new_ops)) {
 		mutex_exit(&fil_system->mutex);
 
 		ut_print_timestamp(stderr);
@@ -5362,8 +5442,8 @@ _fil_io(
 
 	/* Do aio */
 
-	ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
-	ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
+	ut_a(byte_offset % OS_MIN_LOG_BLOCK_SIZE == 0);
+	ut_a((len % OS_MIN_LOG_BLOCK_SIZE) == 0);
 
 	if (srv_pass_corrupt_table == 1 && space->is_corrupt) {
 		/* should ignore i/o for the crashed space */
@@ -5551,7 +5631,7 @@ fil_aio_wait(
 	    && ((buf_page_t*)message)->space_was_being_deleted) {
 
 		/* intended not to be uncompress read page */
-		ut_a(buf_page_get_io_fix(message) == BUF_IO_WRITE
+		ut_a(buf_page_get_io_fix_unlocked(message) == BUF_IO_WRITE
 		     || !buf_page_get_zip_size(message)
 		     || buf_page_get_state(message) != BUF_BLOCK_FILE_PAGE);
 
@@ -5612,7 +5692,7 @@ fil_flush(
 
 	space = fil_space_get_by_id(space_id);
 
-	if (!space || space->is_being_deleted) {
+	if (!space || space->stop_new_ops) {
 		mutex_exit(&fil_system->mutex);
 
 		return;
@@ -5743,7 +5823,7 @@ fil_flush_file_spaces(
 	     space;
 	     space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
 
-		if (space->purpose == purpose && !space->is_being_deleted) {
+		if (space->purpose == purpose && !space->stop_new_ops) {
 
 			space_ids[n_space_ids++] = space->id;
 		}
@@ -5982,3 +6062,26 @@ fil_space_set_corrupt(
 	mutex_exit(&fil_system->mutex);
 }
 
+/****************************************************************//**
+Generate redo logs for swapping two .ibd files */
+UNIV_INTERN
+void
+fil_mtr_rename_log(
+/*===============*/
+	ulint		old_space_id,	/*!< in: tablespace id of the old
+					table. */
+	const char*	old_name,	/*!< in: old table name */
+	ulint		new_space_id,	/*!< in: tablespace id of the new
+					table */
+	const char*	new_name,	/*!< in: new table name */
+	const char*	tmp_name)	/*!< in: temp table name used while
+					swapping */
+{
+	mtr_t           mtr;
+	mtr_start(&mtr);
+	fil_op_write_log(MLOG_FILE_RENAME, old_space_id,
+			 0, 0, old_name, tmp_name, &mtr);
+	fil_op_write_log(MLOG_FILE_RENAME, new_space_id,
+			 0, 0, new_name, old_name, &mtr);
+	mtr_commit(&mtr);
+}
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 12f6f5134d2..8b824bc994d 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -85,6 +85,7 @@ extern "C" {
 #include "row0sel.h"
 #include "row0upd.h"
 #include "log0log.h"
+#include "log0online.h"
 #include "lock0lock.h"
 #include "dict0crea.h"
 #include "btr0cur.h"
@@ -185,6 +186,8 @@ static my_bool	innobase_file_format_check		= TRUE;
 static my_bool	innobase_log_archive			= FALSE;
 static char*	innobase_log_arch_dir			= NULL;
 #endif /* UNIV_LOG_ARCHIVE */
+static my_bool	innobase_use_atomic_writes		= FALSE;
+static my_bool	innobase_use_fallocate			= TRUE;
 static my_bool	innobase_use_doublewrite		= TRUE;
 static my_bool	innobase_use_checksums			= TRUE;
 static my_bool	innobase_fast_checksum			= FALSE;
@@ -297,6 +300,7 @@ static PSI_mutex_info all_innodb_mutexes[] = {
 	{&ibuf_pessimistic_insert_mutex_key,
 		 "ibuf_pessimistic_insert_mutex", 0},
 	{&kernel_mutex_key, "kernel_mutex", 0},
+	{&log_bmp_sys_mutex_key, "log_bmp_sys_mutex", 0},
 	{&log_sys_mutex_key, "log_sys_mutex", 0},
 #  ifdef UNIV_MEM_DEBUG
 	{&mem_hash_mutex_key, "mem_hash_mutex", 0},
@@ -437,6 +441,25 @@ uint
 innobase_alter_table_flags(
 /*=======================*/
 	uint	flags);
+/************************************************************//**
+Synchronously read and parse the redo log up to the last
+checkpoint to write the changed page bitmap.
+@return 0 to indicate success.  Current implementation cannot fail. */
+static
+my_bool
+innobase_flush_changed_page_bitmaps() __attribute__((unused));
+/*==================================*/
+/************************************************************//**
+Delete all the bitmap files for data less than the specified LSN.
+If called with lsn == 0 (i.e. set by RESET request) or
+IB_ULONGLONG_MAX, restart the bitmap file sequence, otherwise
+continue it.
+@return 0 to indicate success, 1 for failure. */
+static
+my_bool
+innobase_purge_changed_page_bitmaps(
+/*================================*/
+	ulonglong lsn) __attribute__((unused));	/*!< in: LSN to purge files up to */
 
 static const char innobase_hton_name[]= "InnoDB";
 
@@ -688,8 +711,12 @@ static SHOW_VAR innodb_status_variables[]= {
   (char*) &export_vars.innodb_background_log_sync,	  SHOW_LONG},
   {"buffer_pool_pages_data",
   (char*) &export_vars.innodb_buffer_pool_pages_data,	  SHOW_LONG},
+  {"buffer_pool_bytes_data",
+  (char*) &export_vars.innodb_buffer_pool_bytes_data,	  SHOW_LONG},
   {"buffer_pool_pages_dirty",
   (char*) &export_vars.innodb_buffer_pool_pages_dirty,	  SHOW_LONG},
+  {"buffer_pool_bytes_dirty",
+  (char*) &export_vars.innodb_buffer_pool_bytes_dirty,	  SHOW_LONG},
   {"buffer_pool_pages_flushed",
   (char*) &export_vars.innodb_buffer_pool_pages_flushed,  SHOW_LONG},
   {"buffer_pool_pages_LRU_flushed",
@@ -870,6 +897,12 @@ static SHOW_VAR innodb_status_variables[]= {
   (char*) &export_vars.innodb_x_lock_spin_rounds,	  SHOW_LONGLONG},
   {"x_lock_spin_waits",
   (char*) &export_vars.innodb_x_lock_spin_waits,	  SHOW_LONGLONG},
+#ifdef UNIV_DEBUG
+  {"purge_trx_id_age",
+  (char*) &export_vars.innodb_purge_trx_id_age,		  SHOW_LONG},
+  {"purge_view_trx_id_age",
+  (char*) &export_vars.innodb_purge_view_trx_id_age,	  SHOW_LONG},
+#endif /* UNIV_DEBUG */
   {NullS, NullS, SHOW_LONG}
 };
 
@@ -1219,11 +1252,23 @@ convert_error_code_to_mysql(
 	case DB_TABLE_NOT_FOUND:
 		return(HA_ERR_NO_SUCH_TABLE);
 
-	case DB_TOO_BIG_RECORD:
-		my_error(ER_TOO_BIG_ROWSIZE, MYF(0),
-			 page_get_free_space_of_empty(flags
-						      & DICT_TF_COMPACT) / 2);
+	case DB_TOO_BIG_RECORD: {
+		/* If prefix is true then a 768-byte prefix is stored
+		locally for BLOB fields. Refer to dict_table_get_format() */
+		bool prefix = ((flags & DICT_TF_FORMAT_MASK)
+		 	       >> DICT_TF_FORMAT_SHIFT) < UNIV_FORMAT_B;
+		my_printf_error(ER_TOO_BIG_ROWSIZE,
+			"Row size too large (> %lu). Changing some columns "
+			"to TEXT or BLOB %smay help. In current row "
+			"format, BLOB prefix of %d bytes is stored inline.",
+			MYF(0),
+			page_get_free_space_of_empty(flags &
+				DICT_TF_COMPACT) / 2,
+			prefix ? "or using ROW_FORMAT=DYNAMIC "
+			"or ROW_FORMAT=COMPRESSED ": "",
+			prefix ? DICT_MAX_FIXED_COL_LEN : 0);
 		return(HA_ERR_TO_BIG_ROW);
+	}
 
 	case DB_TOO_BIG_INDEX_COL:
 		my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
@@ -1262,6 +1307,8 @@ convert_error_code_to_mysql(
 		return(HA_ERR_INDEX_CORRUPT);
 	case DB_UNDO_RECORD_TOO_BIG:
 		return(HA_ERR_UNDO_REC_TOO_BIG);
+	case DB_OUT_OF_MEMORY:
+		return(HA_ERR_OUT_OF_MEM);
 	}
 }
 
@@ -1439,16 +1486,6 @@ innobase_get_lower_case_table_names(void)
 	return(lower_case_table_names);
 }
 
-#if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN)
-extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list;
-/*******************************************************************//**
-Map an OS error to an errno value. The OS error number is stored in
-_doserrno and the mapped value is stored in errno) */
-extern "C"
-void __cdecl
-_dosmaperr(
-	unsigned long);	/*!< in: OS error value */
-
 /*********************************************************************//**
 Creates a temporary file.
 @return	temporary file descriptor, or < 0 on error */
@@ -1457,92 +1494,16 @@ int
 innobase_mysql_tmpfile(void)
 /*========================*/
 {
-	int	fd;				/* handle of opened file */
-	HANDLE	osfh;				/* OS handle of opened file */
-	char*	tmpdir;				/* point to the directory
-						where to create file */
-	TCHAR	path_buf[MAX_PATH - 14];	/* buffer for tmp file path.
-						The length cannot be longer
-						than MAX_PATH - 14, or
-						GetTempFileName will fail. */
-	char	filename[MAX_PATH];		/* name of the tmpfile */
-	DWORD	fileaccess = GENERIC_READ	/* OS file access */
-			     | GENERIC_WRITE
-			     | DELETE;
-	DWORD	fileshare = FILE_SHARE_READ	/* OS file sharing mode */
-			    | FILE_SHARE_WRITE
-			    | FILE_SHARE_DELETE;
-	DWORD	filecreate = CREATE_ALWAYS;	/* OS method of open/create */
-	DWORD	fileattrib =			/* OS file attribute flags */
-			     FILE_ATTRIBUTE_NORMAL
-			     | FILE_FLAG_DELETE_ON_CLOSE
-			     | FILE_ATTRIBUTE_TEMPORARY
-			     | FILE_FLAG_SEQUENTIAL_SCAN;
-
-	DBUG_ENTER("innobase_mysql_tmpfile");
-
-	tmpdir = my_tmpdir(&mysql_tmpdir_list);
-
-	/* The tmpdir parameter can not be NULL for GetTempFileName. */
-	if (!tmpdir) {
-		uint	ret;
-
-		/* Use GetTempPath to determine path for temporary files. */
-		ret = GetTempPath(sizeof(path_buf), path_buf);
-		if (ret > sizeof(path_buf) || (ret == 0)) {
-
-			_dosmaperr(GetLastError());	/* map error */
-			DBUG_RETURN(-1);
-		}
-
-		tmpdir = path_buf;
-	}
-
-	/* Use GetTempFileName to generate a unique filename. */
-	if (!GetTempFileName(tmpdir, "ib", 0, filename)) {
-
-		_dosmaperr(GetLastError());	/* map error */
-		DBUG_RETURN(-1);
-	}
-
-	DBUG_PRINT("info", ("filename: %s", filename));
-
-	/* Open/Create the file. */
-	osfh = CreateFile(filename, fileaccess, fileshare, NULL,
-			  filecreate, fileattrib, NULL);
-	if (osfh == INVALID_HANDLE_VALUE) {
-
-		/* open/create file failed! */
-		_dosmaperr(GetLastError());	/* map error */
-		DBUG_RETURN(-1);
-	}
-
-	do {
-		/* Associates a CRT file descriptor with the OS file handle. */
-		fd = _open_osfhandle((intptr_t) osfh, 0);
-	} while (fd == -1 && errno == EINTR);
+	int	fd2 = -1;
+	File	fd;
 
-	if (fd == -1) {
-		/* Open failed, close the file handle. */
+	DBUG_EXECUTE_IF(
+		"innobase_tmpfile_creation_failure",
+		return(-1);
+	);
 
-		_dosmaperr(GetLastError());	/* map error */
-		CloseHandle(osfh);		/* no need to check if
-						CloseHandle fails */
-	}
+	fd = mysql_tmpfile("ib");
 
-	DBUG_RETURN(fd);
-}
-#else
-/*********************************************************************//**
-Creates a temporary file.
-@return	temporary file descriptor, or < 0 on error */
-extern "C" UNIV_INTERN
-int
-innobase_mysql_tmpfile(void)
-/*========================*/
-{
-	int	fd2 = -1;
-	File	fd = mysql_tmpfile("ib");
 	if (fd >= 0) {
 		/* Copy the file descriptor, so that the additional resources
 		allocated by create_temp_file() can be freed by invoking
@@ -1586,7 +1547,6 @@ innobase_mysql_tmpfile(void)
 	}
 	return(fd2);
 }
-#endif /* defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) */
 
 /*********************************************************************//**
 Wrapper around MySQL's copy_and_convert function.
@@ -1702,10 +1662,13 @@ innobase_next_autoinc(
 		offset = 0;
 	}
 
-	/* Check for overflow. */
+	/* Check for overflow. Current can be > max_value if the value is
+	in reality a negative value.The visual studio compilers converts
+	large double values automatically into unsigned long long datatype
+	maximum value */
 	if (block >= max_value
 	    || offset > max_value
-	    || current == max_value
+	    || current >= max_value
 	    || max_value - offset <= offset) {
 
 		next_value = max_value;
@@ -1778,7 +1741,7 @@ innobase_trx_init(
 	trx->fake_changes = THDVAR(thd, fake_changes);
 
 #ifdef EXTENDED_SLOWLOG
-	if (thd_log_slow_verbosity(thd) & SLOG_V_INNODB) {
+	if (thd_log_slow_verbosity(thd) & (1ULL << SLOG_V_INNODB)) {
 		trx->take_stats = TRUE;
 	} else {
 		trx->take_stats = FALSE;
@@ -2599,6 +2562,13 @@ skip_overwrite:
 }
 
 
+/****************************************************************//**
+Gives the file extension of an InnoDB single-table tablespace. */
+static const char* ha_innobase_exts[] = {
+  ".ibd",
+  NullS
+};
+
 /*********************************************************************//**
 Opens an InnoDB database.
 @return	0 on success, error code on failure */
@@ -2648,6 +2618,9 @@ innobase_init(
 	innobase_hton->alter_table_flags = innobase_alter_table_flags;
         innobase_hton->kill_query = innobase_kill_query;
 
+        if (srv_file_per_table)
+          innobase_hton->tablefile_extensions = ha_innobase_exts;
+
 	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
 
 #ifndef DBUG_OFF
@@ -2717,6 +2690,7 @@ innobase_init(
 	} else {
 		srv_log_block_size = 512;
 	}
+	ut_ad (srv_log_block_size >= OS_MIN_LOG_BLOCK_SIZE);
 
 	if (!srv_log_block_size) {
 		fprintf(stderr,
@@ -3107,6 +3081,38 @@ innobase_change_buffering_inited_ok:
 	srv_kill_idle_transaction = 0;
 #endif
 
+#ifdef HAVE_POSIX_FALLOCATE
+	srv_use_posix_fallocate = (ibool) innobase_use_fallocate;
+#endif
+	srv_use_atomic_writes = (ibool) innobase_use_atomic_writes;
+	if (innobase_use_atomic_writes) {
+		fprintf(stderr, "InnoDB: using atomic writes.\n");
+
+		/* Force doublewrite buffer off, atomic writes replace it. */
+		if (srv_use_doublewrite_buf) {
+			fprintf(stderr, "InnoDB: Switching off doublewrite buffer "
+				"because of atomic writes.\n");
+				innobase_use_doublewrite = srv_use_doublewrite_buf = FALSE;
+		}
+
+		/* Force O_DIRECT on Unixes (on Windows writes are always unbuffered)*/
+#ifndef _WIN32
+		if(!innobase_file_flush_method ||
+			!strstr(innobase_file_flush_method, "O_DIRECT")) {
+			innobase_file_flush_method = 
+				srv_file_flush_method_str = (char*)"O_DIRECT";
+			fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n");
+		}
+#endif
+#ifdef HAVE_POSIX_FALLOCATE
+		/* Due to a bug in directFS, using atomics needs  
+		 * posix_fallocate to extend the file
+		 * pwrite()  past end of the file won't work
+		 */
+		srv_use_posix_fallocate = TRUE;
+#endif
+	}
+
 #ifdef HAVE_PSI_INTERFACE
 	/* Register keys with MySQL performance schema */
 	if (PSI_server) {
@@ -3267,6 +3273,36 @@ innobase_alter_table_flags(
 		| HA_INPLACE_ADD_PK_INDEX_NO_READ_WRITE);
 }
 
+/************************************************************//**
+Synchronously read and parse the redo log up to the last
+checkpoint to write the changed page bitmap.
+@return 0 to indicate success.  Current implementation cannot fail. */
+static
+my_bool
+innobase_flush_changed_page_bitmaps()
+/*=================================*/
+{
+	if (srv_track_changed_pages) {
+		os_event_reset(srv_checkpoint_completed_event);
+		log_online_follow_redo_log();
+	}
+	return FALSE;
+}
+
+/************************************************************//**
+Delete all the bitmap files for data less than the specified LSN.
+If called with lsn == IB_ULONGLONG_MAX (i.e. set by RESET request),
+restart the bitmap file sequence, otherwise continue it.
+@return 0 to indicate success, 1 for failure. */
+static
+my_bool
+innobase_purge_changed_page_bitmaps(
+/*================================*/
+	ulonglong lsn)	/*!< in: LSN to purge files up to */
+{
+	return (my_bool)log_online_purge_changed_page_bitmaps(lsn);
+}
+
 /****************************************************************//**
 Copy the current replication position from MySQL to a transaction. */
 static
@@ -3330,7 +3366,7 @@ innobase_commit_low(
 		header for undo purposes, see the comment at corresponding call
 		at innobase_xa_prepare(). */
 
-		innobase_copy_repl_coords_to_trx(current_thd, trx);
+		innobase_copy_repl_coords_to_trx((THD *) trx->mysql_thd, trx);
 
 		trx_commit_for_mysql(trx);
 	}
@@ -4041,13 +4077,6 @@ ha_innobase::table_flags() const
 }
 
 /****************************************************************//**
-Gives the file extension of an InnoDB single-table tablespace. */
-static const char* ha_innobase_exts[] = {
-  ".ibd",
-  NullS
-};
-
-/****************************************************************//**
 Returns the index type. */
 UNIV_INTERN
 const char*
@@ -4060,17 +4089,6 @@ ha_innobase::index_type(
 }
 
 /****************************************************************//**
-Returns the table file name extension.
-@return	file extension string */
-UNIV_INTERN
-const char**
-ha_innobase::bas_ext() const
-/*========================*/
-{
-	return(ha_innobase_exts);
-}
-
-/****************************************************************//**
 Returns the operations supported for indexes.
 @return	flags of supported operations */
 UNIV_INTERN
@@ -10646,23 +10664,26 @@ ha_innobase::external_lock(
 
 	if (trx->n_mysql_tables_in_use == 0) {
 #ifdef EXTENDED_SLOWLOG
-		increment_thd_innodb_stats(thd,
-					(unsigned long long) trx->id,
-					trx->io_reads,
-					trx->io_read,
-					trx->io_reads_wait_timer,
-					trx->lock_que_wait_timer,
-					trx->innodb_que_wait_timer,
-					trx->distinct_page_access);
-
-		trx->io_reads = 0;
-		trx->io_read = 0;
-		trx->io_reads_wait_timer = 0;
-		trx->lock_que_wait_timer = 0;
-		trx->innodb_que_wait_timer = 0;
-		trx->distinct_page_access = 0;
-		if (trx->distinct_page_access_hash)
-			memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
+		if (UNIV_UNLIKELY(trx->take_stats)) {
+			increment_thd_innodb_stats(thd,
+						   (unsigned long long) trx->id,
+						   trx->io_reads,
+						   trx->io_read,
+						   trx->io_reads_wait_timer,
+						   trx->lock_que_wait_timer,
+						   trx->innodb_que_wait_timer,
+						   trx->distinct_page_access);
+
+			trx->io_reads = 0;
+			trx->io_read = 0;
+			trx->io_reads_wait_timer = 0;
+			trx->lock_que_wait_timer = 0;
+			trx->innodb_que_wait_timer = 0;
+			trx->distinct_page_access = 0;
+			if (trx->distinct_page_access_hash)
+				memset(trx->distinct_page_access_hash, 0,
+				       DPAH_SIZE);
+		}
 #endif
 
 		trx->mysql_n_tables_locked = 0;
@@ -12740,7 +12761,8 @@ static MYSQL_SYSVAR_ULONG(page_size, innobase_page_size,
 static MYSQL_SYSVAR_ULONG(log_block_size, innobase_log_block_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "###EXPERIMENTAL###: The log block size of the transaction log file. Changing for created log file is not supported. Use on your own risk!",
-  NULL, NULL, (1 << 9)/*512*/, (1 << 9)/*512*/, (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0);
+  NULL, NULL, (1 << 9)/*512*/, OS_MIN_LOG_BLOCK_SIZE,
+  (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0);
 
 static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
   PLUGIN_VAR_READONLY,
@@ -12764,6 +12786,20 @@ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
   "Disable with --skip-innodb-doublewrite.",
   NULL, NULL, TRUE);
 
+static MYSQL_SYSVAR_BOOL(use_atomic_writes, innobase_use_atomic_writes,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  "Prevent partial page writes, via atomic writes."
+  "The option is used to prevent partial writes in case of a crash/poweroff, "
+  "as faster alternative to doublewrite buffer."
+  "Currently this option works only "
+  "on Linux only with FusionIO device, and directFS filesystem.",
+  NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(use_fallocate, innobase_use_fallocate,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  "Preallocate files fast, using operating system functionality. On POSIX systems, posix_fallocate system call is used.",
+  NULL, NULL, TRUE);
+
 static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
   PLUGIN_VAR_RQCMDARG,
   "Number of IOPs the server can do. Tunes the background IO rate",
@@ -13163,7 +13199,7 @@ static MYSQL_SYSVAR_ULONGLONG(max_bitmap_file_size, srv_max_bitmap_file_size,
     "The maximum size of changed page bitmap files",
     NULL, NULL, 100*1024*1024ULL, 4096ULL, ULONGLONG_MAX, 0);
 
-static MYSQL_SYSVAR_ULONGLONG(changed_pages_limit, srv_changed_pages_limit,
+static MYSQL_SYSVAR_ULONGLONG(max_changed_pages, srv_max_changed_pages,
   PLUGIN_VAR_RQCMDARG,
   "The maximum number of rows for "
   "INFORMATION_SCHEMA.INNODB_CHANGED_PAGES table, "
@@ -13173,8 +13209,8 @@ static MYSQL_SYSVAR_ULONGLONG(changed_pages_limit, srv_changed_pages_limit,
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
   PLUGIN_VAR_RQCMDARG,
-  "Debug flags for InnoDB change buffering (0=none)",
-  NULL, NULL, 0, 0, 1, 0);
+  "Debug flags for InnoDB change buffering (0=none, 2=crash at merge)",
+  NULL, NULL, 0, 0, 2, 0);
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
 static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
@@ -13188,11 +13224,23 @@ static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
   "trigger a readahead.",
   NULL, NULL, 56, 0, 64, 0);
 
-#ifdef UNIV_DEBUG_never
+#ifdef UNIV_DEBUG
 static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
-  PLUGIN_VAR_RQCMDARG,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_NOCMDOPT,
   "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()",
   NULL, NULL, 0, 0, 1024, 0);
+
+static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
+  btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG,
+  "Artificially limit the number of records per B-tree page (0=unlimited).",
+  NULL, NULL, 0, 0, UINT_MAX32, 0);
+
+static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
+  srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDOPT,
+  "Pause actual purging any delete-marked records, but merely update the purge view. "
+  "It is to create artificially the situation the purge view have been updated "
+  "but the each purges were not done yet.",
+  NULL, NULL, FALSE);
 #endif /* UNIV_DEBUG */
 
 static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
@@ -13372,6 +13420,11 @@ static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks,
   "not take any locks at all.",
   NULL, NULL, TRUE);
 
+static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks,
+  PLUGIN_VAR_OPCMDARG,
+  "Print all deadlocks to MySQL error log (off by default)",
+  NULL, NULL, FALSE);
+
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(page_size),
   MYSQL_SYSVAR(log_block_size),
@@ -13391,6 +13444,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(doublewrite_file),
   MYSQL_SYSVAR(data_home_dir),
   MYSQL_SYSVAR(doublewrite),
+  MYSQL_SYSVAR(use_atomic_writes),
+  MYSQL_SYSVAR(use_fallocate),
   MYSQL_SYSVAR(recovery_stats),
   MYSQL_SYSVAR(fast_shutdown),
   MYSQL_SYSVAR(file_io_threads),
@@ -13464,7 +13519,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(change_buffering),
   MYSQL_SYSVAR(track_changed_pages),
   MYSQL_SYSVAR(max_bitmap_file_size),
-  MYSQL_SYSVAR(changed_pages_limit),
+  MYSQL_SYSVAR(max_changed_pages),
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
   MYSQL_SYSVAR(change_buffering_debug),
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
@@ -13476,14 +13531,17 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(purge_threads),
   MYSQL_SYSVAR(purge_batch_size),
   MYSQL_SYSVAR(rollback_segments),
-#ifdef UNIV_DEBUG_never /* disable this flag. --innodb-trx becomes ambiguous */
+#ifdef UNIV_DEBUG
   MYSQL_SYSVAR(trx_rseg_n_slots_debug),
+  MYSQL_SYSVAR(limit_optimistic_insert_debug),
+  MYSQL_SYSVAR(trx_purge_view_update_only_debug),
 #endif /* UNIV_DEBUG */
   MYSQL_SYSVAR(corrupt_table_action),
   MYSQL_SYSVAR(lazy_drop_table),
   MYSQL_SYSVAR(fake_changes),
   MYSQL_SYSVAR(locking_fake_changes),
   MYSQL_SYSVAR(merge_sort_block_size),
+  MYSQL_SYSVAR(print_all_deadlocks),
   NULL
 };
 
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index 359d0b95367..439be10fddb 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -81,12 +81,13 @@ class ha_innobase: public handler
 
 	uchar*		upd_buf;	/*!< buffer used in updates */
 	ulint		upd_buf_size;	/*!< the size of upd_buf in bytes */
-	uchar		srch_key_val1[REC_VERSION_56_MAX_INDEX_COL_LEN + 2];
-	uchar		srch_key_val2[REC_VERSION_56_MAX_INDEX_COL_LEN + 2];
+	uchar		srch_key_val1[MAX_KEY_LENGTH + MAX_REF_PARTS*2];
+	uchar		srch_key_val2[MAX_KEY_LENGTH + MAX_REF_PARTS*2];
 					/*!< buffers used in converting
 					search key values from MySQL format
-					to InnoDB format. "+ 2" for the two
-					bytes where the length is stored */
+					to InnoDB format. For each column
+					2 bytes are used to store length,
+					hence MAX_REF_PARTS*2. */
 	Table_flags	int_table_flags;
 	uint		primary_key;
 	ulong		start_of_scan;	/*!< this is set to 1 when we are
@@ -124,7 +125,6 @@ class ha_innobase: public handler
 	enum row_type get_row_type() const;
 
 	const char* index_type(uint key_number);
-	const char** bas_ext() const;
 	Table_flags table_flags() const;
 	ulong index_flags(uint idx, uint part, bool all_parts) const;
 	uint max_supported_keys() const;
diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc
index 0496cb98080..9886e8f6bd9 100644
--- a/storage/xtradb/handler/handler0alter.cc
+++ b/storage/xtradb/handler/handler0alter.cc
@@ -102,8 +102,6 @@ innobase_col_to_mysql(
 		ut_ad(flen >= len);
 		ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
 		      >= DATA_MBMINLEN(col->mbminmaxlen));
-		ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
-		      > DATA_MBMINLEN(col->mbminmaxlen) || flen == len);
 		memcpy(dest, data, len);
 		break;
 
@@ -113,13 +111,17 @@ innobase_col_to_mysql(
 		/* These column types should never be shipped to MySQL. */
 		ut_ad(0);
 
-	case DATA_CHAR:
 	case DATA_FIXBINARY:
 	case DATA_FLOAT:
 	case DATA_DOUBLE:
 	case DATA_DECIMAL:
 		/* Above are the valid column types for MySQL data. */
 		ut_ad(flen == len);
+		/* fall through */
+	case DATA_CHAR:
+		/* We may have flen > len when there is a shorter
+		prefix on a CHAR column. */
+		ut_ad(flen >= len);
 #else /* UNIV_DEBUG */
 	default:
 #endif /* UNIV_DEBUG */
@@ -152,7 +154,7 @@ innobase_rec_to_mysql(
 
 		field->reset();
 
-		ipos = dict_index_get_nth_col_pos(index, i);
+		ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE);
 
 		if (UNIV_UNLIKELY(ipos == ULINT_UNDEFINED)) {
 null_field:
@@ -309,7 +311,7 @@ innobase_check_index_keys(
 					}
 				}
 
-				my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+				my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
 					 field->field_name);
 				return(ER_WRONG_KEY_COLUMN);
 			}
@@ -323,7 +325,7 @@ innobase_check_index_keys(
 					continue;
 				}
 
-				my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+				my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
 					 key_part1.field->field_name);
 				return(ER_WRONG_KEY_COLUMN);
 			}
diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc
index 4b33d6a780c..d64a95a969e 100644
--- a/storage/xtradb/handler/i_s.cc
+++ b/storage/xtradb/handler/i_s.cc
@@ -169,7 +169,8 @@ do {									\
 	}								\
 } while (0)
 
-#if !defined __STRICT_ANSI__ && defined __GNUC__ && (__GNUC__) > 2 && !defined __INTEL_COMPILER
+#if !defined __STRICT_ANSI__ && defined __GNUC__ && (__GNUC__) > 2 && 	\
+	!defined __INTEL_COMPILER && !defined __clang__
 #define STRUCT_FLD(name, value)	name: value
 #else
 #define STRUCT_FLD(name, value)	value
@@ -1209,7 +1210,7 @@ trx_i_s_common_fill_table(
 	DBUG_ENTER("trx_i_s_common_fill_table");
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+	if (check_global_access(thd, PROCESS_ACL, true)) {
 
 		DBUG_RETURN(0);
 	}
@@ -1369,7 +1370,7 @@ i_s_cmp_fill_low(
 	DBUG_ENTER("i_s_cmp_fill_low");
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+	if (check_global_access(thd, PROCESS_ACL, true)) {
 
 		DBUG_RETURN(0);
 	}
@@ -1641,7 +1642,7 @@ i_s_cmpmem_fill_low(
 	DBUG_ENTER("i_s_cmpmem_fill_low");
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 
 		DBUG_RETURN(0);
 	}
@@ -2274,7 +2275,7 @@ i_s_innodb_buffer_stats_fill_table(
 	DBUG_ENTER("i_s_innodb_buffer_fill_general");
 
 	/* Only allow the PROCESS privilege holder to access the stats */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 		DBUG_RETURN(0);
 	}
 
@@ -2967,7 +2968,7 @@ i_s_innodb_buffer_page_fill_table(
 	DBUG_ENTER("i_s_innodb_buffer_page_fill_table");
 
 	/* deny access to user without PROCESS privilege */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 		DBUG_RETURN(0);
 	}
 
@@ -3512,7 +3513,7 @@ i_s_innodb_buf_page_lru_fill_table(
 	DBUG_ENTER("i_s_innodb_buf_page_lru_fill_table");
 
 	/* deny access to any users that do not hold PROCESS_ACL */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 		DBUG_RETURN(0);
 	}
 
@@ -3746,7 +3747,7 @@ i_s_sys_tables_fill_table(
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 
                 DBUG_RETURN(0);
 	}
@@ -4049,7 +4050,7 @@ i_s_sys_tables_fill_table_stats(
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 
                 DBUG_RETURN(0);
 	}
@@ -4293,7 +4294,7 @@ i_s_sys_indexes_fill_table(
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 
                 DBUG_RETURN(0);
 	}
@@ -4530,7 +4531,7 @@ i_s_sys_columns_fill_table(
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 
                 DBUG_RETURN(0);
 	}
@@ -4732,7 +4733,7 @@ i_s_sys_fields_fill_table(
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 
                 DBUG_RETURN(0);
 	}
@@ -4961,7 +4962,7 @@ i_s_sys_foreign_fill_table(
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 
                 DBUG_RETURN(0);
 	}
@@ -5172,7 +5173,7 @@ i_s_sys_foreign_cols_fill_table(
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
                 DBUG_RETURN(0);
 	}
 
@@ -5387,7 +5388,7 @@ i_s_sys_stats_fill_table(
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
                 DBUG_RETURN(0);
 	}
 
@@ -5573,7 +5574,7 @@ i_s_innodb_rseg_fill(
 	DBUG_ENTER("i_s_innodb_rseg_fill");
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 
 		DBUG_RETURN(0);
 	}
@@ -5797,7 +5798,7 @@ i_s_innodb_table_stats_fill(
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 		DBUG_RETURN(0);
 	}
 
@@ -5862,7 +5863,7 @@ i_s_innodb_index_stats_fill(
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 		DBUG_RETURN(0);
 	}
 
@@ -6054,7 +6055,7 @@ i_s_innodb_admin_command_fill(
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 		DBUG_RETURN(0);
 	}
 
@@ -6431,7 +6432,7 @@ i_s_innodb_buffer_pool_pages_fill(
 	DBUG_ENTER("i_s_innodb_buffer_pool_pages_fill");
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 
 		DBUG_RETURN(0);
 	}
@@ -6536,7 +6537,7 @@ i_s_innodb_buffer_pool_pages_index_fill(
 	DBUG_ENTER("i_s_innodb_buffer_pool_pages_index_fill");
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 
 		DBUG_RETURN(0);
 	}
@@ -6605,7 +6606,7 @@ i_s_innodb_buffer_pool_pages_blob_fill(
 	DBUG_ENTER("i_s_innodb_buffer_pool_pages_blob_fill");
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 
 		DBUG_RETURN(0);
 	}
@@ -7009,7 +7010,7 @@ i_s_innodb_undo_logs_fill(
 	DBUG_ENTER("i_s_innodb_undo_logs_fill");
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 		DBUG_RETURN(0);
 	}
 
@@ -7335,7 +7336,7 @@ i_s_innodb_changed_pages_fill(
 	DBUG_ENTER("i_s_innodb_changed_pages_fill");
 
 	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
+        if (check_global_access(thd, PROCESS_ACL, true)) {
 
 		DBUG_RETURN(0);
 	}
@@ -7356,8 +7357,8 @@ i_s_innodb_changed_pages_fill(
 	}
 
 	while(log_online_bitmap_iterator_next(&i) &&
-	      (!srv_changed_pages_limit ||
-	       output_rows_num < srv_changed_pages_limit) &&
+	      (!srv_max_changed_pages ||
+	       output_rows_num < srv_max_changed_pages) &&
 	      /*
 		There is no need to compare both start LSN and end LSN fields
 		with maximum value. It's enough to compare only start LSN.
diff --git a/storage/xtradb/ibuf/ibuf0ibuf.c b/storage/xtradb/ibuf/ibuf0ibuf.c
index 77305e42fb1..96c264b32b4 100644
--- a/storage/xtradb/ibuf/ibuf0ibuf.c
+++ b/storage/xtradb/ibuf/ibuf0ibuf.c
@@ -2912,6 +2912,14 @@ ibuf_get_volume_buffered_count_func(
 	ut_a(len == 1);
 	ut_ad(trx_sys_multiple_tablespace_format);
 
+	if (rec_get_deleted_flag(rec, 0)) {
+		/* This record has been merged already,
+		but apparently the system crashed before
+		the change was discarded from the buffer.
+		Pretend that the record does not exist. */
+		return(0);
+	}
+
 	types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
 
 	switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
@@ -4224,11 +4232,11 @@ ibuf_delete(
 					page, 1);
 		}
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(!page_zip || page_zip_validate(page_zip, page));
+		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 		page_cur_delete_rec(&page_cur, index, offsets, mtr);
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(!page_zip || page_zip_validate(page_zip, page));
+		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 		if (page_zip) {
@@ -4333,6 +4341,22 @@ ibuf_delete_rec(
 	ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no);
 	ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space);
 
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+	if (ibuf_debug == 2) {
+		/* Inject a fault (crash). We do this before trying
+		optimistic delete, because a pessimistic delete in the
+		change buffer would require a larger test case. */
+
+		/* Flag the buffered record as processed, to avoid
+		an assertion failure after crash recovery. */
+		btr_cur_set_deleted_flag_for_ibuf(
+			btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
+		ibuf_mtr_commit(mtr);
+		log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+		DBUG_SUICIDE();
+	}
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
 	success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
 
 	if (success) {
@@ -4367,7 +4391,13 @@ ibuf_delete_rec(
 	ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no);
 	ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space);
 
-	/* We have to resort to a pessimistic delete from ibuf */
+	/* We have to resort to a pessimistic delete from ibuf.
+	Delete-mark the record so that it will not be applied again,
+	in case the server crashes before the pessimistic delete is
+	made persistent. */
+	btr_cur_set_deleted_flag_for_ibuf(
+		btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
+
 	btr_pcur_store_position(pcur, mtr);
 	ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
 
@@ -4448,7 +4478,7 @@ ibuf_merge_or_delete_for_page(
 	ut_ad(!block || buf_block_get_space(block) == space);
 	ut_ad(!block || buf_block_get_page_no(block) == page_no);
 	ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
-	ut_ad(!block || buf_block_get_io_fix(block) == BUF_IO_READ);
+	ut_ad(!block || buf_block_get_io_fix_unlocked(block) == BUF_IO_READ);
 
 	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
 	    || trx_sys_hdr_page(space, page_no)) {
@@ -4648,7 +4678,7 @@ loop:
 			fputs("InnoDB: Discarding record\n ", stderr);
 			rec_print_old(stderr, rec);
 			fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
-		} else if (block) {
+		} else if (block && !rec_get_deleted_flag(rec, 0)) {
 			/* Now we have at pcur a record which should be
 			applied on the index page; NOTE that the call below
 			copies pointers to fields in rec, and we must
diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h
index cb44129aeb5..97929d44159 100644
--- a/storage/xtradb/include/btr0cur.h
+++ b/storage/xtradb/include/btr0cur.h
@@ -636,7 +636,7 @@ btr_cur_set_deleted_flag_for_ibuf(
 					when the tablespace is
 					uncompressed */
 	ibool		val,		/*!< in: value to set */
-	mtr_t*		mtr);		/*!< in: mtr */
+	mtr_t*		mtr);		/*!< in/out: mini-transaction */
 /*######################################################################*/
 
 /** In the pessimistic delete, if the page data size drops below this
@@ -806,6 +806,11 @@ srv_printf_innodb_monitor(). */
 extern ulint	btr_cur_n_sea_old;
 #endif /* !UNIV_HOTBACKUP */
 
+#ifdef UNIV_DEBUG
+/* Flag to limit optimistic insert records */
+extern uint	btr_cur_limit_optimistic_insert_debug;
+#endif /* UNIV_DEBUG */
+
 #ifndef UNIV_NONINL
 #include "btr0cur.ic"
 #endif
diff --git a/storage/xtradb/include/btr0cur.ic b/storage/xtradb/include/btr0cur.ic
index e31f77c77eb..5fc4651ca13 100644
--- a/storage/xtradb/include/btr0cur.ic
+++ b/storage/xtradb/include/btr0cur.ic
@@ -27,6 +27,16 @@ Created 10/16/1994 Heikki Tuuri
 #include "btr0btr.h"
 
 #ifdef UNIV_DEBUG
+# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\
+if (btr_cur_limit_optimistic_insert_debug\
+    && (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\
+        CODE;\
+}
+#else
+# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)
+#endif /* UNIV_DEBUG */
+
+#ifdef UNIV_DEBUG
 /*********************************************************//**
 Returns the page cursor component of a tree cursor.
 @return	pointer to page cursor component */
@@ -146,6 +156,9 @@ btr_cur_compress_recommendation(
 
 	page = btr_cur_get_page(cursor);
 
+	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
+				      return(FALSE));
+
 	if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
 	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
 		&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
index d48c7f0212f..e0d7a974fc3 100644
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@@ -199,6 +199,15 @@ struct buf_pool_info_struct{
 
 typedef struct buf_pool_info_struct	buf_pool_info_t;
 
+/** The occupied bytes of lists in all buffer pools */
+struct buf_pools_list_size_struct {
+	ulint	LRU_bytes;		/*!< LRU size in bytes */
+	ulint	unzip_LRU_bytes;	/*!< unzip_LRU size in bytes */
+	ulint	flush_list_bytes;	/*!< flush_list size in bytes */
+};
+
+typedef struct buf_pools_list_size_struct	buf_pools_list_size_t;
+
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Acquire mutex on all buffer pool instances */
@@ -958,7 +967,7 @@ buf_block_set_file_page(
 	ulint			space,	/*!< in: tablespace id */
 	ulint			page_no);/*!< in: page number */
 /*********************************************************************//**
-Gets the io_fix state of a block.
+Gets the io_fix state of a block.  Requires that the block mutex is held.
 @return	io_fix state */
 UNIV_INLINE
 enum buf_io_fix
@@ -967,7 +976,17 @@ buf_page_get_io_fix(
 	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
 	__attribute__((pure));
 /*********************************************************************//**
-Gets the io_fix state of a block.
+Gets the io_fix state of a block.  Does not assert that the block mutex is
+held, to be used in the cases where it is safe not to hold it.
+@return	io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_page_get_io_fix_unlocked(
+/*=========================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the io_fix state of a block.  Requires that the block mutex is held.
 @return	io_fix state */
 UNIV_INLINE
 enum buf_io_fix
@@ -976,6 +995,16 @@ buf_block_get_io_fix(
 	const buf_block_t*	block)	/*!< in: pointer to the control block */
 	__attribute__((pure));
 /*********************************************************************//**
+Gets the io_fix state of a block.  Does not assert that the block mutex is
+held, to be used in the cases where it is safe not to hold it.
+@return	io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_block_get_io_fix_unlocked(
+/*==========================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
 Sets the io_fix state of a block. */
 UNIV_INLINE
 void
@@ -1054,8 +1083,7 @@ UNIV_INLINE
 void
 buf_page_set_accessed(
 /*==================*/
-	buf_page_t*	bpage,		/*!< in/out: control block */
-	ulint		time_ms)	/*!< in: ut_time_ms() */
+	buf_page_t*	bpage)		/*!< in/out: control block */
 	__attribute__((nonnull));
 /*********************************************************************//**
 Gets the buf_block_t handle of a buffered file block if an uncompressed
@@ -1374,6 +1402,14 @@ buf_get_total_list_len(
 	ulint*		free_len,	/*!< out: length of all free lists */
 	ulint*		flush_list_len);/*!< out: length of all flush lists */
 /********************************************************************//**
+Get total list size in bytes from all buffer pools. */
+UNIV_INTERN
+void
+buf_get_total_list_size_in_bytes(
+/*=============================*/
+	buf_pools_list_size_t*	buf_pools_list_size);	/*!< out: list sizes
+							in all buffer pools */
+/********************************************************************//**
 Get total buffer pool statistics. */
 UNIV_INTERN
 void
@@ -1548,10 +1584,11 @@ struct buf_page_struct{
 					to read this for heuristic
 					purposes without holding any
 					mutex or latch */
-	unsigned	access_time:32;	/*!< time of first access, or
-					0 if the block was never accessed
-					in the buffer pool */
 	/* @} */
+	unsigned	access_time;	/*!< time of first access, or
+					0 if the block was never accessed
+					in the buffer pool. Protected by
+					block mutex */
 	ibool		space_was_being_deleted;
 	ibool		is_corrupt;
 # if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
@@ -1741,6 +1778,8 @@ struct buf_pool_stat_struct{
 				young because the first access
 				was not long enough ago, in
 				buf_page_peek_if_too_old() */
+	ulint	LRU_bytes;	/*!< LRU size in bytes */
+	ulint	flush_list_bytes;/*!< flush_list size in bytes */
 };
 
 /** Statistics of buddy blocks of a given size. */
diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic
index 221f86d9d62..8d5c3edeef8 100644
--- a/storage/xtradb/include/buf0buf.ic
+++ b/storage/xtradb/include/buf0buf.ic
@@ -434,7 +434,7 @@ buf_block_set_file_page(
 }
 
 /*********************************************************************//**
-Gets the io_fix state of a block.
+Gets the io_fix state of a block.  Requires that the block mutex is held.
 @return	io_fix state */
 UNIV_INLINE
 enum buf_io_fix
@@ -442,6 +442,20 @@ buf_page_get_io_fix(
 /*================*/
 	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
 {
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	return buf_page_get_io_fix_unlocked(bpage);
+}
+
+/*********************************************************************//**
+Gets the io_fix state of a block.  Does not assert that the block mutex is
+held, to be used in the cases where it is safe not to hold it.
+@return	io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_page_get_io_fix_unlocked(
+/*=========================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+{
 	enum buf_io_fix	io_fix = (enum buf_io_fix) bpage->io_fix;
 #ifdef UNIV_DEBUG
 	switch (io_fix) {
@@ -457,7 +471,7 @@ buf_page_get_io_fix(
 }
 
 /*********************************************************************//**
-Gets the io_fix state of a block.
+Gets the io_fix state of a block.  Requires that the block mutex is held.
 @return	io_fix state */
 UNIV_INLINE
 enum buf_io_fix
@@ -469,6 +483,19 @@ buf_block_get_io_fix(
 }
 
 /*********************************************************************//**
+Gets the io_fix state of a block.  Does not assert that the block mutex is
+held, to be used in the cases where it is safe not to hold it.
+@return	io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_block_get_io_fix_unlocked(
+/*==========================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	return(buf_page_get_io_fix_unlocked(&block->page));
+}
+
+/*********************************************************************//**
 Sets the io_fix state of a block. */
 UNIV_INLINE
 void
@@ -638,19 +665,18 @@ UNIV_INLINE
 void
 buf_page_set_accessed(
 /*==================*/
-	buf_page_t*	bpage,		/*!< in/out: control block */
-	ulint		time_ms)	/*!< in: ut_time_ms() */
+	buf_page_t*	bpage)		/*!< in/out: control block */
 {
 #ifdef UNIV_DEBUG
-	//buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	//ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	ut_ad(!buf_pool_mutex_own(buf_pool));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+#endif
 	ut_a(buf_page_in_file(bpage));
 
 	if (!bpage->access_time) {
 		/* Make this the time of the first access. */
-		bpage->access_time = time_ms;
+		bpage->access_time = ut_time_ms();
 	}
 }
 
diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h
index efaa758f27a..2ea4f9b1ecf 100644
--- a/storage/xtradb/include/buf0lru.h
+++ b/storage/xtradb/include/buf0lru.h
@@ -158,7 +158,10 @@ buf_LRU_block_free_non_file_page(
 	buf_block_t*	block,	/*!< in: block, must not contain a file page */
 	ibool		have_page_hash_mutex);
 /******************************************************************//**
-Adds a block to the LRU list. */
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
 UNIV_INTERN
 void
 buf_LRU_add_block(
diff --git a/storage/xtradb/include/data0type.ic b/storage/xtradb/include/data0type.ic
index 757dd815c5e..7ec2cb6cf36 100644
--- a/storage/xtradb/include/data0type.ic
+++ b/storage/xtradb/include/data0type.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -459,36 +459,18 @@ dtype_get_fixed_size_low(
 		} else if (!comp) {
 			return(len);
 		} else {
-			/* We play it safe here and ask MySQL for
-			mbminlen and mbmaxlen.	Although
-			mbminlen and mbmaxlen are
-			initialized if and only if prtype
-			is (in one of the 3 functions in this file),
-			it could be that none of these functions
-			has been called. */
-
+#ifdef UNIV_DEBUG
 			ulint	i_mbminlen, i_mbmaxlen;
 
 			innobase_get_cset_width(
 				dtype_get_charset_coll(prtype),
 				&i_mbminlen, &i_mbmaxlen);
 
-			if (UNIV_UNLIKELY
-			    (DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
-			     != mbminmaxlen)) {
-
-				ut_print_timestamp(stderr);
-				fprintf(stderr, "  InnoDB: "
-					"mbminlen=%lu, "
-					"mbmaxlen=%lu, "
-					"type->mbminlen=%lu, "
-					"type->mbmaxlen=%lu\n",
-					(ulong) i_mbminlen,
-					(ulong) i_mbmaxlen,
-					(ulong) DATA_MBMINLEN(mbminmaxlen),
-					(ulong) DATA_MBMAXLEN(mbminmaxlen));
-			}
-			if (i_mbminlen == i_mbmaxlen) {
+			ut_ad(DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
+			      == mbminmaxlen);
+#endif /* UNIV_DEBUG */
+			if (DATA_MBMINLEN(mbminmaxlen)
+			    == DATA_MBMAXLEN(mbminmaxlen)) {
 				return(len);
 			}
 		}
diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h
index de3605b1dfb..1dd0b3f5082 100644
--- a/storage/xtradb/include/dict0dict.h
+++ b/storage/xtradb/include/dict0dict.h
@@ -903,7 +903,7 @@ dict_index_get_nth_col_no(
 Looks for column n in an index.
 @return position in internal representation of the index;
 ULINT_UNDEFINED if not contained */
-UNIV_INLINE
+UNIV_INTERN
 ulint
 dict_index_get_nth_col_pos(
 /*=======================*/
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
index 02eafcc5d9c..eeb916fe181 100644
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@@ -697,20 +697,6 @@ dict_index_get_nth_col_no(
 	return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
 }
 
-/********************************************************************//**
-Looks for column n in an index.
-@return position in internal representation of the index;
-ULINT_UNDEFINED if not contained */
-UNIV_INLINE
-ulint
-dict_index_get_nth_col_pos(
-/*=======================*/
-	const dict_index_t*	index,	/*!< in: index */
-	ulint			n)	/*!< in: column number */
-{
-	return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE));
-}
-
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Returns the minimum data size of an index record.
diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h
index 54593a0b9c7..630942ae2ac 100644
--- a/storage/xtradb/include/dict0mem.h
+++ b/storage/xtradb/include/dict0mem.h
@@ -377,10 +377,15 @@ struct dict_index_struct{
 	unsigned	type:DICT_IT_BITS;
 				/*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
 				DICT_UNIVERSAL, DICT_IBUF, DICT_CORRUPT) */
-	unsigned	trx_id_offset:10;/*!< position of the trx id column
+#define MAX_KEY_LENGTH_BITS 12
+	unsigned	trx_id_offset:MAX_KEY_LENGTH_BITS;
+				/*!< position of the trx id column
 				in a clustered index record, if the fields
 				before it are known to be of a fixed size,
 				0 otherwise */
+#if (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
+# error (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
+#endif
 	unsigned	n_user_defined_cols:10;
 				/*!< number of columns the user defined to
 				be in the index: in the internal
diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h
index 7da62e68e56..2149d0aadca 100644
--- a/storage/xtradb/include/fil0fil.h
+++ b/storage/xtradb/include/fil0fil.h
@@ -776,6 +776,21 @@ fil_space_set_corrupt(
 /*==================*/
 	ulint	space_id);
 
+/****************************************************************//**
+Generate redo logs for swapping two .ibd files */
+UNIV_INTERN
+void
+fil_mtr_rename_log(
+/*===============*/
+	ulint		old_space_id,	/*!< in: tablespace id of the old
+					table. */
+	const char*	old_name,	/*!< in: old table name */
+	ulint		new_space_id,	/*!< in: tablespace id of the new
+					table */
+	const char*	new_name,	/*!< in: new table name */
+	const char*	tmp_name);	/*!< in: temp table name used while
+					swapping */
+
 typedef	struct fil_space_struct	fil_space_t;
 
 #endif
diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h
index ea636f985b4..2b659ab417e 100644
--- a/storage/xtradb/include/lock0lock.h
+++ b/storage/xtradb/include/lock0lock.h
@@ -798,14 +798,22 @@ lock_rec_get_page_no(
 				remains set when the waiting lock is granted,
 				or if the lock is inherited to a neighboring
 				record */
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
+#define LOCK_CONV_BY_OTHER 4096 /*!< this bit is set when the lock is created
+				by other transaction */
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_CONV_BY_OTHER)&LOCK_MODE_MASK
 # error
 #endif
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_CONV_BY_OTHER)&LOCK_TYPE_MASK
 # error
 #endif
 /* @} */
 
+/** Checks if this is a waiting lock created by lock->trx itself.
+@param type_mode lock->type_mode
+@return whether it is a waiting lock belonging to lock->trx */
+#define lock_is_wait_not_by_other(type_mode) \
+	((type_mode & (LOCK_CONV_BY_OTHER | LOCK_WAIT)) == LOCK_WAIT)
+
 /** Lock operation struct */
 typedef struct lock_op_struct	lock_op_t;
 /** Lock operation struct */
diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h
index e7c3f301e45..999a317780e 100644
--- a/storage/xtradb/include/log0online.h
+++ b/storage/xtradb/include/log0online.h
@@ -41,23 +41,51 @@ typedef struct log_bitmap_iterator_struct log_bitmap_iterator_t;
 Initializes the online log following subsytem. */
 UNIV_INTERN
 void
-log_online_read_init();
-/*===================*/
+log_online_read_init(void);
+/*=======================*/
 
 /*********************************************************************//**
 Shuts down the online log following subsystem. */
 UNIV_INTERN
 void
-log_online_read_shutdown();
-/*=======================*/
+log_online_read_shutdown(void);
+/*===========================*/
 
 /*********************************************************************//**
 Reads and parses the redo log up to last checkpoint LSN to build the changed
-page bitmap which is then written to disk.  */
+page bitmap which is then written to disk.
+
+@return TRUE if log tracking succeeded, FALSE if bitmap write I/O error */
 UNIV_INTERN
-void
-log_online_follow_redo_log();
-/*=========================*/
+ibool
+log_online_follow_redo_log(void);
+/*=============================*/
+
+/************************************************************//**
+Delete all the bitmap files for data less than the specified LSN.
+If called with lsn == 0 (i.e. set by RESET request) or
+IB_ULONGLONG_MAX, restart the bitmap file sequence, otherwise
+continue it.
+
+@return FALSE to indicate success, TRUE for failure. */
+UNIV_INTERN
+ibool
+log_online_purge_changed_page_bitmaps(
+/*==================================*/
+	ib_uint64_t lsn);	/*!<in: LSN to purge files up to */
+
+/************************************************************//**
+Delete all the bitmap files for data less than the specified LSN.
+If called with lsn == 0 (i.e. set by RESET request) or
+IB_ULONGLONG_MAX, restart the bitmap file sequence, otherwise
+continue it.
+
+@return FALSE to indicate success, TRUE for failure. */
+UNIV_INTERN
+ibool
+log_online_purge_changed_page_bitmaps(
+/*==================================*/
+	ib_uint64_t lsn);	/*!<in: LSN to purge files up to */
 
 #define LOG_BITMAP_ITERATOR_START_LSN(i) \
 	((i).start_lsn)
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
index 4c795d93141..05403a8e752 100644
--- a/storage/xtradb/include/os0file.h
+++ b/storage/xtradb/include/os0file.h
@@ -190,6 +190,8 @@ extern ulint	os_n_file_reads;
 extern ulint	os_n_file_writes;
 extern ulint	os_n_fsyncs;
 
+#define OS_MIN_LOG_BLOCK_SIZE 512
+
 extern ulint	srv_log_block_size;
 
 #ifdef UNIV_PFS_IO
diff --git a/storage/xtradb/include/page0zip.h b/storage/xtradb/include/page0zip.h
index fe3d2e52e0b..23a2cac618b 100644
--- a/storage/xtradb/include/page0zip.h
+++ b/storage/xtradb/include/page0zip.h
@@ -156,9 +156,10 @@ page_zip_validate_low(
 /*==================*/
 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
 	const page_t*		page,	/*!< in: uncompressed page */
+	const dict_index_t*	index,	/*!< in: index of the page, if known */
 	ibool			sloppy)	/*!< in: FALSE=strict,
 					TRUE=ignore the MIN_REC_FLAG */
-	__attribute__((nonnull));
+	__attribute__((nonnull(1,2)));
 /**********************************************************************//**
 Check that the compressed and decompressed pages match. */
 UNIV_INTERN
@@ -166,8 +167,9 @@ ibool
 page_zip_validate(
 /*==============*/
 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
-	const page_t*		page)	/*!< in: uncompressed page */
-	__attribute__((nonnull));
+	const page_t*		page,	/*!< in: uncompressed page */
+	const dict_index_t*	index)	/*!< in: index of the page, if known */
+	__attribute__((nonnull(1,2)));
 #endif /* UNIV_ZIP_DEBUG */
 
 /**********************************************************************//**
diff --git a/storage/xtradb/include/rem0rec.h b/storage/xtradb/include/rem0rec.h
index 10b74d18c13..98bf889b996 100644
--- a/storage/xtradb/include/rem0rec.h
+++ b/storage/xtradb/include/rem0rec.h
@@ -362,24 +362,6 @@ rec_get_offsets_func(
 	rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
 
 /******************************************************//**
-Determine the offset to each field in a leaf-page record
-in ROW_FORMAT=COMPACT.  This is a special case of
-rec_init_offsets() and rec_get_offsets_func(). */
-UNIV_INTERN
-void
-rec_init_offsets_comp_ordinary(
-/*===========================*/
-	const rec_t*		rec,	/*!< in: physical record in
-					ROW_FORMAT=COMPACT */
-	ulint			extra,	/*!< in: number of bytes to reserve
-					between the record header and
-					the data payload
-					(usually REC_N_NEW_EXTRA_BYTES) */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets);/*!< in/out: array of offsets;
-					in: n=rec_offs_n_fields(offsets) */
-
-/******************************************************//**
 The following function determines the offsets to each field
 in the record.  It can reuse a previously allocated array. */
 UNIV_INTERN
@@ -644,8 +626,48 @@ rec_copy(
 /*=====*/
 	void*		buf,	/*!< in: buffer */
 	const rec_t*	rec,	/*!< in: physical record */
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull));
 #ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Determines the size of a data tuple prefix in a temporary file.
+@return	total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_temp(
+/*========================*/
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra)	/*!< out: extra size */
+	__attribute__((warn_unused_result, nonnull));
+
+/******************************************************//**
+Determine the offset to each field in temporary file.
+@see rec_convert_dtuple_to_temp() */
+UNIV_INTERN
+void
+rec_init_offsets_temp(
+/*==================*/
+	const rec_t*		rec,	/*!< in: temporary file record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets)/*!< in/out: array of offsets;
+					in: n=rec_offs_n_fields(offsets) */
+	__attribute__((nonnull));
+
+/*********************************************************//**
+Builds a temporary file record out of a data tuple.
+@see rec_init_offsets_temp() */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_temp(
+/*=======================*/
+	rec_t*			rec,		/*!< out: record */
+	const dict_index_t*	index,		/*!< in: record descriptor */
+	const dfield_t*		fields,		/*!< in: array of data fields */
+	ulint			n_fields)	/*!< in: number of fields */
+	__attribute__((nonnull));
+
 /**************************************************************//**
 Copies the first n fields of a physical record to a new physical record in
 a buffer.
@@ -680,21 +702,6 @@ rec_fold(
 	__attribute__((pure));
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************//**
-Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INTERN
-void
-rec_convert_dtuple_to_rec_comp(
-/*===========================*/
-	rec_t*			rec,	/*!< in: origin of record */
-	ulint			extra,	/*!< in: number of bytes to
-					reserve between the record
-					header and the data payload
-					(normally REC_N_NEW_EXTRA_BYTES) */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint			status,	/*!< in: status bits of the record */
-	const dfield_t*		fields,	/*!< in: array of data fields */
-	ulint			n_fields);/*!< in: number of data fields */
-/*********************************************************//**
 Builds a physical record out of a data tuple and
 stores it into the given buffer.
 @return	pointer to the origin of physical record */
@@ -727,10 +734,7 @@ UNIV_INTERN
 ulint
 rec_get_converted_size_comp_prefix(
 /*===============================*/
-	const dict_index_t*	index,	/*!< in: record descriptor;
-					dict_table_is_comp() is
-					assumed to hold, even if
-					it does not */
+	const dict_index_t*	index,	/*!< in: record descriptor */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
 	ulint*			extra);	/*!< out: extra size */
diff --git a/storage/xtradb/include/row0undo.h b/storage/xtradb/include/row0undo.h
index 6eb4ca448b3..9420d022e3b 100644
--- a/storage/xtradb/include/row0undo.h
+++ b/storage/xtradb/include/row0undo.h
@@ -87,10 +87,6 @@ that index record. */
 enum undo_exec {
 	UNDO_NODE_FETCH_NEXT = 1,	/*!< we should fetch the next
 					undo log record */
-	UNDO_NODE_PREV_VERS,		/*!< the roll ptr to previous
-					version of a row is stored in
-					node, and undo should be done
-					based on it */
 	UNDO_NODE_INSERT,		/*!< undo a fresh insert of a
 					row to a table */
 	UNDO_NODE_MODIFY		/*!< undo a modify operation
@@ -108,9 +104,6 @@ struct undo_node_struct{
 	undo_no_t	undo_no;/*!< undo number of the record */
 	ulint		rec_type;/*!< undo log record type: TRX_UNDO_INSERT_REC,
 				... */
-	roll_ptr_t	new_roll_ptr;
-				/*!< roll ptr to restore to clustered index
-				record */
 	trx_id_t	new_trx_id; /*!< trx id to restore to clustered index
 				record */
 	btr_pcur_t	pcur;	/*!< persistent cursor used in searching the
diff --git a/storage/xtradb/include/row0upd.ic b/storage/xtradb/include/row0upd.ic
index 10646241125..6706c9f8c69 100644
--- a/storage/xtradb/include/row0upd.ic
+++ b/storage/xtradb/include/row0upd.ic
@@ -28,6 +28,7 @@ Created 12/27/1996 Heikki Tuuri
 # include "trx0trx.h"
 # include "trx0undo.h"
 # include "row0row.h"
+# include "lock0lock.h"
 #endif /* !UNIV_HOTBACKUP */
 #include "page0zip.h"
 
@@ -171,6 +172,8 @@ row_upd_rec_sys_fields(
 #if DATA_TRX_ID + 1 != DATA_ROLL_PTR
 # error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
 #endif
+		ut_ad(lock_check_trx_id_sanity(trx_read_trx_id(rec + offset),
+					       rec, index, offsets, FALSE));
 		trx_write_trx_id(rec + offset, trx->id);
 		trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
 	}
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index 6c5b61487f2..586c1e73879 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -148,7 +148,7 @@ extern my_bool		srv_track_changed_pages;
 extern ib_uint64_t	srv_max_bitmap_file_size;
 
 extern
-ulonglong       srv_changed_pages_limit;
+ulonglong       srv_max_changed_pages;
 
 extern ibool	srv_auto_extend_last_data_file;
 extern ulint	srv_last_file_size_max;
@@ -249,6 +249,11 @@ extern ulong	srv_sys_stats_root_page;
 #endif
 
 extern ibool	srv_use_doublewrite_buf;
+extern ibool	srv_use_atomic_writes;
+#ifdef HAVE_POSIX_FALLOCATE
+extern ibool	srv_use_posix_fallocate;
+#endif
+
 extern ibool	srv_use_checksums;
 extern ibool	srv_fast_checksum;
 
@@ -320,6 +325,10 @@ extern ulint	srv_fatal_semaphore_wait_threshold;
 extern ulint	srv_dml_needed_delay;
 extern long long	srv_kill_idle_transaction;
 
+#ifdef UNIV_DEBUG
+extern my_bool	srv_purge_view_update_only_debug;
+#endif /* UNIV_DEBUG */
+
 extern mutex_t*	kernel_mutex_temp;/* mutex protecting the server, trx structs,
 				query threads, and lock table: we allocate
 				it from dynamic memory to get it to the
@@ -400,6 +409,9 @@ extern ibool srv_blocking_lru_restore;
 When FALSE, row locks are not taken at all. */
 extern my_bool srv_fake_changes_locks;
 
+/** print all user-level transactions deadlocks to mysqld stderr */
+extern my_bool srv_print_all_deadlocks;
+
 /** Status variables to be passed to MySQL */
 typedef struct export_var_struct export_struc;
 
@@ -794,7 +806,9 @@ struct export_var_struct{
 	ulint innodb_dict_tables;
 	ulint innodb_buffer_pool_pages_total;	/*!< Buffer pool size */
 	ulint innodb_buffer_pool_pages_data;	/*!< Data pages */
+	ulint innodb_buffer_pool_bytes_data;	/*!< File bytes used */
 	ulint innodb_buffer_pool_pages_dirty;	/*!< Dirty data pages */
+	ulint innodb_buffer_pool_bytes_dirty;	/*!< File bytes modified */
 	ulint innodb_buffer_pool_pages_misc;	/*!< Miscellanous pages */
 	ulint innodb_buffer_pool_pages_free;	/*!< Free pages */
 #ifdef UNIV_DEBUG
@@ -880,6 +894,11 @@ struct export_var_struct{
 	ib_int64_t innodb_x_lock_os_waits;
 	ib_int64_t innodb_x_lock_spin_rounds;
 	ib_int64_t innodb_x_lock_spin_waits;
+#ifdef UNIV_DEBUG
+	ulint innodb_purge_trx_id_age;		/*!< max_trx_id - purged trx_id */
+	ulint innodb_purge_view_trx_id_age;	/*!< rw_max_trx_id
+						- purged view's min trx_id */
+#endif /* UNIV_DEBUG */
 };
 
 /** Thread slot in the thread table */
diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h
index 4a2f55d90ff..b3b99b10630 100644
--- a/storage/xtradb/include/sync0sync.h
+++ b/storage/xtradb/include/sync0sync.h
@@ -89,6 +89,7 @@ extern mysql_pfs_key_t	hash_table_mutex_key;
 extern mysql_pfs_key_t	ibuf_bitmap_mutex_key;
 extern mysql_pfs_key_t	ibuf_mutex_key;
 extern mysql_pfs_key_t	ibuf_pessimistic_insert_mutex_key;
+extern mysql_pfs_key_t	log_bmp_sys_mutex_key;
 extern mysql_pfs_key_t	log_sys_mutex_key;
 extern mysql_pfs_key_t	log_flush_order_mutex_key;
 extern mysql_pfs_key_t	kernel_mutex_key;
@@ -672,6 +673,7 @@ or row lock! */
 #define	SYNC_TRX_LOCK_HEAP	298
 #define SYNC_TRX_SYS_HEADER	290
 #define	SYNC_PURGE_QUEUE	200
+#define SYNC_LOG_ONLINE		175
 #define SYNC_LOG		170
 #define SYNC_LOG_FLUSH_ORDER	156
 #define SYNC_RECV		168
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 77acf54d8dc..60c5cc79852 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -49,13 +49,10 @@ Created 1/20/1994 Heikki Tuuri
 #define _IB_TO_STR(s)	#s
 #define IB_TO_STR(s)	_IB_TO_STR(s)
 
-#define INNODB_VERSION_MAJOR	1
-#define INNODB_VERSION_MINOR	1
-#define INNODB_VERSION_BUGFIX	8
+#include <mysql_version.h>
 
-#ifndef PERCONA_INNODB_VERSION
-#define PERCONA_INNODB_VERSION 29.3
-#endif
+#define INNODB_VERSION_MAJOR	MYSQL_MAJOR_VERSION
+#define INNODB_VERSION_MINOR	MYSQL_MINOR_VERSION
 
 /* The following is the InnoDB version as shown in
 SELECT plugin_version FROM information_schema.plugins;
@@ -66,11 +63,11 @@ component, i.e. we show M.N.P as M.N */
 #define INNODB_VERSION_SHORT	\
 	(INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
 
-#define INNODB_VERSION_STR			\
-	IB_TO_STR(INNODB_VERSION_MAJOR) "."	\
-	IB_TO_STR(INNODB_VERSION_MINOR) "."	\
-	IB_TO_STR(INNODB_VERSION_BUGFIX) "-"	\
-	IB_TO_STR(PERCONA_INNODB_VERSION)
+#ifndef PERCONA_INNODB_VERSION
+#define PERCONA_INNODB_VERSION 30.1
+#endif
+
+#define INNODB_VERSION_STR	MYSQL_SERVER_VERSION "-" IB_TO_STR(PERCONA_INNODB_VERSION)
 
 #define REFMAN "http://dev.mysql.com/doc/refman/"	\
 	IB_TO_STR(MYSQL_MAJOR_VERSION) "."		\
@@ -300,6 +297,24 @@ management to ensure correct alignment for doubles etc. */
 			========================
 */
 
+/** There are currently two InnoDB file formats which are used to group
+features with similar restrictions and dependencies. Using an enum allows
+switch statements to give a compiler warning when a new one is introduced. */
+enum innodb_file_formats_enum {
+	/** Antelope File Format: InnoDB/MySQL up to 5.1.
+	This format includes REDUNDANT and COMPACT row formats */
+	UNIV_FORMAT_A		= 0,
+
+	/** Barracuda File Format: Introduced in InnoDB plugin for 5.1:
+	This format includes COMPRESSED and DYNAMIC row formats.  It
+	includes the ability to create secondary indexes from data that
+	is not on the clustered index page and the ability to store more
+	data off the clustered index page. */
+	UNIV_FORMAT_B		= 1
+};
+
+typedef enum innodb_file_formats_enum innodb_file_formats_t;
+
 /* The 2-logarithm of UNIV_PAGE_SIZE: */
 /* #define UNIV_PAGE_SIZE_SHIFT	14 */
 #define UNIV_PAGE_SIZE_SHIFT_MAX	14
diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c
index f172ad6695b..47d082ed49f 100644
--- a/storage/xtradb/lock/lock0lock.c
+++ b/storage/xtradb/lock/lock0lock.c
@@ -790,12 +790,16 @@ lock_reset_lock_and_trx_wait(
 /*=========================*/
 	lock_t*	lock)	/*!< in: record lock */
 {
-	ut_ad((lock->trx)->wait_lock == lock);
 	ut_ad(lock_get_wait(lock));
 
 	/* Reset the back pointer in trx to this waiting lock request */
 
-	(lock->trx)->wait_lock = NULL;
+	if (!(lock->type_mode & LOCK_CONV_BY_OTHER)) {
+		ut_ad((lock->trx)->wait_lock == lock);
+		(lock->trx)->wait_lock = NULL;
+	} else {
+		ut_ad(lock_get_type_low(lock) == LOCK_REC);
+	}
 	lock->type_mode &= ~LOCK_WAIT;
 }
 
@@ -1431,9 +1435,9 @@ lock_rec_has_expl(
 
 	while (lock) {
 		if (lock->trx == trx
+		    && !lock_is_wait_not_by_other(lock->type_mode)
 		    && lock_mode_stronger_or_eq(lock_get_mode(lock),
 						precise_mode & LOCK_MODE_MASK)
-		    && !lock_get_wait(lock)
 		    && (!lock_rec_get_rec_not_gap(lock)
 			|| (precise_mode & LOCK_REC_NOT_GAP)
 			|| heap_no == PAGE_HEAP_NO_SUPREMUM)
@@ -1731,9 +1735,9 @@ lock_rec_create(
 
 	HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
 		    lock_rec_fold(space, page_no), lock);
-	lock_sys->rec_num++;
-	if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
 
+	lock_sys->rec_num++;
+	if (lock_is_wait_not_by_other(type_mode)) {
 		lock_set_lock_and_trx_wait(lock, trx);
 	}
 
@@ -1763,10 +1767,11 @@ lock_rec_enqueue_waiting(
 	const buf_block_t*	block,	/*!< in: buffer block containing
 					the record */
 	ulint			heap_no,/*!< in: heap number of the record */
+	lock_t*			lock,	/*!< in: lock object; NULL if a new
+					one should be created. */
 	dict_index_t*		index,	/*!< in: index of record */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
-	lock_t*	lock;
 	trx_t*	trx;
 	ulint   sec;
 	ulint   ms;
@@ -1803,9 +1808,17 @@ lock_rec_enqueue_waiting(
 		ut_ad(0);
 	}
 
-	/* Enqueue the lock request that will wait to be granted */
-	lock = lock_rec_create(type_mode | LOCK_WAIT,
-			       block, heap_no, index, trx);
+	if (lock == NULL) {
+		/* Enqueue the lock request that will wait to be granted */
+		lock = lock_rec_create(type_mode | LOCK_WAIT,
+				       block, heap_no, index, trx);
+	} else {
+		ut_ad(lock->type_mode & LOCK_WAIT);
+		ut_ad(lock->type_mode & LOCK_CONV_BY_OTHER);
+
+		lock->type_mode &= ~LOCK_CONV_BY_OTHER;
+		lock_set_lock_and_trx_wait(lock, trx);
+	}
 
 	/* Check if a deadlock occurs: if yes, remove the lock request and
 	return an error code */
@@ -1829,7 +1842,7 @@ lock_rec_enqueue_waiting(
 	trx->que_state = TRX_QUE_LOCK_WAIT;
 	trx->was_chosen_as_deadlock_victim = FALSE;
 	trx->wait_started = time(NULL);
-	if (innobase_get_slow_log() && trx->take_stats) {
+	if (UNIV_UNLIKELY(trx->take_stats)) {
 		ut_usectime(&sec, &ms);
 		trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms;
 	}
@@ -2054,6 +2067,7 @@ lock_rec_lock_slow(
 	que_thr_t*		thr)	/*!< in: query thread */
 {
 	trx_t*	trx;
+	lock_t*	lock;
 
 	ut_ad(mutex_own(&kernel_mutex));
 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
@@ -2068,7 +2082,27 @@ lock_rec_lock_slow(
 
 	trx = thr_get_trx(thr);
 
-	if (lock_rec_has_expl(mode, block, heap_no, trx)) {
+	lock = lock_rec_has_expl(mode, block, heap_no, trx);
+	if (lock) {
+		if (lock->type_mode & LOCK_CONV_BY_OTHER) {
+			/* This lock or lock waiting was created by the other
+			transaction, not by the transaction (trx) itself.
+			So, the transaction (trx) should treat it collectly
+			according as whether granted or not. */
+
+			if (lock->type_mode & LOCK_WAIT) {
+				/* This lock request was not granted yet.
+				Should wait for granted. */
+
+				goto enqueue_waiting;
+			} else {
+				/* This lock request was already granted.
+				Just clearing the flag. */
+
+				lock->type_mode &= ~LOCK_CONV_BY_OTHER;
+			}
+		}
+
 		/* The trx already has a strong enough lock on rec: do
 		nothing */
 
@@ -2078,8 +2112,10 @@ lock_rec_lock_slow(
 		the queue, as this transaction does not have a lock strong
 		enough already granted on the record, we have to wait. */
 
+		ut_ad(lock == NULL);
+enqueue_waiting:
 		return(lock_rec_enqueue_waiting(mode, block, heap_no,
-						index, thr));
+						lock, index, thr));
 	} else if (!impl) {
 		/* Set the requested lock on the record */
 
@@ -2221,7 +2257,8 @@ lock_grant(
 	TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
 	for it */
 
-	if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
+	if (!(lock->type_mode & LOCK_CONV_BY_OTHER)
+	    && lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
 		trx_end_lock_wait(lock->trx);
 	}
 }
@@ -2238,6 +2275,7 @@ lock_rec_cancel(
 {
 	ut_ad(mutex_own(&kernel_mutex));
 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(!(lock->type_mode & LOCK_CONV_BY_OTHER));
 
 	/* Reset the bit (there can be only one set bit) in the lock bitmap */
 	lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
@@ -2382,8 +2420,12 @@ lock_rec_reset_and_release_wait(
 	lock = lock_rec_get_first(block, heap_no);
 
 	while (lock != NULL) {
-		if (lock_get_wait(lock)) {
+		if (lock_is_wait_not_by_other(lock->type_mode)) {
 			lock_rec_cancel(lock);
+		} else if (lock_get_wait(lock)) {
+			/* just reset LOCK_WAIT */
+			lock_rec_reset_nth_bit(lock, heap_no);
+			lock_reset_lock_and_trx_wait(lock);
 		} else {
 			lock_rec_reset_nth_bit(lock, heap_no);
 		}
@@ -3271,6 +3313,80 @@ lock_rec_restore_from_page_infimum(
 
 /*=========== DEADLOCK CHECKING ======================================*/
 
+/*********************************************************************//**
+rewind(3) the file used for storing the latest detected deadlock and
+print a heading message to stderr if printing of all deadlocks to stderr
+is enabled. */
+UNIV_INLINE
+void
+lock_deadlock_start_print()
+/*=======================*/
+{
+	rewind(lock_latest_err_file);
+	ut_print_timestamp(lock_latest_err_file);
+
+	if (srv_print_all_deadlocks) {
+		fprintf(stderr, "InnoDB: transactions deadlock detected, "
+			"dumping detailed information.\n");
+		ut_print_timestamp(stderr);
+	}
+}
+
+/*********************************************************************//**
+Print a message to the deadlock file and possibly to stderr. */
+UNIV_INLINE
+void
+lock_deadlock_fputs(
+/*================*/
+	const char*	msg)	/*!< in: message to print */
+{
+	fputs(msg, lock_latest_err_file);
+
+	if (srv_print_all_deadlocks) {
+		fputs(msg, stderr);
+	}
+}
+
+/*********************************************************************//**
+Print transaction data to the deadlock file and possibly to stderr. */
+UNIV_INLINE
+void
+lock_deadlock_trx_print(
+/*====================*/
+	trx_t*	trx,		/*!< in: transaction */
+	ulint	max_query_len)	/*!< in: max query length to print, or 0 to
+				use the default max length */
+{
+	trx_print(lock_latest_err_file, trx, max_query_len);
+
+	if (srv_print_all_deadlocks) {
+		trx_print(stderr, trx, max_query_len);
+	}
+}
+
+/*********************************************************************//**
+Print lock data to the deadlock file and possibly to stderr. */
+UNIV_INLINE
+void
+lock_deadlock_lock_print(
+/*=====================*/
+	const lock_t*	lock)	/*!< in: record or table type lock */
+{
+	if (lock_get_type_low(lock) == LOCK_REC) {
+		lock_rec_print(lock_latest_err_file, lock);
+
+		if (srv_print_all_deadlocks) {
+			lock_rec_print(stderr, lock);
+		}
+	} else {
+		lock_table_print(lock_latest_err_file, lock);
+
+		if (srv_print_all_deadlocks) {
+			lock_table_print(stderr, lock);
+		}
+	}
+}
+
 /********************************************************************//**
 Checks if a lock request results in a deadlock.
 @return TRUE if a deadlock was detected and we chose trx as a victim;
@@ -3314,31 +3430,26 @@ retry:
 		/* If the lock search exceeds the max step
 		or the max depth, the current trx will be
 		the victim. Print its information. */
-		rewind(lock_latest_err_file);
-		ut_print_timestamp(lock_latest_err_file);
+		lock_deadlock_start_print();
 
-		fputs("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
-		      " WAITS-FOR GRAPH, WE WILL ROLL BACK"
-		      " FOLLOWING TRANSACTION \n",
-		      lock_latest_err_file);
+		lock_deadlock_fputs(
+			"TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
+			" WAITS-FOR GRAPH, WE WILL ROLL BACK"
+			" FOLLOWING TRANSACTION \n\n"
+			"*** TRANSACTION:\n");
 
-		fputs("\n*** TRANSACTION:\n", lock_latest_err_file);
-		      trx_print(lock_latest_err_file, trx, 3000);
+		lock_deadlock_trx_print(trx, 3000);
 
-		fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n",
-		      lock_latest_err_file);
+		lock_deadlock_fputs(
+			"*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
+
+		lock_deadlock_lock_print(lock);
 
-		if (lock_get_type(lock) == LOCK_REC) {
-			lock_rec_print(lock_latest_err_file, lock);
-		} else {
-			lock_table_print(lock_latest_err_file, lock);
-		}
 		break;
 
 	case LOCK_VICTIM_IS_START:
 		srv_n_lock_deadlock_count++;
-		fputs("*** WE ROLL BACK TRANSACTION (2)\n",
-		      lock_latest_err_file);
+		lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (2)\n");
 		break;
 
 	default:
@@ -3453,45 +3564,33 @@ lock_deadlock_recursive(
 				point: a deadlock detected; or we have
 				searched the waits-for graph too long */
 
-				FILE*	ef = lock_latest_err_file;
+				lock_deadlock_start_print();
 
-				rewind(ef);
-				ut_print_timestamp(ef);
+				lock_deadlock_fputs("\n*** (1) TRANSACTION:\n");
 
-				fputs("\n*** (1) TRANSACTION:\n", ef);
+				lock_deadlock_trx_print(wait_lock->trx, 3000);
 
-				trx_print(ef, wait_lock->trx, 3000);
+				lock_deadlock_fputs(
+					"*** (1) WAITING FOR THIS LOCK"
+					" TO BE GRANTED:\n");
 
-				fputs("*** (1) WAITING FOR THIS LOCK"
-				      " TO BE GRANTED:\n", ef);
+				lock_deadlock_lock_print(wait_lock);
 
-				if (lock_get_type_low(wait_lock) == LOCK_REC) {
-					lock_rec_print(ef, wait_lock);
-				} else {
-					lock_table_print(ef, wait_lock);
-				}
+				lock_deadlock_fputs("*** (2) TRANSACTION:\n");
 
-				fputs("*** (2) TRANSACTION:\n", ef);
+				lock_deadlock_trx_print(lock->trx, 3000);
 
-				trx_print(ef, lock->trx, 3000);
+				lock_deadlock_fputs(
+					"*** (2) HOLDS THE LOCK(S):\n");
 
-				fputs("*** (2) HOLDS THE LOCK(S):\n", ef);
+				lock_deadlock_lock_print(lock);
 
-				if (lock_get_type_low(lock) == LOCK_REC) {
-					lock_rec_print(ef, lock);
-				} else {
-					lock_table_print(ef, lock);
-				}
+				lock_deadlock_fputs(
+					"*** (2) WAITING FOR THIS LOCK"
+					" TO BE GRANTED:\n");
 
-				fputs("*** (2) WAITING FOR THIS LOCK"
-				      " TO BE GRANTED:\n", ef);
+				lock_deadlock_lock_print(start->wait_lock);
 
-				if (lock_get_type_low(start->wait_lock)
-				    == LOCK_REC) {
-					lock_rec_print(ef, start->wait_lock);
-				} else {
-					lock_table_print(ef, start->wait_lock);
-				}
 #ifdef UNIV_DEBUG
 				if (lock_print_waits) {
 					fputs("Deadlock detected\n",
@@ -3514,8 +3613,8 @@ lock_deadlock_recursive(
 				as a victim to try to avoid deadlocking our
 				recursion starting point transaction */
 
-				fputs("*** WE ROLL BACK TRANSACTION (1)\n",
-				      ef);
+				lock_deadlock_fputs(
+					"*** WE ROLL BACK TRANSACTION (1)\n");
 
 				wait_lock->trx->was_chosen_as_deadlock_victim
 					= TRUE;
@@ -3600,6 +3699,7 @@ lock_table_create(
 
 	ut_ad(table && trx);
 	ut_ad(mutex_own(&kernel_mutex));
+	ut_ad(!(type_mode & LOCK_CONV_BY_OTHER));
 
 	if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
 		++table->n_waiting_or_granted_auto_inc_locks;
@@ -3837,7 +3937,7 @@ lock_table_enqueue_waiting(
 		return(DB_SUCCESS);
 	}
 
-	if (innobase_get_slow_log() && trx->take_stats) {
+	if (UNIV_UNLIKELY(trx->take_stats)) {
 		ut_usectime(&sec, &ms);
 		trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms;
 	}
@@ -4163,6 +4263,7 @@ lock_cancel_waiting_and_release(
 	lock_t*	lock)	/*!< in: waiting lock request */
 {
 	ut_ad(mutex_own(&kernel_mutex));
+	ut_ad(!(lock->type_mode & LOCK_CONV_BY_OTHER));
 
 	if (lock_get_type_low(lock) == LOCK_REC) {
 
@@ -4658,12 +4759,16 @@ loop:
 
 			lock_mutex_exit_kernel();
 
-			mtr_start(&mtr);
+			if (srv_show_verbose_locks) {
+				mtr_start(&mtr);
 
-			buf_page_get_with_no_latch(space, zip_size,
-						   page_no, &mtr);
+				buf_page_get_gen(space, zip_size, page_no,
+						 RW_NO_LATCH, NULL,
+						 BUF_GET_POSSIBLY_FREED,
+						 __FILE__, __LINE__, &mtr);
 
-			mtr_commit(&mtr);
+				mtr_commit(&mtr);
+			}
 
 			load_page_first = FALSE;
 
@@ -5211,7 +5316,7 @@ lock_rec_insert_check_and_lock(
 		err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
 					       | LOCK_INSERT_INTENTION,
 					       block, next_rec_heap_no,
-					       index, thr);
+					       NULL, index, thr);
 	} else {
 		err = DB_SUCCESS;
 	}
@@ -5287,10 +5392,23 @@ lock_rec_convert_impl_to_expl(
 
 		if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
 				       heap_no, impl_trx)) {
+			ulint	type_mode = (LOCK_REC | LOCK_X
+					     | LOCK_REC_NOT_GAP);
+
+			/* If the delete-marked record was locked already,
+			we should reserve lock waiting for impl_trx as
+			implicit lock. Because cannot lock at this moment.*/
+
+			if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))
+			    && lock_rec_other_has_conflicting(
+					LOCK_X | LOCK_REC_NOT_GAP, block,
+					heap_no, impl_trx)) {
+
+				type_mode |= (LOCK_WAIT | LOCK_CONV_BY_OTHER);
+			}
 
 			lock_rec_add_to_queue(
-				LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
-				block, heap_no, index, impl_trx);
+				type_mode, block, heap_no, index, impl_trx);
 		}
 	}
 }
diff --git a/storage/xtradb/log/log0online.c b/storage/xtradb/log/log0online.c
index 55eb9d17c46..be0a9708b8c 100644
--- a/storage/xtradb/log/log0online.c
+++ b/storage/xtradb/log/log0online.c
@@ -36,6 +36,11 @@ Online database log parsing for changed page tracking
 
 enum { FOLLOW_SCAN_SIZE = 4 * (UNIV_PAGE_SIZE_MAX) };
 
+#ifdef UNIV_PFS_MUTEX
+/* Key to register log_bmp_sys->mutex with PFS */
+UNIV_INTERN mysql_pfs_key_t	log_bmp_sys_mutex_key;
+#endif /* UNIV_PFS_MUTEX */
+
 /** Log parsing and bitmap output data structure */
 struct log_bitmap_struct {
 	byte		read_buf[FOLLOW_SCAN_SIZE];
@@ -69,6 +74,7 @@ struct log_bitmap_struct {
 					both the correct type and the tree does
 					not mind its overwrite during
 					rbt_next() tree traversal. */
+	mutex_t		mutex;		/*!< mutex protecting all the fields.*/
 };
 
 /* The log parsing and bitmap output struct instance */
@@ -172,6 +178,8 @@ log_online_set_page_bit(
 	byte		search_page[MODIFIED_PAGE_BLOCK_SIZE];
 	byte		*page_ptr;
 
+	ut_ad(mutex_own(&log_bmp_sys->mutex));
+
 	ut_a(space != ULINT_UNDEFINED);
 	ut_a(page_no != ULINT_UNDEFINED);
 
@@ -312,8 +320,8 @@ its name is correct and use it for (re-)tracking start.
 @return the last fully tracked LSN */
 static
 ib_uint64_t
-log_online_read_last_tracked_lsn()
-/*==============================*/
+log_online_read_last_tracked_lsn(void)
+/*==================================*/
 {
 	byte		page[MODIFIED_PAGE_BLOCK_SIZE];
 	ibool		is_last_page	= FALSE;
@@ -405,8 +413,10 @@ log_online_can_track_missing(
 
 	if (last_tracked_lsn > tracking_start_lsn) {
 		fprintf(stderr,
-			"InnoDB: Error: last tracked LSN is in future.  This "
-			"can be caused by mismatched bitmap files.\n");
+			"InnoDB: Error: last tracked LSN %llu is ahead of "
+			"tracking start LSN %llu.  This can be caused by "
+			"mismatched bitmap files.\n", last_tracked_lsn,
+			tracking_start_lsn);
 		exit(1);
 	}
 
@@ -431,10 +441,10 @@ log_online_track_missing_on_startup(
 {
 	ut_ad(last_tracked_lsn != tracking_start_lsn);
 
-	fprintf(stderr, "InnoDB: last tracked LSN is %llu, but the last "
-		"checkpoint LSN is %llu.  This might be due to a server "
-		"crash or a very fast shutdown.  ", last_tracked_lsn,
-		tracking_start_lsn);
+	fprintf(stderr, "InnoDB: last tracked LSN in \'%s\' is %llu, but the "
+		"last checkpoint LSN is %llu.  This might be due to a server "
+		"crash or a very fast shutdown.  ", log_bmp_sys->out.name,
+		last_tracked_lsn, tracking_start_lsn);
 
 	/* See if we can fully recover the missing interval */
 	if (log_online_can_track_missing(last_tracked_lsn,
@@ -446,7 +456,9 @@ log_online_track_missing_on_startup(
 		log_bmp_sys->start_lsn = ut_max_uint64(last_tracked_lsn,
 						       MIN_TRACKED_LSN);
 		log_set_tracked_lsn(log_bmp_sys->start_lsn);
-		log_online_follow_redo_log();
+		if (!log_online_follow_redo_log()) {
+			exit(1);
+		}
 		ut_ad(log_bmp_sys->end_lsn >= tracking_start_lsn);
 
 		fprintf(stderr,
@@ -484,18 +496,47 @@ log_online_make_bitmap_name(
 }
 
 /*********************************************************************//**
-Create a new empty bitmap output file.  */
+Check if an old file that has the name of a new bitmap file we are about to
+create should be overwritten.  */
 static
-void
-log_online_start_bitmap_file()
-/*==========================*/
+ibool
+log_online_should_overwrite(
+/*========================*/
+	const char	*path)	/*!< in: path to file */
 {
-	ibool	success;
+	ibool		success;
+	os_file_stat_t	file_info;
 
-	log_bmp_sys->out.file
-		= os_file_create(innodb_file_bmp_key, log_bmp_sys->out.name,
-				 OS_FILE_OVERWRITE, OS_FILE_NORMAL,
-				 OS_DATA_FILE, &success);
+	/* Currently, it's OK to overwrite 0-sized files only */
+	success = os_file_get_status(path, &file_info);
+	return success && file_info.size == 0LL;
+}
+
+/*********************************************************************//**
+Create a new empty bitmap output file.
+
+@return TRUE if operation succeeded, FALSE if I/O error */
+static
+ibool
+log_online_start_bitmap_file(void)
+/*==============================*/
+{
+	ibool	success	= TRUE;
+
+	/* Check for an old file that should be deleted first */
+	if (log_online_should_overwrite(log_bmp_sys->out.name)) {
+		success = os_file_delete(log_bmp_sys->out.name);
+	}
+
+	if (UNIV_LIKELY(success)) {
+		log_bmp_sys->out.file
+			= os_file_create_simple_no_error_handling(
+							innodb_file_bmp_key,
+							log_bmp_sys->out.name,
+							OS_FILE_CREATE,
+							OS_FILE_READ_WRITE,
+							&success);
+	}
 	if (UNIV_UNLIKELY(!success)) {
 
 		/* The following call prints an error message */
@@ -503,25 +544,32 @@ log_online_start_bitmap_file()
 		fprintf(stderr,
 			"InnoDB: Error: Cannot create \'%s\'\n",
 			log_bmp_sys->out.name);
-		exit(1);
+		log_bmp_sys->out.file = -1;
+		return FALSE;
 	}
 
 	log_bmp_sys->out.offset = 0;
+	return TRUE;
 }
 
 /*********************************************************************//**
-Close the current bitmap output file and create the next one.  */
+Close the current bitmap output file and create the next one.
+
+@return TRUE if operation succeeded, FALSE if I/O error */
 static
-void
+ibool
 log_online_rotate_bitmap_file(
 /*===========================*/
 	ib_uint64_t	next_file_start_lsn)	/*!<in: the start LSN name
 						part */
 {
-	os_file_close(log_bmp_sys->out.file);
+	if (log_bmp_sys->out.file != -1) {
+		os_file_close(log_bmp_sys->out.file);
+		log_bmp_sys->out.file = -1;
+	}
 	log_bmp_sys->out_seq_num++;
 	log_online_make_bitmap_name(next_file_start_lsn);
-	log_online_start_bitmap_file();
+	return log_online_start_bitmap_file();
 }
 
 /*********************************************************************//**
@@ -556,8 +604,8 @@ log_online_is_bitmap_file(
 Initialize the online log following subsytem. */
 UNIV_INTERN
 void
-log_online_read_init()
-/*==================*/
+log_online_read_init(void)
+/*======================*/
 {
 	ibool		success;
 	ib_uint64_t	tracking_start_lsn
@@ -566,13 +614,16 @@ log_online_read_init()
 	os_file_stat_t	bitmap_dir_file_info;
 	ib_uint64_t	last_file_start_lsn	= MIN_TRACKED_LSN;
 
-	/* Assert (could be compile-time assert) that bitmap data start and end
-	in a bitmap block is 8-byte aligned */
-	ut_a(MODIFIED_PAGE_BLOCK_BITMAP % 8 == 0);
-	ut_a(MODIFIED_PAGE_BLOCK_BITMAP_LEN % 8 == 0);
+	/* Bitmap data start and end in a bitmap block must be 8-byte
+	aligned. */
+	compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP % 8 == 0);
+	compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP_LEN % 8 == 0);
 
 	log_bmp_sys = ut_malloc(sizeof(*log_bmp_sys));
 
+	mutex_create(log_bmp_sys_mutex_key, &log_bmp_sys->mutex,
+		     SYNC_LOG_ONLINE);
+
 	/* Enumerate existing bitmap files to either open the last one to get
 	the last tracked LSN either to find that there are none and start
 	tracking from scratch.  */
@@ -629,7 +680,9 @@ log_online_read_init()
 	if (!success) {
 
 		/* New file, tracking from scratch */
-		log_online_start_bitmap_file();
+		if (!log_online_start_bitmap_file()) {
+			exit(1);
+		}
 	}
 	else {
 
@@ -637,6 +690,7 @@ log_online_read_init()
 		ulint		size_low;
 		ulint		size_high;
 		ib_uint64_t	last_tracked_lsn;
+		ib_uint64_t	file_start_lsn;
 
 		success = os_file_get_size(log_bmp_sys->out.file, &size_low,
 					   &size_high);
@@ -667,10 +721,12 @@ log_online_read_init()
 		if we can retrack any missing data. */
 		if (log_online_can_track_missing(last_tracked_lsn,
 						 tracking_start_lsn)) {
-			log_online_rotate_bitmap_file(last_tracked_lsn);
+			file_start_lsn = last_tracked_lsn;
+		} else {
+			file_start_lsn = tracking_start_lsn;
 		}
-		else {
-			log_online_rotate_bitmap_file(tracking_start_lsn);
+		if (!log_online_rotate_bitmap_file(file_start_lsn)) {
+			exit(1);
 		}
 
 		if (last_tracked_lsn < tracking_start_lsn) {
@@ -701,12 +757,15 @@ log_online_read_init()
 Shut down the online log following subsystem. */
 UNIV_INTERN
 void
-log_online_read_shutdown()
-/*======================*/
+log_online_read_shutdown(void)
+/*==========================*/
 {
 	ib_rbt_node_t *free_list_node = log_bmp_sys->page_free_list;
 
-	os_file_close(log_bmp_sys->out.file);
+	if (log_bmp_sys->out.file != -1) {
+		os_file_close(log_bmp_sys->out.file);
+		log_bmp_sys->out.file = -1;
+	}
 
 	rbt_free(log_bmp_sys->modified_pages);
 
@@ -716,6 +775,8 @@ log_online_read_shutdown()
 		free_list_node = next;
 	}
 
+	mutex_free(&log_bmp_sys->mutex);
+
 	ut_free(log_bmp_sys);
 }
 
@@ -759,14 +820,16 @@ from the buffer.  If an incomplete record is found, moves it to the end of the
 buffer. */
 static
 void
-log_online_parse_redo_log()
-/*=======================*/
+log_online_parse_redo_log(void)
+/*===========================*/
 {
 	byte *ptr = log_bmp_sys->parse_buf;
 	byte *end = log_bmp_sys->parse_buf_end;
 
 	ulint len = 0;
 
+	ut_ad(mutex_own(&log_bmp_sys->mutex));
+
 	while (ptr != end
 	       && log_bmp_sys->next_parse_lsn < log_bmp_sys->end_lsn) {
 
@@ -857,6 +920,8 @@ log_online_add_to_parse_buf(
 	ulint actual_data_len = (end_offset >= start_offset)
 		? end_offset - start_offset : 0;
 
+	ut_ad(mutex_own(&log_bmp_sys->mutex));
+
 	ut_memcpy(log_bmp_sys->parse_buf_end, log_block + start_offset,
 		  actual_data_len);
 
@@ -881,6 +946,8 @@ log_online_parse_redo_log_block(
 {
 	ulint block_data_len;
 
+	ut_ad(mutex_own(&log_bmp_sys->mutex));
+
 	block_data_len = log_block_get_data_len(log_block);
 
 	ut_ad(block_data_len % OS_FILE_LOG_BLOCK_SIZE == 0
@@ -907,6 +974,8 @@ log_online_follow_log_seg(
 	byte* log_block_end = log_bmp_sys->read_buf
 		+ (block_end_lsn - block_start_lsn);
 
+	ut_ad(mutex_own(&log_bmp_sys->mutex));
+
 	mutex_enter(&log_sys->mutex);
 	log_group_read_log_seg(LOG_RECOVER, log_bmp_sys->read_buf,
 			       group, block_start_lsn, block_end_lsn);
@@ -969,6 +1038,8 @@ log_online_follow_log_group(
 	ib_uint64_t block_start_lsn = contiguous_lsn;
 	ib_uint64_t block_end_lsn;
 
+	ut_ad(mutex_own(&log_bmp_sys->mutex));
+
 	log_bmp_sys->next_parse_lsn = log_bmp_sys->start_lsn;
 	log_bmp_sys->parse_buf_end = log_bmp_sys->parse_buf;
 
@@ -996,19 +1067,26 @@ log_online_follow_log_group(
 
 /*********************************************************************//**
 Write, flush one bitmap block to disk and advance the output position if
-successful. */
+successful.
+
+@return TRUE if page written OK, FALSE if I/O error */
 static
-void
+ibool
 log_online_write_bitmap_page(
 /*=========================*/
 	const byte *block)	/*!< in: block to write */
 {
 	ibool	success;
 
+	ut_ad(mutex_own(&log_bmp_sys->mutex));
+
+	/* Simulate a write error */
+	DBUG_EXECUTE_IF("bitmap_page_write_error", return FALSE;);
+
 	success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file,
 				block,
 				(ulint)(log_bmp_sys->out.offset & 0xFFFFFFFF),
-				(ulint)(log_bmp_sys->out.offset << 32),
+				(ulint)(log_bmp_sys->out.offset >> 32),
 				MODIFIED_PAGE_BLOCK_SIZE);
 	if (UNIV_UNLIKELY(!success)) {
 
@@ -1016,7 +1094,7 @@ log_online_write_bitmap_page(
 		os_file_get_last_error(TRUE);
 		fprintf(stderr, "InnoDB: Error: failed writing changed page "
 			"bitmap file \'%s\'\n", log_bmp_sys->out.name);
-		return;
+		return FALSE;
 	}
 
 	success = os_file_flush(log_bmp_sys->out.file, FALSE);
@@ -1027,25 +1105,38 @@ log_online_write_bitmap_page(
 		fprintf(stderr, "InnoDB: Error: failed flushing "
 			"changed page bitmap file \'%s\'\n",
 			log_bmp_sys->out.name);
-		return;
+		return FALSE;
 	}
 
+#ifdef UNIV_LINUX
+	posix_fadvise(log_bmp_sys->out.file, log_bmp_sys->out.offset,
+		      MODIFIED_PAGE_BLOCK_SIZE, POSIX_FADV_DONTNEED);
+#endif
+
 	log_bmp_sys->out.offset += MODIFIED_PAGE_BLOCK_SIZE;
+	return TRUE;
 }
 
 /*********************************************************************//**
 Append the current changed page bitmap to the bitmap file.  Clears the
-bitmap tree and recycles its nodes to the free list. */
+bitmap tree and recycles its nodes to the free list.
+
+@return TRUE if bitmap written OK, FALSE if I/O error*/
 static
-void
-log_online_write_bitmap()
-/*=====================*/
+ibool
+log_online_write_bitmap(void)
+/*=========================*/
 {
 	ib_rbt_node_t		*bmp_tree_node;
 	const ib_rbt_node_t	*last_bmp_tree_node;
+	ibool			success = TRUE;
+
+	ut_ad(mutex_own(&log_bmp_sys->mutex));
 
 	if (log_bmp_sys->out.offset >= srv_max_bitmap_file_size) {
-		log_online_rotate_bitmap_file(log_bmp_sys->start_lsn);
+		if (!log_online_rotate_bitmap_file(log_bmp_sys->start_lsn)) {
+			return FALSE;
+		}
 	}
 
 	bmp_tree_node = (ib_rbt_node_t *)
@@ -1056,18 +1147,25 @@ log_online_write_bitmap()
 
 		byte *page = rbt_value(byte, bmp_tree_node);
 
-		if (bmp_tree_node == last_bmp_tree_node) {
-			mach_write_to_4(page + MODIFIED_PAGE_IS_LAST_BLOCK, 1);
-		}
+		/* In case of a bitmap page write error keep on looping over
+		the tree to reclaim its memory through the free list instead of
+		returning immediatelly. */
+		if (UNIV_LIKELY(success)) {
+			if (bmp_tree_node == last_bmp_tree_node) {
+				mach_write_to_4(page
+						+ MODIFIED_PAGE_IS_LAST_BLOCK,
+						1);
+			}
 
-		mach_write_to_8(page + MODIFIED_PAGE_START_LSN,
-				log_bmp_sys->start_lsn);
-		mach_write_to_8(page + MODIFIED_PAGE_END_LSN,
-				log_bmp_sys->end_lsn);
-		mach_write_to_4(page + MODIFIED_PAGE_BLOCK_CHECKSUM,
-				log_online_calc_checksum(page));
+			mach_write_to_8(page + MODIFIED_PAGE_START_LSN,
+				       log_bmp_sys->start_lsn);
+			mach_write_to_8(page + MODIFIED_PAGE_END_LSN,
+				       log_bmp_sys->end_lsn);
+			mach_write_to_4(page + MODIFIED_PAGE_BLOCK_CHECKSUM,
+					log_online_calc_checksum(page));
 
-		log_online_write_bitmap_page(page);
+			success = log_online_write_bitmap_page(page);
+		}
 
 		bmp_tree_node->left = log_bmp_sys->page_free_list;
 		log_bmp_sys->page_free_list = bmp_tree_node;
@@ -1077,18 +1175,29 @@ log_online_write_bitmap()
 	}
 
 	rbt_reset(log_bmp_sys->modified_pages);
+	return success;
 }
 
 /*********************************************************************//**
 Read and parse the redo log up to last checkpoint LSN to build the changed
-page bitmap which is then written to disk.  */
+page bitmap which is then written to disk.
+
+@return TRUE if log tracking succeeded, FALSE if bitmap write I/O error */
 UNIV_INTERN
-void
-log_online_follow_redo_log()
-/*========================*/
+ibool
+log_online_follow_redo_log(void)
+/*============================*/
 {
 	ib_uint64_t	contiguous_start_lsn;
 	log_group_t*	group;
+	ibool		result;
+
+	mutex_enter(&log_bmp_sys->mutex);
+
+	if (!srv_track_changed_pages) {
+		mutex_exit(&log_bmp_sys->mutex);
+		return FALSE;
+	}
 
 	/* Grab the LSN of the last checkpoint, we will parse up to it */
 	mutex_enter(&(log_sys->mutex));
@@ -1096,7 +1205,8 @@ log_online_follow_redo_log()
 	mutex_exit(&(log_sys->mutex));
 
 	if (log_bmp_sys->end_lsn == log_bmp_sys->start_lsn) {
-		return;
+		mutex_exit(&log_bmp_sys->mutex);
+		return TRUE;
 	}
 
 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
@@ -1114,9 +1224,12 @@ log_online_follow_redo_log()
 	tracked LSN, so that LSN tracking for this interval is tested. */
 	DBUG_EXECUTE_IF("crash_before_bitmap_write", DBUG_SUICIDE(););
 
-	log_online_write_bitmap();
+	result = log_online_write_bitmap();
 	log_bmp_sys->start_lsn = log_bmp_sys->end_lsn;
 	log_set_tracked_lsn(log_bmp_sys->start_lsn);
+
+	mutex_exit(&log_bmp_sys->mutex);
+	return result;
 }
 
 /*********************************************************************//**
@@ -1514,3 +1627,81 @@ log_online_bitmap_iterator_next(
 
 	return TRUE;
 }
+
+/************************************************************//**
+Delete all the bitmap files for data less than the specified LSN.
+If called with lsn == 0 (i.e. set by RESET request) or
+IB_ULONGLONG_MAX, restart the bitmap file sequence, otherwise
+continue it.
+
+@return FALSE to indicate success, TRUE for failure. */
+UNIV_INTERN
+ibool
+log_online_purge_changed_page_bitmaps(
+/*==================================*/
+	ib_uint64_t lsn)	/*!< in: LSN to purge files up to */
+{
+	log_online_bitmap_file_range_t	bitmap_files;
+	size_t				i;
+	ibool				result = FALSE;
+
+	if (lsn == 0) {
+		lsn = IB_ULONGLONG_MAX;
+	}
+
+	if (srv_track_changed_pages) {
+		/* User requests might happen with both enabled and disabled
+		tracking */
+		mutex_enter(&log_bmp_sys->mutex);
+	}
+
+	if (!log_online_setup_bitmap_file_range(&bitmap_files, 0, lsn)) {
+		if (srv_track_changed_pages) {
+			mutex_exit(&log_bmp_sys->mutex);
+		}
+		return TRUE;
+	}
+
+	if (srv_track_changed_pages && lsn >= log_bmp_sys->end_lsn) {
+		/* If we have to delete the current output file, close it
+		first. */
+		os_file_close(log_bmp_sys->out.file);
+		log_bmp_sys->out.file = -1;
+	}
+
+	for (i = 0; i < bitmap_files.count; i++) {
+		if (bitmap_files.files[i].seq_num == 0
+		    || bitmap_files.files[i].start_lsn >= lsn) {
+			break;
+		}
+		if (!os_file_delete_if_exists(bitmap_files.files[i].name)) {
+			os_file_get_last_error(TRUE);
+			result = TRUE;
+			break;
+		}
+	}
+
+	if (srv_track_changed_pages) {
+		if (lsn > log_bmp_sys->end_lsn) {
+			ib_uint64_t	new_file_lsn;
+			if (lsn == IB_ULONGLONG_MAX) {
+				/* RESET restarts the sequence */
+				log_bmp_sys->out_seq_num = 0;
+				new_file_lsn = 0;
+			} else {
+				new_file_lsn = log_bmp_sys->end_lsn;
+			}
+			if (!log_online_rotate_bitmap_file(new_file_lsn)) {
+				/* If file create failed, signal the log
+				tracking thread to quit next time it wakes
+				up.  */
+				srv_track_changed_pages = FALSE;
+			}
+		}
+
+		mutex_exit(&log_bmp_sys->mutex);
+	}
+
+	free(bitmap_files.files);
+	return result;
+}
diff --git a/storage/xtradb/log/log0recv.c b/storage/xtradb/log/log0recv.c
index 5ab8c14ae2e..4e179afd50c 100644
--- a/storage/xtradb/log/log0recv.c
+++ b/storage/xtradb/log/log0recv.c
@@ -994,8 +994,11 @@ recv_parse_or_apply_log_rec_body(
 				not NULL, then the log record is
 				applied to the page, and the log
 				record should be complete then */
-	mtr_t*		mtr)	/*!< in: mtr or NULL; should be non-NULL
+	mtr_t*		mtr,	/*!< in: mtr or NULL; should be non-NULL
 				if and only if block is non-NULL */
+	ulint		space_id)
+				/*!< in: tablespace id obtained by
+				parsing initial log record */
 {
 	dict_index_t*	index	= NULL;
 	page_t*		page;
@@ -1267,8 +1270,11 @@ recv_parse_or_apply_log_rec_body(
 		ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
 		ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
 		break;
-	case MLOG_FILE_CREATE:
 	case MLOG_FILE_RENAME:
+		ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type,
+						 space_id, 0);
+		break;
+	case MLOG_FILE_CREATE:
 	case MLOG_FILE_DELETE:
 	case MLOG_FILE_CREATE2:
 		ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0);
@@ -1672,7 +1678,8 @@ recv_recover_page_func(
 
 			recv_parse_or_apply_log_rec_body(recv->type, buf,
 							 buf + recv->len,
-							 block, &mtr);
+							 block, &mtr,
+							 recv_addr->space);
 
 			if (srv_recovery_stats) {
 				mutex_enter(&(recv_sys->mutex));
@@ -1704,9 +1711,8 @@ recv_recover_page_func(
 	if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
 		page_zip_des_t*	page_zip = buf_block_get_page_zip(block);
 
-		if (page_zip) {
-			ut_a(page_zip_validate_low(page_zip, page, FALSE));
-		}
+		ut_a(!page_zip
+		     || page_zip_validate_low(page_zip, page, NULL, FALSE));
 	}
 #endif /* UNIV_ZIP_DEBUG */
 
@@ -2158,7 +2164,7 @@ recv_parse_log_rec(
 #endif /* UNIV_LOG_LSN_DEBUG */
 
 	new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
-						   NULL, NULL);
+						   NULL, NULL, *space);
 	if (UNIV_UNLIKELY(new_ptr == NULL)) {
 
 		return(0);
diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c
index 8e0516a84a9..8f1b3e46bb2 100644
--- a/storage/xtradb/os/os0file.c
+++ b/storage/xtradb/os/os0file.c
@@ -1454,6 +1454,43 @@ os_file_set_nocache(
 #endif
 }
 
+
+#ifdef __linux__
+#include <sys/ioctl.h>
+#ifndef DFS_IOCTL_ATOMIC_WRITE_SET 
+#define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint)
+#endif
+static int os_file_set_atomic_writes(os_file_t file, const char *name) 
+{
+	static int first_time = 1;
+	int atomic_option = 1;
+
+	int ret = ioctl (file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option);
+
+	if (ret) {
+		fprintf(stderr, 
+		"InnoDB : can't use atomic write on %s, errno %d\n",
+		name, errno);
+		return ret;
+	}
+	return ret;
+}
+#else 
+static int os_file_set_atomic_writes(os_file_t file, const char *name) 
+{
+	fprintf(stderr,
+	"InnoDB : can't use atomic writes on %s - not implemented on this platform."
+	"innodb_use_atomic_writes needs to be 0.\n", 
+	name);
+#ifdef _WIN32
+	SetLastError(ERROR_INVALID_FUNCTION);
+#else
+	errno = EINVAL;
+#endif
+	return -1;
+}
+#endif
+
 /****************************************************************//**
 NOTE! Use the corresponding macro os_file_create(), not directly
 this function!
@@ -1490,6 +1527,13 @@ os_file_create_func(
 	DWORD		create_flag;
 	DWORD		attributes;
 	ibool		retry;
+
+	DBUG_EXECUTE_IF(
+		"ib_create_table_fail_disk_full",
+		*success = FALSE;
+		SetLastError(ERROR_DISK_FULL);
+		return((os_file_t) -1);
+	);
 try_again:
 	ut_a(name);
 
@@ -1611,6 +1655,13 @@ try_again:
 		}
 	}
 
+	if (srv_use_atomic_writes && type == OS_DATA_FILE && 
+		os_file_set_atomic_writes(file, name)) {
+			 CloseHandle(file);
+			*success = FALSE;
+			file = INVALID_HANDLE_VALUE;
+	}
+
 	return(file);
 #else /* __WIN__ */
 	os_file_t	file;
@@ -1618,6 +1669,12 @@ try_again:
 	ibool		retry;
 	const char*	mode_str	= NULL;
 
+	DBUG_EXECUTE_IF(
+		"ib_create_table_fail_disk_full",
+		*success = FALSE;
+		errno = ENOSPC;
+		return((os_file_t) -1);
+	);
 try_again:
 	ut_a(name);
 
@@ -1724,6 +1781,12 @@ try_again:
 		file = -1;
 	}
 #endif /* USE_FILE_LOCK */
+	if (srv_use_atomic_writes && type == OS_DATA_FILE 
+		&& os_file_set_atomic_writes(file, name)) {
+			close(file);
+			*success = FALSE;
+			file = -1;
+	}
 
 	return(file);
 #endif /* __WIN__ */
@@ -2068,6 +2131,28 @@ os_file_set_size(
 	current_size = 0;
 	desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
 
+#ifdef HAVE_POSIX_FALLOCATE
+        if (srv_use_posix_fallocate) {
+		if (posix_fallocate(file, current_size, desired_size) == -1) {
+			fprintf(stderr,
+		 	"InnoDB: Error: preallocating data for"
+			" file %s failed at\n"
+			"InnoDB: offset 0 size %lld %lld. Operating system"
+			" error number %llu.\n"
+			"InnoDB: Check that the disk is not full"
+			" or a disk quota exceeded.\n"
+			"InnoDB: Some operating system error numbers"
+			" are described at\n"
+			"InnoDB: "
+			REFMAN "operating-system-error-codes.html\n",
+			name,  (long long)size_high,  (long long)size, errno);
+
+			return (FALSE);
+		}
+		return (TRUE);
+	}
+#endif
+
 	/* Write up to 1 megabyte at a time. */
 	buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
 		* UNIV_PAGE_SIZE;
@@ -2377,7 +2462,7 @@ os_file_pread(
 
 	os_n_file_reads++;
 
-	if (innobase_get_slow_log() && trx && trx->take_stats)
+	if (UNIV_UNLIKELY(trx && trx->take_stats))
 	{
 	        trx->io_reads++;
 		trx->io_read += n;
@@ -2410,7 +2495,7 @@ os_file_pread(
 	os_n_pending_reads--;
 	os_mutex_exit(os_file_count_mutex);
 
-	if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
+	if (UNIV_UNLIKELY(start_time != 0))
 	{
 		ut_usectime(&sec, &ms);
 		finish_time = (ib_uint64_t)sec * 1000000 + ms;
@@ -2464,7 +2549,7 @@ os_file_pread(
 		os_n_pending_reads--;
 		os_mutex_exit(os_file_count_mutex);
 
-		if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
+		if (UNIV_UNLIKELY(start_time != 0)
 		{
 			ut_usectime(&sec, &ms);
 			finish_time = (ib_uint64_t)sec * 1000000 + ms;
@@ -4245,8 +4330,8 @@ os_aio_func(
 	ut_ad(file);
 	ut_ad(buf);
 	ut_ad(n > 0);
-	ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
-	ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+	ut_ad(n % OS_MIN_LOG_BLOCK_SIZE == 0);
+	ut_ad(offset % OS_MIN_LOG_BLOCK_SIZE == 0);
 	ut_ad(os_aio_validate_skip());
 #ifdef WIN_ASYNC_IO
 	ut_ad((n & 0xFFFFFFFFUL) == n);
diff --git a/storage/xtradb/page/page0cur.c b/storage/xtradb/page/page0cur.c
index d49b121afab..a722f5b188d 100644
--- a/storage/xtradb/page/page0cur.c
+++ b/storage/xtradb/page/page0cur.c
@@ -310,7 +310,7 @@ page_cur_search_with_match(
 #endif /* UNIV_DEBUG */
 	page = buf_block_get_frame(block);
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	page_check_dir(page);
@@ -1248,7 +1248,7 @@ page_cur_insert_rec_zip(
 
 	ut_ad(!page_rec_is_supremum(*current_rec));
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(page_zip_validate(page_zip, page));
+	ut_a(page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	/* 1. Get the size of the physical record in the page */
@@ -1902,6 +1902,7 @@ page_cur_delete_rec(
 
 	/* Save to local variables some data associated with current_rec */
 	cur_slot_no = page_dir_find_owner_slot(current_rec);
+	ut_ad(cur_slot_no > 0);
 	cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
 	cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot);
 
@@ -1972,7 +1973,7 @@ page_cur_delete_rec(
 	}
 
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 }
 
diff --git a/storage/xtradb/page/page0page.c b/storage/xtradb/page/page0page.c
index e29fa2eb1e5..f2ce6c9fe16 100644
--- a/storage/xtradb/page/page0page.c
+++ b/storage/xtradb/page/page0page.c
@@ -626,7 +626,7 @@ page_copy_rec_list_end(
 		Furthermore, btr_compress() may set FIL_PAGE_PREV to
 		FIL_NULL on new_page while leaving it intact on
 		new_page_zip.  So, we cannot validate new_page_zip. */
-		ut_a(page_zip_validate_low(page_zip, page, TRUE));
+		ut_a(page_zip_validate_low(page_zip, page, index, TRUE));
 	}
 #endif /* UNIV_ZIP_DEBUG */
 	ut_ad(buf_block_get_frame(block) == page);
@@ -796,8 +796,8 @@ zip_reorganize:
 			/* Before copying, "ret" was the predecessor
 			of the predefined supremum record.  If it was
 			the predefined infimum record, then it would
-			still be the infimum.  Thus, the assertion
-			ut_a(ret_pos > 0) would fail here. */
+			still be the infimum, and we would have
+			ret_pos == 0. */
 
 			if (UNIV_UNLIKELY
 			    (!page_zip_reorganize(new_block, index, mtr))) {
@@ -946,7 +946,7 @@ page_delete_rec_list_end(
 	ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
 	ut_ad(!page_zip || page_rec_is_comp(rec));
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(!page_zip || page_zip_validate(page_zip, page));
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	if (page_rec_is_infimum(rec)) {
@@ -988,7 +988,7 @@ page_delete_rec_list_end(
 						  ULINT_UNDEFINED, &heap);
 			rec = rec_get_next_ptr(rec, TRUE);
 #ifdef UNIV_ZIP_DEBUG
-			ut_a(page_zip_validate(page_zip, page));
+			ut_a(page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 			page_cur_delete_rec(&cur, index, offsets, mtr);
 		} while (page_offset(rec) != PAGE_NEW_SUPREMUM);
@@ -1052,6 +1052,7 @@ page_delete_rec_list_end(
 
 		n_owned = rec_get_n_owned_new(rec2) - count;
 		slot_index = page_dir_find_owner_slot(rec2);
+		ut_ad(slot_index > 0);
 		slot = page_dir_get_nth_slot(page, slot_index);
 	} else {
 		rec_t*	rec2	= rec;
@@ -1067,6 +1068,7 @@ page_delete_rec_list_end(
 
 		n_owned = rec_get_n_owned_old(rec2) - count;
 		slot_index = page_dir_find_owner_slot(rec2);
+		ut_ad(slot_index > 0);
 		slot = page_dir_get_nth_slot(page, slot_index);
 	}
 
@@ -1126,7 +1128,8 @@ page_delete_rec_list_start(
 		between btr_attach_half_pages() and insert_page = ...
 		when btr_page_get_split_rec_to_left() holds
 		(direction == FSP_DOWN). */
-		ut_a(!page_zip || page_zip_validate_low(page_zip, page, TRUE));
+		ut_a(!page_zip
+		     || page_zip_validate_low(page_zip, page, index, TRUE));
 	}
 #endif /* UNIV_ZIP_DEBUG */
 
@@ -1197,9 +1200,10 @@ page_move_rec_list_end(
 			= buf_block_get_page_zip(block);
 		ut_a(!new_page_zip == !page_zip);
 		ut_a(!new_page_zip
-		     || page_zip_validate(new_page_zip, new_page));
+		     || page_zip_validate(new_page_zip, new_page, index));
 		ut_a(!page_zip
-		     || page_zip_validate(page_zip, page_align(split_rec)));
+		     || page_zip_validate(page_zip, page_align(split_rec),
+					  index));
 	}
 #endif /* UNIV_ZIP_DEBUG */
 
@@ -1471,6 +1475,10 @@ page_rec_get_nth_const(
 	ulint			n_owned;
 	const rec_t*		rec;
 
+	if (nth == 0) {
+		return(page_get_infimum_rec(page));
+	}
+
 	ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
 
 	for (i = 0;; i++) {
diff --git a/storage/xtradb/page/page0zip.c b/storage/xtradb/page/page0zip.c
index 4751f4816a9..5357479908f 100644
--- a/storage/xtradb/page/page0zip.c
+++ b/storage/xtradb/page/page0zip.c
@@ -1437,7 +1437,7 @@ err_exit:
 	       page_zip_get_size(page_zip) - PAGE_DATA);
 	mem_heap_free(heap);
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(page_zip_validate(page_zip, page));
+	ut_a(page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	if (mtr) {
@@ -3123,6 +3123,7 @@ page_zip_validate_low(
 /*==================*/
 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
 	const page_t*		page,	/*!< in: uncompressed page */
+	const dict_index_t*	index,	/*!< in: index of the page, if known */
 	ibool			sloppy)	/*!< in: FALSE=strict,
 					TRUE=ignore the MIN_REC_FLAG */
 {
@@ -3210,39 +3211,102 @@ page_zip_validate_low(
 		committed.  Let us tolerate that difference when we
 		are performing a sloppy validation. */
 
-		if (sloppy) {
-			byte	info_bits_diff;
-			ulint	offset
-				= rec_get_next_offs(page + PAGE_NEW_INFIMUM,
-						    TRUE);
-			ut_a(offset >= PAGE_NEW_SUPREMUM);
-			offset -= 5 /* REC_NEW_INFO_BITS */;
-
-			info_bits_diff = page[offset] ^ temp_page[offset];
-
-			if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
-				temp_page[offset] = page[offset];
-
-				if (!memcmp(page + PAGE_HEADER,
-					    temp_page + PAGE_HEADER,
-					    UNIV_PAGE_SIZE - PAGE_HEADER
-					    - FIL_PAGE_DATA_END)) {
-
-					/* Only the minimum record flag
-					differed.  Let us ignore it. */
-					page_zip_fail(("page_zip_validate: "
-						       "min_rec_flag "
-						       "(ignored, "
-						       "%lu,%lu,0x%02lx)\n",
-						       page_get_space_id(page),
-						       page_get_page_no(page),
-						       (ulong) page[offset]));
-					goto func_exit;
+		ulint*		offsets;
+		mem_heap_t*	heap;
+		const rec_t*	rec;
+		const rec_t*	trec;
+		byte		info_bits_diff;
+		ulint		offset
+			= rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE);
+		ut_a(offset >= PAGE_NEW_SUPREMUM);
+		offset -= 5/*REC_NEW_INFO_BITS*/;
+
+		info_bits_diff = page[offset] ^ temp_page[offset];
+
+		if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
+			temp_page[offset] = page[offset];
+
+			if (!memcmp(page + PAGE_HEADER,
+				    temp_page + PAGE_HEADER,
+				    UNIV_PAGE_SIZE - PAGE_HEADER
+				    - FIL_PAGE_DATA_END)) {
+
+				/* Only the minimum record flag
+				differed.  Let us ignore it. */
+				page_zip_fail(("page_zip_validate: "
+					       "min_rec_flag "
+					       "(%s"
+					       "%lu,%lu,0x%02lx)\n",
+					       sloppy ? "ignored, " : "",
+					       page_get_space_id(page),
+					       page_get_page_no(page),
+					       (ulong) page[offset]));
+				valid = sloppy;
+				goto func_exit;
+			}
+		}
+
+		/* Compare the pointers in the PAGE_FREE list. */
+		rec = page_header_get_ptr(page, PAGE_FREE);
+		trec = page_header_get_ptr(temp_page, PAGE_FREE);
+
+		while (rec || trec) {
+			if (page_offset(rec) != page_offset(trec)) {
+				page_zip_fail(("page_zip_validate: "
+					       "PAGE_FREE list: %u!=%u\n",
+					       (unsigned) page_offset(rec),
+					       (unsigned) page_offset(trec)));
+				valid = FALSE;
+				goto func_exit;
+			}
+
+			rec = page_rec_get_next_low(rec, TRUE);
+			trec = page_rec_get_next_low(trec, TRUE);
+		}
+
+		/* Compare the records. */
+		heap = NULL;
+		offsets = NULL;
+		rec = page_rec_get_next_low(
+			page + PAGE_NEW_INFIMUM, TRUE);
+		trec = page_rec_get_next_low(
+			temp_page + PAGE_NEW_INFIMUM, TRUE);
+
+		do {
+			if (page_offset(rec) != page_offset(trec)) {
+				page_zip_fail(("page_zip_validate: "
+					       "record list: 0x%02x!=0x%02x\n",
+					       (unsigned) page_offset(rec),
+					       (unsigned) page_offset(trec)));
+				valid = FALSE;
+				break;
+			}
+
+			if (index) {
+				/* Compare the data. */
+				offsets = rec_get_offsets(
+					rec, index, offsets,
+					ULINT_UNDEFINED, &heap);
+
+				if (memcmp(rec - rec_offs_extra_size(offsets),
+					   trec - rec_offs_extra_size(offsets),
+					   rec_offs_size(offsets))) {
+					page_zip_fail(
+						("page_zip_validate: "
+						 "record content: 0x%02x",
+						 (unsigned) page_offset(rec)));
+					valid = FALSE;
+					break;
 				}
 			}
+
+			rec = page_rec_get_next_low(rec, TRUE);
+			trec = page_rec_get_next_low(trec, TRUE);
+		} while (rec || trec);
+
+		if (heap) {
+			mem_heap_free(heap);
 		}
-		page_zip_fail(("page_zip_validate: content\n"));
-		valid = FALSE;
 	}
 
 func_exit:
@@ -3264,9 +3328,10 @@ ibool
 page_zip_validate(
 /*==============*/
 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
-	const page_t*		page)	/*!< in: uncompressed page */
+	const page_t*		page,	/*!< in: uncompressed page */
+	const dict_index_t*	index)	/*!< in: index of the page, if known */
 {
-	return(page_zip_validate_low(page_zip, page,
+	return(page_zip_validate_low(page_zip, page, index,
 				     recv_recovery_is_on()));
 }
 #endif /* UNIV_ZIP_DEBUG */
@@ -3597,7 +3662,7 @@ page_zip_write_rec(
 	page_zip->m_nonempty = TRUE;
 
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(page_zip_validate(page_zip, page_align(rec)));
+	ut_a(page_zip_validate(page_zip, page_align(rec), index));
 #endif /* UNIV_ZIP_DEBUG */
 }
 
@@ -3644,7 +3709,7 @@ corrupt:
 		}
 
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(page_zip_validate(page_zip, page));
+		ut_a(page_zip_validate(page_zip, page, NULL));
 #endif /* UNIV_ZIP_DEBUG */
 
 		memcpy(page + offset,
@@ -3653,7 +3718,7 @@ corrupt:
 		       ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
 
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(page_zip_validate(page_zip, page));
+		ut_a(page_zip_validate(page_zip, page, NULL));
 #endif /* UNIV_ZIP_DEBUG */
 	}
 
@@ -3720,7 +3785,7 @@ page_zip_write_blob_ptr(
 	memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
 
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(page_zip_validate(page_zip, page));
+	ut_a(page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	if (mtr) {
@@ -3791,7 +3856,7 @@ corrupt:
 		}
 
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(page_zip_validate(page_zip, page));
+		ut_a(page_zip_validate(page_zip, page, NULL));
 #endif /* UNIV_ZIP_DEBUG */
 
 		field = page + offset;
@@ -3812,7 +3877,7 @@ corrupt:
 		memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
 
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(page_zip_validate(page_zip, page));
+		ut_a(page_zip_validate(page_zip, page, NULL));
 #endif /* UNIV_ZIP_DEBUG */
 	}
 
@@ -4039,7 +4104,7 @@ page_zip_clear_rec(
 	}
 
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(page_zip_validate(page_zip, page));
+	ut_a(page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 }
 
@@ -4063,7 +4128,7 @@ page_zip_rec_set_deleted(
 		*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
 	}
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(page_zip_validate(page_zip, page_align(rec)));
+	ut_a(page_zip_validate(page_zip, page_align(rec), NULL));
 #endif /* UNIV_ZIP_DEBUG */
 }
 
@@ -4364,14 +4429,14 @@ corrupt:
 			goto corrupt;
 		}
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(page_zip_validate(page_zip, page));
+		ut_a(page_zip_validate(page_zip, page, NULL));
 #endif /* UNIV_ZIP_DEBUG */
 
 		memcpy(page + offset, ptr, len);
 		memcpy(page_zip->data + offset, ptr, len);
 
 #ifdef UNIV_ZIP_DEBUG
-		ut_a(page_zip_validate(page_zip, page));
+		ut_a(page_zip_validate(page_zip, page, NULL));
 #endif /* UNIV_ZIP_DEBUG */
 	}
 
@@ -4449,7 +4514,7 @@ page_zip_reorganize(
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(page_is_comp(page));
 	ut_ad(!dict_index_is_ibuf(index));
-	/* Note that page_zip_validate(page_zip, page) may fail here. */
+	/* Note that page_zip_validate(page_zip, page, index) may fail here. */
 	UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
@@ -4536,7 +4601,7 @@ page_zip_copy_recs(
 	FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
 	mismatch.  A strict page_zip_validate() will be executed later
 	during the B-tree operations. */
-	ut_a(page_zip_validate_low(src_zip, src, TRUE));
+	ut_a(page_zip_validate_low(src_zip, src, index, TRUE));
 #endif /* UNIV_ZIP_DEBUG */
 	ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
 	if (UNIV_UNLIKELY(src_zip->n_blobs)) {
@@ -4597,7 +4662,7 @@ page_zip_copy_recs(
 	}
 
 #ifdef UNIV_ZIP_DEBUG
-	ut_a(page_zip_validate(page_zip, page));
+	ut_a(page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 	btr_blob_dbg_add(page, index, "page_zip_copy_recs");
 
diff --git a/storage/xtradb/rem/rem0rec.c b/storage/xtradb/rem/rem0rec.c
index 30fc28561fa..6bd40c54a0c 100644
--- a/storage/xtradb/rem/rem0rec.c
+++ b/storage/xtradb/rem/rem0rec.c
@@ -167,7 +167,6 @@ rec_get_n_extern_new(
 {
 	const byte*	nulls;
 	const byte*	lens;
-	dict_field_t*	field;
 	ulint		null_mask;
 	ulint		n_extern;
 	ulint		i;
@@ -188,10 +187,13 @@ rec_get_n_extern_new(
 
 	/* read the lengths of fields 0..n */
 	do {
-		ulint	len;
+		const dict_field_t*	field
+			= dict_index_get_nth_field(index, i);
+		const dict_col_t*	col
+			= dict_field_get_col(field);
+		ulint			len;
 
-		field = dict_index_get_nth_field(index, i);
-		if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) {
+		if (!(col->prtype & DATA_NOT_NULL)) {
 			/* nullable field => read the null flag */
 
 			if (UNIV_UNLIKELY(!(byte) null_mask)) {
@@ -209,8 +211,6 @@ rec_get_n_extern_new(
 
 		if (UNIV_UNLIKELY(!field->fixed_len)) {
 			/* Variable-length field: read the length */
-			const dict_col_t*	col
-				= dict_field_get_col(field);
 			len = *lens--;
 			/* If the maximum length of the field is up
 			to 255 bytes, the actual length is always
@@ -239,16 +239,15 @@ rec_get_n_extern_new(
 Determine the offset to each field in a leaf-page record
 in ROW_FORMAT=COMPACT.  This is a special case of
 rec_init_offsets() and rec_get_offsets_func(). */
-UNIV_INTERN
+UNIV_INLINE __attribute__((nonnull))
 void
 rec_init_offsets_comp_ordinary(
 /*===========================*/
 	const rec_t*		rec,	/*!< in: physical record in
 					ROW_FORMAT=COMPACT */
-	ulint			extra,	/*!< in: number of bytes to reserve
-					between the record header and
-					the data payload
-					(usually REC_N_NEW_EXTRA_BYTES) */
+	ibool			temp,	/*!< in: whether to use the
+					format for temporary files in
+					index creation */
 	const dict_index_t*	index,	/*!< in: record descriptor */
 	ulint*			offsets)/*!< in/out: array of offsets;
 					in: n=rec_offs_n_fields(offsets) */
@@ -256,27 +255,38 @@ rec_init_offsets_comp_ordinary(
 	ulint		i		= 0;
 	ulint		offs		= 0;
 	ulint		any_ext		= 0;
-	const byte*	nulls		= rec - (extra + 1);
+	const byte*	nulls		= temp
+		? rec - 1
+		: rec - (1 + REC_N_NEW_EXTRA_BYTES);
 	const byte*	lens		= nulls
 		- UT_BITS_IN_BYTES(index->n_nullable);
-	dict_field_t*	field;
 	ulint		null_mask	= 1;
 
 #ifdef UNIV_DEBUG
-	/* We cannot invoke rec_offs_make_valid() here, because it can hold
-	that extra != REC_N_NEW_EXTRA_BYTES.  Similarly, rec_offs_validate()
-	will fail in that case, because it invokes rec_get_status(). */
+	/* We cannot invoke rec_offs_make_valid() here if temp=TRUE.
+	Similarly, rec_offs_validate() will fail in that case, because
+	it invokes rec_get_status(). */
 	offsets[2] = (ulint) rec;
 	offsets[3] = (ulint) index;
 #endif /* UNIV_DEBUG */
 
+	ut_ad(temp || dict_table_is_comp(index->table));
+
+	if (temp && dict_table_is_comp(index->table)) {
+		/* No need to do adjust fixed_len=0. We only need to
+		adjust it for ROW_FORMAT=REDUNDANT. */
+		temp = FALSE;
+	}
+
 	/* read the lengths of fields 0..n */
 	do {
-		ulint	len;
+		const dict_field_t*	field
+			= dict_index_get_nth_field(index, i);
+		const dict_col_t*	col
+			= dict_field_get_col(field);
+		ulint			len;
 
-		field = dict_index_get_nth_field(index, i);
-		if (!(dict_field_get_col(field)->prtype
-		      & DATA_NOT_NULL)) {
+		if (!(col->prtype & DATA_NOT_NULL)) {
 			/* nullable field => read the null flag */
 
 			if (UNIV_UNLIKELY(!(byte) null_mask)) {
@@ -296,10 +306,9 @@ rec_init_offsets_comp_ordinary(
 			null_mask <<= 1;
 		}
 
-		if (UNIV_UNLIKELY(!field->fixed_len)) {
+		if (!field->fixed_len
+		    || (temp && !dict_col_get_fixed_size(col, temp))) {
 			/* Variable-length field: read the length */
-			const dict_col_t*	col
-				= dict_field_get_col(field);
 			len = *lens--;
 			/* If the maximum length of the field is up
 			to 255 bytes, the actual length is always
@@ -393,9 +402,8 @@ rec_init_offsets(
 				= dict_index_get_n_unique_in_tree(index);
 			break;
 		case REC_STATUS_ORDINARY:
-			rec_init_offsets_comp_ordinary(rec,
-						       REC_N_NEW_EXTRA_BYTES,
-						       index, offsets);
+			rec_init_offsets_comp_ordinary(
+				rec, FALSE, index, offsets);
 			return;
 		}
 
@@ -766,17 +774,19 @@ rec_get_nth_field_offs_old(
 /**********************************************************//**
 Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
 @return	total size */
-UNIV_INTERN
+UNIV_INLINE __attribute__((warn_unused_result, nonnull(1,2)))
 ulint
-rec_get_converted_size_comp_prefix(
-/*===============================*/
+rec_get_converted_size_comp_prefix_low(
+/*===================================*/
 	const dict_index_t*	index,	/*!< in: record descriptor;
 					dict_table_is_comp() is
 					assumed to hold, even if
 					it does not */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
-	ulint*			extra)	/*!< out: extra size */
+	ulint*			extra,	/*!< out: extra size */
+	ibool			temp)	/*!< in: whether this is a
+					temporary file record */
 {
 	ulint	extra_size;
 	ulint	data_size;
@@ -785,15 +795,25 @@ rec_get_converted_size_comp_prefix(
 	ut_ad(fields);
 	ut_ad(n_fields > 0);
 	ut_ad(n_fields <= dict_index_get_n_fields(index));
+	ut_ad(!temp || extra);
 
-	extra_size = REC_N_NEW_EXTRA_BYTES
+	extra_size = temp
+		? UT_BITS_IN_BYTES(index->n_nullable)
+		: REC_N_NEW_EXTRA_BYTES
 		+ UT_BITS_IN_BYTES(index->n_nullable);
 	data_size = 0;
 
+	if (temp && dict_table_is_comp(index->table)) {
+		/* No need to do adjust fixed_len=0. We only need to
+		adjust it for ROW_FORMAT=REDUNDANT. */
+		temp = FALSE;
+	}
+
 	/* read the lengths of fields 0..n */
 	for (i = 0; i < n_fields; i++) {
 		const dict_field_t*	field;
 		ulint			len;
+		ulint			fixed_len;
 		const dict_col_t*	col;
 
 		field = dict_index_get_nth_field(index, i);
@@ -809,8 +829,14 @@ rec_get_converted_size_comp_prefix(
 			continue;
 		}
 
-		ut_ad(len <= col->len || col->mtype == DATA_BLOB);
+		ut_ad(len <= col->len || col->mtype == DATA_BLOB
+		      || (col->len == 0 && col->mtype == DATA_VARCHAR));
 
+		fixed_len = field->fixed_len;
+		if (temp && fixed_len
+		    && !dict_col_get_fixed_size(col, temp)) {
+			fixed_len = 0;
+		}
 		/* If the maximum length of a variable-length field
 		is up to 255 bytes, the actual length is always stored
 		in one byte. If the maximum length is more than 255
@@ -818,11 +844,20 @@ rec_get_converted_size_comp_prefix(
 		0..127.  The length will be encoded in two bytes when
 		it is 128 or more, or when the field is stored externally. */
 
-		if (field->fixed_len) {
-			ut_ad(len == field->fixed_len);
+		if (fixed_len) {
+#ifdef UNIV_DEBUG
+			ulint	mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
+			ulint	mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
+
+			ut_ad(len <= fixed_len);
+
+			ut_ad(!mbmaxlen || len >= mbminlen
+			      * (fixed_len / mbmaxlen));
+
 			/* dict_index_add_col() should guarantee this */
 			ut_ad(!field->prefix_len
-			      || field->fixed_len == field->prefix_len);
+			      || fixed_len == field->prefix_len);
+#endif /* UNIV_DEBUG */
 		} else if (dfield_is_ext(&fields[i])) {
 			ut_ad(col->len >= 256 || col->mtype == DATA_BLOB);
 			extra_size += 2;
@@ -839,7 +874,7 @@ rec_get_converted_size_comp_prefix(
 		data_size += len;
 	}
 
-	if (UNIV_LIKELY_NULL(extra)) {
+	if (extra) {
 		*extra = extra_size;
 	}
 
@@ -847,6 +882,23 @@ rec_get_converted_size_comp_prefix(
 }
 
 /**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return	total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_comp_prefix(
+/*===============================*/
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra)	/*!< out: extra size */
+{
+	ut_ad(dict_table_is_comp(index->table));
+	return(rec_get_converted_size_comp_prefix_low(
+		       index, fields, n_fields, extra, FALSE));
+}
+
+/**********************************************************//**
 Determines the size of a data tuple in ROW_FORMAT=COMPACT.
 @return	total size */
 UNIV_INTERN
@@ -890,8 +942,8 @@ rec_get_converted_size_comp(
 		return(ULINT_UNDEFINED);
 	}
 
-	return(size + rec_get_converted_size_comp_prefix(index, fields,
-							 n_fields, extra));
+	return(size + rec_get_converted_size_comp_prefix_low(
+		       index, fields, n_fields, extra, FALSE));
 }
 
 /***********************************************************//**
@@ -1068,19 +1120,18 @@ rec_convert_dtuple_to_rec_old(
 
 /*********************************************************//**
 Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INTERN
+UNIV_INLINE __attribute__((nonnull))
 void
 rec_convert_dtuple_to_rec_comp(
 /*===========================*/
 	rec_t*			rec,	/*!< in: origin of record */
-	ulint			extra,	/*!< in: number of bytes to
-					reserve between the record
-					header and the data payload
-					(normally REC_N_NEW_EXTRA_BYTES) */
 	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint			status,	/*!< in: status bits of the record */
 	const dfield_t*		fields,	/*!< in: array of data fields */
-	ulint			n_fields)/*!< in: number of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint			status,	/*!< in: status bits of the record */
+	ibool			temp)	/*!< in: whether to use the
+					format for temporary files in
+					index creation */
 {
 	const dfield_t*	field;
 	const dtype_t*	type;
@@ -1092,31 +1143,44 @@ rec_convert_dtuple_to_rec_comp(
 	ulint		n_node_ptr_field;
 	ulint		fixed_len;
 	ulint		null_mask	= 1;
-	ut_ad(extra == 0 || dict_table_is_comp(index->table));
-	ut_ad(extra == 0 || extra == REC_N_NEW_EXTRA_BYTES);
+	ut_ad(temp || dict_table_is_comp(index->table));
 	ut_ad(n_fields > 0);
 
-	switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
-	case REC_STATUS_ORDINARY:
+	if (temp) {
+		ut_ad(status == REC_STATUS_ORDINARY);
 		ut_ad(n_fields <= dict_index_get_n_fields(index));
 		n_node_ptr_field = ULINT_UNDEFINED;
-		break;
-	case REC_STATUS_NODE_PTR:
-		ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1);
-		n_node_ptr_field = n_fields - 1;
-		break;
-	case REC_STATUS_INFIMUM:
-	case REC_STATUS_SUPREMUM:
-		ut_ad(n_fields == 1);
-		n_node_ptr_field = ULINT_UNDEFINED;
-		break;
-	default:
-		ut_error;
-		return;
+		nulls = rec - 1;
+		if (dict_table_is_comp(index->table)) {
+			/* No need to do adjust fixed_len=0. We only
+			need to adjust it for ROW_FORMAT=REDUNDANT. */
+			temp = FALSE;
+		}
+	} else {
+		nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
+
+		switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
+		case REC_STATUS_ORDINARY:
+			ut_ad(n_fields <= dict_index_get_n_fields(index));
+			n_node_ptr_field = ULINT_UNDEFINED;
+			break;
+		case REC_STATUS_NODE_PTR:
+			ut_ad(n_fields
+			      == dict_index_get_n_unique_in_tree(index) + 1);
+			n_node_ptr_field = n_fields - 1;
+			break;
+		case REC_STATUS_INFIMUM:
+		case REC_STATUS_SUPREMUM:
+			ut_ad(n_fields == 1);
+			n_node_ptr_field = ULINT_UNDEFINED;
+			break;
+		default:
+			ut_error;
+			return;
+		}
 	}
 
 	end = rec;
-	nulls = rec - (extra + 1);
 	lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
 	/* clear the SQL-null flags */
 	memset(lens + 1, 0, nulls - lens);
@@ -1162,6 +1226,10 @@ rec_convert_dtuple_to_rec_comp(
 
 		ifield = dict_index_get_nth_field(index, i);
 		fixed_len = ifield->fixed_len;
+		if (temp && fixed_len
+		    && !dict_col_get_fixed_size(ifield->col, temp)) {
+			fixed_len = 0;
+		}
 		/* If the maximum length of a variable-length field
 		is up to 255 bytes, the actual length is always stored
 		in one byte. If the maximum length is more than 255
@@ -1169,8 +1237,17 @@ rec_convert_dtuple_to_rec_comp(
 		0..127.  The length will be encoded in two bytes when
 		it is 128 or more, or when the field is stored externally. */
 		if (fixed_len) {
-			ut_ad(len == fixed_len);
+#ifdef UNIV_DEBUG
+			ulint	mbminlen = DATA_MBMINLEN(
+				ifield->col->mbminmaxlen);
+			ulint	mbmaxlen = DATA_MBMAXLEN(
+				ifield->col->mbminmaxlen);
+
+			ut_ad(len <= fixed_len);
+			ut_ad(!mbmaxlen || len >= mbminlen
+			      * (fixed_len / mbmaxlen));
 			ut_ad(!dfield_is_ext(field));
+#endif /* UNIV_DEBUG */
 		} else if (dfield_is_ext(field)) {
 			ut_ad(ifield->col->len >= 256
 			      || ifield->col->mtype == DATA_BLOB);
@@ -1222,8 +1299,7 @@ rec_convert_dtuple_to_rec_new(
 	rec = buf + extra_size;
 
 	rec_convert_dtuple_to_rec_comp(
-		rec, REC_N_NEW_EXTRA_BYTES, index, status,
-		dtuple->fields, dtuple->n_fields);
+		rec, index, dtuple->fields, dtuple->n_fields, status, FALSE);
 
 	/* Set the info bits of the record */
 	rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
@@ -1285,6 +1361,54 @@ rec_convert_dtuple_to_rec(
 	return(rec);
 }
 
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return	total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_temp(
+/*========================*/
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra)	/*!< out: extra size */
+{
+	return(rec_get_converted_size_comp_prefix_low(
+		       index, fields, n_fields, extra, TRUE));
+}
+
+/******************************************************//**
+Determine the offset to each field in temporary file.
+@see rec_convert_dtuple_to_temp() */
+UNIV_INTERN
+void
+rec_init_offsets_temp(
+/*==================*/
+	const rec_t*		rec,	/*!< in: temporary file record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets)/*!< in/out: array of offsets;
+					in: n=rec_offs_n_fields(offsets) */
+{
+	rec_init_offsets_comp_ordinary(rec, TRUE, index, offsets);
+}
+
+/*********************************************************//**
+Builds a temporary file record out of a data tuple.
+@see rec_init_offsets_temp() */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_temp(
+/*=======================*/
+	rec_t*			rec,		/*!< out: record */
+	const dict_index_t*	index,		/*!< in: record descriptor */
+	const dfield_t*		fields,		/*!< in: array of data fields */
+	ulint			n_fields)	/*!< in: number of fields */
+{
+	rec_convert_dtuple_to_rec_comp(rec, index, fields, n_fields,
+				       REC_STATUS_ORDINARY, TRUE);
+}
+
 /**************************************************************//**
 Copies the first n fields of a physical record to a data tuple. The fields
 are copied to the memory heap. */
@@ -1495,6 +1619,7 @@ rec_copy_prefix_to_buf(
 
 	return(*buf + (rec - (lens + 1)));
 }
+#endif /* UNIV_HOTBACKUP */
 
 /***************************************************************//**
 Validates the consistency of an old-style physical record.
diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c
index 61c3720fa2e..fda0c55b5c7 100644
--- a/storage/xtradb/row/row0ins.c
+++ b/storage/xtradb/row/row0ins.c
@@ -2288,7 +2288,10 @@ row_ins_index_entry(
 	err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
 				      n_ext, thr);
 	if (err != DB_FAIL) {
-
+		if (index == dict_table_get_first_index(index->table)
+		    && thr_get_trx(thr)->mysql_thd != 0) {
+			DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after");
+		}
 		return(err);
 	}
 
diff --git a/storage/xtradb/row/row0merge.c b/storage/xtradb/row/row0merge.c
index 0fd13f5339c..0b6a6302854 100644
--- a/storage/xtradb/row/row0merge.c
+++ b/storage/xtradb/row/row0merge.c
@@ -301,6 +301,7 @@ row_merge_buf_add(
 	for (i = 0; i < n_fields; i++, field++, ifield++) {
 		const dict_col_t*	col;
 		ulint			col_no;
+		ulint			fixed_len;
 		const dfield_t*		row_field;
 		ulint			len;
 
@@ -349,9 +350,30 @@ row_merge_buf_add(
 
 		ut_ad(len <= col->len || col->mtype == DATA_BLOB);
 
-		if (ifield->fixed_len) {
-			ut_ad(len == ifield->fixed_len);
+		fixed_len = ifield->fixed_len;
+		if (fixed_len && !dict_table_is_comp(index->table)
+		    && DATA_MBMINLEN(col->mbminmaxlen)
+		    != DATA_MBMAXLEN(col->mbminmaxlen)) {
+			/* CHAR in ROW_FORMAT=REDUNDANT is always
+			fixed-length, but in the temporary file it is
+			variable-length for variable-length character
+			sets. */
+			fixed_len = 0;
+		}
+
+		if (fixed_len) {
+#ifdef UNIV_DEBUG
+			ulint	mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
+			ulint	mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
+
+			/* len should be between size calcualted base on
+			mbmaxlen and mbminlen */
+			ut_ad(len <= fixed_len);
+			ut_ad(!mbmaxlen || len >= mbminlen
+			      * (fixed_len / mbmaxlen));
+
 			ut_ad(!dfield_is_ext(field));
+#endif /* UNIV_DEBUG */
 		} else if (dfield_is_ext(field)) {
 			extra_size += 2;
 		} else if (len < 128
@@ -372,12 +394,11 @@ row_merge_buf_add(
 		ulint	size;
 		ulint	extra;
 
-		size = rec_get_converted_size_comp(index,
-						   REC_STATUS_ORDINARY,
-						   entry, n_fields, &extra);
+		size = rec_get_converted_size_temp(
+			index, entry, n_fields, &extra);
 
-		ut_ad(data_size + extra_size + REC_N_NEW_EXTRA_BYTES == size);
-		ut_ad(extra_size + REC_N_NEW_EXTRA_BYTES == extra);
+		ut_ad(data_size + extra_size == size);
+		ut_ad(extra_size == extra);
 	}
 #endif /* UNIV_DEBUG */
 
@@ -581,14 +602,9 @@ row_merge_buf_write(
 		ulint		extra_size;
 		const dfield_t*	entry		= buf->tuples[i];
 
-		size = rec_get_converted_size_comp(index,
-						   REC_STATUS_ORDINARY,
-						   entry, n_fields,
-						   &extra_size);
+		size = rec_get_converted_size_temp(
+			index, entry, n_fields, &extra_size);
 		ut_ad(size >= extra_size);
-		ut_ad(extra_size >= REC_N_NEW_EXTRA_BYTES);
-		extra_size -= REC_N_NEW_EXTRA_BYTES;
-		size -= REC_N_NEW_EXTRA_BYTES;
 
 		/* Encode extra_size + 1 */
 		if (extra_size + 1 < 0x80) {
@@ -601,9 +617,8 @@ row_merge_buf_write(
 
 		ut_ad(b + size < block[1]);
 
-		rec_convert_dtuple_to_rec_comp(b + extra_size, 0, index,
-					       REC_STATUS_ORDINARY,
-					       entry, n_fields);
+		rec_convert_dtuple_to_temp(b + extra_size, index,
+					   entry, n_fields);
 
 		b += size;
 
@@ -709,6 +724,8 @@ row_merge_read(
 	ib_uint64_t	ofs = ((ib_uint64_t) offset) * block_size;
 	ibool		success;
 
+	DBUG_EXECUTE_IF("row_merge_read_failure", return(FALSE););
+
 #ifdef UNIV_DEBUG
 	if (row_merge_print_block_read) {
 		fprintf(stderr, "row_merge_read fd=%d ofs=%lu\n",
@@ -756,6 +773,8 @@ row_merge_write(
 			    (ulint) (ofs >> 32),
 			    block_size);
 
+	DBUG_EXECUTE_IF("row_merge_write_failure", return(FALSE););
+
 #ifdef UNIV_DEBUG
 	if (row_merge_print_block_write) {
 		fprintf(stderr, "row_merge_write fd=%d ofs=%lu\n",
@@ -872,7 +891,7 @@ err_exit:
 
 		*mrec = *buf + extra_size;
 
-		rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
+		rec_init_offsets_temp(*mrec, index, offsets);
 
 		data_size = rec_offs_data_size(offsets);
 
@@ -891,7 +910,7 @@ err_exit:
 
 	*mrec = b + extra_size;
 
-	rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
+	rec_init_offsets_temp(*mrec, index, offsets);
 
 	data_size = rec_offs_data_size(offsets);
 	ut_ad(extra_size + data_size < block_size);
@@ -2261,7 +2280,7 @@ row_merge_drop_temp_indexes(void)
 /*********************************************************************//**
 Creates temperary merge files, and if UNIV_PFS_IO defined, register
 the file descriptor with Performance Schema.
-@return File descriptor */
+@return file descriptor, or -1 on failure */
 UNIV_INLINE
 int
 row_merge_file_create_low(void)
@@ -2283,12 +2302,19 @@ row_merge_file_create_low(void)
 #ifdef UNIV_PFS_IO
         register_pfs_file_open_end(locker, fd);
 #endif
+	if (fd < 0) {
+		fprintf(stderr,
+			"InnoDB: Error: Cannot create temporary merge file\n");
+		return(-1);
+	}
 	return(fd);
 }
+
 /*********************************************************************//**
-Create a merge file. */
-static
-void
+Create a merge file.
+@return file descriptor, or -1 on failure */
+static __attribute__((nonnull, warn_unused_result))
+int
 row_merge_file_create(
 /*==================*/
 	merge_file_t*	merge_file)	/*!< out: merge file structure */
@@ -2296,6 +2322,7 @@ row_merge_file_create(
 	merge_file->fd = row_merge_file_create_low();
 	merge_file->offset = 0;
 	merge_file->n_rec = 0;
+	return(merge_file->fd);
 }
 
 /*********************************************************************//**
@@ -2541,6 +2568,28 @@ row_merge_rename_tables(
 		goto err_exit;
 	}
 
+	/* Generate the redo logs for file operations */
+	fil_mtr_rename_log(old_table->space, old_name,
+			   new_table->space, new_table->name, tmp_name);
+
+	/* What if the redo logs are flushed to disk here?  This is
+	tested with following crash point */
+	DBUG_EXECUTE_IF("bug14669848_precommit", log_buffer_flush_to_disk();
+			DBUG_SUICIDE(););
+
+	/* File operations cannot be rolled back.  So, before proceeding
+	with file operations, commit the dictionary changes.*/
+	trx_commit_for_mysql(trx);
+
+	/* If server crashes here, the dictionary in InnoDB and MySQL
+	will differ.  The .ibd files and the .frm files must be swapped
+	manually by the administrator. No loss of data. */
+	DBUG_EXECUTE_IF("bug14669848", DBUG_SUICIDE(););
+
+	/* Ensure that the redo logs are flushed to disk.  The config
+	innodb_flush_log_at_trx_commit must not affect this. */
+	log_buffer_flush_to_disk();
+
 	/* The following calls will also rename the .ibd data files if
 	the tables are stored in a single-table tablespace */
 
@@ -2715,7 +2764,7 @@ row_merge_build_indexes(
 	ulint			block_size;
 	ulint			i;
 	ulint			error;
-	int			tmpfd;
+	int			tmpfd = -1;
 	ulint			merge_sort_block_size;
 	void*			block_mem;
 
@@ -2741,13 +2790,31 @@ row_merge_build_indexes(
 			i * merge_sort_block_size);
 	}
 
+	/* Initialize all the merge file descriptors, so that we
+	don't call row_merge_file_destroy() on uninitialized
+	merge file descriptor */
+
+	for (i = 0; i < n_indexes; i++) {
+		merge_files[i].fd = -1;
+	}
+
 	for (i = 0; i < n_indexes; i++) {
 
-		row_merge_file_create(&merge_files[i]);
+		if (row_merge_file_create(&merge_files[i]) < 0)
+		{
+			error = DB_OUT_OF_MEMORY;
+			goto func_exit;
+		}
 	}
 
 	tmpfd = row_merge_file_create_low();
 
+	if (tmpfd < 0)
+	{
+		error = DB_OUT_OF_MEMORY;
+		goto func_exit;
+	}
+
 	/* Reset the MySQL row buffer that is used when reporting
 	duplicate keys. */
 	innobase_rec_reset(table);
diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c
index 9ab85940760..3a18cfc679e 100644
--- a/storage/xtradb/row/row0mysql.c
+++ b/storage/xtradb/row/row0mysql.c
@@ -1879,7 +1879,8 @@ Creates a table for MySQL. If the name of the table ends in
 one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
 "innodb_table_monitor", then this will also start the printing of monitor
 output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate().
+InnoDB will try to invoke mem_validate(). On failure the transaction will
+be rolled back and the 'table' object will be freed.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
 int
@@ -2017,6 +2018,8 @@ err_exit:
 
 			row_drop_table_for_mysql(table->name, trx, FALSE);
 			trx_commit_for_mysql(trx);
+		} else {
+			dict_mem_table_free(table);
 		}
 		break;
 
@@ -4270,6 +4273,13 @@ end:
 			trx->error_state = DB_SUCCESS;
 			trx_general_rollback_for_mysql(trx, NULL);
 			trx->error_state = DB_SUCCESS;
+		} else {
+			if (old_is_tmp && !new_is_tmp) {
+				/* After ALTER TABLE the table statistics
+				needs to be rebuilt.  It will be rebuilt
+				when the table is loaded again. */
+				table->stat_initialized = FALSE;
+			}
 		}
 	}
 
diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c
index ec3603f2550..858d50fd5a6 100644
--- a/storage/xtradb/row/row0sel.c
+++ b/storage/xtradb/row/row0sel.c
@@ -2489,6 +2489,9 @@ row_sel_convert_mysql_key_to_innobase(
 		dfield++;
 	}
 
+	DBUG_EXECUTE_IF("innodb_srch_key_buffer_full",
+		ut_a(buf == (original_buf + buf_len)););
+
 	ut_a(buf <= original_buf + buf_len);
 
 	/* We set the length of tuple to n_fields: we assume that the memory
@@ -3641,13 +3644,13 @@ row_search_for_mysql(
 	should_release = 0;
 	for (i = 0; i < btr_search_index_num; i++) {
 		/* we should check all latches (fix Bug#791030) */
-		if (rw_lock_get_writer(btr_search_latch_part[i])
-		    != RW_LOCK_NOT_LOCKED) {
+		if (UNIV_UNLIKELY(rw_lock_get_writer(btr_search_latch_part[i])
+				  != RW_LOCK_NOT_LOCKED)) {
 			should_release |= ((ulint)1 << i);
 		}
 	}
 
-	if (should_release) {
+	if (UNIV_UNLIKELY(should_release)) {
 
 		/* There is an x-latch request on the adaptive hash index:
 		release the s-latch to reduce starvation and wait for
@@ -4124,6 +4127,11 @@ wait_table_again:
 	}
 
 rec_loop:
+	if (trx_is_interrupted(trx)) {
+		err = DB_INTERRUPTED;
+		goto normal_return;
+	}
+
 	/*-------------------------------------------------------------*/
 	/* PHASE 4: Look for matching records in a loop */
 
@@ -5118,11 +5126,15 @@ row_search_autoinc_read_column(
 
 	rec_offs_init(offsets_);
 
-	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+	offsets = rec_get_offsets(rec, index, offsets, col_no + 1, &heap);
 
-	data = rec_get_nth_field(rec, offsets, col_no, &len);
+	if (rec_offs_nth_sql_null(offsets, col_no)) {
+		/* There is no non-NULL value in the auto-increment column. */
+		value = 0;
+		goto func_exit;
+	}
 
-	ut_a(len != UNIV_SQL_NULL);
+	data = rec_get_nth_field(rec, offsets, col_no, &len);
 
 	switch (mtype) {
 	case DATA_INT:
@@ -5144,14 +5156,15 @@ row_search_autoinc_read_column(
 		ut_error;
 	}
 
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-
 	if (!unsigned_type && (ib_int64_t) value < 0) {
 		value = 0;
 	}
 
+func_exit:
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
 	return(value);
 }
 
diff --git a/storage/xtradb/row/row0umod.c b/storage/xtradb/row/row0umod.c
index 9597c476125..3c933c87b27 100644
--- a/storage/xtradb/row/row0umod.c
+++ b/storage/xtradb/row/row0umod.c
@@ -69,36 +69,6 @@ If you make a change in this module make sure that no codepath is
 introduced where a call to log_free_check() is bypassed. */
 
 /***********************************************************//**
-Checks if also the previous version of the clustered index record was
-modified or inserted by the same transaction, and its undo number is such
-that it should be undone in the same rollback.
-@return	TRUE if also previous modify or insert of this row should be undone */
-static
-ibool
-row_undo_mod_undo_also_prev_vers(
-/*=============================*/
-	undo_node_t*	node,	/*!< in: row undo node */
-	undo_no_t*	undo_no)/*!< out: the undo number */
-{
-	trx_undo_rec_t*	undo_rec;
-	trx_t*		trx;
-
-	trx = node->trx;
-
-	if (node->new_trx_id != trx->id) {
-
-		*undo_no = 0;
-		return(FALSE);
-	}
-
-	undo_rec = trx_undo_get_undo_rec_low(node->new_roll_ptr, node->heap);
-
-	*undo_no = trx_undo_rec_get_undo_no(undo_rec);
-
-	return(trx->roll_limit <= *undo_no);
-}
-
-/***********************************************************//**
 Undoes a modify in a clustered index record.
 @return	DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
 static
@@ -226,19 +196,11 @@ row_undo_mod_clust(
 	btr_pcur_t*	pcur;
 	mtr_t		mtr;
 	ulint		err;
-	ibool		success;
-	ibool		more_vers;
-	undo_no_t	new_undo_no;
 
 	ut_ad(node && thr);
 
 	log_free_check();
 
-	/* Check if also the previous version of the clustered index record
-	should be undone in this same rollback operation */
-
-	more_vers = row_undo_mod_undo_also_prev_vers(node, &new_undo_no);
-
 	pcur = &(node->pcur);
 
 	mtr_start(&mtr);
@@ -286,20 +248,6 @@ row_undo_mod_clust(
 
 	trx_undo_rec_release(node->trx, node->undo_no);
 
-	if (more_vers && err == DB_SUCCESS) {
-
-		/* Reserve the undo log record to the prior version after
-		committing &mtr: this is necessary to comply with the latching
-		order, as &mtr may contain the fsp latch which is lower in
-		the latch hierarchy than trx->undo_mutex. */
-
-		success = trx_undo_rec_reserve(node->trx, new_undo_no);
-
-		if (success) {
-			node->state = UNDO_NODE_PREV_VERS;
-		}
-	}
-
 	return(err);
 }
 
@@ -571,6 +519,7 @@ row_undo_mod_upd_del_sec(
 	ulint		err	= DB_SUCCESS;
 
 	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
+	ut_ad(!node->undo_row);
 	heap = mem_heap_create(1024);
 
 	while (node->index != NULL) {
@@ -632,6 +581,8 @@ row_undo_mod_del_mark_sec(
 	dict_index_t*	index;
 	ulint		err;
 
+	ut_ad(!node->undo_row);
+
 	heap = mem_heap_create(1024);
 
 	while (node->index != NULL) {
@@ -847,7 +798,6 @@ row_undo_mod_parse_undo_rec(
 	trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
 				       roll_ptr, info_bits, trx,
 				       node->heap, &(node->update));
-	node->new_roll_ptr = roll_ptr;
 	node->new_trx_id = trx_id;
 	node->cmpl_info = cmpl_info;
 }
diff --git a/storage/xtradb/row/row0undo.c b/storage/xtradb/row/row0undo.c
index 09970b7fe21..74fc1baf1d2 100644
--- a/storage/xtradb/row/row0undo.c
+++ b/storage/xtradb/row/row0undo.c
@@ -216,7 +216,7 @@ row_undo_search_clust_to_pcur(
 
 		node->row = row_build(ROW_COPY_DATA, clust_index, rec,
 				      offsets, NULL, ext, node->heap);
-		if (node->update) {
+		if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
 			node->undo_row = dtuple_copy(node->row, node->heap);
 			row_upd_replace(node->undo_row, &node->undo_ext,
 					clust_index, node->update, node->heap);
@@ -282,25 +282,6 @@ row_undo(
 		} else {
 			node->state = UNDO_NODE_MODIFY;
 		}
-
-	} else if (node->state == UNDO_NODE_PREV_VERS) {
-
-		/* Undo should be done to the same clustered index record
-		again in this same rollback, restoring the previous version */
-
-		roll_ptr = node->new_roll_ptr;
-
-		node->undo_rec = trx_undo_get_undo_rec_low(roll_ptr,
-							   node->heap);
-		node->roll_ptr = roll_ptr;
-		node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec);
-
-		if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
-			node->state = UNDO_NODE_INSERT;
-		} else {
-			node->state = UNDO_NODE_MODIFY;
-		}
 	}
 
 	/* Prevent DROP TABLE etc. while we are rolling back this row.
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c
index 6e210071746..6edfbaa7755 100644
--- a/storage/xtradb/srv/srv0srv.c
+++ b/storage/xtradb/srv/srv0srv.c
@@ -58,6 +58,8 @@ Created 10/8/1995 Heikki Tuuri
 *******************************************************/
 
 /* Dummy comment */
+#include "m_string.h" /* for my_sys.h */
+#include "my_sys.h" /* DEBUG_SYNC_C */
 #include "srv0srv.h"
 
 #include "ut0mem.h"
@@ -181,7 +183,7 @@ UNIV_INTERN my_bool	srv_track_changed_pages = TRUE;
 
 UNIV_INTERN ib_uint64_t	srv_max_bitmap_file_size = 100 * 1024 * 1024;
 
-UNIV_INTERN ulonglong	srv_changed_pages_limit = 0;
+UNIV_INTERN ulonglong	srv_max_changed_pages = 0;
 
 /** When TRUE, fake change transcations take S rather than X row locks.
     When FALSE, row locks are not taken at all. */
@@ -312,58 +314,11 @@ UNIV_INTERN ulong srv_purge_batch_size = 20;
 /* the number of rollback segments to use */
 UNIV_INTERN ulong srv_rollback_segments = TRX_SYS_N_RSEGS;
 
-/* variable counts amount of data read in total (in bytes) */
-UNIV_INTERN ulint srv_data_read = 0;
-
 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
 NULL value when collecting statistics. By default, it is set to
 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
 ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
 
-/* here we count the amount of data written in total (in bytes) */
-UNIV_INTERN ulint srv_data_written = 0;
-
-/* the number of the log write requests done */
-UNIV_INTERN ulint srv_log_write_requests = 0;
-
-/* the number of physical writes to the log performed */
-UNIV_INTERN ulint srv_log_writes = 0;
-
-/* amount of data written to the log files in bytes */
-UNIV_INTERN ulint srv_os_log_written = 0;
-
-/* amount of writes being done to the log files */
-UNIV_INTERN ulint srv_os_log_pending_writes = 0;
-
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-UNIV_INTERN ulint srv_log_waits = 0;
-
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-UNIV_INTERN ulint srv_dblwr_writes = 0;
-
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-UNIV_INTERN ulint srv_dblwr_pages_written = 0;
-
-/* in this variable we store the number of write requests issued */
-UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
-
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
-
-/* variable to count the number of pages that were written from buffer
-pool to the disk */
-UNIV_INTERN ulint srv_buf_pool_flushed = 0;
-UNIV_INTERN ulint buf_lru_flush_page_count = 0;
-
-/** Number of buffer pool reads that led to the
-reading of a disk page */
-UNIV_INTERN ulint srv_buf_pool_reads = 0;
-
 /** Time in seconds between automatic buffer pool dumps */
 UNIV_INTERN uint srv_auto_lru_dump = 0;
 
@@ -406,6 +361,9 @@ UNIV_INTERN lint	srv_conc_n_threads	= 0;
 InnoDB */
 UNIV_INTERN ulint	srv_conc_n_waiting_threads = 0;
 
+/* print all user-level transactions deadlocks to mysqld stderr */
+UNIV_INTERN my_bool	srv_print_all_deadlocks = FALSE;
+
 typedef struct srv_conc_slot_struct	srv_conc_slot_t;
 struct srv_conc_slot_struct{
 	os_event_t			event;		/*!< event to wait */
@@ -451,6 +409,10 @@ UNIV_INTERN ulong	srv_sys_stats_root_page = 0;
 #endif
 
 UNIV_INTERN ibool	srv_use_doublewrite_buf	= TRUE;
+UNIV_INTERN ibool	srv_use_atomic_writes = FALSE;
+#ifdef HAVE_POSIX_FALLOCATE
+UNIV_INTERN ibool	srv_use_posix_fallocate = TRUE;
+#endif
 UNIV_INTERN ibool	srv_use_checksums = TRUE;
 UNIV_INTERN ibool	srv_fast_checksum = FALSE;
 
@@ -489,23 +451,83 @@ UNIV_INTERN ibool	srv_print_log_io		= FALSE;
 UNIV_INTERN ibool	srv_print_latch_waits		= FALSE;
 #endif /* UNIV_DEBUG */
 
-UNIV_INTERN ulint		srv_n_rows_inserted		= 0;
-UNIV_INTERN ulint		srv_n_rows_updated		= 0;
-UNIV_INTERN ulint		srv_n_rows_deleted		= 0;
-UNIV_INTERN ulint		srv_n_rows_read			= 0;
-
 static ulint	srv_n_rows_inserted_old		= 0;
 static ulint	srv_n_rows_updated_old		= 0;
 static ulint	srv_n_rows_deleted_old		= 0;
 static ulint	srv_n_rows_read_old		= 0;
 
-UNIV_INTERN ulint		srv_n_lock_deadlock_count	= 0;
-UNIV_INTERN ulint		srv_n_lock_wait_count		= 0;
-UNIV_INTERN ulint		srv_n_lock_wait_current_count	= 0;
-UNIV_INTERN ib_int64_t	srv_n_lock_wait_time		= 0;
-UNIV_INTERN ulint		srv_n_lock_max_wait_time	= 0;
+/* Ensure counters are on separate cache lines */
+
+#define CACHE_LINE_SIZE 64
+#define CACHE_ALIGNED __attribute__ ((aligned (CACHE_LINE_SIZE)))
+
+UNIV_INTERN byte
+counters_pad_start[CACHE_LINE_SIZE] __attribute__((unused)) = {0};
+
+UNIV_INTERN ulint		srv_n_rows_inserted CACHE_ALIGNED	= 0;
+UNIV_INTERN ulint		srv_n_rows_updated CACHE_ALIGNED	= 0;
+UNIV_INTERN ulint		srv_n_rows_deleted CACHE_ALIGNED	= 0;
+UNIV_INTERN ulint		srv_n_rows_read CACHE_ALIGNED		= 0;
+
+UNIV_INTERN ulint		srv_n_lock_deadlock_count CACHE_ALIGNED	= 0;
+UNIV_INTERN ulint		srv_n_lock_wait_count CACHE_ALIGNED	= 0;
+UNIV_INTERN ulint		srv_n_lock_wait_current_count CACHE_ALIGNED = 0;
+UNIV_INTERN ib_int64_t	srv_n_lock_wait_time CACHE_ALIGNED		= 0;
+UNIV_INTERN ulint		srv_n_lock_max_wait_time CACHE_ALIGNED	= 0;
 
-UNIV_INTERN ulint		srv_truncated_status_writes	= 0;
+UNIV_INTERN ulint		srv_truncated_status_writes CACHE_ALIGNED = 0;
+
+/* variable counts amount of data read in total (in bytes) */
+UNIV_INTERN ulint srv_data_read CACHE_ALIGNED			= 0;
+
+/* here we count the amount of data written in total (in bytes) */
+UNIV_INTERN ulint srv_data_written CACHE_ALIGNED		= 0;
+
+/* the number of the log write requests done */
+UNIV_INTERN ulint srv_log_write_requests CACHE_ALIGNED		= 0;
+
+/* the number of physical writes to the log performed */
+UNIV_INTERN ulint srv_log_writes CACHE_ALIGNED			= 0;
+
+/* amount of data written to the log files in bytes */
+UNIV_INTERN ulint srv_os_log_written CACHE_ALIGNED		= 0;
+
+/* amount of writes being done to the log files */
+UNIV_INTERN ulint srv_os_log_pending_writes CACHE_ALIGNED	= 0;
+
+/* we increase this counter, when there we don't have enough space in the
+log buffer and have to flush it */
+UNIV_INTERN ulint srv_log_waits CACHE_ALIGNED			= 0;
+
+/* this variable counts the amount of times, when the doublewrite buffer
+was flushed */
+UNIV_INTERN ulint srv_dblwr_writes CACHE_ALIGNED		= 0;
+
+/* here we store the number of pages that have been flushed to the
+doublewrite buffer */
+UNIV_INTERN ulint srv_dblwr_pages_written CACHE_ALIGNED		= 0;
+
+/* in this variable we store the number of write requests issued */
+UNIV_INTERN ulint srv_buf_pool_write_requests CACHE_ALIGNED	= 0;
+
+/* here we store the number of times when we had to wait for a free page
+in the buffer pool. It happens when the buffer pool is full and we need
+to make a flush, in order to be able to read or create a page. */
+UNIV_INTERN ulint srv_buf_pool_wait_free CACHE_ALIGNED		= 0;
+
+/** Number of buffer pool reads that led to the
+reading of a disk page */
+UNIV_INTERN ulint srv_buf_pool_reads CACHE_ALIGNED		= 0;
+
+/* variable to count the number of pages that were written from buffer
+pool to the disk */
+UNIV_INTERN ulint srv_buf_pool_flushed CACHE_ALIGNED		= 0;
+
+/* variable to count the number of LRU flushed pages */
+UNIV_INTERN ulint buf_lru_flush_page_count CACHE_ALIGNED	= 0;
+
+UNIV_INTERN byte
+counters_pad_end[CACHE_LINE_SIZE] __attribute__((unused)) = {0};
 
 /*
   Set the following to 0 if you want InnoDB to write messages on
@@ -1438,7 +1460,7 @@ retry:
 	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
 #endif /* UNIV_SYNC_DEBUG */
 
-	if (innobase_get_slow_log() && trx->take_stats) {
+	if (UNIV_UNLIKELY(trx->take_stats)) {
 		ut_usectime(&sec, &ms);
 		start_time = (ib_uint64_t)sec * 1000000 + ms;
 	} else {
@@ -1453,7 +1475,7 @@ retry:
 
 	trx->op_info = "";
 
-	if (innobase_get_slow_log() && trx->take_stats && start_time) {
+	if (UNIV_UNLIKELY(start_time != 0)) {
 		ut_usectime(&sec, &ms);
 		finish_time = (ib_uint64_t)sec * 1000000 + ms;
 		trx->innodb_que_wait_timer += (ulint)(finish_time - start_time);
@@ -1764,6 +1786,10 @@ srv_suspend_mysql_thread(
 
 	trx = thr_get_trx(thr);
 
+	if (trx->mysql_thd != 0) {
+		DEBUG_SYNC_C("srv_suspend_mysql_thread_enter");
+	}
+
 	os_event_set(srv_lock_timeout_thread_event);
 
 	mutex_enter(&kernel_mutex);
@@ -2193,6 +2219,8 @@ srv_printf_innodb_monitor(
 		(long) srv_conc_n_threads,
 		(ulong) srv_conc_n_waiting_threads);
 
+	mutex_enter(&kernel_mutex);
+
 	fprintf(file, "%lu read views open inside InnoDB\n",
 		UT_LIST_GET_LEN(trx_sys->view_list));
 
@@ -2206,6 +2234,8 @@ srv_printf_innodb_monitor(
 		}
 	}
 
+	mutex_exit(&kernel_mutex);
+
 	n_reserved = fil_space_get_n_reserved_extents(0);
 	if (n_reserved > 0) {
 		fprintf(file,
@@ -2290,16 +2320,18 @@ void
 srv_export_innodb_status(void)
 /*==========================*/
 {
-	buf_pool_stat_t	stat;
-	ulint		LRU_len;
-	ulint		free_len;
-	ulint		flush_list_len;
-	ulint		mem_adaptive_hash, mem_dictionary;
-	read_view_t*	oldest_view;
-	ulint		i;
+	buf_pool_stat_t		stat;
+	buf_pools_list_size_t	buf_pools_list_size;
+	ulint			LRU_len;
+	ulint			free_len;
+	ulint			flush_list_len;
+	ulint			mem_adaptive_hash, mem_dictionary;
+	read_view_t*		oldest_view;
+	ulint			i;
 
 	buf_get_total_stat(&stat);
 	buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
+	buf_get_total_list_size_in_bytes(&buf_pools_list_size);
 
 	if (btr_search_sys && btr_search_sys->hash_index[0]->heap) {
 		mem_adaptive_hash = mem_heap_get_size(btr_search_sys->hash_index[0]->heap);
@@ -2364,7 +2396,12 @@ srv_export_innodb_status(void)
 	export_vars.innodb_buffer_pool_read_ahead_evicted
 		= stat.n_ra_pages_evicted;
 	export_vars.innodb_buffer_pool_pages_data = LRU_len;
+	export_vars.innodb_buffer_pool_bytes_data =
+		buf_pools_list_size.LRU_bytes
+		+ buf_pools_list_size.unzip_LRU_bytes;
 	export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
+	export_vars.innodb_buffer_pool_bytes_dirty =
+		buf_pools_list_size.flush_list_bytes;
 	export_vars.innodb_buffer_pool_pages_free = free_len;
 	export_vars.innodb_deadlocks = srv_n_lock_deadlock_count;
 #ifdef UNIV_DEBUG
@@ -2497,6 +2534,23 @@ srv_export_innodb_status(void)
 	export_vars.innodb_rows_deleted = srv_n_rows_deleted;
 	export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
 
+#ifdef UNIV_DEBUG
+	if (trx_sys->max_trx_id < purge_sys->done_trx_no) {
+		export_vars.innodb_purge_trx_id_age = 0;
+	} else {
+		export_vars.innodb_purge_trx_id_age =
+		  trx_sys->max_trx_id - purge_sys->done_trx_no;
+	}
+
+	if (!purge_sys->view
+	    || trx_sys->max_trx_id < purge_sys->view->up_limit_id) {
+		export_vars.innodb_purge_view_trx_id_age = 0;
+	} else {
+		export_vars.innodb_purge_view_trx_id_age =
+		  trx_sys->max_trx_id - purge_sys->view->up_limit_id;
+	}
+#endif /* UNIV_DEBUG */
+
 	mutex_exit(&srv_innodb_monitor_mutex);
 }
 
@@ -3080,11 +3134,19 @@ srv_redo_log_follow_thread(
 		os_event_reset(srv_checkpoint_completed_event);
 
 		if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
-			log_online_follow_redo_log();
+			if (!log_online_follow_redo_log()) {
+				/* TODO: sync with I_S log tracking status? */
+				fprintf(stderr,
+					"InnoDB: Error: log tracking bitmap "
+					"write failed, stopping log tracking "
+					"thread!\n");
+				break;
+			}
 		}
 
 	} while (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE);
 
+	srv_track_changed_pages = FALSE;
 	log_online_read_shutdown();
 	os_event_set(srv_redo_log_thread_finished_event);
 
@@ -3329,6 +3391,26 @@ loop:
 	for (i = 0; i < 10; i++) {
 		ulint	cur_time = ut_time_ms();
 
+#ifdef UNIV_DEBUG
+		if (btr_cur_limit_optimistic_insert_debug
+		    && srv_n_purge_threads == 0) {
+			/* If btr_cur_limit_optimistic_insert_debug is enabled
+			and no purge_threads, purge opportunity is increased
+			by x100 (1purge/100msec), to speed up debug scripts
+			which should wait for purged. */
+			next_itr_time -= 900;
+
+			srv_main_thread_op_info = "master purging";
+
+			srv_master_do_purge();
+
+			if (srv_fast_shutdown && srv_shutdown_state > 0) {
+
+				goto background_loop;
+			}
+		}
+#endif /* UNIV_DEBUG */
+
 		n_pages_flushed = 0; /* initialize */
 
 		/* ALTER TABLE in MySQL requires on Unix that the table handler
@@ -3487,8 +3569,7 @@ loop:
 
 						buf_pool = buf_pool_from_array(j);
 
-						/* The scanning flush_list is optimistic here */
-
+						buf_flush_list_mutex_enter(buf_pool);
 						level = 0;
 						n_blocks = 0;
 						bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
@@ -3502,6 +3583,7 @@ loop:
 							bpage = UT_LIST_GET_NEXT(flush_list, bpage);
 							n_blocks++;
 						}
+						buf_flush_list_mutex_exit(buf_pool);
 
 						if (level) {
 							bpl += ((ib_uint64_t) n_blocks * n_blocks
@@ -3567,30 +3649,25 @@ retry_flush_batch:
 
 				/* prev_flush_info[j] should be the previous loop's */
 				for (j = 0; j < srv_buf_pool_instances; j++) {
-					lint	blocks_num, new_blocks_num, flushed_blocks_num;
-					ibool	found;
+					lint	blocks_num, new_blocks_num = 0;
+					lint	flushed_blocks_num;
 
 					buf_pool = buf_pool_from_array(j);
+					buf_flush_list_mutex_enter(buf_pool);
 
 					blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
 					bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
-					new_blocks_num = 0;
 
-					found = FALSE;
 					while (bpage != NULL) {
 						if (prev_flush_info[j].space == bpage->space
 						    && prev_flush_info[j].offset == bpage->offset
 						    && prev_flush_info[j].oldest_modification
 								== bpage->oldest_modification) {
-							found = TRUE;
 							break;
 						}
 						bpage = UT_LIST_GET_NEXT(flush_list, bpage);
 						new_blocks_num++;
 					}
-					if (!found) {
-						new_blocks_num = blocks_num;
-					}
 
 					flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
 								- blocks_num;
@@ -3605,7 +3682,9 @@ retry_flush_batch:
 						prev_flush_info[j].space = bpage->space;
 						prev_flush_info[j].offset = bpage->offset;
 						prev_flush_info[j].oldest_modification = bpage->oldest_modification;
+						buf_flush_list_mutex_exit(buf_pool);
 					} else {
+						buf_flush_list_mutex_exit(buf_pool);
 						prev_flush_info[j].space = 0;
 						prev_flush_info[j].offset = 0;
 						prev_flush_info[j].oldest_modification = 0;
@@ -3631,6 +3710,7 @@ retry_flush_batch:
 				/* store previous first pages of the flush_list */
 				for (j = 0; j < srv_buf_pool_instances; j++) {
 					buf_pool = buf_pool_from_array(j);
+					buf_flush_list_mutex_enter(buf_pool);
 
 					bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
 
@@ -3639,7 +3719,9 @@ retry_flush_batch:
 						prev_flush_info[j].space = bpage->space;
 						prev_flush_info[j].offset = bpage->offset;
 						prev_flush_info[j].oldest_modification = bpage->oldest_modification;
+						buf_flush_list_mutex_exit(buf_pool);
 					} else {
+						buf_flush_list_mutex_exit(buf_pool);
 						prev_flush_info[j].space = 0;
 						prev_flush_info[j].offset = 0;
 						prev_flush_info[j].oldest_modification = 0;
diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c
index 2faa68cb87c..9e0477253cd 100644
--- a/storage/xtradb/srv/srv0start.c
+++ b/storage/xtradb/srv/srv0start.c
@@ -1155,6 +1155,11 @@ void
 init_log_online(void)
 /*=================*/
 {
+	if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
+		srv_track_changed_pages = FALSE;
+		return;
+	}
+
 	if (srv_track_changed_pages) {
 
 		log_online_read_init();
diff --git a/storage/xtradb/sync/sync0sync.c b/storage/xtradb/sync/sync0sync.c
index efc43c4cbe5..25f96d9817a 100644
--- a/storage/xtradb/sync/sync0sync.c
+++ b/storage/xtradb/sync/sync0sync.c
@@ -315,9 +315,9 @@ mutex_create_func(
 
 	/* NOTE! The very first mutexes are not put to the mutex list */
 
-	if ((mutex == &mutex_list_mutex)
+	if (mutex == &mutex_list_mutex
 #ifdef UNIV_SYNC_DEBUG
-	    || (mutex == &sync_thread_mutex)
+	    || mutex == &sync_thread_mutex
 #endif /* UNIV_SYNC_DEBUG */
 	    ) {
 
diff --git a/storage/xtradb/trx/trx0purge.c b/storage/xtradb/trx/trx0purge.c
index 122aab119ba..b048dc66efe 100644
--- a/storage/xtradb/trx/trx0purge.c
+++ b/storage/xtradb/trx/trx0purge.c
@@ -61,6 +61,10 @@ UNIV_INTERN mysql_pfs_key_t	trx_purge_latch_key;
 UNIV_INTERN mysql_pfs_key_t	purge_sys_bh_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
+#ifdef UNIV_DEBUG
+UNIV_INTERN my_bool		srv_purge_view_update_only_debug;
+#endif /* UNIV_DEBUG */
+
 /*****************************************************************//**
 Checks if trx_id is >= purge_view: then it is guaranteed that its update
 undo log still exists in the system.
@@ -236,6 +240,7 @@ trx_purge_sys_create(
 	purge_sys->purge_trx_no = 0;
 	purge_sys->purge_undo_no = 0;
 	purge_sys->next_stored = FALSE;
+	ut_d(purge_sys->done_trx_no = 0);
 
 	rw_lock_create(trx_purge_latch_key,
 		       &purge_sys->latch, SYNC_PURGE_LATCH);
@@ -656,6 +661,12 @@ trx_purge_truncate_if_arr_empty(void)
 {
 	static ulint	count;
 
+#ifdef UNIV_DEBUG
+	if (purge_sys->arr->n_used == 0) {
+		purge_sys->done_trx_no = purge_sys->purge_trx_no;
+	}
+#endif /* UNIV_DEBUG */
+
 	if (!(++count % TRX_SYS_N_RSEGS) && purge_sys->arr->n_used == 0) {
 
 		trx_purge_truncate_history();
@@ -1172,6 +1183,12 @@ trx_purge(
 
 	rw_lock_x_unlock(&(purge_sys->latch));
 
+#ifdef UNIV_DEBUG
+	if (srv_purge_view_update_only_debug) {
+		return(0);
+	}
+#endif
+
 	purge_sys->state = TRX_PURGE_ON;
 
 	purge_sys->handle_limit = purge_sys->n_pages_handled + limit;
diff --git a/storage/xtradb/trx/trx0rec.c b/storage/xtradb/trx/trx0rec.c
index db4897c368d..ef42152aeb7 100644
--- a/storage/xtradb/trx/trx0rec.c
+++ b/storage/xtradb/trx/trx0rec.c
@@ -36,6 +36,7 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef UNIV_HOTBACKUP
 #include "dict0dict.h"
 #include "ut0mem.h"
+#include "read0read.h"
 #include "row0ext.h"
 #include "row0upd.h"
 #include "que0que.h"
@@ -1647,6 +1648,25 @@ trx_undo_prev_version_build(
 	if (row_upd_changes_field_size_or_external(index, offsets, update)) {
 		ulint	n_ext;
 
+		/* We should confirm the existence of disowned external data,
+		if the previous version record is delete marked. If the trx_id
+		of the previous record is seen by purge view, we should treat
+		it as missing history, because the disowned external data
+		might be purged already.
+
+		The inherited external data (BLOBs) can be freed (purged)
+		after trx_id was committed, provided that no view was started
+		before trx_id. If the purge view can see the committed
+		delete-marked record by trx_id, no transactions need to access
+		the BLOB. */
+
+		if ((update->info_bits & REC_INFO_DELETED_FLAG)
+		    && read_view_sees_trx_id(purge_sys->view, trx_id)) {
+			/* treat as a fresh insert, not to
+			cause assertion error at the caller. */
+			return(DB_SUCCESS);
+		}
+
 		/* We have to set the appropriate extern storage bits in the
 		old version of the record: the extern bits in rec for those
 		fields that update does NOT update, as well as the bits for
diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c
index 99b4276fbee..a17f8abdad0 100644
--- a/storage/xtradb/trx/trx0trx.c
+++ b/storage/xtradb/trx/trx0trx.c
@@ -235,7 +235,7 @@ trx_allocate_for_mysql(void)
 
 	mutex_exit(&kernel_mutex);
 
-	if (innobase_get_slow_log() && trx->take_stats) {
+	if (UNIV_UNLIKELY(trx->take_stats)) {
 		trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
 		memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
 	}
@@ -1269,7 +1269,7 @@ trx_end_lock_wait(
 		thr = UT_LIST_GET_FIRST(trx->wait_thrs);
 	}
 
-	if (innobase_get_slow_log() && trx->take_stats) {
+	if (UNIV_UNLIKELY(trx->take_stats)) {
 		ut_usectime(&sec, &ms);
 		now = (ib_uint64_t)sec * 1000000 + ms;
 		trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
@@ -1304,7 +1304,7 @@ trx_lock_wait_to_suspended(
 		thr = UT_LIST_GET_FIRST(trx->wait_thrs);
 	}
 
-	if (innobase_get_slow_log() && trx->take_stats) {
+	if (UNIV_UNLIKELY(trx->take_stats)) {
 		ut_usectime(&sec, &ms);
 		now = (ib_uint64_t)sec * 1000000 + ms;
 		trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);