249 files changed, 19787 insertions, 27135 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index ce47074bc53..e0bc7006770 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -36,7 +36,6 @@ SET(INNOBASE_SOURCES
 	buf/buf0flu.cc
 	buf/buf0lru.cc
 	buf/buf0rea.cc
-	buf/buf0mtflu.cc
 	data/data0data.cc
 	data/data0type.cc
 	dict/dict0boot.cc
@@ -154,7 +153,7 @@ MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
   DEFAULT RECOMPILE_FOR_EMBEDDED
   LINK_LIBRARIES
 	${ZLIB_LIBRARY}
-	${CRC32_VPMSUM_LIBRARY}
+	${CRC32_LIBRARY}
 	${NUMA_LIBRARY}
 	${LIBSYSTEMD}
 	${LINKER_SCRIPT})
@@ -180,12 +179,18 @@ IF(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
       mtr/mtr0mtr.cc
       row/row0merge.cc
       row/row0mysql.cc
+      row/row0trunc.cc
       srv/srv0srv.cc
       COMPILE_FLAGS "-O0"
       )
   ENDIF()
 ENDIF()
 IF(MSVC)
+  IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
+   ADD_COMPILE_FLAGS(
+      pars/lexyy.cc
+      COMPILE_FLAGS "/wd4267")
+  ENDIF()
   # silence "switch statement contains 'default' but no 'case' label
   # on generated file.
   TARGET_COMPILE_OPTIONS(innobase PRIVATE "/wd4065")
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index d06568535e6..96be7349b46 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -2,7 +2,7 @@
 
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2017, MariaDB Corporation.
+Copyright (c) 2014, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -204,7 +204,7 @@ btr_root_fseg_validate(
 
 	ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space);
 	ut_a(offset >= FIL_PAGE_DATA);
-	ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
+	ut_a(offset <= srv_page_size - FIL_PAGE_DATA_END);
 	return(TRUE);
 }
 #endif /* UNIV_BTR_DEBUG */
@@ -220,24 +220,25 @@ btr_root_block_get(
 					or RW_X_LATCH */
 	mtr_t*			mtr)	/*!< in: mtr */
 {
-	const ulint		space = dict_index_get_space(index);
-	const page_id_t		page_id(space, dict_index_get_page(index));
-	const page_size_t	page_size(dict_table_page_size(index->table));
+	if (!index->table || !index->table->space) {
+		return NULL;
+	}
 
-	buf_block_t*	block = btr_block_get(page_id, page_size, mode,
-					      index, mtr);
+	buf_block_t*	block = btr_block_get(
+		page_id_t(index->table->space_id, index->page),
+		page_size_t(index->table->space->flags), mode,
+		index, mtr);
 
 	if (!block) {
-		if (index && index->table) {
-			index->table->file_unreadable = true;
-
-			ib_push_warning(
-				static_cast<THD*>(NULL), DB_DECRYPTION_FAILED,
-				"Table %s in tablespace %lu is encrypted but encryption service or"
-				" used key_id is not available. "
-				" Can't continue reading table.",
-				index->table->name, space);
-		}
+		index->table->file_unreadable = true;
+
+		ib_push_warning(
+			static_cast<THD*>(NULL), DB_DECRYPTION_FAILED,
+			"Table %s in file %s is encrypted but encryption service or"
+			" used key_id is not available. "
+			" Can't continue reading table.",
+			index->table->name,
+			UT_LIST_GET_FIRST(index->table->space->chain)->name);
 
 		return NULL;
 	}
@@ -249,9 +250,9 @@ btr_root_block_get(
 		const page_t*	root = buf_block_get_frame(block);
 
 		ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
-					    + root, space));
+					    + root, index->table->space_id));
 		ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
-					    + root, space));
+					    + root, index->table->space_id));
 	}
 #endif /* UNIV_BTR_DEBUG */
 
@@ -304,7 +305,7 @@ btr_height_get(
 	root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
 
 	if (root_block) {
-		height = btr_page_get_level(buf_block_get_frame(root_block), mtr);
+		height = btr_page_get_level(buf_block_get_frame(root_block));
 
 		/* Release the S latch on the root page. */
 		mtr->memo_release(root_block, MTR_MEMO_PAGE_S_FIX);
@@ -332,7 +333,7 @@ btr_root_fseg_adjust_on_import(
 	ulint	offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
 
 	if (offset < FIL_PAGE_DATA
-	    || offset > UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) {
+	    || offset > srv_page_size - FIL_PAGE_DATA_END) {
 
 		return(FALSE);
 
@@ -362,9 +363,8 @@ btr_root_adjust_on_import(
 	buf_block_t*		block;
 	page_zip_des_t*		page_zip;
 	dict_table_t*		table = index->table;
-	const ulint		space_id = dict_index_get_space(index);
-	const page_id_t		page_id(space_id, dict_index_get_page(index));
-	const page_size_t	page_size(dict_table_page_size(table));
+	const page_id_t		page_id(table->space_id, index->page);
+	const page_size_t	page_size(table->space->flags);
 
 	DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
 			return(DB_CORRUPTION););
@@ -393,10 +393,9 @@ btr_root_adjust_on_import(
 		} else {
 			/* Check that the table flags and the tablespace
 			flags match. */
-			ulint	flags = dict_tf_to_fsp_flags(table->flags);
-			ulint	fsp_flags = fil_space_get_flags(table->space);
-			err = flags == fsp_flags
-			      ? DB_SUCCESS : DB_CORRUPTION;
+			err = (dict_tf_to_fsp_flags(table->flags)
+			       == table->space->flags)
+				? DB_SUCCESS : DB_CORRUPTION;
 		}
 	} else {
 		err = DB_SUCCESS;
@@ -406,10 +405,10 @@ btr_root_adjust_on_import(
 	if (err == DB_SUCCESS
 	    && (!btr_root_fseg_adjust_on_import(
 			FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
-			+ page, page_zip, space_id, &mtr)
+			+ page, page_zip, table->space_id, &mtr)
 		|| !btr_root_fseg_adjust_on_import(
 			FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
-			+ page, page_zip, space_id, &mtr))) {
+			+ page, page_zip, table->space_id, &mtr))) {
 
 		err = DB_CORRUPTION;
 	}
@@ -433,7 +432,7 @@ btr_page_create(
 {
 	page_t*		page = buf_block_get_frame(block);
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
 	if (page_zip) {
 		page_create_zip(block, index, level, 0, NULL, mtr);
@@ -475,8 +474,8 @@ btr_page_alloc_for_ibuf(
 	ut_a(node_addr.page != FIL_NULL);
 
 	new_block = buf_page_get(
-		page_id_t(dict_index_get_space(index), node_addr.page),
-		dict_table_page_size(index->table),
+		page_id_t(index->table->space_id, node_addr.page),
+		page_size_t(index->table->space->flags),
 		RW_X_LATCH, mtr);
 
 	new_page = buf_block_get_frame(new_block);
@@ -710,7 +709,7 @@ btr_page_free_for_ibuf(
 {
 	page_t*		root;
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	root = btr_root_get(index, mtr);
 
 	flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
@@ -736,7 +735,7 @@ btr_page_free_low(
 	fseg_header_t*	seg_header;
 	page_t*		root;
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	/* The page gets invalid for optimistic searches: increment the frame
 	modify clock */
 
@@ -753,7 +752,7 @@ btr_page_free_low(
 		// TODO(jonaso): scrub only what is actually needed
 		page_t* page = buf_block_get_frame(block);
 		memset(page + PAGE_HEADER, 0,
-		       UNIV_PAGE_SIZE - PAGE_HEADER);
+		       srv_page_size - PAGE_HEADER);
 #ifdef UNIV_DEBUG_SCRUBBING
 		fprintf(stderr,
 			"btr_page_free_low: scrub blob page %lu/%lu\n",
@@ -851,7 +850,7 @@ btr_page_free_low(
 	/* The page was marked free in the allocation bitmap, but it
 	should remain buffer-fixed until mtr_commit(mtr) or until it
 	is explicitly freed from the mini-transaction. */
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	/* TODO: Discard any operations on the page from the redo log
 	and remove the block from the flush list and the buffer pool.
 	This would free up buffer pool earlier and reduce writes to
@@ -869,7 +868,7 @@ btr_page_free(
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	const page_t*	page	= buf_block_get_frame(block);
-	ulint		level	= btr_page_get_level(page, mtr);
+	ulint		level	= btr_page_get_level(page);
 
 	ut_ad(fil_page_index_page_check(block->frame));
 	ut_ad(level != ULINT_UNDEFINED);
@@ -924,13 +923,14 @@ btr_node_ptr_get_child(
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ut_ad(rec_offs_validate(node_ptr, index, offsets));
-
-	const page_id_t	page_id(
-		page_get_space_id(page_align(node_ptr)),
-		btr_node_ptr_get_child_page_no(node_ptr, offsets));
-
-	return(btr_block_get(page_id, dict_table_page_size(index->table),
-			     RW_SX_LATCH, index, mtr));
+	ut_ad(index->table->space_id
+	      == page_get_space_id(page_align(node_ptr)));
+
+	return btr_block_get(
+		page_id_t(index->table->space_id,
+			  btr_node_ptr_get_child_page_no(node_ptr, offsets)),
+		page_size_t(index->table->space->flags),
+		RW_SX_LATCH, index, mtr);
 }
 
 /************************************************************//**
@@ -973,7 +973,7 @@ btr_page_get_father_node_ptr_func(
 
 	ut_ad(dict_index_get_page(index) != page_no);
 
-	level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
+	level = btr_page_get_level(btr_cur_get_page(cursor));
 
 	user_rec = btr_cur_get_rec(cursor);
 	ut_a(page_rec_is_user_rec(user_rec));
@@ -1176,8 +1176,7 @@ btr_free_root_check(
 
 /** Create the root node for a new index tree.
 @param[in]	type			type of the index
-@param[in]	space			space where created
-@param[in]	page_size		page size
+@param[in,out]	space			tablespace where created
 @param[in]	index_id		index id
 @param[in]	index			index, or NULL when applying TRUNCATE
 log record during recovery
@@ -1188,8 +1187,7 @@ record during recovery
 ulint
 btr_create(
 	ulint			type,
-	ulint			space,
-	const page_size_t&	page_size,
+	fil_space_t*		space,
 	index_id_t		index_id,
 	dict_index_t*		index,
 	const btr_create_t*	btr_redo_create_info,
@@ -1256,7 +1254,7 @@ btr_create(
 			/* Not enough space for new segment, free root
 			segment before return. */
 			btr_free_root(block, mtr);
-			if (!dict_table_is_temporary(index->table)) {
+			if (!index->table->is_temporary()) {
 				btr_free_root_invalidate(block, mtr);
 			}
 
@@ -1331,7 +1329,7 @@ btr_create(
 	Note: Insert Buffering is disabled for temporary tables given that
 	most temporary tables are smaller in size and short-lived. */
 	if (!(type & DICT_CLUSTERED)
-	    && (index == NULL || !dict_table_is_temporary(index->table))) {
+	    && (index == NULL || !index->table->is_temporary())) {
 
 		ibuf_reset_free_bits(block);
 	}
@@ -1362,7 +1360,7 @@ btr_free_but_not_root(
 leaf_loop:
 	mtr_start(&mtr);
 	mtr_set_log_mode(&mtr, log_mode);
-	mtr.set_named_space(block->page.id.space());
+	mtr.set_named_space_id(block->page.id.space());
 
 	page_t*	root = block->frame;
 
@@ -1392,7 +1390,7 @@ leaf_loop:
 top_loop:
 	mtr_start(&mtr);
 	mtr_set_log_mode(&mtr, log_mode);
-	mtr.set_named_space(block->page.id.space());
+	mtr.set_named_space_id(block->page.id.space());
 
 	root = block->frame;
 
@@ -1406,7 +1404,6 @@ top_loop:
 	mtr_commit(&mtr);
 
 	if (!finished) {
-
 		goto top_loop;
 	}
 }
@@ -1432,7 +1429,7 @@ btr_free_if_exists(
 
 	ut_ad(page_is_root(root->frame));
 	btr_free_but_not_root(root, mtr->get_log_mode());
-	mtr->set_named_space(page_id.space());
+	mtr->set_named_space_id(page_id.space());
 	btr_free_root(root, mtr);
 	btr_free_root_invalidate(root, mtr);
 }
@@ -1468,28 +1465,22 @@ btr_free(
 ib_uint64_t
 btr_read_autoinc(dict_index_t* index)
 {
-	ut_ad(dict_index_is_clust(index));
+	ut_ad(index->is_primary());
 	ut_ad(index->table->persistent_autoinc);
-	ut_ad(!dict_table_is_temporary(index->table));
-
-	if (fil_space_t* space = fil_space_acquire(index->space)) {
-		mtr_t		mtr;
-		mtr.start();
-		ib_uint64_t	autoinc;
-		if (buf_block_t* block = buf_page_get(
-			    page_id_t(index->space, index->page),
-			    page_size_t(space->flags),
-			    RW_S_LATCH, &mtr)) {
-			autoinc = page_get_autoinc(block->frame);
-		} else {
-			autoinc = 0;
-		}
-		mtr.commit();
-		fil_space_release(space);
-		return(autoinc);
+	ut_ad(!index->table->is_temporary());
+	mtr_t		mtr;
+	mtr.start();
+	ib_uint64_t	autoinc;
+	if (buf_block_t* block = buf_page_get(
+		    page_id_t(index->table->space_id, index->page),
+		    page_size_t(index->table->space->flags),
+		    RW_S_LATCH, &mtr)) {
+		autoinc = page_get_autoinc(block->frame);
+	} else {
+		autoinc = 0;
 	}
-
-	return(0);
+	mtr.commit();
+	return autoinc;
 }
 
 /** Read the last used AUTO_INCREMENT value from PAGE_ROOT_AUTO_INC,
@@ -1502,47 +1493,43 @@ ib_uint64_t
 btr_read_autoinc_with_fallback(const dict_table_t* table, unsigned col_no)
 {
 	ut_ad(table->persistent_autoinc);
-	ut_ad(!dict_table_is_temporary(table));
+	ut_ad(!table->is_temporary());
 
 	dict_index_t*	index = dict_table_get_first_index(table);
 
 	if (index == NULL) {
-	} else if (fil_space_t* space = fil_space_acquire(index->space)) {
-		mtr_t		mtr;
-		mtr.start();
-		buf_block_t*	block = buf_page_get(
-			page_id_t(index->space, index->page),
-			page_size_t(space->flags),
-			RW_S_LATCH, &mtr);
-
-		ib_uint64_t	autoinc	= block
-			? page_get_autoinc(block->frame) : 0;
-		const bool	retry	= block && autoinc == 0
-			&& !page_is_empty(block->frame);
-		mtr.commit();
-		fil_space_release(space);
-
-		if (retry) {
-			/* This should be an old data file where
-			PAGE_ROOT_AUTO_INC was initialized to 0.
-			Fall back to reading MAX(autoinc_col).
-			There should be an index on it. */
-			const dict_col_t*	autoinc_col
-				= dict_table_get_nth_col(table, col_no);
-			while (index != NULL
-			       && index->fields[0].col != autoinc_col) {
-				index = dict_table_get_next_index(index);
-			}
+		return 0;
+	}
 
-			if (index != NULL && index->space == space->id) {
-				autoinc = row_search_max_autoinc(index);
-			}
+	mtr_t		mtr;
+	mtr.start();
+	buf_block_t*	block = buf_page_get(
+		page_id_t(index->table->space_id, index->page),
+		page_size_t(index->table->space->flags),
+		RW_S_LATCH, &mtr);
+
+	ib_uint64_t	autoinc	= block ? page_get_autoinc(block->frame) : 0;
+	const bool	retry	= block && autoinc == 0
+		&& !page_is_empty(block->frame);
+	mtr.commit();
+
+	if (retry) {
+		/* This should be an old data file where
+		PAGE_ROOT_AUTO_INC was initialized to 0.
+		Fall back to reading MAX(autoinc_col).
+		There should be an index on it. */
+		const dict_col_t*	autoinc_col
+			= dict_table_get_nth_col(table, col_no);
+		while (index && index->fields[0].col != autoinc_col) {
+			index = dict_table_get_next_index(index);
 		}
 
-		return(autoinc);
+		if (index) {
+			autoinc = row_search_max_autoinc(index);
+		}
 	}
 
-	return(0);
+	return autoinc;
 }
 
 /** Write the next available AUTO_INCREMENT value to PAGE_ROOT_AUTO_INC.
@@ -1554,22 +1541,19 @@ btr_read_autoinc_with_fallback(const dict_table_t* table, unsigned col_no)
 void
 btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset)
 {
-	ut_ad(dict_index_is_clust(index));
+	ut_ad(index->is_primary());
 	ut_ad(index->table->persistent_autoinc);
-	ut_ad(!dict_table_is_temporary(index->table));
-
-	if (fil_space_t* space = fil_space_acquire(index->space)) {
-		mtr_t		mtr;
-		mtr.start();
-		mtr.set_named_space(space);
-		page_set_autoinc(buf_page_get(
-					 page_id_t(index->space, index->page),
-					 page_size_t(space->flags),
-					 RW_SX_LATCH, &mtr),
-				 index, autoinc, &mtr, reset);
-		mtr.commit();
-		fil_space_release(space);
-	}
+	ut_ad(!index->table->is_temporary());
+
+	mtr_t		mtr;
+	mtr.start();
+	fil_space_t* space = index->table->space;
+	mtr.set_named_space(space);
+	page_set_autoinc(buf_page_get(page_id_t(space->id, index->page),
+				      page_size_t(space->flags),
+				      RW_SX_LATCH, &mtr),
+			 index, autoinc, &mtr, reset);
+	mtr.commit();
 }
 
 /*************************************************************//**
@@ -1612,7 +1596,7 @@ btr_page_reorganize_low(
 	bool		log_compressed;
 	bool		is_spatial;
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	btr_assert_not_corrupted(block, index);
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
@@ -1667,7 +1651,7 @@ btr_page_reorganize_low(
 
 	During redo log apply, dict_index_is_sec_or_ibuf() always
 	holds, even for clustered indexes. */
-	ut_ad(recovery || dict_table_is_temporary(index->table)
+	ut_ad(recovery || index->table->is_temporary()
 	      || !page_is_leaf(temp_page)
 	      || !dict_index_is_sec_or_ibuf(index)
 	      || page_get_max_trx_id(page) != 0);
@@ -1697,18 +1681,18 @@ btr_page_reorganize_low(
 		ut_a(!memcmp(PAGE_HEADER + PAGE_N_RECS + page,
 			     PAGE_HEADER + PAGE_N_RECS + temp_page,
 			     PAGE_DATA - (PAGE_HEADER + PAGE_N_RECS)));
-		ut_a(!memcmp(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page,
-			     UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + temp_page,
+		ut_a(!memcmp(srv_page_size - FIL_PAGE_DATA_END + page,
+			     srv_page_size - FIL_PAGE_DATA_END + temp_page,
 			     FIL_PAGE_DATA_END));
 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 
 		memcpy(PAGE_HEADER + page, PAGE_HEADER + temp_page,
 		       PAGE_N_RECS - PAGE_N_DIR_SLOTS);
 		memcpy(PAGE_DATA + page, PAGE_DATA + temp_page,
-		       UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
+		       srv_page_size - PAGE_DATA - FIL_PAGE_DATA_END);
 
 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-		ut_a(!memcmp(page, temp_page, UNIV_PAGE_SIZE));
+		ut_a(!memcmp(page, temp_page, srv_page_size));
 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 
 		goto func_exit;
@@ -1746,6 +1730,17 @@ func_exit:
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
+
+	if (!recovery && page_is_root(temp_page)
+	    && fil_page_get_type(temp_page) == FIL_PAGE_TYPE_INSTANT) {
+		/* Preserve the PAGE_INSTANT information. */
+		ut_ad(!page_zip);
+		ut_ad(index->is_instant());
+		memcpy(FIL_PAGE_TYPE + page, FIL_PAGE_TYPE + temp_page, 2);
+		memcpy(PAGE_HEADER + PAGE_INSTANT + page,
+		       PAGE_HEADER + PAGE_INSTANT + temp_page, 2);
+	}
+
 	buf_block_free(temp_block);
 
 	/* Restore logging mode */
@@ -1780,6 +1775,19 @@ func_exit:
 		MONITOR_INC(MONITOR_INDEX_REORG_SUCCESSFUL);
 	}
 
+	if (UNIV_UNLIKELY(fil_page_get_type(page) == FIL_PAGE_TYPE_INSTANT)) {
+		/* Log the PAGE_INSTANT information. */
+		ut_ad(!page_zip);
+		ut_ad(index->is_instant());
+		ut_ad(!recovery);
+		mlog_write_ulint(FIL_PAGE_TYPE + page, FIL_PAGE_TYPE_INSTANT,
+				 MLOG_2BYTES, mtr);
+		mlog_write_ulint(PAGE_HEADER + PAGE_INSTANT + page,
+				 mach_read_from_2(PAGE_HEADER + PAGE_INSTANT
+						  + page),
+				 MLOG_2BYTES, mtr);
+	}
+
 	return(success);
 }
 
@@ -1878,21 +1886,23 @@ btr_parse_page_reorganize(
 	return(ptr);
 }
 
-/*************************************************************//**
-Empties an index page.  @see btr_page_create(). */
-static
+/** Empty an index page (possibly the root page). @see btr_page_create().
+@param[in,out]	block		page to be emptied
+@param[in,out]	page_zip	compressed page frame, or NULL
+@param[in]	index		index of the page
+@param[in]	level		B-tree level of the page (0=leaf)
+@param[in,out]	mtr		mini-transaction */
 void
 btr_page_empty(
-/*===========*/
-	buf_block_t*	block,	/*!< in: page to be emptied */
-	page_zip_des_t*	page_zip,/*!< out: compressed page, or NULL */
-	dict_index_t*	index,	/*!< in: index of the page */
-	ulint		level,	/*!< in: the B-tree level of the page */
-	mtr_t*		mtr)	/*!< in: mtr */
+	buf_block_t*	block,
+	page_zip_des_t*	page_zip,
+	dict_index_t*	index,
+	ulint		level,
+	mtr_t*		mtr)
 {
 	page_t*	page = buf_block_get_frame(block);
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(page_zip == buf_block_get_page_zip(block));
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
@@ -1963,12 +1973,13 @@ btr_root_raise_and_insert(
 	root_page_zip = buf_block_get_page_zip(root_block);
 	ut_ad(!page_is_empty(root));
 	index = btr_cur_get_index(cursor);
+	ut_ad(index->n_core_null_bytes <= UT_BITS_IN_BYTES(index->n_nullable));
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!root_page_zip || page_zip_validate(root_page_zip, root, index));
 #endif /* UNIV_ZIP_DEBUG */
 #ifdef UNIV_BTR_DEBUG
 	if (!dict_index_is_ibuf(index)) {
-		ulint	space = dict_index_get_space(index);
+		ulint	space = index->table->space_id;
 
 		ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
 					    + root, space));
@@ -1981,14 +1992,13 @@ btr_root_raise_and_insert(
 	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
 					MTR_MEMO_X_LOCK
 					| MTR_MEMO_SX_LOCK));
-	ut_ad(mtr_is_block_fix(
-		mtr, root_block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
 
 	/* Allocate a new page to the tree. Root splitting is done by first
 	moving the root records to the new page, emptying the root, putting
 	a node pointer to the new page, and then splitting the new page. */
 
-	level = btr_page_get_level(root, mtr);
+	level = btr_page_get_level(root);
 
 	new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr);
 
@@ -2025,19 +2035,16 @@ btr_root_raise_and_insert(
 				   root_page_zip, root, index, mtr);
 
 		/* Update the lock table and possible hash index. */
-
-		if (!dict_table_is_locking_disabled(index->table)) {
-			lock_move_rec_list_end(new_block, root_block,
-					       page_get_infimum_rec(root));
-		}
+		lock_move_rec_list_end(new_block, root_block,
+				       page_get_infimum_rec(root));
 
 		/* Move any existing predicate locks */
 		if (dict_index_is_spatial(index)) {
 			lock_prdt_rec_move(new_block, root_block);
+		} else {
+			btr_search_move_or_delete_hash_entries(
+				new_block, root_block);
 		}
-
-		btr_search_move_or_delete_hash_entries(new_block, root_block,
-						       index);
 	}
 
 	if (dict_index_is_sec_or_ibuf(index)) {
@@ -2092,7 +2099,7 @@ btr_root_raise_and_insert(
 
 		rtr_page_cal_mbr(index, new_block, &new_mbr, *heap);
 		node_ptr = rtr_index_build_node_ptr(
-			index, &new_mbr, rec, new_page_no, *heap, level);
+			index, &new_mbr, rec, new_page_no, *heap);
 	} else {
 		node_ptr = dict_index_build_node_ptr(
 			index, rec, new_page_no, *heap, level);
@@ -2106,12 +2113,23 @@ btr_root_raise_and_insert(
 
 	/* Rebuild the root page to get free space */
 	btr_page_empty(root_block, root_page_zip, index, level + 1, mtr);
+	/* btr_page_empty() is supposed to zero-initialize the field. */
+	ut_ad(!page_get_instant(root_block->frame));
+
+	if (index->is_instant()) {
+		ut_ad(!root_page_zip);
+		byte* page_type = root_block->frame + FIL_PAGE_TYPE;
+		ut_ad(mach_read_from_2(page_type) == FIL_PAGE_INDEX);
+		mlog_write_ulint(page_type, FIL_PAGE_TYPE_INSTANT,
+				 MLOG_2BYTES, mtr);
+		page_set_instant(root_block->frame, index->n_core_fields, mtr);
+	}
 
 	/* Set the next node and previous node fields, although
 	they should already have been set.  The previous node field
 	must be FIL_NULL if root_page_zip != NULL, because the
 	REC_INFO_MIN_REC_FLAG (of the first user record) will be
-	set if and only if btr_page_get_prev() == FIL_NULL. */
+	set if and only if !page_has_prev(). */
 	btr_page_set_next(root, root_page_zip, FIL_NULL, mtr);
 	btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr);
 
@@ -2131,7 +2149,7 @@ btr_root_raise_and_insert(
 	/* We play safe and reset the free bits for the new page */
 
 	if (!dict_index_is_clust(index)
-	    && !dict_table_is_temporary(index->table)) {
+	    && !index->table->is_temporary()) {
 		ibuf_reset_free_bits(new_block);
 	}
 
@@ -2303,7 +2321,7 @@ btr_page_get_split_rec(
 	/* free_space is now the free space of a created new page */
 
 	total_data   = page_get_data_size(page) + insert_size;
-	total_n_recs = page_get_n_recs(page) + 1;
+	total_n_recs = ulint(page_get_n_recs(page)) + 1;
 	ut_ad(total_n_recs >= 2);
 	total_space  = total_data + page_dir_calc_reserved_space(total_n_recs);
 
@@ -2414,7 +2432,7 @@ btr_page_insert_fits(
 	/* free_space is now the free space of a created new page */
 
 	total_data   = page_get_data_size(page) + insert_size;
-	total_n_recs = page_get_n_recs(page) + 1;
+	total_n_recs = ulint(page_get_n_recs(page)) + 1;
 
 	/* We determine which records (from rec to end_rec, not including
 	end_rec) will end up on the other half page from tuple when it is
@@ -2586,9 +2604,8 @@ btr_attach_half_pages(
 	buf_block_t*	prev_block = NULL;
 	buf_block_t*	next_block = NULL;
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
-	ut_ad(mtr_is_block_fix(
-		mtr, new_block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
 
 	/* Create a memory heap where the data tuple is stored */
 	heap = mem_heap_create(1024);
@@ -2646,9 +2663,8 @@ btr_attach_half_pages(
 	}
 
 	/* Get the level of the split pages */
-	level = btr_page_get_level(buf_block_get_frame(block), mtr);
-	ut_ad(level
-	      == btr_page_get_level(buf_block_get_frame(new_block), mtr));
+	level = btr_page_get_level(buf_block_get_frame(block));
+	ut_ad(level == btr_page_get_level(buf_block_get_frame(new_block)));
 
 	/* Build the node pointer (= node key and page address) for the upper
 	half */
@@ -2773,8 +2789,7 @@ btr_insert_into_right_sibling(
 	ut_ad(mtr_memo_contains_flagged(
 		      mtr, dict_index_get_lock(cursor->index),
 		      MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
-	ut_ad(mtr_is_block_fix(
-		mtr, block, MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(heap);
 
 	if (next_page_no == FIL_NULL || !page_rec_is_supremum(
@@ -2821,7 +2836,7 @@ btr_insert_into_right_sibling(
 		if (is_leaf
 		    && next_block->page.size.is_compressed()
 		    && !dict_index_is_clust(cursor->index)
-		    && !dict_table_is_temporary(cursor->index->table)) {
+		    && !cursor->index->table->is_temporary()) {
 			/* Reset the IBUF_BITMAP_FREE bits, because
 			page_cur_tuple_insert() will have attempted page
 			reorganize before failing. */
@@ -2832,7 +2847,7 @@ btr_insert_into_right_sibling(
 
 	ibool	compressed;
 	dberr_t	err;
-	ulint	level = btr_page_get_level(next_page, mtr);
+	ulint	level = btr_page_get_level(next_page);
 
 	/* adjust cursor position */
 	*btr_cur_get_page_cur(cursor) = next_page_cursor;
@@ -2863,7 +2878,7 @@ btr_insert_into_right_sibling(
 
 	if (is_leaf
 	    && !dict_index_is_clust(cursor->index)
-	    && !dict_table_is_temporary(cursor->index->table)) {
+	    && !cursor->index->table->is_temporary()) {
 		/* Update the free bits of the B-tree page in the
 		insert buffer bitmap. */
 
@@ -2956,8 +2971,7 @@ func_start:
 	page = buf_block_get_frame(block);
 	page_zip = buf_block_get_page_zip(block);
 
-	ut_ad(mtr_is_block_fix(
-		mtr, block, MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(!page_is_empty(page));
 
 	/* try to insert to the next page if possible before split */
@@ -3018,7 +3032,7 @@ func_start:
 
 	/* 2. Allocate a new page to the index */
 	new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
-				   btr_page_get_level(page, mtr), mtr, mtr);
+				   btr_page_get_level(page), mtr, mtr);
 
 	if (new_block == NULL && os_has_said_disk_full) {
 		return(NULL);
@@ -3027,7 +3041,7 @@ func_start:
 	new_page = buf_block_get_frame(new_block);
 	new_page_zip = buf_block_get_page_zip(new_block);
 	btr_page_create(new_block, new_page_zip, cursor->index,
-			btr_page_get_level(page, mtr), mtr);
+			btr_page_get_level(page), mtr);
 	/* Only record the leaf level page splits. */
 	if (page_is_leaf(page)) {
 		cursor->index->stat_defrag_n_page_split ++;
@@ -3141,16 +3155,12 @@ insert_empty:
 						 ULINT_UNDEFINED, mtr);
 
 			/* Update the lock table and possible hash index. */
-
-			if (!dict_table_is_locking_disabled(
-				cursor->index->table)) {
-				lock_move_rec_list_start(
-					new_block, block, move_limit,
-					new_page + PAGE_NEW_INFIMUM);
-			}
+			lock_move_rec_list_start(
+				new_block, block, move_limit,
+				new_page + PAGE_NEW_INFIMUM);
 
 			btr_search_move_or_delete_hash_entries(
-				new_block, block, cursor->index);
+				new_block, block);
 
 			/* Delete the records from the source page. */
 
@@ -3187,16 +3197,12 @@ insert_empty:
 						   cursor->index, mtr);
 
 			/* Update the lock table and possible hash index. */
-			if (!dict_table_is_locking_disabled(
-				cursor->index->table)) {
-				lock_move_rec_list_end(
-					new_block, block, move_limit);
-			}
+			lock_move_rec_list_end(new_block, block, move_limit);
 
 			ut_ad(!dict_index_is_spatial(index));
 
 			btr_search_move_or_delete_hash_entries(
-				new_block, block, cursor->index);
+				new_block, block);
 
 			/* Delete the records from the source page. */
 
@@ -3284,7 +3290,7 @@ insert_empty:
 insert_failed:
 		/* We play safe and reset the free bits for new_page */
 		if (!dict_index_is_clust(cursor->index)
-		    && !dict_table_is_temporary(cursor->index->table)) {
+		    && !cursor->index->table->is_temporary()) {
 			ibuf_reset_free_bits(new_block);
 			ibuf_reset_free_bits(block);
 		}
@@ -3302,7 +3308,7 @@ func_exit:
 	left and right pages in the same mtr */
 
 	if (!dict_index_is_clust(cursor->index)
-	    && !dict_table_is_temporary(cursor->index->table)
+	    && !cursor->index->table->is_temporary()
 	    && page_is_leaf(page)) {
 
 		ibuf_update_free_bits_for_two_pages_low(
@@ -3337,7 +3343,7 @@ btr_level_list_remove_func(
 {
 	ut_ad(page != NULL);
 	ut_ad(mtr != NULL);
-	ut_ad(mtr_is_page_fix(mtr, page, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(space == page_get_space_id(page));
 	/* Get the previous and next page numbers of page */
 
@@ -3471,7 +3477,7 @@ btr_node_ptr_delete(
 	ibool		compressed;
 	dberr_t		err;
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
 	/* Delete node pointer on father page */
 	btr_page_get_father(index, block, mtr, &cursor);
@@ -3512,11 +3518,10 @@ btr_lift_page_up(
 	bool		lift_father_up;
 	buf_block_t*	block_orig	= block;
 
-	ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
-	ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(!page_has_siblings(page));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
-	page_level = btr_page_get_level(page, mtr);
+	page_level = btr_page_get_level(page);
 	root_page_no = dict_index_get_page(index);
 
 	{
@@ -3524,7 +3529,8 @@ btr_lift_page_up(
 		ulint*		offsets	= NULL;
 		mem_heap_t*	heap	= mem_heap_create(
 			sizeof(*offsets)
-			* (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields));
+			* (REC_OFFS_HEADER_SIZE + 1 + 1
+			   + unsigned(index->n_fields)));
 		buf_block_t*	b;
 
 		if (dict_index_is_spatial(index)) {
@@ -3578,12 +3584,11 @@ btr_lift_page_up(
 
 			block = father_block;
 			page = buf_block_get_frame(block);
-			page_level = btr_page_get_level(page, mtr);
+			page_level = btr_page_get_level(page);
 
-			ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
-			ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
-			ut_ad(mtr_is_block_fix(
-				mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+			ut_ad(!page_has_siblings(page));
+			ut_ad(mtr_memo_contains(
+				      mtr, block, MTR_MEMO_PAGE_X_FIX));
 
 			father_block = blocks[0];
 			father_page_zip = buf_block_get_page_zip(father_block);
@@ -3597,6 +3602,19 @@ btr_lift_page_up(
 
 	/* Make the father empty */
 	btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
+	/* btr_page_empty() is supposed to zero-initialize the field. */
+	ut_ad(!page_get_instant(father_block->frame));
+
+	if (page_level == 0 && index->is_instant()) {
+		ut_ad(!father_page_zip);
+		byte* page_type = father_block->frame + FIL_PAGE_TYPE;
+		ut_ad(mach_read_from_2(page_type) == FIL_PAGE_INDEX);
+		mlog_write_ulint(page_type, FIL_PAGE_TYPE_INSTANT,
+				 MLOG_2BYTES, mtr);
+		page_set_instant(father_block->frame,
+				 index->n_core_fields, mtr);
+	}
+
 	page_level++;
 
 	/* Copy the records to the father page one by one. */
@@ -3618,18 +3636,16 @@ btr_lift_page_up(
 
 		/* Update the lock table and possible hash index. */
 
-		if (!dict_table_is_locking_disabled(index->table)) {
-			lock_move_rec_list_end(father_block, block,
-					       page_get_infimum_rec(page));
-		}
+		lock_move_rec_list_end(father_block, block,
+				       page_get_infimum_rec(page));
 
 		/* Also update the predicate locks */
 		if (dict_index_is_spatial(index)) {
 			lock_prdt_rec_move(father_block, block);
+		} else {
+			btr_search_move_or_delete_hash_entries(
+				father_block, block);
 		}
-
-		btr_search_move_or_delete_hash_entries(father_block, block,
-						       index);
 	}
 
 	if (!dict_table_is_locking_disabled(index->table)) {
@@ -3637,7 +3653,7 @@ btr_lift_page_up(
 		if (dict_index_is_spatial(index)) {
 			lock_mutex_enter();
 			lock_prdt_page_free_from_discard(
-				block, lock_sys->prdt_page_hash);
+				block, lock_sys.prdt_page_hash);
 			lock_mutex_exit();
 		}
 		lock_update_copy_and_discard(father_block, block);
@@ -3648,7 +3664,7 @@ btr_lift_page_up(
 		page_t*		page	= buf_block_get_frame(blocks[i]);
 		page_zip_des_t*	page_zip= buf_block_get_page_zip(blocks[i]);
 
-		ut_ad(btr_page_get_level(page, mtr) == page_level + 1);
+		ut_ad(btr_page_get_level(page) == page_level + 1);
 
 		btr_page_set_level(page, page_zip, page_level, mtr);
 #ifdef UNIV_ZIP_DEBUG
@@ -3665,7 +3681,7 @@ btr_lift_page_up(
 
 	/* We play it safe and reset the free bits for the father */
 	if (!dict_index_is_clust(index)
-	    && !dict_table_is_temporary(index->table)) {
+	    && !index->table->is_temporary()) {
 		ibuf_reset_free_bits(father_block);
 	}
 	ut_ad(page_validate(father_page, index));
@@ -3696,7 +3712,6 @@ btr_compress(
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	dict_index_t*	index;
-	ulint		space;
 	ulint		left_page_no;
 	ulint		right_page_no;
 	buf_block_t*	merge_block;
@@ -3732,10 +3747,9 @@ btr_compress(
 	}
 #endif /* UNIV_DEBUG */
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
-	space = dict_index_get_space(index);
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
-	const page_size_t	page_size(dict_table_page_size(index->table));
+	const page_size_t	page_size(index->table->space->flags);
 
 	MONITOR_INC(MONITOR_INDEX_MERGE_ATTEMPTS);
 
@@ -3878,8 +3892,7 @@ retry:
 			/* Check if parent entry needs to be updated */
 			mbr_changed = rtr_merge_mbr_changed(
 				&cursor2, &father_cursor,
-				offsets2, offsets, &new_mbr,
-				merge_block, block, index);
+				offsets2, offsets, &new_mbr);
 		}
 
 		rec_t*	orig_pred = page_copy_rec_list_start(
@@ -3893,7 +3906,8 @@ retry:
 		btr_search_drop_page_hash_index(block);
 
 		/* Remove the page from the level list */
-		btr_level_list_remove(space, page_size, page, index, mtr);
+		btr_level_list_remove(index->table->space_id,
+				      page_size, page, index, mtr);
 
 		if (dict_index_is_spatial(index)) {
 			rec_t*  my_rec = father_cursor.page_cur.rec;
@@ -3923,14 +3937,13 @@ retry:
 					merge_page, &new_mbr, NULL, mtr);
 #endif
 			} else {
-				rtr_node_ptr_delete(
-					index, &father_cursor, block, mtr);
+				rtr_node_ptr_delete(&father_cursor, mtr);
 			}
 
 			/* No GAP lock needs to be worrying about */
 			lock_mutex_enter();
 			lock_prdt_page_free_from_discard(
-				block, lock_sys->prdt_page_hash);
+				block, lock_sys.prdt_page_hash);
 			lock_rec_free_all_from_discard_page(block);
 			lock_mutex_exit();
 		} else {
@@ -3986,9 +3999,7 @@ retry:
 #ifdef UNIV_BTR_DEBUG
 			memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4);
 #endif /* UNIV_BTR_DEBUG */
-#if FIL_NULL != 0xffffffff
-# error "FIL_NULL != 0xffffffff"
-#endif
+			compile_time_assert(FIL_NULL == 0xffffffffU);
 			memset(merge_page + FIL_PAGE_PREV, 0xff, 4);
 		}
 
@@ -4025,7 +4036,8 @@ retry:
 #endif /* UNIV_BTR_DEBUG */
 
 		/* Remove the page from the level list */
-		btr_level_list_remove(space, page_size, (page_t*)page, index, mtr);
+		btr_level_list_remove(index->table->space_id,
+				      page_size, page, index, mtr);
 
 		ut_ad(btr_node_ptr_get_child_page_no(
 			btr_cur_get_rec(&father_cursor), offsets)
@@ -4071,9 +4083,7 @@ retry:
 				rtr_merge_and_update_mbr(&father_cursor,
 							 &cursor2,
 							 offsets, offsets2,
-							 merge_page,
-							 merge_block,
-							 block, index, mtr);
+							 merge_page, mtr);
 			} else {
 				/* Otherwise, we will keep the node ptr of
 				merge page and delete the father node ptr.
@@ -4082,13 +4092,11 @@ retry:
 				rtr_merge_and_update_mbr(&cursor2,
 							 &father_cursor,
 							 offsets2, offsets,
-							 merge_page,
-							 merge_block,
-							 block, index, mtr);
+							 merge_page, mtr);
 			}
 			lock_mutex_enter();
 			lock_prdt_page_free_from_discard(
-				block, lock_sys->prdt_page_hash);
+				block, lock_sys.prdt_page_hash);
 			lock_rec_free_all_from_discard_page(block);
 			lock_mutex_exit();
 		} else {
@@ -4113,7 +4121,7 @@ retry:
 	}
 
 	if (!dict_index_is_clust(index)
-	    && !dict_table_is_temporary(index->table)
+	    && !index->table->is_temporary()
 	    && page_is_leaf(merge_page)) {
 		/* Update the free bits of the B-tree page in the
 		insert buffer bitmap.  This has to be done in a
@@ -4150,7 +4158,7 @@ retry:
 			write the bits accurately in a separate
 			mini-transaction. */
 			ibuf_update_free_bits_if_full(merge_block,
-						      UNIV_PAGE_SIZE,
+						      srv_page_size,
 						      ULINT_UNDEFINED);
 		}
 	}
@@ -4234,12 +4242,10 @@ btr_discard_only_page_on_level(
 		const page_t*	page	= buf_block_get_frame(block);
 
 		ut_a(page_get_n_recs(page) == 1);
-		ut_a(page_level == btr_page_get_level(page, mtr));
-		ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
-		ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
+		ut_a(page_level == btr_page_get_level(page));
+		ut_a(!page_has_siblings(page));
 
-		ut_ad(mtr_is_block_fix(
-			mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+		ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 		btr_search_drop_page_hash_index(block);
 
 		if (dict_index_is_spatial(index)) {
@@ -4265,11 +4271,12 @@ btr_discard_only_page_on_level(
 
 	/* block is the root page, which must be empty, except
 	for the node pointer to the (now discarded) block(s). */
+	ut_ad(page_is_root(block->frame));
 
 #ifdef UNIV_BTR_DEBUG
 	if (!dict_index_is_ibuf(index)) {
 		const page_t*	root	= buf_block_get_frame(block);
-		const ulint	space	= dict_index_get_space(index);
+		const ulint	space	= index->table->space_id;
 		ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
 					    + root, space));
 		ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
@@ -4279,9 +4286,14 @@ btr_discard_only_page_on_level(
 
 	btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
 	ut_ad(page_is_leaf(buf_block_get_frame(block)));
-
-	if (!dict_index_is_clust(index)
-	    && !dict_table_is_temporary(index->table)) {
+	/* btr_page_empty() is supposed to zero-initialize the field. */
+	ut_ad(!page_get_instant(block->frame));
+
+	if (index->is_primary()) {
+		/* Concurrent access is prevented by the root_block->lock
+		X-latch, so this should be safe. */
+		index->remove_instant();
+	} else if (!index->table->is_temporary()) {
 		/* We play it safe and reset the free bits for the root */
 		ibuf_reset_free_bits(block);
 
@@ -4324,9 +4336,7 @@ btr_discard_page(
 	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
 					MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
-
-	const ulint	space = dict_index_get_space(index);
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
 	MONITOR_INC(MONITOR_INDEX_DISCARD);
 
@@ -4343,12 +4353,12 @@ btr_discard_page(
 	left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
 	right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
 
-	const page_size_t	page_size(dict_table_page_size(index->table));
+	const page_size_t	page_size(index->table->space->flags);
 
 	if (left_page_no != FIL_NULL) {
 		merge_block = btr_block_get(
-			page_id_t(space, left_page_no), page_size,
-			RW_X_LATCH, index, mtr);
+			page_id_t(index->table->space_id, left_page_no),
+			page_size, RW_X_LATCH, index, mtr);
 
 		merge_page = buf_block_get_frame(merge_block);
 #ifdef UNIV_BTR_DEBUG
@@ -4363,8 +4373,8 @@ btr_discard_page(
 			 == btr_cur_get_rec(&parent_cursor)));
 	} else if (right_page_no != FIL_NULL) {
 		merge_block = btr_block_get(
-			page_id_t(space, right_page_no), page_size,
-			RW_X_LATCH, index, mtr);
+			page_id_t(index->table->space_id, right_page_no),
+			page_size, RW_X_LATCH, index, mtr);
 
 		merge_page = buf_block_get_frame(merge_block);
 #ifdef UNIV_BTR_DEBUG
@@ -4406,13 +4416,14 @@ btr_discard_page(
 		node ptr, so, we need to get father node ptr first and then
 		delete it. */
 		rtr_page_get_father(index, block, mtr, cursor, &father_cursor);
-		rtr_node_ptr_delete(index, &father_cursor, block, mtr);
+		rtr_node_ptr_delete(&father_cursor, mtr);
 	} else {
 		btr_node_ptr_delete(index, block, mtr);
 	}
 
 	/* Remove the page from the level list */
-	btr_level_list_remove(space, page_size, page, index, mtr);
+	btr_level_list_remove(index->table->space_id, page_size,
+			      page, index, mtr);
 
 #ifdef UNIV_ZIP_DEBUG
 	{
@@ -4507,9 +4518,9 @@ btr_print_recursive(
 	ulint		i	= 0;
 	mtr_t		mtr2;
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_SX_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_SX_FIX));
 
-	ib::info() << "NODE ON LEVEL " << btr_page_get_level(page, mtr)
+	ib::info() << "NODE ON LEVEL " << btr_page_get_level(page)
 		<< " page " << block->page.id;
 
 	page_print(block, index, width, width);
@@ -4601,7 +4612,7 @@ btr_check_node_ptr(
 	btr_cur_t	cursor;
 	page_t*		page = buf_block_get_frame(block);
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
 	if (dict_index_get_page(index) == block->page.id.page_no()) {
 
@@ -4625,7 +4636,7 @@ btr_check_node_ptr(
 
 	tuple = dict_index_build_node_ptr(
 		index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
-		btr_page_get_level(page, mtr));
+		btr_page_get_level(page));
 
 	/* For spatial index, the MBR in the parent rec could be different
 	with that of first rec of child, their relationship should be
@@ -4675,8 +4686,6 @@ btr_index_rec_validate(
 						and page on error */
 {
 	ulint		len;
-	ulint		n;
-	ulint		i;
 	const page_t*	page;
 	mem_heap_t*	heap	= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
@@ -4707,31 +4716,34 @@ btr_index_rec_validate(
 		return(FALSE);
 	}
 
-	n = dict_index_get_n_fields(index);
-
-	if (!page_is_comp(page)
-	    && (rec_get_n_fields_old(rec) != n
-		/* a record for older SYS_INDEXES table
-		(missing merge_threshold column) is acceptable. */
-		&& !(index->id == DICT_INDEXES_ID
-		     && rec_get_n_fields_old(rec) == n - 1))) {
-		btr_index_rec_validate_report(page, rec, index);
+	if (!page_is_comp(page)) {
+		const ulint n_rec_fields = rec_get_n_fields_old(rec);
+		if (n_rec_fields == DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD
+		    && index->id == DICT_INDEXES_ID) {
+			/* A record for older SYS_INDEXES table
+			(missing merge_threshold column) is acceptable. */
+		} else if (n_rec_fields < index->n_core_fields
+			   || n_rec_fields > index->n_fields) {
+			btr_index_rec_validate_report(page, rec, index);
 
-		ib::error() << "Has " << rec_get_n_fields_old(rec)
-			<< " fields, should have " << n;
+			ib::error() << "Has " << rec_get_n_fields_old(rec)
+				    << " fields, should have "
+				    << index->n_core_fields << ".."
+				    << index->n_fields;
 
-		if (dump_on_error) {
-			fputs("InnoDB: corrupt record ", stderr);
-			rec_print_old(stderr, rec);
-			putc('\n', stderr);
+			if (dump_on_error) {
+				fputs("InnoDB: corrupt record ", stderr);
+				rec_print_old(stderr, rec);
+				putc('\n', stderr);
+			}
+			return(FALSE);
 		}
-		return(FALSE);
 	}
 
 	offsets = rec_get_offsets(rec, index, offsets, page_is_leaf(page),
 				  ULINT_UNDEFINED, &heap);
 
-	for (i = 0; i < n; i++) {
+	for (unsigned i = 0; i < index->n_fields; i++) {
 		dict_field_t*	field = dict_index_get_nth_field(index, i);
 		ulint		fixed_size = dict_col_get_fixed_size(
 						dict_field_get_col(field),
@@ -4746,14 +4758,10 @@ btr_index_rec_validate(
 		length.  When fixed_size == 0, prefix_len is the maximum
 		length of the prefix index column. */
 
-		if ((field->prefix_len == 0
-		     && len != UNIV_SQL_NULL && fixed_size
-		     && len != fixed_size)
-		    || (field->prefix_len > 0
-			&& len != UNIV_SQL_NULL
-			&& len
-			> field->prefix_len)) {
-
+		if (len_is_stored(len)
+		    && (field->prefix_len
+			? len > field->prefix_len
+			: (fixed_size && len != fixed_size))) {
 			btr_index_rec_validate_report(page, rec, index);
 
 			ib::error	error;
@@ -4944,7 +4952,7 @@ btr_validate_level(
 	}
 #endif
 
-	fil_space_t*		space	= fil_space_get(index->space);
+	fil_space_t*		space	= index->table->space;
 	const page_size_t	table_page_size(
 		dict_table_page_size(index->table));
 	const page_size_t	space_page_size(space->flags);
@@ -4959,7 +4967,7 @@ btr_validate_level(
 		return(false);
 	}
 
-	while (level != btr_page_get_level(page, &mtr)) {
+	while (level != btr_page_get_level(page)) {
 		const rec_t*	node_ptr;
 
 		if (fseg_page_is_free(space, block->page.id.page_no())) {
@@ -4971,8 +4979,8 @@ btr_validate_level(
 			ret = false;
 		}
 
-		ut_a(index->space == block->page.id.space());
-		ut_a(index->space == page_get_space_id(page));
+		ut_a(index->table->space_id == block->page.id.space());
+		ut_a(block->page.id.space() == page_get_space_id(page));
 #ifdef UNIV_ZIP_DEBUG
 		page_zip = buf_block_get_page_zip(block);
 		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
@@ -5000,8 +5008,6 @@ btr_validate_level(
 			left_page_no = btr_page_get_prev(page, &mtr);
 
 			while (left_page_no != FIL_NULL) {
-				page_id_t	left_page_id(
-					index->space, left_page_no);
 				/* To obey latch order of tree blocks,
 				we should release the right_block once to
 				obtain lock of the uncle block. */
@@ -5010,7 +5016,8 @@ btr_validate_level(
 
 				savepoint2 = mtr_set_savepoint(&mtr);
 				block = btr_block_get(
-					left_page_id,
+					page_id_t(index->table->space_id,
+						  left_page_no),
 					table_page_size,
 					RW_SX_LATCH, index, &mtr);
 				page = buf_block_get_frame(block);
@@ -5038,7 +5045,7 @@ loop:
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
-	ut_a(block->page.id.space() == index->space);
+	ut_a(block->page.id.space() == index->table->space_id);
 
 	if (fseg_page_is_free(space, block->page.id.page_no())) {
 
@@ -5067,7 +5074,7 @@ loop:
 		ret = false;
 	}
 
-	ut_a(btr_page_get_level(page, &mtr) == level);
+	ut_a(btr_page_get_level(page) == level);
 
 	right_page_no = btr_page_get_next(page, &mtr);
 	left_page_no = btr_page_get_prev(page, &mtr);
@@ -5081,7 +5088,7 @@ loop:
 		savepoint = mtr_set_savepoint(&mtr);
 
 		right_block = btr_block_get(
-			page_id_t(index->space, right_page_no),
+			page_id_t(index->table->space_id, right_page_no),
 			table_page_size,
 			RW_SX_LATCH, index, &mtr);
 
@@ -5212,7 +5219,7 @@ loop:
 			node_ptr_tuple = dict_index_build_node_ptr(
 				index,
 				page_rec_get_next(page_get_infimum_rec(page)),
-				0, heap, btr_page_get_level(page, &mtr));
+				0, heap, btr_page_get_level(page));
 
 			if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
 					   offsets)) {
@@ -5237,13 +5244,13 @@ loop:
 		if (left_page_no == FIL_NULL) {
 			ut_a(node_ptr == page_rec_get_next(
 				     page_get_infimum_rec(father_page)));
-			ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
+			ut_a(!page_has_prev(father_page));
 		}
 
 		if (right_page_no == FIL_NULL) {
 			ut_a(node_ptr == page_rec_get_prev(
 				     page_get_supremum_rec(father_page)));
-			ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
+			ut_a(!page_has_next(father_page));
 		} else {
 			const rec_t*	right_node_ptr;
 
@@ -5258,13 +5265,13 @@ loop:
 					&mtr, savepoint, right_block);
 
 				btr_block_get(
-					page_id_t(index->space,
+					page_id_t(index->table->space_id,
 						  parent_right_page_no),
 					table_page_size,
 					RW_SX_LATCH, index, &mtr);
 
 				right_block = btr_block_get(
-					page_id_t(index->space,
+					page_id_t(index->table->space_id,
 						  right_page_no),
 					table_page_size,
 					RW_SX_LATCH, index, &mtr);
@@ -5342,14 +5349,14 @@ node_ptr_fails:
 				if (parent_right_page_no != FIL_NULL) {
 					btr_block_get(
 						page_id_t(
-							index->space,
+							index->table->space_id,
 							parent_right_page_no),
 						table_page_size,
 						RW_SX_LATCH, index, &mtr);
 				}
 			} else if (parent_page_no != FIL_NULL) {
 				btr_block_get(
-					page_id_t(index->space,
+					page_id_t(index->table->space_id,
 						  parent_page_no),
 					table_page_size,
 					RW_SX_LATCH, index, &mtr);
@@ -5357,7 +5364,7 @@ node_ptr_fails:
 		}
 
 		block = btr_block_get(
-			page_id_t(index->space, right_page_no),
+			page_id_t(index->table->space_id, right_page_no),
 			table_page_size,
 			RW_SX_LATCH, index, &mtr);
 
@@ -5390,7 +5397,7 @@ btr_validate_spatial_index(
 	mtr_x_lock(dict_index_get_lock(index), &mtr);
 
 	page_t*	root = btr_root_get(index, &mtr);
-	ulint	n = btr_page_get_level(root, &mtr);
+	ulint	n = btr_page_get_level(root);
 
 #ifdef UNIV_RTR_DEBUG
 	fprintf(stderr, "R-tree level is %lu\n", n);
@@ -5457,7 +5464,7 @@ btr_validate_index(
 		return err;
 	}
 
-	ulint	n = btr_page_get_level(root, &mtr);
+	ulint	n = btr_page_get_level(root);
 
 	for (ulint i = 0; i <= n; ++i) {
 
@@ -5503,8 +5510,8 @@ btr_can_merge_with_page(
 	index = btr_cur_get_index(cursor);
 	page = btr_cur_get_page(cursor);
 
-	const page_id_t		page_id(dict_index_get_space(index), page_no);
-	const page_size_t	page_size(dict_table_page_size(index->table));
+	const page_id_t		page_id(index->table->space_id, page_no);
+	const page_size_t	page_size(index->table->space->flags);
 
 	mblock = btr_block_get(page_id, page_size, RW_X_LATCH, index, mtr);
 	mpage = buf_block_get_frame(mblock);
diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc
index bf33745f091..2ce2815acb0 100644
--- a/storage/innobase/btr/btr0bulk.cc
+++ b/storage/innobase/btr/btr0bulk.cc
@@ -32,7 +32,7 @@ Created 03/11/2014 Shaohua Wang
 #include "trx0trx.h"
 
 /** Innodb B-tree index fill factor for bulk load. */
-long	innobase_fill_factor;
+uint	innobase_fill_factor;
 /** whether to reduce redo logging during ALTER TABLE */
 my_bool	innodb_log_optimize_ddl;
 
@@ -56,7 +56,7 @@ PageBulk::init()
 		m_mtr.set_log_mode(MTR_LOG_NO_REDO);
 		m_mtr.set_flush_observer(m_flush_observer);
 	} else {
-		m_mtr.set_named_space(m_index->space);
+		m_index->set_modified(m_mtr);
 	}
 
 	if (m_page_no == FIL_NULL) {
@@ -67,11 +67,12 @@ PageBulk::init()
 		the allocation order, and we will always generate redo log
 		for page allocation, even when creating a new tablespace. */
 		alloc_mtr.start();
-		alloc_mtr.set_named_space(m_index->space);
+		m_index->set_modified(alloc_mtr);
 
 		ulint	n_reserved;
 		bool	success;
-		success = fsp_reserve_free_extents(&n_reserved, m_index->space,
+		success = fsp_reserve_free_extents(&n_reserved,
+						   m_index->table->space,
 						   1, FSP_NORMAL, &alloc_mtr);
 		if (!success) {
 			alloc_mtr.commit();
@@ -83,10 +84,7 @@ PageBulk::init()
 		new_block = btr_page_alloc(m_index, 0, FSP_UP, m_level,
 					   &alloc_mtr, &m_mtr);
 
-		if (n_reserved > 0) {
-			fil_space_release_free_extents(m_index->space,
-						       n_reserved);
-		}
+		m_index->table->space->release_free_extents(n_reserved);
 
 		alloc_mtr.commit();
 
@@ -121,11 +119,10 @@ PageBulk::init()
 				       m_index->id, &m_mtr);
 		}
 	} else {
-		page_id_t	page_id(dict_index_get_space(m_index), m_page_no);
-		page_size_t	page_size(dict_table_page_size(m_index->table));
-
-		new_block = btr_block_get(page_id, page_size,
-					  RW_X_LATCH, m_index, &m_mtr);
+		new_block = btr_block_get(
+			page_id_t(m_index->table->space_id, m_page_no),
+			page_size_t(m_index->table->space->flags),
+			RW_X_LATCH, m_index, &m_mtr);
 
 		new_page = buf_block_get_frame(new_block);
 		new_page_zip = buf_block_get_page_zip(new_block);
@@ -156,11 +153,11 @@ PageBulk::init()
 		m_reserved_space = dict_index_get_space_reserve();
 	} else {
 		m_reserved_space =
-			UNIV_PAGE_SIZE * (100 - innobase_fill_factor) / 100;
+			srv_page_size * (100 - innobase_fill_factor) / 100;
 	}
 
 	m_padding_space =
-		UNIV_PAGE_SIZE - dict_index_zip_pad_optimal_page_size(m_index);
+		srv_page_size - dict_index_zip_pad_optimal_page_size(m_index);
 	m_heap_top = page_header_get_ptr(new_page, PAGE_HEAP_TOP);
 	m_rec_no = page_header_get_field(new_page, PAGE_N_RECS);
 
@@ -185,13 +182,14 @@ PageBulk::insert(
 	ut_ad(m_heap != NULL);
 
 	rec_size = rec_offs_size(offsets);
+	ut_d(const bool is_leaf = page_rec_is_leaf(m_cur_rec));
 
 #ifdef UNIV_DEBUG
 	/* Check whether records are in order. */
 	if (!page_rec_is_infimum(m_cur_rec)) {
 		rec_t*	old_rec = m_cur_rec;
 		ulint*	old_offsets = rec_get_offsets(
-			old_rec, m_index, NULL,	page_rec_is_leaf(old_rec),
+			old_rec, m_index, NULL,	is_leaf,
 			ULINT_UNDEFINED, &m_heap);
 
 		ut_ad(cmp_rec_rec(rec, old_rec, offsets, old_offsets, m_index)
@@ -203,7 +201,7 @@ PageBulk::insert(
 
 	/* 1. Copy the record to page. */
 	rec_t*	insert_rec = rec_copy(m_heap_top, rec, offsets);
-	rec_offs_make_valid(insert_rec, m_index, offsets);
+	rec_offs_make_valid(insert_rec, m_index, is_leaf, offsets);
 
 	/* 2. Insert the record in the linked list. */
 	rec_t*	next_rec = page_rec_get_next(m_cur_rec);
@@ -229,7 +227,7 @@ PageBulk::insert(
 		- page_dir_calc_reserved_space(m_rec_no);
 
 	ut_ad(m_free_space >= rec_size + slot_size);
-	ut_ad(m_heap_top + rec_size < m_page + UNIV_PAGE_SIZE);
+	ut_ad(m_heap_top + rec_size < m_page + srv_page_size);
 
 	m_free_space -= rec_size + slot_size;
 	m_heap_top += rec_size;
@@ -306,6 +304,7 @@ PageBulk::finish()
 	page_dir_slot_set_n_owned(slot, NULL, count + 1);
 
 	ut_ad(!dict_index_is_spatial(m_index));
+	ut_ad(!page_get_instant(m_page));
 
 	if (!m_flush_observer && !m_page_zip) {
 		mlog_write_ulint(PAGE_HEADER + PAGE_N_DIR_SLOTS + m_page,
@@ -322,7 +321,7 @@ PageBulk::finish()
 		mlog_write_ulint(PAGE_HEADER + PAGE_LAST_INSERT + m_page,
 				 ulint(m_cur_rec - m_page),
 				 MLOG_2BYTES, &m_mtr);
-		mlog_write_ulint(PAGE_HEADER + PAGE_DIRECTION + m_page,
+		mlog_write_ulint(PAGE_HEADER + PAGE_DIRECTION_B - 1 + m_page,
 				 PAGE_RIGHT, MLOG_2BYTES, &m_mtr);
 		mlog_write_ulint(PAGE_HEADER + PAGE_N_DIRECTION + m_page, 0,
 				 MLOG_2BYTES, &m_mtr);
@@ -339,7 +338,7 @@ PageBulk::finish()
 		mach_write_to_2(PAGE_HEADER + PAGE_N_RECS + m_page, m_rec_no);
 		mach_write_to_2(PAGE_HEADER + PAGE_LAST_INSERT + m_page,
 				ulint(m_cur_rec - m_page));
-		mach_write_to_2(PAGE_HEADER + PAGE_DIRECTION + m_page,
+		mach_write_to_2(PAGE_HEADER + PAGE_DIRECTION_B - 1 + m_page,
 				PAGE_RIGHT);
 		mach_write_to_2(PAGE_HEADER + PAGE_N_DIRECTION + m_page, 0);
 	}
@@ -509,15 +508,14 @@ PageBulk::copyOut(
 				  page_rec_is_leaf(split_rec),
 				  ULINT_UNDEFINED, &m_heap);
 
-	m_free_space += rec_get_end(last_rec, offsets)
-		- m_heap_top
+	m_free_space += ulint(rec_get_end(last_rec, offsets) - m_heap_top)
 		+ page_dir_calc_reserved_space(m_rec_no)
 		- page_dir_calc_reserved_space(n);
-	ut_ad(m_free_space > 0);
+	ut_ad(lint(m_free_space) > 0);
 	m_rec_no = n;
 
 #ifdef UNIV_DEBUG
-	m_total_data -= rec_get_end(last_rec, offsets) - m_heap_top;
+	m_total_data -= ulint(rec_get_end(last_rec, offsets) - m_heap_top);
 #endif /* UNIV_DEBUG */
 }
 
@@ -658,18 +656,17 @@ PageBulk::latch()
 		m_mtr.set_log_mode(MTR_LOG_NO_REDO);
 		m_mtr.set_flush_observer(m_flush_observer);
 	} else {
-		m_mtr.set_named_space(m_index->space);
+		m_index->set_modified(m_mtr);
 	}
 
 	/* In case the block is S-latched by page_cleaner. */
 	if (!buf_page_optimistic_get(RW_X_LATCH, m_block, m_modify_clock,
 				     __FILE__, __LINE__, &m_mtr)) {
-		page_id_t       page_id(dict_index_get_space(m_index), m_page_no);
-		page_size_t     page_size(dict_table_page_size(m_index->table));
-
-		m_block = buf_page_get_gen(page_id, page_size, RW_X_LATCH,
-					   m_block, BUF_GET_IF_IN_POOL,
-					   __FILE__, __LINE__, &m_mtr, &m_err);
+		m_block = buf_page_get_gen(
+			page_id_t(m_index->table->space_id, m_page_no),
+			page_size_t(m_index->table->space->flags),
+			RW_X_LATCH, m_block, BUF_GET_IF_IN_POOL,
+			__FILE__, __LINE__, &m_mtr, &m_err);
 
 		if (m_err != DB_SUCCESS) {
 			return (m_err);
@@ -783,7 +780,7 @@ BtrBulk::pageCommit(
 /** Log free check */
 inline void BtrBulk::logFreeCheck()
 {
-	if (log_sys->check_flush_or_checkpoint) {
+	if (log_sys.check_flush_or_checkpoint) {
 		release();
 
 		log_free_check();
@@ -976,7 +973,7 @@ BtrBulk::finish(dberr_t	err)
 {
 	ulint		last_page_no = FIL_NULL;
 
-	ut_ad(!dict_table_is_temporary(m_index->table));
+	ut_ad(!m_index->table->is_temporary());
 
 	if (m_page_bulks.size() == 0) {
 		/* The table is empty. The root page of the index tree
@@ -1008,30 +1005,27 @@ BtrBulk::finish(dberr_t	err)
 		rec_t*		first_rec;
 		mtr_t		mtr;
 		buf_block_t*	last_block;
-		page_t*		last_page;
-		page_id_t	page_id(dict_index_get_space(m_index),
-					last_page_no);
-		page_size_t	page_size(dict_table_page_size(m_index->table));
-		ulint		root_page_no = dict_index_get_page(m_index);
 		PageBulk	root_page_bulk(m_index, m_trx->id,
-					       root_page_no, m_root_level,
+					       m_index->page, m_root_level,
 					       m_flush_observer);
 
-		mtr_start(&mtr);
-		mtr.set_named_space(dict_index_get_space(m_index));
-		mtr_x_lock(dict_index_get_lock(m_index), &mtr);
+		mtr.start();
+		m_index->set_modified(mtr);
+		mtr_x_lock(&m_index->lock, &mtr);
 
 		ut_ad(last_page_no != FIL_NULL);
-		last_block = btr_block_get(page_id, page_size,
-					   RW_X_LATCH, m_index, &mtr);
-		last_page = buf_block_get_frame(last_block);
-		first_rec = page_rec_get_next(page_get_infimum_rec(last_page));
+		last_block = btr_block_get(
+			page_id_t(m_index->table->space_id, last_page_no),
+			page_size_t(m_index->table->space->flags),
+			RW_X_LATCH, m_index, &mtr);
+		first_rec = page_rec_get_next(
+			page_get_infimum_rec(last_block->frame));
 		ut_ad(page_rec_is_user_rec(first_rec));
 
 		/* Copy last page to root page. */
 		err = root_page_bulk.init();
 		if (err != DB_SUCCESS) {
-			mtr_commit(&mtr);
+			mtr.commit();
 			return(err);
 		}
 		root_page_bulk.copyIn(first_rec);
@@ -1042,7 +1036,7 @@ BtrBulk::finish(dberr_t	err)
 		/* Do not flush the last page. */
 		last_block->page.flush_observer = NULL;
 
-		mtr_commit(&mtr);
+		mtr.commit();
 
 		err = pageCommit(&root_page_bulk, NULL, false);
 		ut_ad(err == DB_SUCCESS);
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 373328939c2..2ba311fce7b 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -77,18 +77,14 @@ enum btr_op_t {
 	BTR_DELMARK_OP			/*!< Mark a record for deletion */
 };
 
-/** Modification types for the B-tree operation. */
+/** Modification types for the B-tree operation.
+    Note that the order must be DELETE, BOTH, INSERT !!
+ */
 enum btr_intention_t {
 	BTR_INTENTION_DELETE,
 	BTR_INTENTION_BOTH,
 	BTR_INTENTION_INSERT
 };
-#if BTR_INTENTION_DELETE > BTR_INTENTION_BOTH
-#error "BTR_INTENTION_DELETE > BTR_INTENTION_BOTH"
-#endif
-#if BTR_INTENTION_BOTH > BTR_INTENTION_INSERT
-#error "BTR_INTENTION_BOTH > BTR_INTENTION_INSERT"
-#endif
 
 /** For the index->lock scalability improvement, only possibility of clear
 performance regression observed was caused by grown huge history list length.
@@ -124,7 +120,7 @@ uint	btr_cur_limit_optimistic_insert_debug;
 
 /** In the optimistic insert, if the insert does not fit, but this much space
 can be released by page reorganize, then it is reorganized */
-#define BTR_CUR_PAGE_REORGANIZE_LIMIT	(UNIV_PAGE_SIZE / 32)
+#define BTR_CUR_PAGE_REORGANIZE_LIMIT	(srv_page_size / 32)
 
 /** The structure of a BLOB part header */
 /* @{ */
@@ -211,16 +207,6 @@ btr_rec_free_externally_stored_fields(
 
 /*==================== B-TREE SEARCH =========================*/
 
-#if MTR_MEMO_PAGE_S_FIX != RW_S_LATCH
-#error "MTR_MEMO_PAGE_S_FIX != RW_S_LATCH"
-#endif
-#if MTR_MEMO_PAGE_X_FIX != RW_X_LATCH
-#error "MTR_MEMO_PAGE_X_FIX != RW_X_LATCH"
-#endif
-#if MTR_MEMO_PAGE_SX_FIX != RW_SX_LATCH
-#error "MTR_MEMO_PAGE_SX_FIX != RW_SX_LATCH"
-#endif
-
 /** Latches the leaf page or pages requested.
 @param[in]	block		leaf page where the search converged
 @param[in]	page_id		page id of the leaf
@@ -245,6 +231,10 @@ btr_cur_latch_leaves(
 	bool		spatial;
 	btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}};
 
+	compile_time_assert(int(MTR_MEMO_PAGE_S_FIX) == int(RW_S_LATCH));
+	compile_time_assert(int(MTR_MEMO_PAGE_X_FIX) == int(RW_X_LATCH));
+	compile_time_assert(int(MTR_MEMO_PAGE_SX_FIX) == int(RW_SX_LATCH));
+
 	spatial = dict_index_is_spatial(cursor->index) && cursor->rtr_info;
 	ut_ad(buf_page_in_file(&block->page));
 
@@ -395,6 +385,235 @@ btr_cur_latch_leaves(
 	return(latch_leaves);
 }
 
+/** Load the instant ALTER TABLE metadata from the clustered index
+when loading a table definition.
+@param[in,out]	index	clustered index definition
+@param[in,out]	mtr	mini-transaction
+@return	error code
+@retval	DB_SUCCESS	if no error occurred
+@retval	DB_CORRUPTION	if any corruption was noticed */
+static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
+{
+	ut_ad(index->is_primary());
+	ut_ad(index->n_core_null_bytes == dict_index_t::NO_CORE_NULL_BYTES);
+	ut_ad(index->table->supports_instant());
+	ut_ad(index->table->is_readable());
+
+	const fil_space_t* space = index->table->space;
+	if (!space) {
+unreadable:
+		ib::error() << "Table " << index->table->name
+			    << " has an unreadable root page";
+		index->table->corrupted = true;
+		return DB_CORRUPTION;
+	}
+
+	page_t* root = btr_root_get(index, mtr);
+
+	if (!root || btr_cur_instant_root_init(index, root)) {
+		goto unreadable;
+	}
+
+	ut_ad(index->n_core_null_bytes != dict_index_t::NO_CORE_NULL_BYTES);
+
+	if (fil_page_get_type(root) == FIL_PAGE_INDEX) {
+		ut_ad(!index->is_instant());
+		return DB_SUCCESS;
+	}
+
+	btr_cur_t cur;
+	dberr_t err = btr_cur_open_at_index_side(true, index, BTR_SEARCH_LEAF,
+						 &cur, 0, mtr);
+	if (err != DB_SUCCESS) {
+		index->table->corrupted = true;
+		return err;
+	}
+
+	ut_ad(page_cur_is_before_first(&cur.page_cur));
+	ut_ad(page_is_leaf(cur.page_cur.block->frame));
+
+	page_cur_move_to_next(&cur.page_cur);
+
+	const rec_t* rec = cur.page_cur.rec;
+	const ulint comp = dict_table_is_comp(index->table);
+	const ulint info_bits = rec_get_info_bits(rec, comp);
+
+	if (page_rec_is_supremum(rec)
+	    || !(info_bits & REC_INFO_MIN_REC_FLAG)) {
+		if (!index->is_instant()) {
+			/* The FIL_PAGE_TYPE_INSTANT and PAGE_INSTANT may be
+			assigned even if instant ADD COLUMN was not
+			committed. Changes to these page header fields are not
+			undo-logged, but changes to the hidden metadata record
+			are. If the server is killed and restarted, the page
+			header fields could remain set even though no metadata
+			record is present. */
+			return DB_SUCCESS;
+		}
+
+		ib::error() << "Table " << index->table->name
+			    << " is missing instant ALTER metadata";
+		index->table->corrupted = true;
+		return DB_CORRUPTION;
+	}
+
+	if (info_bits != REC_INFO_MIN_REC_FLAG
+	    || (comp && rec_get_status(rec) != REC_STATUS_COLUMNS_ADDED)) {
+incompatible:
+		ib::error() << "Table " << index->table->name
+			<< " contains unrecognizable instant ALTER metadata";
+		index->table->corrupted = true;
+		return DB_CORRUPTION;
+	}
+
+	/* Read the metadata. We can get here on server restart
+	or when the table was evicted from the data dictionary cache
+	and is now being accessed again.
+
+	Here, READ COMMITTED and REPEATABLE READ should be equivalent.
+	Committing the ADD COLUMN operation would acquire
+	MDL_EXCLUSIVE and LOCK_X|LOCK_TABLE, which would prevent any
+	concurrent operations on the table, including table eviction
+	from the cache. */
+
+	mem_heap_t* heap = NULL;
+	ulint* offsets = rec_get_offsets(rec, index, NULL, true,
+					 ULINT_UNDEFINED, &heap);
+	if (rec_offs_any_default(offsets)) {
+inconsistent:
+		mem_heap_free(heap);
+		goto incompatible;
+	}
+
+	/* In fact, because we only ever append fields to the metadata
+	record, it is also OK to perform READ UNCOMMITTED and
+	then ignore any extra fields, provided that
+	trx_sys.is_registered(DB_TRX_ID). */
+	if (rec_offs_n_fields(offsets) > index->n_fields
+	    && !trx_sys.is_registered(current_trx(),
+				      row_get_rec_trx_id(rec, index,
+							 offsets))) {
+		goto inconsistent;
+	}
+
+	for (unsigned i = index->n_core_fields; i < index->n_fields; i++) {
+		ulint len;
+		const byte* data = rec_get_nth_field(rec, offsets, i, &len);
+		dict_col_t* col = index->fields[i].col;
+		ut_ad(!col->is_instant());
+		ut_ad(!col->def_val.data);
+		col->def_val.len = len;
+		switch (len) {
+		case UNIV_SQL_NULL:
+			continue;
+		case 0:
+			col->def_val.data = field_ref_zero;
+			continue;
+		}
+		ut_ad(len != UNIV_SQL_DEFAULT);
+		if (!rec_offs_nth_extern(offsets, i)) {
+			col->def_val.data = mem_heap_dup(
+				index->table->heap, data, len);
+		} else if (len < BTR_EXTERN_FIELD_REF_SIZE
+			   || !memcmp(data + len - BTR_EXTERN_FIELD_REF_SIZE,
+				      field_ref_zero,
+				      BTR_EXTERN_FIELD_REF_SIZE)) {
+			col->def_val.len = UNIV_SQL_DEFAULT;
+			goto inconsistent;
+		} else {
+			col->def_val.data = btr_copy_externally_stored_field(
+				&col->def_val.len, data,
+				dict_table_page_size(index->table),
+				len, index->table->heap);
+		}
+	}
+
+	mem_heap_free(heap);
+	return DB_SUCCESS;
+}
+
+/** Load the instant ALTER TABLE metadata from the clustered index
+when loading a table definition.
+@param[in,out]	table	table definition from the data dictionary
+@return	error code
+@retval	DB_SUCCESS	if no error occurred */
+dberr_t
+btr_cur_instant_init(dict_table_t* table)
+{
+	mtr_t		mtr;
+	dict_index_t*	index = dict_table_get_first_index(table);
+	mtr.start();
+	dberr_t	err = index
+		? btr_cur_instant_init_low(index, &mtr)
+		: DB_CORRUPTION;
+	mtr.commit();
+	return(err);
+}
+
+/** Initialize the n_core_null_bytes on first access to a clustered
+index root page.
+@param[in]	index	clustered index that is on its first access
+@param[in]	page	clustered index root page
+@return	whether the page is corrupted */
+bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page)
+{
+	ut_ad(page_is_root(page));
+	ut_ad(!page_is_comp(page) == !dict_table_is_comp(index->table));
+	ut_ad(index->is_primary());
+	ut_ad(!index->is_instant());
+	ut_ad(index->table->supports_instant());
+	/* This is normally executed as part of btr_cur_instant_init()
+	when dict_load_table_one() is loading a table definition.
+	Other threads should not access or modify the n_core_null_bytes,
+	n_core_fields before dict_load_table_one() returns.
+
+	This can also be executed during IMPORT TABLESPACE, where the
+	table definition is exclusively locked. */
+
+	switch (fil_page_get_type(page)) {
+	default:
+		ut_ad(!"wrong page type");
+		return true;
+	case FIL_PAGE_INDEX:
+		/* The field PAGE_INSTANT is guaranteed 0 on clustered
+		index root pages of ROW_FORMAT=COMPACT or
+		ROW_FORMAT=DYNAMIC when instant ADD COLUMN is not used. */
+		ut_ad(!page_is_comp(page) || !page_get_instant(page));
+		index->n_core_null_bytes = UT_BITS_IN_BYTES(
+			unsigned(index->n_nullable));
+		return false;
+	case FIL_PAGE_TYPE_INSTANT:
+		break;
+	}
+
+	const uint16_t n = page_get_instant(page);
+
+	if (n < index->n_uniq + DATA_ROLL_PTR || n > index->n_fields) {
+		/* The PRIMARY KEY (or hidden DB_ROW_ID) and
+		DB_TRX_ID,DB_ROLL_PTR columns must always be present
+		as 'core' fields. All fields, including those for
+		instantly added columns, must be present in the data
+		dictionary. */
+		return true;
+	}
+
+	if (memcmp(page_get_infimum_rec(page), "infimum", 8)
+	    || memcmp(page_get_supremum_rec(page), "supremum", 8)) {
+		/* In a later format, these fields in a FIL_PAGE_TYPE_INSTANT
+		root page could be repurposed for something else. */
+		return true;
+	}
+
+	index->n_core_fields = n;
+	ut_ad(!index->is_dummy);
+	ut_d(index->is_dummy = true);
+	index->n_core_null_bytes = n == index->n_fields
+		? UT_BITS_IN_BYTES(unsigned(index->n_nullable))
+		: UT_BITS_IN_BYTES(index->get_n_nullable(n));
+	ut_d(index->is_dummy = false);
+	return false;
+}
+
 /** Optimistically latches the leaf page or pages requested.
 @param[in]	block		guessed buffer block
 @param[in]	modify_clock	modify clock value
@@ -447,13 +666,11 @@ btr_cur_optimistic_latch_leaves(
 		rw_lock_s_unlock(&block->lock);
 
 		if (left_page_no != FIL_NULL) {
-			const page_id_t	page_id(
-				dict_index_get_space(cursor->index),
-				left_page_no);
-
 			cursor->left_block = btr_block_get(
-				page_id,
-				dict_table_page_size(cursor->index->table),
+				page_id_t(cursor->index->table->space_id,
+					  left_page_no),
+				page_size_t(cursor->index->table->space
+					    ->flags),
 				mode, cursor->index, mtr);
 		} else {
 			cursor->left_block = NULL;
@@ -518,7 +735,7 @@ btr_cur_get_and_clear_intention(
 		/* both or unknown */
 		intention = BTR_INTENTION_BOTH;
 	}
-	*latch_mode &= ~(BTR_LATCH_FOR_INSERT | BTR_LATCH_FOR_DELETE);
+	*latch_mode &= ulint(~(BTR_LATCH_FOR_INSERT | BTR_LATCH_FOR_DELETE));
 
 	return(intention);
 }
@@ -602,10 +819,10 @@ btr_cur_will_modify_tree(
 
 		/* is first, 2nd or last record */
 		if (page_rec_is_first(rec, page)
-		    || (mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
+		    || (page_has_next(page)
 			&& (page_rec_is_last(rec, page)
 			    || page_rec_is_second_last(rec, page)))
-		    || (mach_read_from_4(page + FIL_PAGE_PREV) != FIL_NULL
+		    || (page_has_prev(page)
 			&& page_rec_is_second(rec, page))) {
 			return(true);
 		}
@@ -687,13 +904,10 @@ btr_cur_need_opposite_intention(
 {
 	switch (lock_intention) {
 	case BTR_INTENTION_DELETE:
-		return((mach_read_from_4(page + FIL_PAGE_PREV) != FIL_NULL
-			&& page_rec_is_first(rec, page))
-		       || (mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
-			   && page_rec_is_last(rec, page)));
+		return (page_has_prev(page) && page_rec_is_first(rec, page)) ||
+			(page_has_next(page) && page_rec_is_last(rec, page));
 	case BTR_INTENTION_INSERT:
-		return(mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
-		       && page_rec_is_last(rec, page));
+		return page_has_next(page) && page_rec_is_last(rec, page);
 	case BTR_INTENTION_BOTH:
 		return(false);
 	}
@@ -823,8 +1037,7 @@ search tuple should be performed in the B-tree. InnoDB does an insert
 immediately after the cursor. Thus, the cursor may end up on a user record,
 or on a page infimum record. */
 dberr_t
-btr_cur_search_to_nth_level(
-/*========================*/
+btr_cur_search_to_nth_level_func(
 	dict_index_t*	index,	/*!< in: index */
 	ulint		level,	/*!< in: the tree level of search */
 	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
@@ -839,17 +1052,16 @@ btr_cur_search_to_nth_level(
 				cursor->left_block is used to store a pointer
 				to the left neighbor page, in the cases
 				BTR_SEARCH_PREV and BTR_MODIFY_PREV;
-				NOTE that if has_search_latch
-				is != 0, we maybe do not have a latch set
-				on the cursor page, we assume
-				the caller uses his search latch
-				to protect the record! */
+				NOTE that if ahi_latch, we might not have a
+				cursor page latch, we assume that ahi_latch
+				protects the record! */
 	btr_cur_t*	cursor, /*!< in/out: tree cursor; the cursor page is
 				s- or x-latched, but see also above! */
-	ulint		has_search_latch,
-				/*!< in: info on the latch mode the
-				caller currently has on search system:
-				RW_S_LATCH, or 0 */
+#ifdef BTR_CUR_HASH_ADAPT
+	rw_lock_t*	ahi_latch,
+				/*!< in: currently held btr_search_latch
+				(in RW_S_LATCH mode), or NULL */
+#endif /* BTR_CUR_HASH_ADAPT */
 	const char*	file,	/*!< in: file name */
 	unsigned	line,	/*!< in: line where called */
 	mtr_t*		mtr,	/*!< in: mtr */
@@ -870,7 +1082,7 @@ btr_cur_search_to_nth_level(
 	page_cur_mode_t	search_mode = PAGE_CUR_UNSUPP;
 	ulint		buf_mode;
 	ulint		estimate;
-	ulint		node_ptr_max_size = UNIV_PAGE_SIZE / 2;
+	ulint		node_ptr_max_size = srv_page_size / 2;
 	page_cur_t*	page_cursor;
 	btr_op_t	btr_op;
 	ulint		root_height = 0; /* remove warning */
@@ -971,7 +1183,7 @@ btr_cur_search_to_nth_level(
 	/* Operations on the clustered index cannot be buffered. */
 	ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index));
 	/* Operations on the temporary table(indexes) cannot be buffered. */
-	ut_ad(btr_op == BTR_NO_OP || !dict_table_is_temporary(index->table));
+	ut_ad(btr_op == BTR_NO_OP || !index->table->is_temporary());
 	/* Operation on the spatial index cannot be buffered. */
 	ut_ad(btr_op == BTR_NO_OP || !dict_index_is_spatial(index));
 
@@ -1027,16 +1239,15 @@ btr_cur_search_to_nth_level(
 	    && mode != PAGE_CUR_LE_OR_EXTENDS
 # endif /* PAGE_CUR_LE_OR_EXTENDS */
 	    && !dict_index_is_spatial(index)
-	    /* If !has_search_latch, we do a dirty read of
+	    /* If !ahi_latch, we do a dirty read of
 	    btr_search_enabled below, and btr_search_guess_on_hash()
 	    will have to check it again. */
 	    && btr_search_enabled
 	    && !modify_external
-	    && rw_lock_get_writer(btr_get_search_latch(index))
-	    == RW_LOCK_NOT_LOCKED
+	    && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG)
 	    && btr_search_guess_on_hash(index, info, tuple, mode,
 					latch_mode, cursor,
-					has_search_latch, mtr)) {
+					ahi_latch, mtr)) {
 
 		/* Search using the hash index succeeded */
 
@@ -1057,10 +1268,12 @@ btr_cur_search_to_nth_level(
 	/* If the hash search did not succeed, do binary search down the
 	tree */
 
-	if (has_search_latch) {
+#ifdef BTR_CUR_HASH_ADAPT
+	if (ahi_latch) {
 		/* Release possible search latch to obey latching order */
-		btr_search_s_unlock(index);
+		rw_lock_s_unlock(ahi_latch);
 	}
+#endif /* BTR_CUR_HASH_ADAPT */
 
 	/* Store the position of the tree latch we push to mtr so that we
 	know how to release it when we have latched leaf node(s) */
@@ -1073,7 +1286,7 @@ btr_cur_search_to_nth_level(
 		Free blocks and read IO bandwidth should be prior
 		for them, when the history list is glowing huge. */
 		if (lock_intention == BTR_INTENTION_DELETE
-		    && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+		    && trx_sys.history_size() > BTR_CUR_FINE_HISTORY_LENGTH
 			&& buf_get_n_pending_read_ios()) {
 			mtr_x_lock(dict_index_get_lock(index), mtr);
 		} else if (dict_index_is_spatial(index)
@@ -1130,11 +1343,10 @@ btr_cur_search_to_nth_level(
 
 	page_cursor = btr_cur_get_page_cur(cursor);
 
-	const ulint		space = dict_index_get_space(index);
-	const page_size_t	page_size(dict_table_page_size(index->table));
+	const page_size_t	page_size(index->table->space->flags);
 
 	/* Start with the root page. */
-	page_id_t		page_id(space, dict_index_get_page(index));
+	page_id_t		page_id(index->table->space_id, index->page);
 
 	if (root_leaf_rw_latch == RW_X_LATCH) {
 		node_ptr_max_size = btr_node_ptr_max_size(index);
@@ -1417,10 +1629,10 @@ retry_page_get:
 	ut_ad(fil_page_index_page_check(page));
 	ut_ad(index->id == btr_page_get_index_id(page));
 
-	if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
+	if (height == ULINT_UNDEFINED) {
 		/* We are in the root node */
 
-		height = btr_page_get_level(page, mtr);
+		height = btr_page_get_level(page);
 		root_height = height;
 		cursor->tree_height = root_height + 1;
 
@@ -1589,6 +1801,7 @@ retry_page_get:
 		}
 #ifdef BTR_CUR_HASH_ADAPT
 	} else if (height == 0 && btr_search_enabled
+		   && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG)
 		   && !dict_index_is_spatial(index)) {
 		/* The adaptive hash index is only used when searching
 		for leaf pages (height==0), but not in r-trees.
@@ -1613,8 +1826,7 @@ retry_page_get:
 
 	/* If this is the desired level, leave the loop */
 
-	ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor),
-					   mtr));
+	ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor)));
 
 	/* Add Predicate lock if it is serializable isolation
 	and only if it is in the search case */
@@ -1637,7 +1849,7 @@ retry_page_get:
 		}
 
 		lock_prdt_lock(block, &prdt, index, LOCK_S,
-			       LOCK_PREDICATE, cursor->thr, mtr);
+			       LOCK_PREDICATE, cursor->thr);
 
 		if (rw_latch == RW_NO_LATCH && height != 0) {
 			rw_lock_s_unlock(&(block->lock));
@@ -1684,7 +1896,7 @@ need_opposite_intention:
 
 			lock_intention = BTR_INTENTION_BOTH;
 
-			page_id = page_id_t(space, dict_index_get_page(index));
+			page_id.set_page_no(index->page);
 			up_match = 0;
 			low_match = 0;
 			height = ULINT_UNDEFINED;
@@ -1873,7 +2085,7 @@ need_opposite_intention:
 				MTR_MEMO_PAGE_S_FIX
 				| MTR_MEMO_PAGE_X_FIX));
 
-			if (btr_page_get_prev(page, mtr) != FIL_NULL
+			if (page_has_prev(page)
 			    && page_rec_is_first(node_ptr, page)) {
 
 				if (leftmost_from_level == 0) {
@@ -1900,8 +2112,7 @@ need_opposite_intention:
 				ulint	idx = n_blocks
 					- (leftmost_from_level - 1);
 
-				page_id = page_id_t(
-					space,
+				page_id.set_page_no(
 					tree_blocks[idx]->page.id.page_no());
 
 				for (ulint i = n_blocks
@@ -1935,8 +2146,7 @@ need_opposite_intention:
 		}
 
 		/* Go to the child node */
-		page_id = page_id_t(
-			space,
+		page_id.set_page_no(
 			btr_node_ptr_get_child_page_no(node_ptr, offsets));
 
 		n_blocks++;
@@ -1990,7 +2200,7 @@ need_opposite_intention:
 	} else if (!dict_index_is_spatial(index)
 		   && latch_mode == BTR_MODIFY_TREE
 		   && lock_intention == BTR_INTENTION_INSERT
-		   && mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
+		   && page_has_next(page)
 		   && page_rec_is_last(page_cur_get_rec(page_cursor), page)) {
 
 		/* btr_insert_into_right_sibling() might cause
@@ -2074,11 +2284,20 @@ need_opposite_intention:
 		will properly check btr_search_enabled again in
 		btr_search_build_page_hash_index() before building a
 		page hash index, while holding search latch. */
-		if (btr_search_enabled
+		if (!btr_search_enabled) {
 # ifdef MYSQL_INDEX_DISABLE_AHI
-		    && !index->disable_ahi
+		} else if (index->disable_ahi) {
 # endif
-		    ) {
+		} else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) {
+			ut_ad(index->is_instant());
+			/* This may be a search tuple for
+			btr_pcur_restore_position(). */
+			ut_ad(tuple->info_bits == REC_INFO_METADATA
+			      || tuple->info_bits == REC_INFO_MIN_REC_FLAG);
+		} else if (rec_is_metadata(btr_cur_get_rec(cursor), index)) {
+			/* Only user records belong in the adaptive
+			hash index. */
+		} else {
 			btr_search_info_update(index, cursor);
 		}
 #endif /* BTR_CUR_HASH_ADAPT */
@@ -2116,15 +2335,17 @@ func_exit:
 		ut_free(prev_tree_savepoints);
 	}
 
-	if (has_search_latch) {
-		btr_search_s_lock(index);
-	}
-
 	if (mbr_adj) {
 		/* remember that we will need to adjust parent MBR */
 		cursor->rtr_info->mbr_adj = true;
 	}
 
+#ifdef BTR_CUR_HASH_ADAPT
+	if (ahi_latch) {
+		rw_lock_s_lock(ahi_latch);
+	}
+#endif /* BTR_CUR_HASH_ADAPT */
+
 	DBUG_RETURN(err);
 }
 
@@ -2145,7 +2366,7 @@ btr_cur_open_at_index_side_func(
 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
 	page_cur_t*	page_cursor;
-	ulint		node_ptr_max_size = UNIV_PAGE_SIZE / 2;
+	ulint		node_ptr_max_size = srv_page_size / 2;
 	ulint		height;
 	ulint		root_height = 0; /* remove warning */
 	rec_t*		node_ptr;
@@ -2165,14 +2386,14 @@ btr_cur_open_at_index_side_func(
 	rec_offs_init(offsets_);
 
 	estimate = latch_mode & BTR_ESTIMATE;
-	latch_mode &= ~BTR_ESTIMATE;
+	latch_mode &= ulint(~BTR_ESTIMATE);
 
 	ut_ad(level != ULINT_UNDEFINED);
 
 	bool	s_latch_by_caller;
 
 	s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
-	latch_mode &= ~BTR_ALREADY_S_LATCHED;
+	latch_mode &= ulint(~BTR_ALREADY_S_LATCHED);
 
 	lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
 
@@ -2200,7 +2421,7 @@ btr_cur_open_at_index_side_func(
 		Free blocks and read IO bandwidth should be prior
 		for them, when the history list is glowing huge. */
 		if (lock_intention == BTR_INTENTION_DELETE
-		    && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+		    && trx_sys.history_size() > BTR_CUR_FINE_HISTORY_LENGTH
 		    && buf_get_n_pending_read_ios()) {
 			mtr_x_lock(dict_index_get_lock(index), mtr);
 		} else {
@@ -2232,9 +2453,8 @@ btr_cur_open_at_index_side_func(
 	page_cursor = btr_cur_get_page_cur(cursor);
 	cursor->index = index;
 
-	page_id_t		page_id(dict_index_get_space(index),
-					dict_index_get_page(index));
-	const page_size_t&	page_size = dict_table_page_size(index->table);
+	page_id_t		page_id(index->table->space_id, index->page);
+	const page_size_t	page_size(index->table->space->flags);
 
 	if (root_leaf_rw_latch == RW_X_LATCH) {
 		node_ptr_max_size = btr_node_ptr_max_size(index);
@@ -2302,12 +2522,12 @@ btr_cur_open_at_index_side_func(
 		if (height == ULINT_UNDEFINED) {
 			/* We are in the root node */
 
-			height = btr_page_get_level(page, mtr);
+			height = btr_page_get_level(page);
 			root_height = height;
 			ut_a(height >= level);
 		} else {
 			/* TODO: flag the index corrupted if this fails */
-			ut_ad(height == btr_page_get_level(page, mtr));
+			ut_ad(height == btr_page_get_level(page));
 		}
 
 		if (height == level) {
@@ -2517,7 +2737,7 @@ btr_cur_open_at_rnd_pos_func(
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	page_cur_t*	page_cursor;
-	ulint		node_ptr_max_size = UNIV_PAGE_SIZE / 2;
+	ulint		node_ptr_max_size = srv_page_size / 2;
 	ulint		height;
 	rec_t*		node_ptr;
 	ulint		savepoint;
@@ -2546,7 +2766,7 @@ btr_cur_open_at_rnd_pos_func(
 		Free blocks and read IO bandwidth should be prior
 		for them, when the history list is glowing huge. */
 		if (lock_intention == BTR_INTENTION_DELETE
-		    && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+		    && trx_sys.history_size() > BTR_CUR_FINE_HISTORY_LENGTH
 		    && buf_get_n_pending_read_ios()) {
 			mtr_x_lock(dict_index_get_lock(index), mtr);
 		} else {
@@ -2590,9 +2810,8 @@ btr_cur_open_at_rnd_pos_func(
 	page_cursor = btr_cur_get_page_cur(cursor);
 	cursor->index = index;
 
-	page_id_t		page_id(dict_index_get_space(index),
-					dict_index_get_page(index));
-	const page_size_t&	page_size = dict_table_page_size(index->table);
+	page_id_t		page_id(index->table->space_id, index->page);
+	const page_size_t	page_size(index->table->space->flags);
 	dberr_t			err = DB_SUCCESS;
 
 	if (root_leaf_rw_latch == RW_X_LATCH) {
@@ -2662,7 +2881,7 @@ btr_cur_open_at_rnd_pos_func(
 		if (height == ULINT_UNDEFINED) {
 			/* We are in the root node */
 
-			height = btr_page_get_level(page, mtr);
+			height = btr_page_get_level(page);
 		}
 
 		if (height == 0) {
@@ -2830,9 +3049,8 @@ btr_cur_insert_if_possible(
 
 	ut_ad(dtuple_check_typed(tuple));
 
-	ut_ad(mtr_is_block_fix(
-		mtr, btr_cur_get_block(cursor),
-		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+			       MTR_MEMO_PAGE_X_FIX));
 	page_cursor = btr_cur_get_page_cur(cursor);
 
 	/* Now, try the insert */
@@ -2867,7 +3085,7 @@ btr_cur_ins_lock_and_undo(
 	dtuple_t*	entry,	/*!< in/out: entry to insert */
 	que_thr_t*	thr,	/*!< in: query thread or NULL */
 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
-	ibool*		inherit)/*!< out: TRUE if the inserted new record maybe
+	bool*		inherit)/*!< out: true if the inserted new record maybe
 				should inherit LOCK_GAP type locks from the
 				successor record */
 {
@@ -2885,7 +3103,7 @@ btr_cur_ins_lock_and_undo(
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || dict_index_is_clust(index)
 	      || (flags & BTR_CREATE_FLAG));
-	ut_ad(mtr->is_named_space(index->space));
+	ut_ad(mtr->is_named_space(index->table->space));
 
 	/* Check if there is predicate or GAP lock preventing the insertion */
 	if (!(flags & BTR_NO_LOCKING_FLAG)) {
@@ -2921,23 +3139,21 @@ btr_cur_ins_lock_and_undo(
 
 	if (flags & BTR_NO_UNDO_LOG_FLAG) {
 		roll_ptr = roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS;
+		if (!(flags & BTR_KEEP_SYS_FLAG)) {
+upd_sys:
+			row_upd_index_entry_sys_field(entry, index,
+						      DATA_ROLL_PTR, roll_ptr);
+		}
 	} else {
 		err = trx_undo_report_row_operation(thr, index, entry,
 						    NULL, 0, NULL, NULL,
 						    &roll_ptr);
-		if (err != DB_SUCCESS) {
-			return(err);
+		if (err == DB_SUCCESS) {
+			goto upd_sys;
 		}
 	}
 
-	/* Now we can fill in the roll ptr field in entry */
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
-
-		row_upd_index_entry_sys_field(entry, index,
-					      DATA_ROLL_PTR, roll_ptr);
-	}
-
-	return(DB_SUCCESS);
+	return(err);
 }
 
 /**
@@ -3011,9 +3227,9 @@ btr_cur_optimistic_insert(
 	buf_block_t*	block;
 	page_t*		page;
 	rec_t*		dummy;
-	ibool		leaf;
-	ibool		reorg;
-	ibool		inherit = TRUE;
+	bool		leaf;
+	bool		reorg;
+	bool		inherit = true;
 	ulint		rec_size;
 	dberr_t		err;
 
@@ -3024,7 +3240,7 @@ btr_cur_optimistic_insert(
 	page = buf_block_get_frame(block);
 	index = cursor->index;
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || dict_index_is_clust(index)
 	      || (flags & BTR_CREATE_FLAG));
@@ -3155,7 +3371,10 @@ fail_err:
 			ut_ad(trx_id[1].len == DATA_ROLL_PTR_LEN);
 			ut_ad(*static_cast<const byte*>
 			      (trx_id[1].data) & 0x80);
-			if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
+			if (flags & BTR_NO_UNDO_LOG_FLAG) {
+				ut_ad(!memcmp(trx_id->data, reset_trx_id,
+					      DATA_TRX_ID_LEN));
+			} else {
 				ut_ad(thr->graph->trx->id);
 				ut_ad(thr->graph->trx->id
 				      == trx_read_trx_id(
@@ -3174,7 +3393,7 @@ fail_err:
 
 	if (*rec) {
 	} else if (page_size.is_compressed()) {
-		ut_ad(!dict_table_is_temporary(index->table));
+		ut_ad(!index->table->is_temporary());
 		/* Reset the IBUF_BITMAP_FREE bits, because
 		page_cur_tuple_insert() will have attempted page
 		reorganize before failing. */
@@ -3213,10 +3432,18 @@ fail_err:
 # ifdef MYSQL_INDEX_DISABLE_AHI
 	} else if (index->disable_ahi) {
 # endif
-	} else if (!reorg && cursor->flag == BTR_CUR_HASH) {
-		btr_search_update_hash_node_on_insert(cursor);
+	} else if (entry->info_bits & REC_INFO_MIN_REC_FLAG) {
+		ut_ad(entry->info_bits == REC_INFO_METADATA);
+		ut_ad(index->is_instant());
+		ut_ad(flags == BTR_NO_LOCKING_FLAG);
 	} else {
-		btr_search_update_hash_on_insert(cursor);
+		rw_lock_t* ahi_latch = btr_get_search_latch(index);
+		if (!reorg && cursor->flag == BTR_CUR_HASH) {
+			btr_search_update_hash_node_on_insert(
+				cursor, ahi_latch);
+		} else {
+			btr_search_update_hash_on_insert(cursor, ahi_latch);
+		}
 	}
 #endif /* BTR_CUR_HASH_ADAPT */
 
@@ -3227,7 +3454,7 @@ fail_err:
 
 	if (leaf
 	    && !dict_index_is_clust(index)
-	    && !dict_table_is_temporary(index->table)) {
+	    && !index->table->is_temporary()) {
 		/* Update the free bits of the B-tree page in the
 		insert buffer bitmap. */
 
@@ -3293,7 +3520,7 @@ btr_cur_pessimistic_insert(
 	dict_index_t*	index		= cursor->index;
 	big_rec_t*	big_rec_vec	= NULL;
 	dberr_t		err;
-	ibool		inherit = FALSE;
+	bool		inherit = false;
 	bool		success;
 	ulint		n_reserved	= 0;
 
@@ -3305,9 +3532,8 @@ btr_cur_pessimistic_insert(
 	ut_ad(mtr_memo_contains_flagged(
 		      mtr, dict_index_get_lock(btr_cur_get_index(cursor)),
 		      MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
-	ut_ad(mtr_is_block_fix(
-		mtr, btr_cur_get_block(cursor),
-		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+			       MTR_MEMO_PAGE_X_FIX));
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || dict_index_is_clust(index)
 	      || (flags & BTR_CREATE_FLAG));
@@ -3331,7 +3557,8 @@ btr_cur_pessimistic_insert(
 
 		ulint	n_extents = cursor->tree_height / 16 + 3;
 
-		success = fsp_reserve_free_extents(&n_reserved, index->space,
+		success = fsp_reserve_free_extents(&n_reserved,
+						   index->table->space,
 						   n_extents, FSP_NORMAL, mtr);
 		if (!success) {
 			return(DB_OUT_OF_FILE_SPACE);
@@ -3356,10 +3583,7 @@ btr_cur_pessimistic_insert(
 
 		if (big_rec_vec == NULL) {
 
-			if (n_reserved > 0) {
-				fil_space_release_free_extents(index->space,
-							       n_reserved);
-			}
+			index->table->space->release_free_extents(n_reserved);
 			return(DB_TOO_BIG_RECORD);
 		}
 	}
@@ -3383,7 +3607,7 @@ btr_cur_pessimistic_insert(
 	      || dict_index_is_spatial(index));
 
 	if (!(flags & BTR_NO_LOCKING_FLAG)) {
-		ut_ad(!dict_table_is_temporary(index->table));
+		ut_ad(!index->table->is_temporary());
 		if (dict_index_is_spatial(index)) {
 			/* Do nothing */
 		} else {
@@ -3404,7 +3628,7 @@ btr_cur_pessimistic_insert(
 			       == FIL_NULL) {
 				/* split and inserted need to call
 				lock_update_insert() always. */
-				inherit = TRUE;
+				inherit = true;
 			}
 		}
 	}
@@ -3416,7 +3640,15 @@ btr_cur_pessimistic_insert(
 # ifdef MYSQL_INDEX_DISABLE_AHI
 		if (index->disable_ahi); else
 # endif
-			btr_search_update_hash_on_insert(cursor);
+		if (entry->info_bits & REC_INFO_MIN_REC_FLAG) {
+			ut_ad(entry->info_bits == REC_INFO_METADATA);
+			ut_ad(index->is_instant());
+			ut_ad((flags & ulint(~BTR_KEEP_IBUF_BITMAP))
+			      == BTR_NO_LOCKING_FLAG);
+		} else {
+			btr_search_update_hash_on_insert(
+				cursor, btr_get_search_latch(index));
+		}
 #endif /* BTR_CUR_HASH_ADAPT */
 		if (inherit && !(flags & BTR_NO_LOCKING_FLAG)) {
 
@@ -3424,10 +3656,7 @@ btr_cur_pessimistic_insert(
 		}
 	}
 
-	if (n_reserved > 0) {
-		fil_space_release_free_extents(index->space, n_reserved);
-	}
-
+	index->table->space->release_free_extents(n_reserved);
 	*big_rec = big_rec_vec;
 
 	return(DB_SUCCESS);
@@ -3463,7 +3692,7 @@ btr_cur_upd_lock_and_undo(
 	index = cursor->index;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_ad(mtr->is_named_space(index->space));
+	ut_ad(mtr->is_named_space(index->table->space));
 
 	if (!dict_index_is_clust(index)) {
 		ut_ad(dict_index_is_online_ddl(index)
@@ -3600,7 +3829,7 @@ btr_cur_parse_update_in_place(
 	rec_offset = mach_read_from_2(ptr);
 	ptr += 2;
 
-	ut_a(rec_offset <= UNIV_PAGE_SIZE);
+	ut_a(rec_offset <= srv_page_size);
 
 	heap = mem_heap_create(256);
 
@@ -3697,7 +3926,8 @@ btr_cur_update_alloc_zip_func(
 		goto out_of_space;
 	}
 
-	rec_offs_make_valid(page_cur_get_rec(cursor), index, offsets);
+	rec_offs_make_valid(page_cur_get_rec(cursor), index,
+			    page_is_leaf(page), offsets);
 
 	/* After recompressing a page, we must make sure that the free
 	bits in the insert buffer bitmap will not exceed the free
@@ -3719,7 +3949,7 @@ out_of_space:
 
 	/* Out of space: reset the free bits. */
 	if (!dict_index_is_clust(index)
-	    && !dict_table_is_temporary(index->table)
+	    && !index->table->is_temporary()
 	    && page_is_leaf(page)) {
 		ibuf_reset_free_bits(page_cur_get_block(cursor));
 	}
@@ -3776,6 +4006,7 @@ btr_cur_update_in_place(
 		  | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
 	ut_ad(fil_page_index_page_check(btr_cur_get_page(cursor)));
 	ut_ad(btr_page_get_index_id(btr_cur_get_page(cursor)) == index->id);
+	ut_ad(!(update->info_bits & REC_INFO_MIN_REC_FLAG));
 
 	DBUG_LOG("ib_cur",
 		 "update-in-place " << index->name << " (" << index->id
@@ -3787,7 +4018,7 @@ btr_cur_update_in_place(
 
 	/* Check that enough space is available on the compressed page. */
 	if (page_zip) {
-		ut_ad(!dict_table_is_temporary(index->table));
+		ut_ad(!index->table->is_temporary());
 
 		if (!btr_cur_update_alloc_zip(
 			    page_zip, btr_cur_get_page_cur(cursor),
@@ -3838,7 +4069,8 @@ btr_cur_update_in_place(
 			if (!dict_index_is_clust(index)
 			    || row_upd_changes_ord_field_binary(
 				    index, update, thr, NULL, NULL)) {
-
+				ut_ad(!(update->info_bits
+					& REC_INFO_MIN_REC_FLAG));
 				/* Remove possible hash index pointer
 				to this record */
 				btr_search_update_hash_on_delete(cursor);
@@ -3880,13 +4112,67 @@ func_exit:
 	    && !dict_index_is_clust(index)
 	    && page_is_leaf(buf_block_get_frame(block))) {
 		/* Update the free bits in the insert buffer. */
-		ut_ad(!dict_table_is_temporary(index->table));
+		ut_ad(!index->table->is_temporary());
 		ibuf_update_free_bits_zip(block, mtr);
 	}
 
 	return(err);
 }
 
+/** Trim an update tuple due to instant ADD COLUMN, if needed.
+For normal records, the trailing instantly added fields that match
+the initial default values are omitted.
+
+For the special metadata record on a table on which instant
+ADD COLUMN has already been executed, both ADD COLUMN and the
+rollback of ADD COLUMN need to be handled specially.
+
+@param[in,out]	entry	index entry
+@param[in]	index	index
+@param[in]	update	update vector
+@param[in]	thr	execution thread */
+static inline
+void
+btr_cur_trim(
+	dtuple_t*		entry,
+	const dict_index_t*	index,
+	const upd_t*		update,
+	const que_thr_t*	thr)
+{
+	if (!index->is_instant()) {
+	} else if (UNIV_UNLIKELY(update->info_bits == REC_INFO_METADATA)) {
+		/* We are either updating a metadata record
+		(instantly adding columns to a table where instant ADD was
+		already executed) or rolling back such an operation. */
+		ut_ad(!upd_get_nth_field(update, 0)->orig_len);
+		ut_ad(upd_get_nth_field(update, 0)->field_no
+		      > index->n_core_fields);
+
+		if (thr->graph->trx->in_rollback) {
+			/* This rollback can occur either as part of
+			ha_innobase::commit_inplace_alter_table() rolling
+			back after a failed innobase_add_instant_try(),
+			or as part of crash recovery. Either way, the
+			table will be in the data dictionary cache, with
+			the instantly added columns going to be removed
+			later in the rollback. */
+			ut_ad(index->table->cached);
+			/* The DB_TRX_ID,DB_ROLL_PTR are always last,
+			and there should be some change to roll back.
+			The first field in the update vector is the
+			first instantly added column logged by
+			innobase_add_instant_try(). */
+			ut_ad(update->n_fields > 2);
+			ulint n_fields = upd_get_nth_field(update, 0)
+				->field_no;
+			ut_ad(n_fields + 1 >= entry->n_fields);
+			entry->n_fields = n_fields;
+		}
+	} else {
+		entry->trim(*index);
+	}
+}
+
 /*************************************************************//**
 Tries to update a record on a page in an index tree. It is assumed that mtr
 holds an x-latch on the page. The operation does not succeed if there is too
@@ -3941,7 +4227,7 @@ btr_cur_optimistic_update(
 	index = cursor->index;
 	ut_ad(trx_id > 0 || (flags & BTR_KEEP_SYS_FLAG));
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	/* This is intended only for leaf page updates */
 	ut_ad(page_is_leaf(page));
 	/* The insert buffer tree should never be updated in place. */
@@ -3962,7 +4248,11 @@ btr_cur_optimistic_update(
 	     || trx_is_recv(thr_get_trx(thr)));
 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
-	if (!row_upd_changes_field_size_or_external(index, *offsets, update)) {
+	const bool is_metadata = update->info_bits == REC_INFO_METADATA;
+
+	if (UNIV_LIKELY(!is_metadata)
+	    && !row_upd_changes_field_size_or_external(index, *offsets,
+						       update)) {
 
 		/* The simplest and the most common case: the update does not
 		change the size of any field and none of the updated fields is
@@ -4015,7 +4305,8 @@ any_extern:
 	corresponding to new_entry is latched in mtr.
 	Thus the following call is safe. */
 	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
-						     FALSE, *heap);
+						     *heap);
+	btr_cur_trim(new_entry, index, update, thr);
 	old_rec_size = rec_offs_size(*offsets);
 	new_rec_size = rec_get_converted_size(index, new_entry, 0);
 
@@ -4025,7 +4316,7 @@ any_extern:
 #endif /* UNIV_ZIP_DEBUG */
 
 	if (page_zip) {
-		ut_ad(!dict_table_is_temporary(index->table));
+		ut_ad(!index->table->is_temporary());
 
 		if (page_zip_rec_needs_ext(new_rec_size, page_is_comp(page),
 					   dict_index_get_n_fields(index),
@@ -4119,7 +4410,16 @@ any_extern:
 		lock_rec_store_on_page_infimum(block, rec);
 	}
 
-	btr_search_update_hash_on_delete(cursor);
+	if (UNIV_UNLIKELY(is_metadata)) {
+		ut_ad(new_entry->info_bits == REC_INFO_METADATA);
+		ut_ad(index->is_instant());
+		/* This can be innobase_add_instant_try() performing a
+		subsequent instant ADD COLUMN, or its rollback by
+		row_undo_mod_clust_low(). */
+		ut_ad(flags & BTR_NO_LOCKING_FLAG);
+	} else {
+		btr_search_update_hash_on_delete(cursor);
+	}
 
 	page_cur_delete_rec(page_cursor, index, *offsets, mtr);
 
@@ -4137,8 +4437,14 @@ any_extern:
 		cursor, new_entry, offsets, heap, 0/*n_ext*/, mtr);
 	ut_a(rec); /* <- We calculated above the insert would fit */
 
-	/* Restore the old explicit lock state on the record */
-	if (!dict_table_is_locking_disabled(index->table)) {
+	if (UNIV_UNLIKELY(is_metadata)) {
+		/* We must empty the PAGE_FREE list, because if this
+		was a rollback, the shortened metadata record
+		would have too many fields, and we would be unable to
+		know the size of the freed record. */
+		btr_page_reorganize(page_cursor, index, mtr);
+	} else if (!dict_table_is_locking_disabled(index->table)) {
+		/* Restore the old explicit lock state on the record */
 		lock_rec_restore_from_page_infimum(block, rec, block);
 	}
 
@@ -4150,9 +4456,9 @@ func_exit:
 	    && !dict_index_is_clust(index)) {
 		/* Update the free bits in the insert buffer. */
 		if (page_zip) {
-			ut_ad(!dict_table_is_temporary(index->table));
+			ut_ad(!index->table->is_temporary());
 			ibuf_update_free_bits_zip(block, mtr);
-		} else if (!dict_table_is_temporary(index->table)) {
+		} else if (!index->table->is_temporary()) {
 			ibuf_update_free_bits_low(block, max_ins_size, mtr);
 		}
 	}
@@ -4272,11 +4578,11 @@ btr_cur_pessimistic_update(
 	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
 					MTR_MEMO_X_LOCK |
 					MTR_MEMO_SX_LOCK));
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
-	ut_ad(!page_zip || !dict_table_is_temporary(index->table));
+	ut_ad(!page_zip || !index->table->is_temporary());
 	/* The insert buffer tree should never be updated in place. */
 	ut_ad(!dict_index_is_ibuf(index));
 	ut_ad(trx_id > 0
@@ -4308,7 +4614,7 @@ btr_cur_pessimistic_update(
 		    && optim_err != DB_ZIP_OVERFLOW
 		    && !dict_index_is_clust(index)
 		    && page_is_leaf(page)) {
-			ut_ad(!dict_table_is_temporary(index->table));
+			ut_ad(!index->table->is_temporary());
 			ibuf_update_free_bits_zip(block, mtr);
 		}
 
@@ -4335,7 +4641,11 @@ btr_cur_pessimistic_update(
 	purge would also have removed the clustered index record
 	itself.  Thus the following call is safe. */
 	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
-						     FALSE, entry_heap);
+						     entry_heap);
+	btr_cur_trim(new_entry, index, update, thr);
+
+	const bool is_metadata = new_entry->info_bits
+		& REC_INFO_MIN_REC_FLAG;
 
 	/* We have to set appropriate extern storage bits in the new
 	record to be inserted: we have to remember which fields were such */
@@ -4380,11 +4690,7 @@ btr_cur_pessimistic_update(
 			ut_a(!page_zip
 			     || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
-			if (n_reserved > 0) {
-				fil_space_release_free_extents(
-					index->space, n_reserved);
-			}
-
+			index->table->space->release_free_extents(n_reserved);
 			err = DB_TOO_BIG_RECORD;
 			goto err_exit;
 		}
@@ -4411,7 +4717,7 @@ btr_cur_pessimistic_update(
 		ulint	n_extents = cursor->tree_height / 16 + 3;
 
 		if (!fsp_reserve_free_extents(
-		            &n_reserved, index->space, n_extents,
+		            &n_reserved, index->table->space, n_extents,
 		            flags & BTR_NO_UNDO_LOG_FLAG
 		            ? FSP_CLEANING : FSP_NORMAL,
 		            mtr)) {
@@ -4432,19 +4738,30 @@ btr_cur_pessimistic_update(
 				page, 1);
 	}
 
-	/* Store state of explicit locks on rec on the page infimum record,
-	before deleting rec. The page infimum acts as a dummy carrier of the
-	locks, taking care also of lock releases, before we can move the locks
-	back on the actual record. There is a special case: if we are
-	inserting on the root page and the insert causes a call of
-	btr_root_raise_and_insert. Therefore we cannot in the lock system
-	delete the lock structs set on the root page even if the root
-	page carries just node pointers. */
-	if (!dict_table_is_locking_disabled(index->table)) {
-		lock_rec_store_on_page_infimum(block, rec);
-	}
+	if (UNIV_UNLIKELY(is_metadata)) {
+		ut_ad(new_entry->info_bits == REC_INFO_METADATA);
+		ut_ad(index->is_instant());
+		/* This can be innobase_add_instant_try() performing a
+		subsequent instant ADD COLUMN, or its rollback by
+		row_undo_mod_clust_low(). */
+		ut_ad(flags & BTR_NO_LOCKING_FLAG);
+	} else {
+		btr_search_update_hash_on_delete(cursor);
 
-	btr_search_update_hash_on_delete(cursor);
+		/* Store state of explicit locks on rec on the page
+		infimum record, before deleting rec. The page infimum
+		acts as a dummy carrier of the locks, taking care also
+		of lock releases, before we can move the locks back on
+		the actual record. There is a special case: if we are
+		inserting on the root page and the insert causes a
+		call of btr_root_raise_and_insert. Therefore we cannot
+		in the lock system delete the lock structs set on the
+		root page even if the root page carries just node
+		pointers. */
+		if (!dict_table_is_locking_disabled(index->table)) {
+			lock_rec_store_on_page_infimum(block, rec);
+		}
+	}
 
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
@@ -4461,7 +4778,15 @@ btr_cur_pessimistic_update(
 	if (rec) {
 		page_cursor->rec = rec;
 
-		if (!dict_table_is_locking_disabled(index->table)) {
+		if (UNIV_UNLIKELY(is_metadata)) {
+			/* We must empty the PAGE_FREE list, because if this
+			was a rollback, the shortened metadata record
+			would have too many fields, and we would be unable to
+			know the size of the freed record. */
+			btr_page_reorganize(page_cursor, index, mtr);
+			rec = page_cursor->rec;
+			rec_offs_make_valid(rec, index, true, *offsets);
+		} else if (!dict_table_is_locking_disabled(index->table)) {
 			lock_rec_restore_from_page_infimum(
 				btr_cur_get_block(cursor), rec, block);
 		}
@@ -4478,11 +4803,12 @@ btr_cur_pessimistic_update(
 		}
 
 		bool adjust = big_rec_vec && (flags & BTR_KEEP_POS_FLAG);
+		ut_ad(!adjust || page_is_leaf(page));
 
 		if (btr_cur_compress_if_useful(cursor, adjust, mtr)) {
 			if (adjust) {
-				rec_offs_make_valid(
-					page_cursor->rec, index, *offsets);
+				rec_offs_make_valid(page_cursor->rec, index,
+						    true, *offsets);
 			}
 		} else if (!dict_index_is_clust(index)
 			   && page_is_leaf(page)) {
@@ -4490,9 +4816,9 @@ btr_cur_pessimistic_update(
 			This is the same block which was skipped by
 			BTR_KEEP_IBUF_BITMAP. */
 			if (page_zip) {
-				ut_ad(!dict_table_is_temporary(index->table));
+				ut_ad(!index->table->is_temporary());
 				ibuf_update_free_bits_zip(block, mtr);
-			} else if (!dict_table_is_temporary(index->table)) {
+			} else if (!index->table->is_temporary()) {
 				ibuf_update_free_bits_low(block, max_ins_size,
 							  mtr);
 			}
@@ -4524,7 +4850,7 @@ btr_cur_pessimistic_update(
 		This is the same block which was skipped by
 		BTR_KEEP_IBUF_BITMAP. */
 		if (!dict_index_is_clust(index)
-		    && !dict_table_is_temporary(index->table)
+		    && !index->table->is_temporary()
 		    && page_is_leaf(page)) {
 			ibuf_reset_free_bits(block);
 		}
@@ -4576,7 +4902,7 @@ btr_cur_pessimistic_update(
 	max_trx_id is ignored for temp tables because it not required
 	for MVCC. */
 	if (dict_index_is_sec_or_ibuf(index)
-	    && !dict_table_is_temporary(index->table)) {
+	    && !index->table->is_temporary()) {
 		/* Update PAGE_MAX_TRX_ID in the index page header.
 		It was not updated by btr_cur_pessimistic_insert()
 		because of BTR_NO_LOCKING_FLAG. */
@@ -4608,7 +4934,14 @@ btr_cur_pessimistic_update(
 		ut_ad(row_get_rec_trx_id(rec, index, *offsets));
 	}
 
-	if (!dict_table_is_locking_disabled(index->table)) {
+	if (UNIV_UNLIKELY(is_metadata)) {
+		/* We must empty the PAGE_FREE list, because if this
+		was a rollback, the shortened metadata record
+		would have too many fields, and we would be unable to
+		know the size of the freed record. */
+		btr_page_reorganize(page_cursor, index, mtr);
+		rec = page_cursor->rec;
+	} else if (!dict_table_is_locking_disabled(index->table)) {
 		lock_rec_restore_from_page_infimum(
 			btr_cur_get_block(cursor), rec, block);
 	}
@@ -4628,12 +4961,8 @@ return_after_reservations:
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
-	if (n_reserved > 0) {
-		fil_space_release_free_extents(index->space, n_reserved);
-	}
-
+	index->table->space->release_free_extents(n_reserved);
 	*big_rec = big_rec_vec;
-
 	return(err);
 }
 
@@ -4655,7 +4984,7 @@ btr_cur_del_mark_set_clust_rec_log(
 	byte*	log_ptr;
 
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-	ut_ad(mtr->is_named_space(index->space));
+	ut_ad(mtr->is_named_space(index->table->space));
 
 	log_ptr = mlog_open_and_write_index(mtr, rec, index,
 					    page_rec_is_comp(rec)
@@ -4729,7 +5058,7 @@ btr_cur_parse_del_mark_set_clust_rec(
 	offset = mach_read_from_2(ptr);
 	ptr += 2;
 
-	ut_a(offset <= UNIV_PAGE_SIZE);
+	ut_a(offset <= srv_page_size);
 
 	/* In delete-marked records, DB_TRX_ID must
 	always refer to an existing undo log record. */
@@ -4809,7 +5138,7 @@ btr_cur_del_mark_set_clust_rec(
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
 	ut_ad(buf_block_get_frame(block) == page_align(rec));
 	ut_ad(page_rec_is_leaf(rec));
-	ut_ad(mtr->is_named_space(index->space));
+	ut_ad(mtr->is_named_space(index->table->space));
 
 	if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
 		/* We may already have delete-marked this record
@@ -4923,7 +5252,7 @@ btr_cur_parse_del_mark_set_sec_rec(
 	offset = mach_read_from_2(ptr);
 	ptr += 2;
 
-	ut_a(offset <= UNIV_PAGE_SIZE);
+	ut_a(offset <= srv_page_size);
 
 	if (page) {
 		rec = page + offset;
@@ -5034,9 +5363,8 @@ btr_cur_compress_if_useful(
 	ut_ad(mtr_memo_contains_flagged(
 		mtr, dict_index_get_lock(btr_cur_get_index(cursor)),
 		MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
-	ut_ad(mtr_is_block_fix(
-		mtr, btr_cur_get_block(cursor),
-		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+			       MTR_MEMO_PAGE_X_FIX));
 
 	if (dict_index_is_spatial(cursor->index)) {
 		const page_t*   page = btr_cur_get_page(cursor);
@@ -5088,9 +5416,9 @@ btr_cur_optimistic_delete_func(
 	ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
 	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_is_block_fix(mtr, btr_cur_get_block(cursor),
-			       MTR_MEMO_PAGE_X_FIX, cursor->index->table));
-	ut_ad(mtr->is_named_space(cursor->index->space));
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+			       MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr->is_named_space(cursor->index->table->space));
 
 	/* This is intended only for leaf page deletions */
 
@@ -5102,6 +5430,48 @@ btr_cur_optimistic_delete_func(
 	      || (flags & BTR_CREATE_FLAG));
 
 	rec = btr_cur_get_rec(cursor);
+
+	if (UNIV_UNLIKELY(page_is_root(block->frame)
+			  && page_get_n_recs(block->frame) == 1
+			  + (cursor->index->is_instant()
+			     && !rec_is_metadata(rec, cursor->index)))) {
+		/* The whole index (and table) becomes logically empty.
+		Empty the whole page. That is, if we are deleting the
+		only user record, also delete the metadata record
+		if one exists (it exists if and only if is_instant()).
+		If we are deleting the metadata record and the
+		table becomes empty, clean up the whole page. */
+		dict_index_t* index = cursor->index;
+		ut_ad(!index->is_instant()
+		      || rec_is_metadata(
+			      page_rec_get_next_const(
+				      page_get_infimum_rec(block->frame)),
+			      index));
+		if (UNIV_UNLIKELY(rec_get_info_bits(rec, page_rec_is_comp(rec))
+				  & REC_INFO_MIN_REC_FLAG)) {
+			/* This should be rolling back instant ADD COLUMN.
+			If this is a recovered transaction, then
+			index->is_instant() will hold until the
+			insert into SYS_COLUMNS is rolled back. */
+			ut_ad(index->table->supports_instant());
+			ut_ad(index->is_primary());
+		} else {
+			lock_update_delete(block, rec);
+		}
+		btr_page_empty(block, buf_block_get_page_zip(block),
+			       index, 0, mtr);
+		page_cur_set_after_last(block, btr_cur_get_page_cur(cursor));
+
+		if (index->is_primary()) {
+			/* Concurrent access is prevented by
+			root_block->lock X-latch, so this should be
+			safe. */
+			index->remove_instant();
+		}
+
+		return true;
+	}
+
 	offsets = rec_get_offsets(rec, cursor->index, offsets, true,
 				  ULINT_UNDEFINED, &heap);
 
@@ -5114,9 +5484,29 @@ btr_cur_optimistic_delete_func(
 		page_t*		page	= buf_block_get_frame(block);
 		page_zip_des_t*	page_zip= buf_block_get_page_zip(block);
 
-		lock_update_delete(block, rec);
+		if (UNIV_UNLIKELY(rec_get_info_bits(rec, page_rec_is_comp(rec))
+				  & REC_INFO_MIN_REC_FLAG)) {
+			/* This should be rolling back instant ADD COLUMN.
+			If this is a recovered transaction, then
+			index->is_instant() will hold until the
+			insert into SYS_COLUMNS is rolled back. */
+			ut_ad(cursor->index->table->supports_instant());
+			ut_ad(cursor->index->is_primary());
+			ut_ad(!page_zip);
+			page_cur_delete_rec(btr_cur_get_page_cur(cursor),
+					    cursor->index, offsets, mtr);
+			/* We must empty the PAGE_FREE list, because
+			after rollback, this deleted metadata record
+			would have too many fields, and we would be
+			unable to know the size of the freed record. */
+			btr_page_reorganize(btr_cur_get_page_cur(cursor),
+					    cursor->index, mtr);
+			goto func_exit;
+		} else {
+			lock_update_delete(block, rec);
 
-		btr_search_update_hash_on_delete(cursor);
+			btr_search_update_hash_on_delete(cursor);
+		}
 
 		if (page_zip) {
 #ifdef UNIV_ZIP_DEBUG
@@ -5145,7 +5535,7 @@ btr_cur_optimistic_delete_func(
 			into non-leaf pages, into clustered indexes,
 			or into the change buffer. */
 			if (!dict_index_is_clust(cursor->index)
-			    && !dict_table_is_temporary(cursor->index->table)
+			    && !cursor->index->table->is_temporary()
 			    && !dict_index_is_ibuf(cursor->index)) {
 				ibuf_update_free_bits_low(block, max_ins, mtr);
 			}
@@ -5156,6 +5546,7 @@ btr_cur_optimistic_delete_func(
 		btr_cur_prefetch_siblings(block);
 	}
 
+func_exit:
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
@@ -5217,8 +5608,8 @@ btr_cur_pessimistic_delete(
 	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
 					MTR_MEMO_X_LOCK
 					| MTR_MEMO_SX_LOCK));
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
-	ut_ad(mtr->is_named_space(index->space));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr->is_named_space(index->table->space));
 
 	if (!has_reserved_extents) {
 		/* First reserve enough free space for the file segments
@@ -5228,7 +5619,7 @@ btr_cur_pessimistic_delete(
 		ulint	n_extents = cursor->tree_height / 32 + 1;
 
 		success = fsp_reserve_free_extents(&n_reserved,
-						   index->space,
+						   index->table->space,
 						   n_extents,
 						   FSP_CLEANING, mtr);
 		if (!success) {
@@ -5257,29 +5648,82 @@ btr_cur_pessimistic_delete(
 #endif /* UNIV_ZIP_DEBUG */
 	}
 
-	if (flags == 0) {
-		lock_update_delete(block, rec);
-	}
-
-	if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
-	    && UNIV_UNLIKELY(dict_index_get_page(index)
-			     != block->page.id.page_no())) {
+	if (page_is_leaf(page)) {
+		const bool is_metadata = rec_get_info_bits(
+			rec, page_rec_is_comp(rec)) & REC_INFO_MIN_REC_FLAG;
+		if (UNIV_UNLIKELY(is_metadata)) {
+			/* This should be rolling back instant ADD COLUMN.
+			If this is a recovered transaction, then
+			index->is_instant() will hold until the
+			insert into SYS_COLUMNS is rolled back. */
+			ut_ad(rollback);
+			ut_ad(index->table->supports_instant());
+			ut_ad(index->is_primary());
+		} else if (flags == 0) {
+			lock_update_delete(block, rec);
+		}
+
+		if (!page_is_root(page)) {
+			if (page_get_n_recs(page) < 2) {
+				goto discard_page;
+			}
+		} else if (page_get_n_recs(page) == 1
+			   + (index->is_instant()
+			      && !rec_is_metadata(rec, index))) {
+			/* The whole index (and table) becomes logically empty.
+			Empty the whole page. That is, if we are deleting the
+			only user record, also delete the metadata record
+			if one exists (it exists if and only if is_instant()).
+			If we are deleting the metadata record and the
+			table becomes empty, clean up the whole page. */
+			ut_ad(!index->is_instant()
+			      || rec_is_metadata(
+				      page_rec_get_next_const(
+					      page_get_infimum_rec(page)),
+					      index));
+			btr_page_empty(block, page_zip, index, 0, mtr);
+			page_cur_set_after_last(block,
+						btr_cur_get_page_cur(cursor));
+			if (index->is_primary()) {
+				/* Concurrent access is prevented by
+				index->lock and root_block->lock
+				X-latch, so this should be safe. */
+				index->remove_instant();
+			}
+			ret = TRUE;
+			goto return_after_reservations;
+		}
 
-		/* If there is only one record, drop the whole page in
-		btr_discard_page, if this is not the root page */
+		if (UNIV_LIKELY(!is_metadata)) {
+			btr_search_update_hash_on_delete(cursor);
+		} else {
+			page_cur_delete_rec(btr_cur_get_page_cur(cursor),
+					    index, offsets, mtr);
+			/* We must empty the PAGE_FREE list, because
+			after rollback, this deleted metadata record
+			would carry too many fields, and we would be
+			unable to know the size of the freed record. */
+			btr_page_reorganize(btr_cur_get_page_cur(cursor),
+					    index, mtr);
+			ut_ad(!ret);
+			goto return_after_reservations;
+		}
+	} else if (UNIV_UNLIKELY(page_rec_is_first(rec, page))) {
+		if (page_rec_is_last(rec, page)) {
+discard_page:
+			ut_ad(page_get_n_recs(page) == 1);
+			/* If there is only one record, drop
+			the whole page. */
 
-		btr_discard_page(cursor, mtr);
+			btr_discard_page(cursor, mtr);
 
-		ret = TRUE;
-		goto return_after_reservations;
-	}
+			ret = TRUE;
+			goto return_after_reservations;
+		}
 
-	if (page_is_leaf(page)) {
-		btr_search_update_hash_on_delete(cursor);
-	} else if (UNIV_UNLIKELY(page_rec_is_first(rec, page))) {
 		rec_t*	next_rec = page_rec_get_next(rec);
 
-		if (btr_page_get_prev(page, mtr) == FIL_NULL) {
+		if (!page_has_prev(page)) {
 
 			/* If we delete the leftmost node pointer on a
 			non-leaf level, we must mark the new leftmost node
@@ -5329,9 +5773,9 @@ btr_cur_pessimistic_delete(
 			on a page, we have to change the parent node pointer
 			so that it is equal to the new leftmost node pointer
 			on the page */
-			ulint level = btr_page_get_level(page, mtr);
 
 			btr_node_ptr_delete(index, block, mtr);
+			const ulint	level = btr_page_get_level(page);
 
 			dtuple_t*	node_ptr = dict_index_build_node_ptr(
 				index, next_rec, block->page.id.page_no(),
@@ -5394,10 +5838,7 @@ return_after_reservations:
 		has segment header and already modified in most of cases.*/
 	}
 
-	if (n_reserved > 0) {
-		fil_space_release_free_extents(index->space, n_reserved);
-	}
-
+	index->table->space->release_free_extents(n_reserved);
 	return(ret);
 }
 
@@ -5443,7 +5884,7 @@ btr_cur_add_path_info(
 	slot->nth_rec = page_rec_get_n_recs_before(rec);
 	slot->n_recs = page_get_n_recs(page);
 	slot->page_no = page_get_page_no(page);
-	slot->page_level = btr_page_get_level_low(page);
+	slot->page_level = btr_page_get_level(page);
 }
 
 /*******************************************************************//**
@@ -5459,31 +5900,28 @@ the number of pages between slot1->page and slot2->page (which is
 n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE.
 @return number of rows, not including the borders (exact or estimated) */
 static
-int64_t
+ha_rows
 btr_estimate_n_rows_in_range_on_level(
 /*==================================*/
 	dict_index_t*	index,			/*!< in: index */
 	btr_path_t*	slot1,			/*!< in: left border */
 	btr_path_t*	slot2,			/*!< in: right border */
-	int64_t		n_rows_on_prev_level,	/*!< in: number of rows
+	ha_rows		n_rows_on_prev_level,	/*!< in: number of rows
 						on the previous level for the
 						same descend paths; used to
 						determine the number of pages
 						on this level */
-	ibool*		is_n_rows_exact)	/*!< out: TRUE if the returned
+	bool*		is_n_rows_exact)	/*!< out: TRUE if the returned
 						value is exact i.e. not an
 						estimation */
 {
-	int64_t		n_rows;
-	ulint		n_pages_read;
+	ha_rows		n_rows = 0;
+	uint		n_pages_read = 0;
 	ulint		level;
 
-	n_rows = 0;
-	n_pages_read = 0;
-
 	/* Assume by default that we will scan all pages between
 	slot1->page_no and slot2->page_no. */
-	*is_n_rows_exact = TRUE;
+	*is_n_rows_exact = true;
 
 	/* Add records from slot1->page_no which are to the right of
 	the record which serves as a left border of the range, if any
@@ -5508,10 +5946,8 @@ btr_estimate_n_rows_in_range_on_level(
 	average from the pages scanned so far. */
 #	define N_PAGES_READ_LIMIT	10
 
-	page_id_t		page_id(
-		dict_index_get_space(index), slot1->page_no);
-	const fil_space_t*	space = fil_space_get(index->space);
-	ut_ad(space);
+	const fil_space_t*	space = index->table->space;
+	page_id_t		page_id(space->id, slot1->page_no);
 	const page_size_t	page_size(space->flags);
 
 	level = slot1->page_level;
@@ -5560,7 +5996,7 @@ btr_estimate_n_rows_in_range_on_level(
 		reuses them. */
 		if (!fil_page_index_page_check(page)
 		    || btr_page_get_index_id(page) != index->id
-		    || btr_page_get_level_low(page) != level) {
+		    || btr_page_get_level(page) != level) {
 
 			/* The page got reused for something else */
 			mtr_commit(&mtr);
@@ -5601,7 +6037,7 @@ btr_estimate_n_rows_in_range_on_level(
 
 inexact:
 
-	*is_n_rows_exact = FALSE;
+	*is_n_rows_exact = false;
 
 	/* We did interrupt before reaching slot2->page */
 
@@ -5609,8 +6045,7 @@ inexact:
 		/* The number of pages on this level is
 		n_rows_on_prev_level, multiply it by the
 		average number of recs per page so far */
-		n_rows = n_rows_on_prev_level
-			* n_rows / n_pages_read;
+		n_rows = n_rows_on_prev_level * n_rows / n_pages_read;
 	} else {
 		/* The tree changed before we could even
 		start with slot1->page_no */
@@ -5629,7 +6064,7 @@ static const unsigned	rows_in_range_max_retries = 4;
 /** We pretend that a range has that many records if the tree keeps changing
 for rows_in_range_max_retries retries while we try to estimate the records
 in a given range. */
-static const int64_t	rows_in_range_arbitrary_ret_val = 10;
+static const ha_rows	rows_in_range_arbitrary_ret_val = 10;
 
 /** Estimates the number of rows in a given index range.
 @param[in]	index		index
@@ -5646,7 +6081,7 @@ rows_in_range_arbitrary_ret_val as a result (if
 nth_attempt >= rows_in_range_max_retries and the tree is modified between
 the two dives). */
 static
-int64_t
+ha_rows
 btr_estimate_n_rows_in_range_low(
 	dict_index_t*	index,
 	const dtuple_t*	tuple1,
@@ -5660,14 +6095,14 @@ btr_estimate_n_rows_in_range_low(
 	btr_cur_t	cursor;
 	btr_path_t*	slot1;
 	btr_path_t*	slot2;
-	ibool		diverged;
-	ibool		diverged_lot;
+	bool		diverged;
+	bool		diverged_lot;
 	ulint		divergence_level;
-	int64_t		n_rows;
-	ibool		is_n_rows_exact;
+	ha_rows		n_rows;
+	bool		is_n_rows_exact;
 	ulint		i;
 	mtr_t		mtr;
-	int64_t		table_n_rows;
+	ha_rows		table_n_rows;
 
 	table_n_rows = dict_table_get_n_rows(index->table);
 
@@ -5803,16 +6238,16 @@ btr_estimate_n_rows_in_range_low(
 	/* We have the path information for the range in path1 and path2 */
 
 	n_rows = 0;
-	is_n_rows_exact = TRUE;
+	is_n_rows_exact = true;
 
 	/* This becomes true when the two paths do not pass through the
 	same pages anymore. */
-	diverged = FALSE;
+	diverged = false;
 
 	/* This becomes true when the paths are not the same or adjacent
 	any more. This means that they pass through the same or
 	neighboring-on-the-same-level pages only. */
-	diverged_lot = FALSE;
+	diverged_lot = false;
 
 	/* This is the level where paths diverged a lot. */
 	divergence_level = 1000000;
@@ -5935,15 +6370,12 @@ btr_estimate_n_rows_in_range_low(
 					return(rows_in_range_arbitrary_ret_val);
 				}
 
-				const int64_t	ret =
-					btr_estimate_n_rows_in_range_low(
-						index, tuple1, mode1,
-						tuple2, mode2, nth_attempt + 1);
-
-				return(ret);
+				return btr_estimate_n_rows_in_range_low(
+					index, tuple1, mode1,
+					tuple2, mode2, nth_attempt + 1);
 			}
 
-			diverged = TRUE;
+			diverged = true;
 
 			if (slot1->nth_rec < slot2->nth_rec) {
 				/* We do not count the borders (nor the left
@@ -5956,7 +6388,7 @@ btr_estimate_n_rows_in_range_low(
 					and slot2, so on the level below the
 					slots will point to non-adjacent
 					pages. */
-					diverged_lot = TRUE;
+					diverged_lot = true;
 					divergence_level = i;
 				}
 			} else {
@@ -5978,7 +6410,7 @@ btr_estimate_n_rows_in_range_low(
 			if (slot1->nth_rec < slot1->n_recs
 			    || slot2->nth_rec > 1) {
 
-				diverged_lot = TRUE;
+				diverged_lot = true;
 				divergence_level = i;
 
 				n_rows = 0;
@@ -6008,7 +6440,7 @@ btr_estimate_n_rows_in_range_low(
 @param[in]	tuple2	range end, may also be empty tuple
 @param[in]	mode2	search mode for range end
 @return estimated number of rows */
-int64_t
+ha_rows
 btr_estimate_n_rows_in_range(
 	dict_index_t*	index,
 	const dtuple_t*	tuple1,
@@ -6016,10 +6448,8 @@ btr_estimate_n_rows_in_range(
 	const dtuple_t*	tuple2,
 	page_cur_mode_t	mode2)
 {
-	const int64_t	ret = btr_estimate_n_rows_in_range_low(
-		index, tuple1, mode1, tuple2, mode2, 1 /* first attempt */);
-
-	return(ret);
+	return btr_estimate_n_rows_in_range_low(
+		index, tuple1, mode1, tuple2, mode2, 1);
 }
 
 /*******************************************************************//**
@@ -6221,7 +6651,7 @@ btr_estimate_number_of_different_key_vals(
 		page = btr_cur_get_page(&cursor);
 
 		rec = page_rec_get_next(page_get_infimum_rec(page));
-		ut_d(const bool is_leaf = page_is_leaf(page));
+		const bool is_leaf = page_is_leaf(page);
 
 		if (!page_rec_is_supremum(rec)) {
 			not_empty_flag = 1;
@@ -6283,7 +6713,8 @@ btr_estimate_number_of_different_key_vals(
 			}
 		}
 
-		if (n_cols == dict_index_get_n_unique_in_tree(index)) {
+		if (n_cols == dict_index_get_n_unique_in_tree(index)
+		    && page_has_siblings(page)) {
 
 			/* If there is more than one leaf page in the tree,
 			we add one because we know that the first record
@@ -6294,11 +6725,7 @@ btr_estimate_number_of_different_key_vals(
 			algorithm grossly underestimated the number of rows
 			in the table. */
 
-			if (btr_page_get_prev(page, &mtr) != FIL_NULL
-			    || btr_page_get_next(page, &mtr) != FIL_NULL) {
-
-				n_diff[n_cols - 1]++;
-			}
+			n_diff[n_cols - 1]++;
 		}
 
 		mtr_commit(&mtr);
@@ -6373,7 +6800,7 @@ btr_rec_get_field_ref_offs(
 
 	ut_a(rec_offs_nth_extern(offsets, n));
 	field_ref_offs = rec_get_nth_field_offs(offsets, n, &local_len);
-	ut_a(local_len != UNIV_SQL_NULL);
+	ut_a(len_is_stored(local_len));
 	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 
 	return(field_ref_offs + local_len - BTR_EXTERN_FIELD_REF_SIZE);
@@ -6415,12 +6842,12 @@ btr_rec_get_externally_stored_len(
 				btr_rec_get_field_ref(rec, offsets, i)
 				+ BTR_EXTERN_LEN + 4);
 
-			total_extern_len += ut_calc_align(extern_len,
-							  UNIV_PAGE_SIZE);
+			total_extern_len += ut_calc_align(
+				extern_len, ulint(srv_page_size));
 		}
 	}
 
-	return(total_extern_len / UNIV_PAGE_SIZE);
+	return total_extern_len >> srv_page_size_shift;
 }
 
 /*******************************************************************//**
@@ -6598,10 +7025,10 @@ btr_push_update_extern_fields(
 							     uf->orig_len);
 				/* Copy the locally stored prefix. */
 				memcpy(buf, data,
-				       uf->orig_len
+				       unsigned(uf->orig_len)
 				       - BTR_EXTERN_FIELD_REF_SIZE);
 				/* Copy the BLOB pointer. */
-				memcpy(buf + uf->orig_len
+				memcpy(buf + unsigned(uf->orig_len)
 				       - BTR_EXTERN_FIELD_REF_SIZE,
 				       data + len - BTR_EXTERN_FIELD_REF_SIZE,
 				       BTR_EXTERN_FIELD_REF_SIZE);
@@ -6645,7 +7072,6 @@ static
 void
 btr_blob_free(
 /*==========*/
-	dict_index_t*	index,	/*!< in: index */
 	buf_block_t*	block,	/*!< in: buffer block */
 	ibool		all,	/*!< in: TRUE=remove also the compressed page
 				if there is one */
@@ -6655,7 +7081,7 @@ btr_blob_free(
 	ulint		space = block->page.id.space();
 	ulint		page_no	= block->page.id.page_no();
 
-	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
 	mtr_commit(mtr);
 
@@ -6753,23 +7179,20 @@ struct btr_blob_log_check_t {
 		const mtr_log_t log_mode = m_mtr->get_log_mode();
 		m_mtr->start();
 		m_mtr->set_log_mode(log_mode);
-		m_mtr->set_named_space(index->space);
+		index->set_modified(*m_mtr);
 		m_mtr->set_flush_observer(observer);
 
 		if (m_op == BTR_STORE_INSERT_BULK) {
-			page_id_t       page_id(dict_index_get_space(index),
-						page_no);
-			page_size_t     page_size(dict_table_page_size(
-						index->table));
-			page_cur_t*	page_cur = &m_pcur->btr_cur.page_cur;
-
 			mtr_x_lock(dict_index_get_lock(index), m_mtr);
-			page_cur->block = btr_block_get(
-				page_id, page_size, RW_X_LATCH, index, m_mtr);
-			page_cur->rec = buf_block_get_frame(page_cur->block)
+			m_pcur->btr_cur.page_cur.block = btr_block_get(
+				page_id_t(index->table->space_id, page_no),
+				page_size_t(index->table->space->flags),
+				RW_X_LATCH, index, m_mtr);
+			m_pcur->btr_cur.page_cur.rec
+				= m_pcur->btr_cur.page_cur.block->frame
 				+ offs;
 
-			buf_block_buf_fix_dec(page_cur->block);
+			buf_block_buf_fix_dec(m_pcur->btr_cur.page_cur.block);
 		} else {
 			ut_ad(m_pcur->rel_pos == BTR_PCUR_ON);
 			bool ret = btr_pcur_restore_position(
@@ -6782,8 +7205,8 @@ struct btr_blob_log_check_t {
 		*m_block	= btr_pcur_get_block(m_pcur);
 		*m_rec		= btr_pcur_get_rec(m_pcur);
 
-		ut_d(rec_offs_make_valid(
-			*m_rec, index, const_cast<ulint*>(m_offsets)));
+		rec_offs_make_valid(*m_rec, index, true,
+				    const_cast<ulint*>(m_offsets));
 
 		ut_ad(m_mtr->memo_contains_page_flagged(
 		      *m_rec,
@@ -6847,8 +7270,7 @@ btr_store_big_rec_extern_fields(
 	ut_ad(rec_offs_any_extern(offsets));
 	ut_ad(mtr_memo_contains_flagged(btr_mtr, dict_index_get_lock(index),
 					MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
-	ut_ad(mtr_is_block_fix(
-		btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains(btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
 	ut_a(dict_index_is_clust(index));
 
@@ -6874,7 +7296,7 @@ btr_store_big_rec_extern_fields(
 		heap = mem_heap_create(250000);
 		page_zip_set_alloc(&c_stream, heap);
 
-		err = deflateInit2(&c_stream, page_zip_level,
+		err = deflateInit2(&c_stream, int(page_zip_level),
 				   Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
 		ut_a(err == Z_OK);
 	}
@@ -6956,8 +7378,8 @@ btr_store_big_rec_extern_fields(
 				rec_page_no = rec_block->page.id.page_no();
 			}
 
-			mtr_start(&mtr);
-			mtr.set_named_space(index->space);
+			mtr.start();
+			index->set_modified(mtr);
 			mtr.set_log_mode(btr_mtr->get_log_mode());
 			mtr.set_flush_observer(btr_mtr->get_flush_observer());
 
@@ -6973,18 +7395,19 @@ btr_store_big_rec_extern_fields(
 			mtr_t	*alloc_mtr;
 
 			if (op == BTR_STORE_INSERT_BULK) {
-				mtr_start(&mtr_bulk);
+				mtr_bulk.start();
 				mtr_bulk.set_spaces(mtr);
 				alloc_mtr = &mtr_bulk;
 			} else {
 				alloc_mtr = &mtr;
 			}
 
-			if (!fsp_reserve_free_extents(&r_extents, space_id, 1,
+			if (!fsp_reserve_free_extents(&r_extents,
+						      index->table->space, 1,
 						      FSP_BLOB, alloc_mtr,
 						      1)) {
 
-				mtr_commit(alloc_mtr);
+				alloc_mtr->commit();
 				error = DB_OUT_OF_FILE_SPACE;
 				goto func_exit;
 			}
@@ -6992,10 +7415,10 @@ btr_store_big_rec_extern_fields(
 			block = btr_page_alloc(index, hint_page_no, FSP_NO_DIR,
 					       0, alloc_mtr, &mtr);
 
-			alloc_mtr->release_free_extents(r_extents);
+			index->table->space->release_free_extents(r_extents);
 
 			if (op == BTR_STORE_INSERT_BULK) {
-				mtr_commit(&mtr_bulk);
+				mtr_bulk.commit();
 			}
 
 			ut_a(block != NULL);
@@ -7161,7 +7584,7 @@ next_zip_page:
 
 				/* Commit mtr and release the
 				uncompressed page frame to save memory. */
-				btr_blob_free(index, block, FALSE, &mtr);
+				btr_blob_free(block, FALSE, &mtr);
 
 				if (err == Z_STREAM_END) {
 					break;
@@ -7222,7 +7645,7 @@ next_zip_page:
 
 				prev_page_no = page_no;
 
-				mtr_commit(&mtr);
+				mtr.commit();
 
 				if (extern_len == 0) {
 					break;
@@ -7288,11 +7711,11 @@ btr_check_blob_fil_page_type(
 		ulint	flags = fil_space_get_flags(space_id);
 
 #ifndef UNIV_DEBUG /* Improve debug test coverage */
-		if (dict_tf_get_format(flags) == UNIV_FORMAT_A) {
+		if (!DICT_TF_HAS_ATOMIC_BLOBS(flags)) {
 			/* Old versions of InnoDB did not initialize
 			FIL_PAGE_TYPE on BLOB pages.  Do not print
 			anything about the type mismatch when reading
-			a BLOB page that is in Antelope format.*/
+			a BLOB page that may be from old versions. */
 			return;
 		}
 #endif /* !UNIV_DEBUG */
@@ -7343,11 +7766,11 @@ btr_free_externally_stored_field(
 	ulint		next_page_no;
 	mtr_t		mtr;
 
-	ut_ad(dict_index_is_clust(index));
+	ut_ad(index->is_primary());
 	ut_ad(mtr_memo_contains_flagged(local_mtr, dict_index_get_lock(index),
 					MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
-	ut_ad(mtr_is_page_fix(
-		local_mtr, field_ref, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
+				     MTR_MEMO_PAGE_X_FIX));
 	ut_ad(!rec || rec_offs_validate(rec, index, offsets));
 	ut_ad(!rec || field_ref == btr_rec_get_field_ref(rec, offsets, i));
 	ut_ad(local_mtr->is_named_space(
@@ -7365,7 +7788,8 @@ btr_free_externally_stored_field(
 	ut_ad(!(mach_read_from_4(field_ref + BTR_EXTERN_LEN)
 	        & ~((BTR_EXTERN_OWNER_FLAG
 	             | BTR_EXTERN_INHERITED_FLAG) << 24)));
-	ut_ad(space_id == index->space);
+	ut_ad(space_id == index->table->space->id);
+	ut_ad(space_id == index->table->space_id);
 
 	const page_size_t	ext_page_size(dict_table_page_size(index->table));
 	const page_size_t&	rec_page_size(rec == NULL
@@ -7386,7 +7810,7 @@ btr_free_externally_stored_field(
 		mtr.set_spaces(*local_mtr);
 		mtr.set_log_mode(local_mtr->get_log_mode());
 
-		ut_ad(!dict_table_is_temporary(index->table)
+		ut_ad(!index->table->is_temporary()
 		      || local_mtr->get_log_mode() == MTR_LOG_NO_REDO);
 
 		const page_t*	p = page_align(field_ref);
@@ -7480,7 +7904,7 @@ btr_free_externally_stored_field(
 					 MLOG_4BYTES, &mtr);
 			/* Zero out the BLOB length.  If the server
 			crashes during the execution of this function,
-			trx_rollback_or_clean_all_recovered() could
+			trx_rollback_all_recovered() could
 			dereference the half-deleted BLOB, fetching a
 			wrong prefix for the BLOB. */
 			mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
@@ -7489,7 +7913,7 @@ btr_free_externally_stored_field(
 		}
 
 		/* Commit mtr and release the BLOB block to save memory. */
-		btr_blob_free(index, ext_block, TRUE, &mtr);
+		btr_blob_free(ext_block, TRUE, &mtr);
 	}
 }
 
@@ -7514,8 +7938,8 @@ btr_rec_free_externally_stored_fields(
 	ulint	i;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_ad(mtr_is_page_fix(mtr, rec, MTR_MEMO_PAGE_X_FIX, index->table));
-	ut_ad(dict_index_is_clust(index));
+	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(index->is_primary());
 	ut_ad(page_rec_is_leaf(rec));
 	/* Free possible externally stored fields in the record */
 
@@ -7553,7 +7977,7 @@ btr_rec_free_updated_extern_fields(
 	ulint	i;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_ad(mtr_is_page_fix(mtr, rec, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
 
 	/* Free possible externally stored fields in the record */
 
diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc
index 036e2ea776c..e3c7ef8a768 100644
--- a/storage/innobase/btr/btr0defragment.cc
+++ b/storage/innobase/btr/btr0defragment.cc
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (C) 2013, 2014 Facebook, Inc. All Rights Reserved.
-Copyright (C) 2014, 2017, MariaDB Corporation.
+Copyright (C) 2014, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -161,14 +161,14 @@ btr_defragment_add_index(
 	dberr_t*	err)	/*!< out: error code */
 {
 	mtr_t mtr;
-	ulint page_no = dict_index_get_page(index);
 	*err = DB_SUCCESS;
 
 	mtr_start(&mtr);
 	// Load index rood page.
-	const page_id_t page_id(dict_index_get_space(index), page_no);
-	const page_size_t page_size(dict_table_page_size(index->table));
-	buf_block_t* block = btr_block_get(page_id, page_size, RW_NO_LATCH, index, &mtr);
+	buf_block_t* block = btr_block_get(
+		page_id_t(index->table->space_id, index->page),
+		page_size_t(index->table->space->flags),
+		RW_NO_LATCH, index, &mtr);
 	page_t* page = NULL;
 
 	if (block) {
@@ -314,7 +314,7 @@ btr_defragment_save_defrag_stats_if_needed(
 	dict_index_t*	index)	/*!< in: index */
 {
 	if (srv_defragment_stats_accuracy != 0 // stats tracking disabled
-	    && dict_index_get_space(index) != 0 // do not track system tables
+	    && index->table->space_id != 0 // do not track system tables
 	    && index->stat_defrag_modified_counter
 	       >= srv_defragment_stats_accuracy) {
 		dict_stats_defrag_pool_add(index);
@@ -386,8 +386,7 @@ btr_defragment_merge_pages(
 {
 	page_t* from_page = buf_block_get_frame(from_block);
 	page_t* to_page = buf_block_get_frame(to_block);
-	ulint space = dict_index_get_space(index);
-	ulint level = btr_page_get_level(from_page, mtr);
+	ulint level = btr_page_get_level(from_page);
 	ulint n_recs = page_get_n_recs(from_page);
 	ulint new_data_size = page_get_data_size(to_page);
 	ulint max_ins_size =
@@ -406,7 +405,7 @@ btr_defragment_merge_pages(
 	// Estimate how many records can be moved from the from_page to
 	// the to_page.
 	if (page_size.is_compressed()) {
-		ulint page_diff = UNIV_PAGE_SIZE - *max_data_size;
+		ulint page_diff = srv_page_size - *max_data_size;
 		max_ins_size_to_use = (max_ins_size_to_use > page_diff)
 			       ? max_ins_size_to_use - page_diff : 0;
 	}
@@ -479,7 +478,7 @@ btr_defragment_merge_pages(
 		} else {
 			ibuf_update_free_bits_if_full(
 				to_block,
-				UNIV_PAGE_SIZE,
+				srv_page_size,
 				ULINT_UNDEFINED);
 		}
 	}
@@ -489,7 +488,9 @@ btr_defragment_merge_pages(
 		lock_update_merge_left(to_block, orig_pred,
 				       from_block);
 		btr_search_drop_page_hash_index(from_block);
-		btr_level_list_remove(space, page_size, (page_t*)from_page, index, mtr);
+		btr_level_list_remove(
+			index->table->space_id,
+			page_size, from_page, index, mtr);
 		btr_node_ptr_delete(index, from_block, mtr);
 		/* btr_blob_dbg_remove(from_page, index,
 		"btr_defragment_n_pages"); */
@@ -540,7 +541,6 @@ btr_defragment_n_pages(
 	uint		n_pages,/*!< in: number of pages to defragment */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
-	ulint		space;
 	/* We will need to load the n+1 block because if the last page is freed
 	and we need to modify the prev_page_no of that block. */
 	buf_block_t*	blocks[BTR_DEFRAGMENT_MAX_N_PAGES + 1];
@@ -551,7 +551,6 @@ btr_defragment_n_pages(
 	ulint		data_size_per_rec;
 	ulint		optimal_page_size;
 	ulint		reserved_space;
-	ulint		level;
 	ulint		max_data_size = 0;
 	uint		n_defragmented = 0;
 	uint		n_new_slots;
@@ -561,8 +560,11 @@ btr_defragment_n_pages(
 	/* It doesn't make sense to call this function with n_pages = 1. */
 	ut_ad(n_pages > 1);
 
-	space = dict_index_get_space(index);
-	if (space == 0) {
+	if (!page_is_leaf(block->frame)) {
+		return NULL;
+	}
+
+	if (!index->table->space || !index->table->space_id) {
 		/* Ignore space 0. */
 		return NULL;
 	}
@@ -572,12 +574,7 @@ btr_defragment_n_pages(
 	}
 
 	first_page = buf_block_get_frame(block);
-	level = btr_page_get_level(first_page, mtr);
-	const page_size_t page_size(dict_table_page_size(index->table));
-
-	if (level != 0) {
-		return NULL;
-	}
+	const page_size_t page_size(index->table->space->flags);
 
 	/* 1. Load the pages and calculate the total data size. */
 	blocks[0] = block;
@@ -592,14 +589,13 @@ btr_defragment_n_pages(
 			break;
 		}
 
-		const page_id_t page_id(dict_index_get_space(index), page_no);
-
-		blocks[i] = btr_block_get(page_id, page_size,
+		blocks[i] = btr_block_get(page_id_t(index->table->space_id,
+						    page_no), page_size,
 					  RW_X_LATCH, index, mtr);
 	}
 
 	if (n_pages == 1) {
-		if (btr_page_get_prev(first_page, mtr) == FIL_NULL) {
+		if (!page_has_prev(first_page)) {
 			/* last page in the index */
 			if (dict_index_get_page(index)
 			    == page_get_page_no(first_page))
@@ -622,7 +618,7 @@ btr_defragment_n_pages(
 	// For compressed pages, we take compression failures into account.
 	if (page_size.is_compressed()) {
 		ulint size = 0;
-		int i = 0;
+		uint i = 0;
 		// We estimate the optimal data size of the index use samples of
 		// data size. These samples are taken when pages failed to
 		// compress due to insertion on the page. We use the average
@@ -636,7 +632,7 @@ btr_defragment_n_pages(
 			size += index->stat_defrag_data_size_sample[i];
 		}
 		if (i != 0) {
-			size = size / i;
+			size /= i;
 			optimal_page_size = ut_min(optimal_page_size, size);
 		}
 		max_data_size = optimal_page_size;
@@ -748,7 +744,7 @@ DECLARE_THREAD(btr_defragment_thread)(void*)
 		mtr_start(&mtr);
 		cursor = btr_pcur_get_btr_cur(pcur);
 		index = btr_cur_get_index(cursor);
-		mtr.set_named_space(index->space);
+		index->set_modified(mtr);
 		/* To follow the latching order defined in WL#6326, acquire index->lock X-latch.
 		This entitles us to acquire page latches in any order for the index. */
 		mtr_x_lock(&index->lock, &mtr);
diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc
index 2b85c764a3b..41661d226e1 100644
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -127,20 +127,18 @@ btr_pcur_store_position(
 			  mtr, dict_index_get_lock(index),
 			  MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)));
 
+	cursor->old_stored = true;
+
 	if (page_is_empty(page)) {
 		/* It must be an empty index tree; NOTE that in this case
 		we do not store the modify_clock, but always do a search
 		if we restore the cursor position */
 
-		ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
-		ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
+		ut_a(!page_has_siblings(page));
 		ut_ad(page_is_leaf(page));
 		ut_ad(page_get_page_no(page) == index->page);
 
-		cursor->old_stored = true;
-
 		if (page_rec_is_supremum_low(offs)) {
-
 			cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE;
 		} else {
 			cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE;
@@ -150,21 +148,25 @@ btr_pcur_store_position(
 	}
 
 	if (page_rec_is_supremum_low(offs)) {
-
 		rec = page_rec_get_prev(rec);
 
-		cursor->rel_pos = BTR_PCUR_AFTER;
+		ut_ad(!page_rec_is_infimum(rec));
+		ut_ad(!rec_is_metadata(rec, index));
 
+		cursor->rel_pos = BTR_PCUR_AFTER;
 	} else if (page_rec_is_infimum_low(offs)) {
-
 		rec = page_rec_get_next(rec);
 
+		if (rec_is_metadata(rec, index)) {
+			rec = page_rec_get_next(rec);
+			ut_ad(!page_rec_is_supremum(rec));
+		}
+
 		cursor->rel_pos = BTR_PCUR_BEFORE;
 	} else {
 		cursor->rel_pos = BTR_PCUR_ON;
 	}
 
-	cursor->old_stored = true;
 	cursor->old_rec = dict_index_copy_rec_order_prefix(
 		index, rec, &cursor->old_n_fields,
 		&cursor->old_rec_buf, &cursor->buf_size);
@@ -353,7 +355,11 @@ btr_pcur_restore_position_func(
 	}
 
 	btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
-					cursor, 0, file, line, mtr);
+					cursor,
+#ifdef BTR_CUR_HASH_ADAPT
+					NULL,
+#endif /* BTR_CUR_HASH_ADAPT */
+					file, line, mtr);
 
 	/* Restore the old search mode */
 	cursor->search_mode = old_mode;
@@ -487,7 +493,7 @@ btr_pcur_move_backward_from_page(
 
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	ut_ad(btr_pcur_is_before_first_on_page(cursor));
-	ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));
+	ut_ad(!btr_pcur_is_before_first_in_tree(cursor));
 
 	latch_mode = cursor->latch_mode;
 
@@ -559,7 +565,7 @@ btr_pcur_move_to_prev(
 
 	if (btr_pcur_is_before_first_on_page(cursor)) {
 
-		if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
+		if (btr_pcur_is_before_first_in_tree(cursor)) {
 
 			return(FALSE);
 		}
diff --git a/storage/innobase/btr/btr0scrub.cc b/storage/innobase/btr/btr0scrub.cc
index 376a106bf8a..7d8966d4109 100644
--- a/storage/innobase/btr/btr0scrub.cc
+++ b/storage/innobase/btr/btr0scrub.cc
@@ -133,7 +133,7 @@ btr_scrub_lock_dict_func(ulint space_id, bool lock_to_close_table,
 		if (lock_to_close_table) {
 		} else if (fil_space_t* space = fil_space_acquire(space_id)) {
 			bool stopping = space->is_stopping();
-			fil_space_release(space);
+			space->release();
 			if (stopping) {
 				return false;
 			}
@@ -209,7 +209,7 @@ btr_scrub_table_close_for_thread(
 			btr_scrub_table_close(scrub_data->current_table);
 			mutex_exit(&dict_sys->mutex);
 		}
-		fil_space_release(space);
+		space->release();
 	}
 
 	scrub_data->current_table = NULL;
@@ -423,7 +423,7 @@ btr_pessimistic_scrub(
 	* so that splitting won't fail due to this */
 	ulint n_extents = 3;
 	ulint n_reserved = 0;
-	if (!fsp_reserve_free_extents(&n_reserved, index->space,
+	if (!fsp_reserve_free_extents(&n_reserved, index->table->space,
 				      n_extents, FSP_NORMAL, mtr)) {
 		log_scrub_failure(index, scrub_data, block,
 				  DB_OUT_OF_FILE_SPACE);
@@ -432,12 +432,9 @@ btr_pessimistic_scrub(
 
 	/* read block variables */
 	const ulint page_no =  mach_read_from_4(page + FIL_PAGE_OFFSET);
-	const page_id_t page_id(dict_index_get_space(index), page_no);
-	const ulint left_page_no = btr_page_get_prev(page, mtr);
-	const ulint right_page_no = btr_page_get_next(page, mtr);
-	const page_id_t lpage_id(dict_index_get_space(index), left_page_no);
-	const page_id_t rpage_id(dict_index_get_space(index), right_page_no);
-	const page_size_t page_size(dict_table_page_size(index->table));
+	const ulint left_page_no = mach_read_from_4(page + FIL_PAGE_PREV);
+	const ulint right_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
+	const page_size_t page_size(index->table->space->flags);
 
 	/**
 	* When splitting page, we need X-latches on left/right brothers
@@ -453,15 +450,15 @@ btr_pessimistic_scrub(
 		mtr->release_block_at_savepoint(scrub_data->savepoint, block);
 
 		buf_block_t* get_block __attribute__((unused)) = btr_block_get(
-			lpage_id, page_size,
-			RW_X_LATCH, index, mtr);
+			page_id_t(index->table->space_id, left_page_no),
+			page_size, RW_X_LATCH, index, mtr);
 
 		/**
 		* Refetch block and re-initialize page
 		*/
 		block = btr_block_get(
-			page_id, page_size,
-			RW_X_LATCH, index, mtr);
+			page_id_t(index->table->space_id, page_no),
+			page_size, RW_X_LATCH, index, mtr);
 
 		page = buf_block_get_frame(block);
 
@@ -474,8 +471,8 @@ btr_pessimistic_scrub(
 
 	if (right_page_no != FIL_NULL) {
 		buf_block_t* get_block __attribute__((unused))= btr_block_get(
-			rpage_id, page_size,
-			RW_X_LATCH, index, mtr);
+			page_id_t(index->table->space_id, right_page_no),
+			page_size, RW_X_LATCH, index, mtr);
 	}
 
 	/* arguments to btr_page_split_and_insert */
@@ -522,10 +519,7 @@ btr_pessimistic_scrub(
 		mem_heap_free(heap);
 	}
 
-	if (n_reserved > 0) {
-		fil_space_release_free_extents(index->space, n_reserved);
-	}
-
+	index->table->space->release_free_extents(n_reserved);
 	scrub_data->scrub_stat.page_splits++;
 	return DB_SUCCESS;
 }
@@ -674,7 +668,7 @@ btr_scrub_free_page(
 		* it will be found by scrubbing thread again
 		*/
 		memset(buf_block_get_frame(block) + PAGE_HEADER, 0,
-		       UNIV_PAGE_SIZE - PAGE_HEADER);
+		       srv_page_size - PAGE_HEADER);
 
 		mach_write_to_2(buf_block_get_frame(block) + FIL_PAGE_TYPE,
 				FIL_PAGE_TYPE_ALLOCATED);
@@ -792,13 +786,14 @@ btr_scrub_page(
 
 	/* check that table/index still match now that they are loaded */
 
-	if (scrub_data->current_table->space != scrub_data->space) {
+	if (!scrub_data->current_table->space
+	    || scrub_data->current_table->space_id != scrub_data->space) {
 		/* this is truncate table */
 		mtr_commit(mtr);
 		return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
 	}
 
-	if (scrub_data->current_index->space != scrub_data->space) {
+	if (scrub_data->current_index->table != scrub_data->current_table) {
 		/* this is truncate table */
 		mtr_commit(mtr);
 		return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index b494565b288..9c2dedeef9e 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -45,10 +45,10 @@ Created 2/17/1996 Heikki Tuuri
 
 /** Is search system enabled.
 Search system is protected by array of latches. */
-char		btr_search_enabled	= true;
+char		btr_search_enabled;
 
 /** Number of adaptive hash index partition. */
-ulong		btr_ahi_parts		= 8;
+ulong		btr_ahi_parts;
 
 #ifdef UNIV_SEARCH_PERF_STAT
 /** Number of successful adaptive hash index lookups */
@@ -80,11 +80,78 @@ btr_search_sys_t*	btr_search_sys;
 /** If the number of records on the page divided by this parameter
 would have been successfully accessed using a hash index, the index
 is then built on the page, assuming the global limit has been reached */
-#define BTR_SEARCH_PAGE_BUILD_LIMIT	16
+#define BTR_SEARCH_PAGE_BUILD_LIMIT	16U
 
 /** The global limit for consecutive potentially successful hash searches,
 before hash index building is started */
-#define BTR_SEARCH_BUILD_LIMIT		100
+#define BTR_SEARCH_BUILD_LIMIT		100U
+
+/** Compute a hash value of a record in a page.
+@param[in]	rec		index record
+@param[in]	offsets		return value of rec_get_offsets()
+@param[in]	n_fields	number of complete fields to fold
+@param[in]	n_bytes		number of bytes to fold in the last field
+@param[in]	index_id	index tree ID
+@return the hash value */
+static inline
+ulint
+rec_fold(
+	const rec_t*	rec,
+	const ulint*	offsets,
+	ulint		n_fields,
+	ulint		n_bytes,
+	index_id_t	tree_id)
+{
+	ulint		i;
+	const byte*	data;
+	ulint		len;
+	ulint		fold;
+	ulint		n_fields_rec;
+
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(rec_validate(rec, offsets));
+	ut_ad(page_rec_is_leaf(rec));
+	ut_ad(!page_rec_is_metadata(rec));
+	ut_ad(n_fields > 0 || n_bytes > 0);
+
+	n_fields_rec = rec_offs_n_fields(offsets);
+	ut_ad(n_fields <= n_fields_rec);
+	ut_ad(n_fields < n_fields_rec || n_bytes == 0);
+
+	if (n_fields > n_fields_rec) {
+		n_fields = n_fields_rec;
+	}
+
+	if (n_fields == n_fields_rec) {
+		n_bytes = 0;
+	}
+
+	fold = ut_fold_ull(tree_id);
+
+	for (i = 0; i < n_fields; i++) {
+		data = rec_get_nth_field(rec, offsets, i, &len);
+
+		if (len != UNIV_SQL_NULL) {
+			fold = ut_fold_ulint_pair(fold,
+						  ut_fold_binary(data, len));
+		}
+	}
+
+	if (n_bytes > 0) {
+		data = rec_get_nth_field(rec, offsets, i, &len);
+
+		if (len != UNIV_SQL_NULL) {
+			if (len > n_bytes) {
+				len = n_bytes;
+			}
+
+			fold = ut_fold_ulint_pair(fold,
+						  ut_fold_binary(data, len));
+		}
+	}
+
+	return(fold);
+}
 
 /** Determine the number of accessed key fields.
 @param[in]	n_fields	number of complete fields
@@ -110,23 +177,6 @@ btr_search_get_n_fields(
 	return(btr_search_get_n_fields(cursor->n_fields, cursor->n_bytes));
 }
 
-/********************************************************************//**
-Builds a hash index on a page with the given parameters. If the page already
-has a hash index with different parameters, the old hash index is removed.
-If index is non-NULL, this function checks if n_fields and n_bytes are
-sensible values, and does not build a hash index if not. */
-static
-void
-btr_search_build_page_hash_index(
-/*=============================*/
-	dict_index_t*	index,	/*!< in: index for which to build, or NULL if
-				not known */
-	buf_block_t*	block,	/*!< in: index page, s- or x-latched */
-	ulint		n_fields,/*!< in: hash this many full fields */
-	ulint		n_bytes,/*!< in: hash this many bytes from the next
-				field */
-	ibool		left_side);/*!< in: hash for searches from left side? */
-
 /** This function should be called before reserving any btr search mutex, if
 the intended operation might add nodes to the search system hash table.
 Because of the latching order, once we have reserved the btr search system
@@ -139,13 +189,13 @@ will not guarantee success.
 @param[in]	index	index handler */
 static
 void
-btr_search_check_free_space_in_heap(dict_index_t* index)
+btr_search_check_free_space_in_heap(const dict_index_t* index)
 {
 	hash_table_t*	table;
 	mem_heap_t*	heap;
 
-	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
-	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
+	ut_ad(!btr_search_own_any(RW_LOCK_S));
+	ut_ad(!btr_search_own_any(RW_LOCK_X));
 
 	table = btr_get_search_table(index);
 
@@ -157,8 +207,9 @@ btr_search_check_free_space_in_heap(dict_index_t* index)
 
 	if (heap->free_block == NULL) {
 		buf_block_t*	block = buf_block_alloc(NULL);
+		rw_lock_t*	ahi_latch = btr_get_search_latch(index);
 
-		btr_search_x_lock(index);
+		rw_lock_x_lock(ahi_latch);
 
 		if (btr_search_enabled
 		    && heap->free_block == NULL) {
@@ -167,14 +218,13 @@ btr_search_check_free_space_in_heap(dict_index_t* index)
 			buf_block_free(block);
 		}
 
-		btr_search_x_unlock(index);
+		rw_lock_x_unlock(ahi_latch);
 	}
 }
 
 /** Creates and initializes the adaptive search system at a database start.
 @param[in]	hash_size	hash table size. */
-void
-btr_search_sys_create(ulint hash_size)
+void btr_search_sys_create(ulint hash_size)
 {
 	/* Search System is divided into n parts.
 	Each part controls access to distinct set of hash buckets from
@@ -215,8 +265,7 @@ btr_search_sys_create(ulint hash_size)
 
 /** Resize hash index hash table.
 @param[in]	hash_size	hash index hash table size */
-void
-btr_search_sys_resize(ulint hash_size)
+void btr_search_sys_resize(ulint hash_size)
 {
 	/* Step-1: Lock all search latches in exclusive mode. */
 	btr_search_x_lock_all();
@@ -252,10 +301,14 @@ btr_search_sys_resize(ulint hash_size)
 }
 
 /** Frees the adaptive search system at a database shutdown. */
-void
-btr_search_sys_free()
+void btr_search_sys_free()
 {
-	ut_ad(btr_search_sys != NULL && btr_search_latches != NULL);
+	if (!btr_search_sys) {
+		ut_ad(!btr_search_latches);
+		return;
+	}
+
+	ut_ad(btr_search_latches);
 
 	/* Step-1: Release the hash tables. */
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
@@ -294,18 +347,13 @@ btr_search_disable_ref_count(
 	for (index = dict_table_get_first_index(table);
 	     index != NULL;
 	     index = dict_table_get_next_index(index)) {
-
-		ut_ad(rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
-
 		index->search_info->ref_count = 0;
 	}
 }
 
 /** Disable the adaptive hash search system and empty the index.
 @param[in]	need_mutex	need to acquire dict_sys->mutex */
-void
-btr_search_disable(
-	bool	need_mutex)
+void btr_search_disable(bool need_mutex)
 {
 	dict_table_t*	table;
 
@@ -358,8 +406,7 @@ btr_search_disable(
 }
 
 /** Enable the adaptive hash search system. */
-void
-btr_search_enable()
+void btr_search_enable()
 {
 	buf_pool_mutex_enter_all();
 	if (srv_buf_pool_old_size != srv_buf_pool_size) {
@@ -390,12 +437,10 @@ btr_search_info_get_ref_count(
 
 	ut_ad(info);
 
-	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
-	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
-
-	btr_search_s_lock(index);
+	rw_lock_t* ahi_latch = btr_get_search_latch(index);
+	rw_lock_s_lock(ahi_latch);
 	ret = info->ref_count;
-	btr_search_s_unlock(index);
+	rw_lock_s_unlock(ahi_latch);
 
 	return(ret);
 }
@@ -415,8 +460,8 @@ btr_search_info_update_hash(
 	ulint		n_unique;
 	int		cmp;
 
-	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
-	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
+	ut_ad(!btr_search_own_any(RW_LOCK_S));
+	ut_ad(!btr_search_own_any(RW_LOCK_X));
 
 	if (dict_index_is_ibuf(index)) {
 		/* So many deletes are performed on an insert buffer tree
@@ -520,17 +565,12 @@ block->n_hash_helps, n_fields, n_bytes, left_side are NOT protected by any
 semaphore, to save CPU time! Do not assume the fields are consistent.
 @return TRUE if building a (new) hash index on the block is recommended
 @param[in,out]	info	search info
-@param[in,out]	block	buffer block
-@param[in]	cursor	cursor */
+@param[in,out]	block	buffer block */
 static
-ibool
-btr_search_update_block_hash_info(
-	btr_search_t*		info,
-	buf_block_t*		block,
-	const btr_cur_t*	cursor)
+bool
+btr_search_update_block_hash_info(btr_search_t* info, buf_block_t* block)
 {
-	ut_ad(!rw_lock_own_flagged(btr_get_search_latch(cursor->index),
-				   RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+	ut_ad(!btr_search_own_any());
 	ut_ad(rw_lock_own_flagged(&block->lock,
 				  RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
 
@@ -570,18 +610,18 @@ btr_search_update_block_hash_info(
 
 		if ((!block->index)
 		    || (block->n_hash_helps
-			> 2 * page_get_n_recs(block->frame))
+			> 2U * page_get_n_recs(block->frame))
 		    || (block->n_fields != block->curr_n_fields)
 		    || (block->n_bytes != block->curr_n_bytes)
 		    || (block->left_side != block->curr_left_side)) {
 
 			/* Build a new hash index on the page */
 
-			return(TRUE);
+			return(true);
 		}
 	}
 
-	return(FALSE);
+	return(false);
 }
 
 /** Updates a hash node reference when it has been unsuccessfully used in a
@@ -620,9 +660,9 @@ btr_search_update_hash_ref(
 		return;
 	}
 
-	ut_ad(block->page.id.space() == index->space);
-	ut_a(index == cursor->index);
-	ut_a(!dict_index_is_ibuf(index));
+	ut_ad(block->page.id.space() == index->table->space_id);
+	ut_ad(index == cursor->index);
+	ut_ad(!dict_index_is_ibuf(index));
 
 	if ((info->n_hash_potential > 0)
 	    && (block->curr_n_fields == info->n_fields)
@@ -647,7 +687,6 @@ btr_search_update_hash_ref(
 		if (UNIV_LIKELY_NULL(heap)) {
 			mem_heap_free(heap);
 		}
-		ut_ad(rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
 
 		ha_insert_for_fold(btr_get_search_table(index), fold,
 				   block, rec);
@@ -656,61 +695,6 @@ btr_search_update_hash_ref(
 	}
 }
 
-/** Updates the search info.
-@param[in,out]	info	search info
-@param[in]	cursor	cursor which was just positioned */
-void
-btr_search_info_update_slow(
-	btr_search_t*	info,
-	btr_cur_t*	cursor)
-{
-	buf_block_t*	block;
-	ibool		build_index;
-
-	ut_ad(!rw_lock_own_flagged(btr_get_search_latch(cursor->index),
-				   RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
-
-	block = btr_cur_get_block(cursor);
-
-	/* NOTE that the following two function calls do NOT protect
-	info or block->n_fields etc. with any semaphore, to save CPU time!
-	We cannot assume the fields are consistent when we return from
-	those functions! */
-
-	btr_search_info_update_hash(info, cursor);
-
-	build_index = btr_search_update_block_hash_info(info, block, cursor);
-
-	if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
-
-		btr_search_check_free_space_in_heap(cursor->index);
-	}
-
-	if (cursor->flag == BTR_CUR_HASH_FAIL) {
-		/* Update the hash node reference, if appropriate */
-
-#ifdef UNIV_SEARCH_PERF_STAT
-		btr_search_n_hash_fail++;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
-		btr_search_x_lock(cursor->index);
-
-		btr_search_update_hash_ref(info, block, cursor);
-
-		btr_search_x_unlock(cursor->index);
-	}
-
-	if (build_index) {
-		/* Note that since we did not protect block->n_fields etc.
-		with any semaphore, the values can be inconsistent. We have
-		to check inside the function call that they make sense. */
-		btr_search_build_page_hash_index(cursor->index, block,
-						 block->n_fields,
-						 block->n_bytes,
-						 block->left_side);
-	}
-}
-
 /** Checks if a guessed position for a tree cursor is right. Note that if
 mode is PAGE_CUR_LE, which is used in inserts, and the function returns
 TRUE, then cursor->up_match and cursor->low_match both have sensible values.
@@ -724,16 +708,14 @@ TRUE, then cursor->up_match and cursor->low_match both have sensible values.
 				previous record to check our guess!
 @param[in]	tuple		data tuple
 @param[in]	mode		PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, PAGE_CUR_GE
-@param[in]	mtr		mini transaction
-@return TRUE if success */
+@return	whether a match was found */
 static
-ibool
+bool
 btr_search_check_guess(
 	btr_cur_t*	cursor,
-	ibool		can_only_compare_to_cursor_rec,
+	bool		can_only_compare_to_cursor_rec,
 	const dtuple_t*	tuple,
-	ulint		mode,
-	mtr_t*		mtr)
+	ulint		mode)
 {
 	rec_t*		rec;
 	ulint		n_unique;
@@ -795,14 +777,13 @@ btr_search_check_guess(
 	match = 0;
 
 	if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
-		rec_t*	prev_rec;
-
 		ut_ad(!page_rec_is_infimum(rec));
 
-		prev_rec = page_rec_get_prev(rec);
+		const rec_t* prev_rec = page_rec_get_prev(rec);
 
 		if (page_rec_is_infimum(prev_rec)) {
-			success = btr_page_get_prev(page_align(prev_rec), mtr)
+			success = *reinterpret_cast<const uint32_t*>(
+				page_align(prev_rec) + FIL_PAGE_PREV)
 				== FIL_NULL;
 
 			goto exit_func;
@@ -817,17 +798,14 @@ btr_search_check_guess(
 		} else {
 			success = cmp >= 0;
 		}
-
-		goto exit_func;
 	} else {
-		rec_t*	next_rec;
-
 		ut_ad(!page_rec_is_supremum(rec));
 
-		next_rec = page_rec_get_next(rec);
+		const rec_t* next_rec = page_rec_get_next(rec);
 
 		if (page_rec_is_supremum(next_rec)) {
-			if (btr_page_get_next(page_align(next_rec), mtr)
+			if (*reinterpret_cast<const uint32_t*>(
+				    page_align(next_rec) + FIL_PAGE_NEXT)
 			    == FIL_NULL) {
 
 				cursor->up_match = 0;
@@ -886,12 +864,11 @@ both have sensible values.
 				we assume the caller uses his search latch
 				to protect the record!
 @param[out]	cursor		tree cursor
-@param[in]	has_search_latch
-				latch mode the caller currently has on
-				search system: RW_S/X_LATCH or 0
+@param[in]	ahi_latch	the adaptive hash index latch being held,
+				or NULL
 @param[in]	mtr		mini transaction
-@return TRUE if succeeded */
-ibool
+@return whether the search succeeded */
+bool
 btr_search_guess_on_hash(
 	dict_index_t*	index,
 	btr_search_t*	info,
@@ -899,7 +876,7 @@ btr_search_guess_on_hash(
 	ulint		mode,
 	ulint		latch_mode,
 	btr_cur_t*	cursor,
-	ulint		has_search_latch,
+	rw_lock_t*	ahi_latch,
 	mtr_t*		mtr)
 {
 	const rec_t*	rec;
@@ -909,6 +886,8 @@ btr_search_guess_on_hash(
 	btr_cur_t	cursor2;
 	btr_pcur_t	pcur;
 #endif
+	ut_ad(!ahi_latch || rw_lock_own_flagged(
+		      ahi_latch, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
 
 	if (!btr_search_enabled) {
 		return(FALSE);
@@ -916,6 +895,7 @@ btr_search_guess_on_hash(
 
 	ut_ad(index && info && tuple && cursor && mtr);
 	ut_ad(!dict_index_is_ibuf(index));
+	ut_ad(!ahi_latch || ahi_latch == btr_get_search_latch(index));
 	ut_ad((latch_mode == BTR_SEARCH_LEAF)
 	      || (latch_mode == BTR_MODIFY_LEAF));
 
@@ -948,28 +928,26 @@ btr_search_guess_on_hash(
 	cursor->fold = fold;
 	cursor->flag = BTR_CUR_HASH;
 
-	if (!has_search_latch) {
-		btr_search_s_lock(index);
-
-		if (!btr_search_enabled) {
-			btr_search_s_unlock(index);
+	rw_lock_t* use_latch = ahi_latch ? NULL : btr_get_search_latch(index);
 
-			btr_search_failure(info, cursor);
+	if (use_latch) {
+		rw_lock_s_lock(use_latch);
 
-			return(FALSE);
+		if (!btr_search_enabled) {
+			goto fail;
 		}
+	} else {
+		ut_ad(btr_search_enabled);
+		ut_ad(rw_lock_own(ahi_latch, RW_LOCK_S));
 	}
 
-	ut_ad(rw_lock_get_writer(btr_get_search_latch(index)) != RW_LOCK_X);
-	ut_ad(rw_lock_get_reader_count(btr_get_search_latch(index)) > 0);
-
 	rec = (rec_t*) ha_search_and_get_data(
-			btr_get_search_table(index), fold);
+		btr_get_search_table(index), fold);
 
 	if (rec == NULL) {
-
-		if (!has_search_latch) {
-			btr_search_s_unlock(index);
+		if (use_latch) {
+fail:
+			rw_lock_s_unlock(use_latch);
 		}
 
 		btr_search_failure(info, cursor);
@@ -979,22 +957,15 @@ btr_search_guess_on_hash(
 
 	buf_block_t*	block = buf_block_from_ahi(rec);
 
-	if (!has_search_latch) {
+	if (use_latch) {
 
 		if (!buf_page_get_known_nowait(
 			latch_mode, block, BUF_MAKE_YOUNG,
 			__FILE__, __LINE__, mtr)) {
-
-			if (!has_search_latch) {
-				btr_search_s_unlock(index);
-			}
-
-			btr_search_failure(info, cursor);
-
-			return(FALSE);
+			goto fail;
 		}
 
-		btr_search_s_unlock(index);
+		rw_lock_s_unlock(use_latch);
 
 		buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
 	}
@@ -1003,7 +974,7 @@ btr_search_guess_on_hash(
 
 		ut_ad(buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
 
-		if (!has_search_latch) {
+		if (!ahi_latch) {
 
 			btr_leaf_page_release(block, latch_mode, mtr);
 		}
@@ -1025,11 +996,9 @@ btr_search_guess_on_hash(
 	record to determine if our guess for the cursor position is
 	right. */
 	if (index_id != btr_page_get_index_id(block->frame)
-	    || !btr_search_check_guess(cursor,
-				       has_search_latch,
-				       tuple, mode, mtr)) {
+	    || !btr_search_check_guess(cursor, !!ahi_latch, tuple, mode)) {
 
-		if (!has_search_latch) {
+		if (!ahi_latch) {
 			btr_leaf_page_release(block, latch_mode, mtr);
 		}
 
@@ -1050,7 +1019,7 @@ btr_search_guess_on_hash(
 	info->last_hash_succ = FALSE;
 
 	/* Currently, does not work if the following fails: */
-	ut_ad(!has_search_latch);
+	ut_ad(!ahi_latch);
 
 	btr_leaf_page_release(block, latch_mode, mtr);
 
@@ -1083,7 +1052,7 @@ btr_search_guess_on_hash(
 #ifdef UNIV_SEARCH_PERF_STAT
 	btr_search_n_succ++;
 #endif
-	if (!has_search_latch && buf_page_peek_if_too_old(&block->page)) {
+	if (!ahi_latch && buf_page_peek_if_too_old(&block->page)) {
 
 		buf_page_make_young(&block->page);
 	}
@@ -1105,8 +1074,7 @@ btr_search_guess_on_hash(
 			block->buf_fix_count == 0 or it is an index page which
 			has already been removed from the buf_pool->page_hash
 			i.e.: it is in state BUF_BLOCK_REMOVE_HASH */
-void
-btr_search_drop_page_hash_index(buf_block_t* block)
+void btr_search_drop_page_hash_index(buf_block_t* block)
 {
 	ulint			n_fields;
 	ulint			n_bytes;
@@ -1131,6 +1099,8 @@ retry:
 	/* This debug check uses a dirty read that could theoretically cause
 	false positives while buf_pool_clear_hash_index() is executing. */
 	assert_block_ahi_valid(block);
+	ut_ad(!btr_search_own_any(RW_LOCK_S));
+	ut_ad(!btr_search_own_any(RW_LOCK_X));
 
 	if (index == NULL) {
 		return;
@@ -1154,9 +1124,6 @@ retry:
 		% btr_ahi_parts;
 	latch = btr_search_latches[ahi_slot];
 
-	ut_ad(!btr_search_own_any(RW_LOCK_S));
-	ut_ad(!btr_search_own_any(RW_LOCK_X));
-
 	rw_lock_s_lock(latch);
 	assert_block_ahi_valid(block);
 
@@ -1177,8 +1144,7 @@ retry:
 #endif
 	ut_ad(btr_search_enabled);
 
-	ut_ad(index->space == FIL_NULL
-	      || block->page.id.space() == index->space);
+	ut_ad(block->page.id.space() == index->table->space_id);
 	ut_a(index_id == index->id);
 	ut_a(!dict_index_is_ibuf(index));
 #ifdef UNIV_DEBUG
@@ -1224,6 +1190,9 @@ retry:
 
 	rec = page_get_infimum_rec(page);
 	rec = page_rec_get_next_low(rec, page_is_comp(page));
+	if (rec_is_metadata(rec, index)) {
+		rec = page_rec_get_next_low(rec, page_is_comp(page));
+	}
 
 	prev_fold = 0;
 
@@ -1350,6 +1319,7 @@ If index is non-NULL, this function checks if n_fields and n_bytes are
 sensible, and does not build a hash index if not.
 @param[in,out]	index		index for which to build.
 @param[in,out]	block		index page, s-/x- latched.
+@param[in,out]	ahi_latch	the adaptive search latch
 @param[in]	n_fields	hash this many full fields
 @param[in]	n_bytes		hash this many bytes of the next field
 @param[in]	left_side	hash for searches from left side */
@@ -1358,20 +1328,19 @@ void
 btr_search_build_page_hash_index(
 	dict_index_t*	index,
 	buf_block_t*	block,
+	rw_lock_t*	ahi_latch,
 	ulint		n_fields,
 	ulint		n_bytes,
 	ibool		left_side)
 {
-	hash_table_t*	table;
-	page_t*		page;
-	rec_t*		rec;
-	rec_t*		next_rec;
+	const rec_t*	rec;
+	const rec_t*	next_rec;
 	ulint		fold;
 	ulint		next_fold;
 	ulint		n_cached;
 	ulint		n_recs;
 	ulint*		folds;
-	rec_t**		recs;
+	const rec_t**	recs;
 	ulint		i;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
@@ -1385,29 +1354,26 @@ btr_search_build_page_hash_index(
 	}
 
 	rec_offs_init(offsets_);
+	ut_ad(ahi_latch == btr_get_search_latch(index));
 	ut_ad(index);
-	ut_ad(block->page.id.space() == index->space);
+	ut_ad(block->page.id.space() == index->table->space_id);
 	ut_a(!dict_index_is_ibuf(index));
 	ut_ad(page_is_leaf(block->frame));
 
-	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
 	ut_ad(rw_lock_own_flagged(&block->lock,
 				  RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
 
-	btr_search_s_lock(index);
+	rw_lock_s_lock(ahi_latch);
 
-	table = btr_get_search_table(index);
-	page = buf_block_get_frame(block);
+	const bool rebuild = block->index
+		&& (block->curr_n_fields != n_fields
+		    || block->curr_n_bytes != n_bytes
+		    || block->curr_left_side != left_side);
 
-	if (block->index && ((block->curr_n_fields != n_fields)
-			     || (block->curr_n_bytes != n_bytes)
-			     || (block->curr_left_side != left_side))) {
-
-		btr_search_s_unlock(index);
+	rw_lock_s_unlock(ahi_latch);
 
+	if (rebuild) {
 		btr_search_drop_page_hash_index(block);
-	} else {
-		btr_search_s_unlock(index);
 	}
 
 	/* Check that the values for hash index build are sensible */
@@ -1422,6 +1388,7 @@ btr_search_build_page_hash_index(
 		return;
 	}
 
+	page_t*		page	= buf_block_get_frame(block);
 	n_recs = page_get_n_recs(page);
 
 	if (n_recs == 0) {
@@ -1429,18 +1396,24 @@ btr_search_build_page_hash_index(
 		return;
 	}
 
+	rec = page_rec_get_next_const(page_get_infimum_rec(page));
+
+	if (rec_is_metadata(rec, index)) {
+		rec = page_rec_get_next_const(rec);
+		if (!--n_recs) return;
+	}
+
 	/* Calculate and cache fold values and corresponding records into
 	an array for fast insertion to the hash index */
 
-	folds = (ulint*) ut_malloc_nokey(n_recs * sizeof(ulint));
-	recs = (rec_t**) ut_malloc_nokey(n_recs * sizeof(rec_t*));
+	folds = static_cast<ulint*>(ut_malloc_nokey(n_recs * sizeof *folds));
+	recs = static_cast<const rec_t**>(
+		ut_malloc_nokey(n_recs * sizeof *recs));
 
 	n_cached = 0;
 
 	ut_a(index->id == btr_page_get_index_id(page));
 
-	rec = page_rec_get_next(page_get_infimum_rec(page));
-
 	offsets = rec_get_offsets(
 		rec, index, offsets, true,
 		btr_search_get_n_fields(n_fields, n_bytes),
@@ -1458,7 +1431,7 @@ btr_search_build_page_hash_index(
 	}
 
 	for (;;) {
-		next_rec = page_rec_get_next(rec);
+		next_rec = page_rec_get_next_const(rec);
 
 		if (page_rec_is_supremum(next_rec)) {
 
@@ -1499,7 +1472,8 @@ btr_search_build_page_hash_index(
 
 	btr_search_check_free_space_in_heap(index);
 
-	btr_search_x_lock(index);
+	hash_table_t*	table	= btr_get_search_table(index);
+	rw_lock_x_lock(ahi_latch);
 
 	if (!btr_search_enabled) {
 		goto exit_func;
@@ -1537,7 +1511,7 @@ btr_search_build_page_hash_index(
 	MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_ADDED, n_cached);
 exit_func:
 	assert_block_ahi_valid(block);
-	btr_search_x_unlock(index);
+	rw_lock_x_unlock(ahi_latch);
 
 	ut_free(folds);
 	ut_free(recs);
@@ -1546,48 +1520,99 @@ exit_func:
 	}
 }
 
-/** Moves or deletes hash entries for moved records. If new_page is already
-hashed, then the hash index for page, if any, is dropped. If new_page is not
-hashed, and page is hashed, then a new hash index is built to new_page with the
-same parameters as page (this often happens when a page is split).
-@param[in,out]	new_block	records are copied to this page.
-@param[in,out]	block		index page from which record are copied, and the
-				copied records will be deleted from this page.
-@param[in,out]	index		record descriptor */
+/** Updates the search info.
+@param[in,out]	info	search info
+@param[in,out]	cursor	cursor which was just positioned */
 void
-btr_search_move_or_delete_hash_entries(
-	buf_block_t*	new_block,
-	buf_block_t*	block,
-	dict_index_t*	index)
+btr_search_info_update_slow(btr_search_t* info, btr_cur_t* cursor)
 {
-#ifdef MYSQL_INDEX_DISABLE_AHI
-	if (index->disable_ahi) return;
-#endif
-	if (!btr_search_enabled) {
-		return;
+	rw_lock_t*	ahi_latch = btr_get_search_latch(cursor->index);
+
+	ut_ad(!rw_lock_own_flagged(ahi_latch,
+				   RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+
+	buf_block_t*	block = btr_cur_get_block(cursor);
+
+	/* NOTE that the following two function calls do NOT protect
+	info or block->n_fields etc. with any semaphore, to save CPU time!
+	We cannot assume the fields are consistent when we return from
+	those functions! */
+
+	btr_search_info_update_hash(info, cursor);
+
+	bool build_index = btr_search_update_block_hash_info(info, block);
+
+	if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
+
+		btr_search_check_free_space_in_heap(cursor->index);
 	}
 
+	if (cursor->flag == BTR_CUR_HASH_FAIL) {
+		/* Update the hash node reference, if appropriate */
+
+#ifdef UNIV_SEARCH_PERF_STAT
+		btr_search_n_hash_fail++;
+#endif /* UNIV_SEARCH_PERF_STAT */
+
+		rw_lock_x_lock(ahi_latch);
+
+		btr_search_update_hash_ref(info, block, cursor);
+
+		rw_lock_x_unlock(ahi_latch);
+	}
+
+	if (build_index) {
+		/* Note that since we did not protect block->n_fields etc.
+		with any semaphore, the values can be inconsistent. We have
+		to check inside the function call that they make sense. */
+		btr_search_build_page_hash_index(cursor->index, block,
+						 ahi_latch,
+						 block->n_fields,
+						 block->n_bytes,
+						 block->left_side);
+	}
+}
+
+/** Move or delete hash entries for moved records, usually in a page split.
+If new_block is already hashed, then any hash index for block is dropped.
+If new_block is not hashed, and block is hashed, then a new hash index is
+built to new_block with the same parameters as block.
+@param[in,out]	new_block	destination page
+@param[in,out]	block		source page (subject to deletion later) */
+void
+btr_search_move_or_delete_hash_entries(
+	buf_block_t*	new_block,
+	buf_block_t*	block)
+{
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
 	ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_X));
 
-	btr_search_s_lock(index);
+	if (!btr_search_enabled) {
+		return;
+	}
 
-	ut_a(!new_block->index || new_block->index == index);
-	ut_a(!block->index || block->index == index);
-	ut_a(!(new_block->index || block->index)
-	     || !dict_index_is_ibuf(index));
+	dict_index_t* index = block->index;
+	if (!index) {
+		index = new_block->index;
+	} else {
+		ut_ad(!new_block->index || index == new_block->index);
+	}
 	assert_block_ahi_valid(block);
 	assert_block_ahi_valid(new_block);
 
-	if (new_block->index) {
-
-		btr_search_s_unlock(index);
+	rw_lock_t* ahi_latch = index ? btr_get_search_latch(index) : NULL;
 
+	if (new_block->index) {
 		btr_search_drop_page_hash_index(block);
+		return;
+	}
 
+	if (!index) {
 		return;
 	}
 
+	rw_lock_s_lock(ahi_latch);
+
 	if (block->index) {
 		ulint	n_fields = block->curr_n_fields;
 		ulint	n_bytes = block->curr_n_bytes;
@@ -1597,26 +1622,26 @@ btr_search_move_or_delete_hash_entries(
 		new_block->n_bytes = block->curr_n_bytes;
 		new_block->left_side = left_side;
 
-		btr_search_s_unlock(index);
+		rw_lock_s_unlock(ahi_latch);
 
 		ut_a(n_fields > 0 || n_bytes > 0);
 
 		btr_search_build_page_hash_index(
-			index, new_block, n_fields, n_bytes, left_side);
+			index, new_block, ahi_latch,
+			n_fields, n_bytes, left_side);
 		ut_ad(n_fields == block->curr_n_fields);
 		ut_ad(n_bytes == block->curr_n_bytes);
 		ut_ad(left_side == block->curr_left_side);
 		return;
 	}
 
-	btr_search_s_unlock(index);
+	rw_lock_s_unlock(ahi_latch);
 }
 
 /** Updates the page hash index when a single record is deleted from a page.
 @param[in]	cursor	cursor which was positioned on the record to delete
 			using btr_cur_search_, the record is not yet deleted.*/
-void
-btr_search_update_hash_on_delete(btr_cur_t* cursor)
+void btr_search_update_hash_on_delete(btr_cur_t* cursor)
 {
 	hash_table_t*	table;
 	buf_block_t*	block;
@@ -1648,7 +1673,7 @@ btr_search_update_hash_on_delete(btr_cur_t* cursor)
 		return;
 	}
 
-	ut_ad(block->page.id.space() == index->space);
+	ut_ad(block->page.id.space() == index->table->space_id);
 	ut_a(index == cursor->index);
 	ut_a(block->curr_n_fields > 0 || block->curr_n_bytes > 0);
 	ut_a(!dict_index_is_ibuf(index));
@@ -1664,7 +1689,9 @@ btr_search_update_hash_on_delete(btr_cur_t* cursor)
 		mem_heap_free(heap);
 	}
 
-	btr_search_x_lock(index);
+	rw_lock_t*	ahi_latch = btr_get_search_latch(index);
+
+	rw_lock_x_lock(ahi_latch);
 	assert_block_ahi_valid(block);
 
 	if (block->index) {
@@ -1680,21 +1707,25 @@ btr_search_update_hash_on_delete(btr_cur_t* cursor)
 		assert_block_ahi_valid(block);
 	}
 
-	btr_search_x_unlock(index);
+	rw_lock_x_unlock(ahi_latch);
 }
 
 /** Updates the page hash index when a single record is inserted on a page.
 @param[in]	cursor	cursor which was positioned to the place to insert
 			using btr_cur_search_, and the new record has been
-			inserted next to the cursor. */
+			inserted next to the cursor.
+@param[in]	ahi_latch	the adaptive hash index latch */
 void
-btr_search_update_hash_node_on_insert(btr_cur_t* cursor)
+btr_search_update_hash_node_on_insert(btr_cur_t* cursor, rw_lock_t* ahi_latch)
 {
 	hash_table_t*	table;
 	buf_block_t*	block;
 	dict_index_t*	index;
 	rec_t*		rec;
 
+	ut_ad(ahi_latch == btr_get_search_latch(cursor->index));
+	ut_ad(!btr_search_own_any(RW_LOCK_S));
+	ut_ad(!btr_search_own_any(RW_LOCK_X));
 #ifdef MYSQL_INDEX_DISABLE_AHI
 	if (cursor->index->disable_ahi) return;
 #endif
@@ -1717,8 +1748,7 @@ btr_search_update_hash_node_on_insert(btr_cur_t* cursor)
 
 	ut_a(cursor->index == index);
 	ut_a(!dict_index_is_ibuf(index));
-
-	btr_search_x_lock(index);
+	rw_lock_x_lock(ahi_latch);
 
 	if (!block->index) {
 
@@ -1742,11 +1772,11 @@ btr_search_update_hash_node_on_insert(btr_cur_t* cursor)
 
 func_exit:
 		assert_block_ahi_valid(block);
-		btr_search_x_unlock(index);
+		rw_lock_x_unlock(ahi_latch);
 	} else {
-		btr_search_x_unlock(index);
+		rw_lock_x_unlock(ahi_latch);
 
-		btr_search_update_hash_on_insert(cursor);
+		btr_search_update_hash_on_insert(cursor, ahi_latch);
 	}
 }
 
@@ -1754,9 +1784,10 @@ func_exit:
 @param[in,out]	cursor		cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
-				to the cursor */
+				to the cursor
+@param[in]	ahi_latch	the adaptive hash index latch */
 void
-btr_search_update_hash_on_insert(btr_cur_t* cursor)
+btr_search_update_hash_on_insert(btr_cur_t* cursor, rw_lock_t* ahi_latch)
 {
 	hash_table_t*	table;
 	buf_block_t*	block;
@@ -1770,13 +1801,16 @@ btr_search_update_hash_on_insert(btr_cur_t* cursor)
 	ulint		n_fields;
 	ulint		n_bytes;
 	ibool		left_side;
-	ibool		locked		= FALSE;
+	bool		locked		= false;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
 	rec_offs_init(offsets_);
 
+	ut_ad(ahi_latch == btr_get_search_latch(cursor->index));
 	ut_ad(page_is_leaf(btr_cur_get_page(cursor)));
+	ut_ad(!btr_search_own_any(RW_LOCK_S));
+	ut_ad(!btr_search_own_any(RW_LOCK_X));
 #ifdef MYSQL_INDEX_DISABLE_AHI
 	if (cursor->index->disable_ahi) return;
 #endif
@@ -1796,7 +1830,7 @@ btr_search_update_hash_on_insert(btr_cur_t* cursor)
 		return;
 	}
 
-	ut_ad(block->page.id.space() == index->space);
+	ut_ad(block->page.id.space() == index->table->space_id);
 	btr_search_check_free_space_in_heap(index);
 
 	table = btr_get_search_table(index);
@@ -1828,17 +1862,15 @@ btr_search_update_hash_on_insert(btr_cur_t* cursor)
 				     n_bytes, index->id);
 	}
 
-	if (!page_rec_is_infimum(rec)) {
+	if (!page_rec_is_infimum(rec) && !rec_is_metadata(rec, index)) {
 		offsets = rec_get_offsets(
 			rec, index, offsets, true,
 			btr_search_get_n_fields(n_fields, n_bytes), &heap);
 		fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id);
 	} else {
 		if (left_side) {
-
-			btr_search_x_lock(index);
-
-			locked = TRUE;
+			locked = true;
+			rw_lock_x_lock(ahi_latch);
 
 			if (!btr_search_enabled) {
 				goto function_exit;
@@ -1853,10 +1885,8 @@ btr_search_update_hash_on_insert(btr_cur_t* cursor)
 	if (fold != ins_fold) {
 
 		if (!locked) {
-
-			btr_search_x_lock(index);
-
-			locked = TRUE;
+			locked = true;
+			rw_lock_x_lock(ahi_latch);
 
 			if (!btr_search_enabled) {
 				goto function_exit;
@@ -1874,11 +1904,9 @@ check_next_rec:
 	if (page_rec_is_supremum(next_rec)) {
 
 		if (!left_side) {
-
 			if (!locked) {
-				btr_search_x_lock(index);
-
-				locked = TRUE;
+				locked = true;
+				rw_lock_x_lock(ahi_latch);
 
 				if (!btr_search_enabled) {
 					goto function_exit;
@@ -1894,10 +1922,8 @@ check_next_rec:
 	if (ins_fold != next_fold) {
 
 		if (!locked) {
-
-			btr_search_x_lock(index);
-
-			locked = TRUE;
+			locked = true;
+			rw_lock_x_lock(ahi_latch);
 
 			if (!btr_search_enabled) {
 				goto function_exit;
@@ -1916,8 +1942,9 @@ function_exit:
 		mem_heap_free(heap);
 	}
 	if (locked) {
-		btr_search_x_unlock(index);
+		rw_lock_x_unlock(ahi_latch);
 	}
+	ut_ad(!rw_lock_own(ahi_latch, RW_LOCK_X));
 }
 
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
@@ -2024,7 +2051,8 @@ btr_search_hash_table_validate(ulint hash_table_id)
 			}
 
 			ut_a(!dict_index_is_ibuf(block->index));
-			ut_ad(block->page.id.space() == block->index->space);
+			ut_ad(block->page.id.space()
+			      == block->index->table->space_id);
 
 			page_index_id = btr_page_get_index_id(block->frame);
 
diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc
index 440d64322a4..f932195897c 100644
--- a/storage/innobase/buf/buf0buddy.cc
+++ b/storage/innobase/buf/buf0buddy.cc
@@ -73,10 +73,6 @@ list. This value is stamped at BUF_BUDDY_STAMP_OFFSET offset */
 value by the consumer of the block */
 #define BUF_BUDDY_STAMP_NONFREE	0XFFFFFFFFUL
 
-#if BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE
-# error "BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE"
-#endif
-
 /** Return type of buf_buddy_is_free() */
 enum buf_buddy_state_t {
 	BUF_BUDDY_STATE_FREE,	/*!< If the buddy to completely free */
@@ -114,6 +110,7 @@ buf_buddy_stamp_is_free(
 /*====================*/
 	const buf_buddy_free_t*	buf)	/*!< in: block to check */
 {
+	compile_time_assert(BUF_BUDDY_STAMP_FREE < BUF_BUDDY_STAMP_NONFREE);
 	return(mach_read_from_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET)
 	       == BUF_BUDDY_STAMP_FREE);
 }
@@ -138,13 +135,12 @@ buf_buddy_stamp_free(
 Stamps a buddy nonfree.
 @param[in,out]	buf	block to stamp
 @param[in]	i	block size */
-#define buf_buddy_stamp_nonfree(buf, i) do {				\
-	buf_buddy_mem_invalid(buf, i);					\
-	memset(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET, 0xff, 4);	\
-} while (0)
-#if BUF_BUDDY_STAMP_NONFREE != 0xffffffff
-# error "BUF_BUDDY_STAMP_NONFREE != 0xffffffff"
-#endif
+static inline void buf_buddy_stamp_nonfree(buf_buddy_free_t* buf, ulint i)
+{
+	buf_buddy_mem_invalid(buf, i);
+	compile_time_assert(BUF_BUDDY_STAMP_NONFREE == 0xffffffffU);
+	memset(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET, 0xff, 4);
+}
 
 /**********************************************************************//**
 Get the offset of the buddy of a compressed page frame.
@@ -160,7 +156,7 @@ buf_buddy_get(
 	ut_ad(size >= BUF_BUDDY_LOW);
 	ut_ad(BUF_BUDDY_LOW <= UNIV_ZIP_SIZE_MIN);
 	ut_ad(size < BUF_BUDDY_HIGH);
-	ut_ad(BUF_BUDDY_HIGH == UNIV_PAGE_SIZE);
+	ut_ad(BUF_BUDDY_HIGH == srv_page_size);
 	ut_ad(!ut_align_offset(page, size));
 
 	if (((ulint) page) & size) {
@@ -376,7 +372,7 @@ buf_buddy_alloc_zip(
 }
 
 /**********************************************************************//**
-Deallocate a buffer frame of UNIV_PAGE_SIZE. */
+Deallocate a buffer frame of srv_page_size. */
 static
 void
 buf_buddy_block_free(
@@ -390,7 +386,7 @@ buf_buddy_block_free(
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(!mutex_own(&buf_pool->zip_mutex));
-	ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
+	ut_a(!ut_align_offset(buf, srv_page_size));
 
 	HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
 		    ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
@@ -403,8 +399,8 @@ buf_buddy_block_free(
 	ut_d(bpage->in_zip_hash = FALSE);
 	HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
 
-	ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
-	UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
+	ut_d(memset(buf, 0, srv_page_size));
+	UNIV_MEM_INVALID(buf, srv_page_size);
 
 	block = (buf_block_t*) bpage;
 	buf_page_mutex_enter(block);
@@ -432,7 +428,7 @@ buf_buddy_block_register(
 	buf_block_set_state(block, BUF_BLOCK_MEMORY);
 
 	ut_a(block->frame);
-	ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));
+	ut_a(!ut_align_offset(block->frame, srv_page_size));
 
 	ut_ad(!block->page.in_page_hash);
 	ut_ad(!block->page.in_zip_hash);
@@ -489,8 +485,8 @@ buf_buddy_alloc_low(
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
 	ulint		i,		/*!< in: index of buf_pool->zip_free[],
 					or BUF_BUDDY_SIZES */
-	ibool*		lru)		/*!< in: pointer to a variable that
-					will be assigned TRUE if storage was
+	bool*		lru)		/*!< in: pointer to a variable that
+					will be assigned true if storage was
 					allocated from the LRU list and
 					buf_pool->mutex was temporarily
 					released */
@@ -522,7 +518,7 @@ buf_buddy_alloc_low(
 	/* Try replacing an uncompressed page in the buffer pool. */
 	buf_pool_mutex_exit(buf_pool);
 	block = buf_LRU_get_free_block(buf_pool);
-	*lru = TRUE;
+	*lru = true;
 	buf_pool_mutex_enter(buf_pool);
 
 alloc_big:
@@ -765,7 +761,7 @@ func_exit:
 @param[in]	buf_pool	buffer pool instance
 @param[in]	buf		block to be reallocated, must be pointed
 to by the buffer pool
-@param[in]	size		block size, up to UNIV_PAGE_SIZE
+@param[in]	size		block size, up to srv_page_size
 @retval false	if failed because of no free blocks. */
 bool
 buf_buddy_realloc(
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 03bf76fd299..dd9dca496fb 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -283,8 +283,8 @@ reachable via buf_pool->chunks[].
 
 The chains of free memory blocks (buf_pool->zip_free[]) are used by
 the buddy allocator (buf0buddy.cc) to keep track of currently unused
-memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2.  These
-blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
+memory blocks of size sizeof(buf_page_t)..srv_page_size / 2.  These
+blocks are inside the srv_page_size-sized memory blocks of type
 BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
 pool.  The buddy allocator is solely used for allocating control
 blocks for compressed pages (buf_page_t) and compressed page frames.
@@ -482,7 +482,7 @@ buf_pool_register_chunk(
 @return whether the operation was successful */
 static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
 {
-	ut_ad(space->n_pending_ios > 0);
+	ut_ad(space->pending_io());
 	ut_ad(space->id == bpage->id.space());
 
 	byte* dst_frame = bpage->zip.data ? bpage->zip.data :
@@ -516,7 +516,7 @@ decompress_with_slot:
 		slot->release();
 
 		ut_ad(!bpage->write_size || fil_page_type_validate(dst_frame));
-		ut_ad(space->n_pending_ios > 0);
+		ut_ad(space->pending_io());
 		return bpage->write_size != 0;
 	}
 
@@ -562,13 +562,10 @@ decrypt_failed:
 		goto decompress;
 	}
 
-	ut_ad(space->n_pending_ios > 0);
+	ut_ad(space->pending_io());
 	return true;
 }
 
-/* prototypes for new functions added to ha_innodb.cc */
-trx_t* innobase_get_trx();
-
 /********************************************************************//**
 Gets the smallest oldest_modification lsn for any page in the pool. Returns
 zero if all modified pages have been flushed to disk.
@@ -668,7 +665,8 @@ buf_get_total_list_size_in_bytes(
 		for statistics purpose */
 		buf_pools_list_size->LRU_bytes += buf_pool->stat.LRU_bytes;
 		buf_pools_list_size->unzip_LRU_bytes +=
-			UT_LIST_GET_LEN(buf_pool->unzip_LRU) * UNIV_PAGE_SIZE;
+			UT_LIST_GET_LEN(buf_pool->unzip_LRU)
+			<< srv_page_size_shift;
 		buf_pools_list_size->flush_list_bytes +=
 			buf_pool->stat.flush_list_bytes;
 	}
@@ -980,7 +978,7 @@ buf_page_is_corrupted(
 		ib::info() << "Log sequence number at the start "
 			   << mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
 			   << " and the end "
-			   << mach_read_from_4(read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)
+			   << mach_read_from_4(read_buf + srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)
 			   << " do not match";
 #endif /* UNIV_INNOCHECKSUM */
 		return(true);
@@ -1035,9 +1033,7 @@ buf_page_is_corrupted(
 	checksum_field2 = mach_read_from_4(
 		read_buf + page_size.logical() - FIL_PAGE_END_LSN_OLD_CHKSUM);
 
-#if FIL_PAGE_LSN % 8
-#error "FIL_PAGE_LSN must be 64 bit aligned"
-#endif
+	compile_time_assert(!(FIL_PAGE_LSN % 8));
 
 	/* declare empty pages non-corrupted */
 	if (checksum_field1 == 0
@@ -1299,6 +1295,56 @@ buf_page_is_corrupted(
 }
 
 #ifndef UNIV_INNOCHECKSUM
+
+#if defined(DBUG_OFF) && defined(HAVE_MADVISE) &&  defined(MADV_DODUMP)
+/** Enable buffers to be dumped to core files
+
+A convience function, not called anyhwere directly however
+it is left available for gdb or any debugger to call
+in the event that you want all of the memory to be dumped
+to a core file.
+
+Returns number of errors found in madvise calls. */
+int
+buf_madvise_do_dump()
+{
+	int ret= 0;
+	buf_pool_t*	buf_pool;
+	buf_chunk_t*	chunk;
+
+	/* mirrors allocation in log_t::create() */
+	if (log_sys.buf) {
+		ret+= madvise(log_sys.first_in_use
+			      ? log_sys.buf
+			      : log_sys.buf - srv_log_buffer_size,
+			      srv_log_buffer_size * 2,
+			      MADV_DODUMP);
+	}
+	/* mirrors recv_sys_init() */
+	if (recv_sys->buf)
+	{
+		ret+= madvise(recv_sys->buf, recv_sys->len, MADV_DODUMP);
+	}
+
+	buf_pool_mutex_enter_all();
+
+	for (ulong i= 0; i < srv_buf_pool_instances; i++)
+	{
+		buf_pool = buf_pool_from_array(i);
+		chunk = buf_pool->chunks;
+
+		for (int n = buf_pool->n_chunks; n--; chunk++)
+		{
+			ret+= madvise(chunk->mem, chunk->mem_size(), MADV_DODUMP);
+		}
+	}
+
+	buf_pool_mutex_exit_all();
+
+	return ret;
+}
+#endif
+
 /** Dump a page to stderr.
 @param[in]	read_buf	database page
 @param[in]	page_size	page size */
@@ -1409,20 +1455,10 @@ buf_page_print(const byte* read_buf, const page_size_t& page_size)
 				read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 	}
 
-	if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
-	    == TRX_UNDO_INSERT) {
-		fprintf(stderr,
-			"InnoDB: Page may be an insert undo log page\n");
-	} else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
-				    + TRX_UNDO_PAGE_TYPE)
-		   == TRX_UNDO_UPDATE) {
-		fprintf(stderr,
-			"InnoDB: Page may be an update undo log page\n");
-	}
-
 	switch (fil_page_get_type(read_buf)) {
 		index_id_t	index_id;
 	case FIL_PAGE_INDEX:
+	case FIL_PAGE_TYPE_INSTANT:
 	case FIL_PAGE_RTREE:
 		index_id = btr_page_get_index_id(read_buf);
 		ib::info() << "Page may be an index page where"
@@ -1436,6 +1472,9 @@ buf_page_print(const byte* read_buf, const page_size_t& page_size)
 				<< " in table " << index->table->name;
 		}
 		break;
+	case FIL_PAGE_UNDO_LOG:
+		fputs("InnoDB: Page may be an undo log page\n", stderr);
+		break;
 	case FIL_PAGE_INODE:
 		fputs("InnoDB: Page may be an 'inode' page\n", stderr);
 		break;
@@ -1544,7 +1583,7 @@ buf_block_init(
 	buf_block_t*	block,		/*!< in: pointer to control block */
 	byte*		frame)		/*!< in: pointer to buffer frame */
 {
-	UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
+	UNIV_MEM_DESC(frame, srv_page_size);
 
 	/* This function should only be executed at database startup or by
 	buf_pool_resize(). Either way, adaptive hash index must not exist. */
@@ -1627,15 +1666,17 @@ buf_chunk_init(
 
 	/* Round down to a multiple of page size,
 	although it already should be. */
-	mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
+	mem_size = ut_2pow_round(mem_size, ulint(srv_page_size));
 	/* Reserve space for the block descriptors. */
-	mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
-				  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
+	mem_size += ut_2pow_round((mem_size >> srv_page_size_shift)
+				  * (sizeof *block)
+				  + (srv_page_size - 1),
+				  ulint(srv_page_size));
 
 	DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", return(NULL););
 
 	chunk->mem = buf_pool->allocator.allocate_large(mem_size,
-							&chunk->mem_pfx);
+							&chunk->mem_pfx, true);
 
 	if (UNIV_UNLIKELY(chunk->mem == NULL)) {
 
@@ -1664,12 +1705,12 @@ buf_chunk_init(
 	chunk->blocks = (buf_block_t*) chunk->mem;
 
 	/* Align a pointer to the first frame.  Note that when
-	os_large_page_size is smaller than UNIV_PAGE_SIZE,
+	os_large_page_size is smaller than srv_page_size,
 	we may allocate one fewer block than requested.  When
 	it is bigger, we may allocate more blocks than requested. */
 
-	frame = (byte*) ut_align(chunk->mem, UNIV_PAGE_SIZE);
-	chunk->size = chunk->mem_pfx.m_size / UNIV_PAGE_SIZE
+	frame = (byte*) ut_align(chunk->mem, srv_page_size);
+	chunk->size = (chunk->mem_pfx.m_size >> srv_page_size_shift)
 		- (frame != chunk->mem);
 
 	/* Subtract the space needed for block descriptors. */
@@ -1677,7 +1718,7 @@ buf_chunk_init(
 		ulint	size = chunk->size;
 
 		while (frame < (byte*) (chunk->blocks + size)) {
-			frame += UNIV_PAGE_SIZE;
+			frame += srv_page_size;
 			size--;
 		}
 
@@ -1693,7 +1734,7 @@ buf_chunk_init(
 	for (i = chunk->size; i--; ) {
 
 		buf_block_init(buf_pool, block, frame);
-		UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
+		UNIV_MEM_INVALID(block->frame, srv_page_size);
 
 		/* Add the block to the free list */
 		UT_LIST_ADD_LAST(buf_pool->free, &block->page);
@@ -1702,7 +1743,7 @@ buf_chunk_init(
 		ut_ad(buf_pool_from_block(block) == buf_pool);
 
 		block++;
-		frame += UNIV_PAGE_SIZE;
+		frame += srv_page_size;
 	}
 
 	buf_pool_register_chunk(chunk);
@@ -1929,7 +1970,8 @@ buf_pool_init_instance(
 					}
 
 					buf_pool->allocator.deallocate_large(
-						chunk->mem, &chunk->mem_pfx);
+						chunk->mem, &chunk->mem_pfx, chunk->mem_size(),
+						true);
 				}
 				ut_free(buf_pool->chunks);
 				buf_pool_mutex_exit(buf_pool);
@@ -1945,7 +1987,8 @@ buf_pool_init_instance(
 			ut_min(BUF_READ_AHEAD_PAGES,
 			       ut_2_power_up(buf_pool->curr_size /
 					     BUF_READ_AHEAD_PORTION));
-		buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
+		buf_pool->curr_pool_size = buf_pool->curr_size
+			<< srv_page_size_shift;
 
 		buf_pool->old_size = buf_pool->curr_size;
 		buf_pool->n_chunks_new = buf_pool->n_chunks;
@@ -2076,7 +2119,7 @@ buf_pool_free_instance(
 		}
 
 		buf_pool->allocator.deallocate_large(
-			chunk->mem, &chunk->mem_pfx);
+			chunk->mem, &chunk->mem_pfx, true);
 	}
 
 	for (ulint i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; ++i) {
@@ -2273,7 +2316,7 @@ buf_page_realloc(
 		buf_block_modify_clock_inc(block);
 		memset(block->frame + FIL_PAGE_OFFSET, 0xff, 4);
 		memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
-		UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
+		UNIV_MEM_INVALID(block->frame, srv_page_size);
 		buf_block_set_state(block, BUF_BLOCK_REMOVE_HASH);
 		block->page.id
 		    = page_id_t(ULINT32_UNDEFINED, ULINT32_UNDEFINED);
@@ -2397,7 +2440,7 @@ buf_frame_will_withdrawn(
 	while (chunk < echunk) {
 		if (ptr >= chunk->blocks->frame
 		    && ptr < (chunk->blocks + chunk->size - 1)->frame
-			     + UNIV_PAGE_SIZE) {
+			     + srv_page_size) {
 			return(true);
 		}
 		++chunk;
@@ -2735,7 +2778,7 @@ buf_pool_resize()
 	ut_ad(srv_buf_pool_chunk_unit > 0);
 
 	new_instance_size = srv_buf_pool_size / srv_buf_pool_instances;
-	new_instance_size /= UNIV_PAGE_SIZE;
+	new_instance_size >>= srv_page_size_shift;
 
 	buf_resize_status("Resizing buffer pool from " ULINTPF " to "
 			  ULINTPF " (unit=" ULINTPF ").",
@@ -2754,7 +2797,8 @@ buf_pool_resize()
 
 		buf_pool->curr_size = new_instance_size;
 
-		buf_pool->n_chunks_new = new_instance_size * UNIV_PAGE_SIZE
+		buf_pool->n_chunks_new =
+			(new_instance_size << srv_page_size_shift)
 			/ srv_buf_pool_chunk_unit;
 
 		buf_pool_mutex_exit(buf_pool);
@@ -2840,11 +2884,11 @@ withdraw_retry:
 		}
 
 		lock_mutex_enter();
-		trx_sys_mutex_enter();
+		mutex_enter(&trx_sys.mutex);
 		bool	found = false;
-		for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
+		for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys.trx_list);
 		     trx != NULL;
-		     trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
+		     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
 			if (trx->state != TRX_STATE_NOT_STARTED
 			    && trx->mysql_thd != NULL
 			    && ut_difftime(withdraw_started,
@@ -2864,7 +2908,7 @@ withdraw_retry:
 					stderr, trx);
 			}
 		}
-		trx_sys_mutex_exit();
+		mutex_exit(&trx_sys.mutex);
 		lock_mutex_exit();
 
 		withdraw_started = ut_time();
@@ -2953,7 +2997,7 @@ withdraw_retry:
 				}
 
 				buf_pool->allocator.deallocate_large(
-					chunk->mem, &chunk->mem_pfx);
+					chunk->mem, &chunk->mem_pfx, true);
 
 				sum_freed += chunk->size;
 
@@ -3090,7 +3134,7 @@ calc_buf_pool_size:
 				       ut_2_power_up(buf_pool->curr_size /
 						      BUF_READ_AHEAD_PORTION));
 			buf_pool->curr_pool_size
-				= buf_pool->curr_size * UNIV_PAGE_SIZE;
+				= buf_pool->curr_size << srv_page_size_shift;
 			curr_size += buf_pool->curr_pool_size;
 			buf_pool->old_size = buf_pool->curr_size;
 		}
@@ -3142,8 +3186,9 @@ calc_buf_pool_size:
 		buf_resize_status("Resizing also other hash tables.");
 
 		/* normalize lock_sys */
-		srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
-		lock_sys_resize(srv_lock_table_size);
+		srv_lock_table_size = 5
+			* (srv_buf_pool_size >> srv_page_size_shift);
+		lock_sys.resize(srv_lock_table_size);
 
 		/* normalize btr_search_sys */
 		btr_search_sys_resize(
@@ -4043,7 +4088,7 @@ buf_zip_decompress(
 		if (page_zip_decompress(&block->page.zip,
 					block->frame, TRUE)) {
 			if (space) {
-				fil_space_release_for_io(space);
+				space->release_for_io();
 			}
 			return(TRUE);
 		}
@@ -4062,7 +4107,7 @@ buf_zip_decompress(
 		/* Copy to uncompressed storage. */
 		memcpy(block->frame, frame, block->page.size.physical());
 		if (space) {
-			fil_space_release_for_io(space);
+			space->release_for_io();
 		}
 
 		return(TRUE);
@@ -4078,13 +4123,16 @@ err_exit:
 		ib::info() << "Row compressed page could be encrypted"
 			" with key_version " << key_version;
 		block->page.encrypted = true;
-		dict_set_encrypted_by_space(block->page.id.space());
-	} else {
-		dict_set_corrupted_by_space(block->page.id.space());
 	}
 
 	if (space) {
-		fil_space_release_for_io(space);
+		if (encrypted) {
+			dict_set_encrypted_by_space(space);
+		} else {
+			dict_set_corrupted_by_space(space);
+		}
+
+		space->release_for_io();
 	}
 
 	return(FALSE);
@@ -4115,16 +4163,16 @@ buf_block_from_ahi(const byte* ptr)
 		chunk = (--it)->second;
 	}
 
-	ulint		offs = ptr - chunk->blocks->frame;
+	ulint		offs = ulint(ptr - chunk->blocks->frame);
 
-	offs >>= UNIV_PAGE_SIZE_SHIFT;
+	offs >>= srv_page_size_shift;
 
 	ut_a(offs < chunk->size);
 
 	buf_block_t*	block = &chunk->blocks[offs];
 
 	/* The function buf_chunk_init() invokes buf_block_init() so that
-	block[n].frame == block->frame + n * UNIV_PAGE_SIZE.  Check it. */
+	block[n].frame == block->frame + n * srv_page_size.  Check it. */
 	ut_ad(block->frame == page_align(ptr));
 	/* Read the state of the block without holding a mutex.
 	A state transition from BUF_BLOCK_FILE_PAGE to
@@ -4315,12 +4363,14 @@ buf_page_get_gen(
 #ifdef UNIV_DEBUG
 	switch (mode) {
 	case BUF_EVICT_IF_IN_POOL:
-	case BUF_PEEK_IF_IN_POOL:
 		/* After DISCARD TABLESPACE, the tablespace would not exist,
 		but in IMPORT TABLESPACE, PageConverter::operator() must
 		replace any old pages, which were not evicted during DISCARD.
-		Similarly, btr_search_drop_page_hash_when_freed() must
-		remove any old pages. Skip the assertion on page_size. */
+		Skip the assertion on space_page_size. */
+		break;
+	case BUF_PEEK_IF_IN_POOL:
+		/* In this mode, the caller may pass a dummy page size,
+		because it does not really matter. */
 		break;
 	default:
 		ut_error;
@@ -4486,9 +4536,16 @@ loop:
 
 			/* Try to set table as corrupted instead of
 			asserting. */
-			if (page_id.space() != TRX_SYS_SPACE &&
-			    dict_set_corrupted_by_space(page_id.space())) {
-				return (NULL);
+			if (page_id.space() == TRX_SYS_SPACE) {
+			} else if (page_id.space() == SRV_TMP_SPACE_ID) {
+			} else if (fil_space_t* space
+				   = fil_space_acquire_for_io(
+					   page_id.space())) {
+				bool set = dict_set_corrupted_by_space(space);
+				space->release_for_io();
+				if (set) {
+					return NULL;
+				}
 			}
 
 			ib::fatal() << "Unable to read page " << page_id
@@ -4501,9 +4558,7 @@ loop:
 		}
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-		ut_a(fsp_skip_sanity_check(page_id.space())
-		     || ++buf_dbg_counter % 5771
-		     || buf_validate());
+		ut_a(++buf_dbg_counter % 5771 || buf_validate());
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 		goto loop;
 	} else {
@@ -4895,9 +4950,7 @@ evict_from_pool:
 	}
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-	ut_a(fsp_skip_sanity_check(page_id.space())
-	     || ++buf_dbg_counter % 5771
-	     || buf_validate());
+	ut_a(++buf_dbg_counter % 5771 || buf_validate());
 	ut_a(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
@@ -5044,9 +5097,7 @@ buf_page_optimistic_get(
 	mtr_memo_push(mtr, block, fix_type);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-	ut_a(fsp_skip_sanity_check(block->page.id.space())
-	     || ++buf_dbg_counter % 5771
-	     || buf_validate());
+	ut_a(++buf_dbg_counter % 5771 || buf_validate());
 	ut_a(block->page.buf_fix_count > 0);
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@@ -5249,9 +5300,7 @@ buf_page_try_get_func(
 	mtr_memo_push(mtr, block, fix_type);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-	ut_a(fsp_skip_sanity_check(block->page.id.space())
-	     || ++buf_dbg_counter % 5771
-	     || buf_validate());
+	ut_a(++buf_dbg_counter % 5771 || buf_validate());
 	ut_a(block->page.buf_fix_count > 0);
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@@ -5328,7 +5377,7 @@ buf_page_init(
 		/* Silence valid Valgrind warnings about uninitialized
 		data being written to data files.  There are some unused
 		bytes on some pages that InnoDB does not initialize. */
-		UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
+		UNIV_MEM_VALID(block->frame, srv_page_size);
 	}
 #endif /* UNIV_DEBUG_VALGRIND */
 
@@ -5412,7 +5461,7 @@ buf_page_init_for_read(
 	buf_page_t*	watch_page;
 	rw_lock_t*	hash_lock;
 	mtr_t		mtr;
-	ibool		lru	= FALSE;
+	bool		lru	= false;
 	void*		data;
 	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
@@ -5703,7 +5752,7 @@ buf_page_create(
 
 	if (page_size.is_compressed()) {
 		void*	data;
-		ibool	lru;
+		bool	lru;
 
 		/* Prevent race conditions during buf_buddy_alloc(),
 		which may release and reacquire buf_pool->mutex,
@@ -5797,13 +5846,14 @@ buf_page_monitor(
 
 	switch (fil_page_get_type(frame)) {
 		ulint	level;
-
+	case FIL_PAGE_TYPE_INSTANT:
 	case FIL_PAGE_INDEX:
 	case FIL_PAGE_RTREE:
-		level = btr_page_get_level_low(frame);
+		level = btr_page_get_level(frame);
 
 		/* Check if it is an index page for insert buffer */
-		if (btr_page_get_index_id(frame)
+		if (fil_page_get_type(frame) == FIL_PAGE_INDEX
+		    && btr_page_get_index_id(frame)
 		    == (index_id_t)(DICT_IBUF_ID_MIN + IBUF_SPACE_ID)) {
 			if (level == 0) {
 				counter = MONITOR_RW_COUNTER(
@@ -5877,24 +5927,22 @@ buf_page_monitor(
 	MONITOR_INC_NOCHECK(counter);
 }
 
-/********************************************************************//**
-Mark a table with the specified space pointed by bpage->id.space() corrupted.
-Also remove the bpage from LRU list.
-@param[in,out]		bpage			Block */
+/** Mark a table corrupted.
+Also remove the bpage from LRU list. */
 static
 void
-buf_mark_space_corrupt(buf_page_t* bpage)
+buf_mark_space_corrupt(buf_page_t* bpage, const fil_space_t* space)
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	const ibool	uncompressed = (buf_page_get_state(bpage)
 					== BUF_BLOCK_FILE_PAGE);
-	uint32_t	space = bpage->id.space();
 
 	/* First unfix and release lock on the bpage */
 	buf_pool_mutex_enter(buf_pool);
 	mutex_enter(buf_page_get_mutex(bpage));
 	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
 	ut_ad(bpage->buf_fix_count == 0);
+	ut_ad(bpage->id.space() == space->id);
 
 	/* Set BUF_IO_NONE before we remove the block from LRU list */
 	buf_page_set_io_fix(bpage, BUF_IO_NONE);
@@ -5940,7 +5988,7 @@ static
 dberr_t
 buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
 {
-	ut_ad(space->n_pending_ios > 0);
+	ut_ad(space->pending_io());
 
 	byte* dst_frame = (bpage->zip.data) ? bpage->zip.data :
 		((buf_block_t*) bpage)->frame;
@@ -6065,7 +6113,7 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
 			my_atomic_addlint(&buf_pool->n_pend_unzip, 1);
 			ibool ok = buf_zip_decompress((buf_block_t*) bpage,
 						      FALSE);
-			my_atomic_addlint(&buf_pool->n_pend_unzip, -1);
+			my_atomic_addlint(&buf_pool->n_pend_unzip, ulint(-1));
 
 			if (!ok) {
 				ib::info() << "Page "
@@ -6119,10 +6167,10 @@ database_corrupted:
 				"buf_page_import_corrupt_failure",
 				if (!is_predefined_tablespace(
 					    bpage->id.space())) {
-					buf_mark_space_corrupt(bpage);
+					buf_mark_space_corrupt(bpage, space);
 					ib::info() << "Simulated IMPORT "
 						"corruption";
-					fil_space_release_for_io(space);
+					space->release_for_io();
 					return(err);
 				}
 				err = DB_SUCCESS;
@@ -6163,8 +6211,8 @@ database_corrupted:
 						" a corrupt database page.";
 				}
 
-				buf_mark_space_corrupt(bpage);
-				fil_space_release_for_io(space);
+				buf_mark_space_corrupt(bpage, space);
+				space->release_for_io();
 				return(err);
 			}
 		}
@@ -6207,7 +6255,7 @@ database_corrupted:
 
 		}
 
-		fil_space_release_for_io(space);
+		space->release_for_io();
 	} else {
 		/* io_type == BUF_IO_WRITE */
 		if (bpage->slot) {
@@ -7398,7 +7446,7 @@ buf_page_encrypt_before_write(
 	byte*		src_frame)
 {
 	ut_ad(space->id == bpage->id.space());
-	bpage->real_size = UNIV_PAGE_SIZE;
+	bpage->real_size = srv_page_size;
 
 	fil_page_type_validate(src_frame);
 
@@ -7450,7 +7498,7 @@ not_compressed:
 					      src_frame,
 					      dst_frame);
 
-		bpage->real_size = UNIV_PAGE_SIZE;
+		bpage->real_size = srv_page_size;
 		slot->out_buf = dst_frame = tmp;
 
 		ut_d(fil_page_type_validate(tmp));
diff --git a/storage/innobase/buf/buf0checksum.cc b/storage/innobase/buf/buf0checksum.cc
index 78b49e49690..74b182affbf 100644
--- a/storage/innobase/buf/buf0checksum.cc
+++ b/storage/innobase/buf/buf0checksum.cc
@@ -46,7 +46,7 @@ when it is written to a file and also checked for a match when reading from
 the file. When reading we allow both normal CRC32 and CRC-legacy-big-endian
 variants. Note that we must be careful to calculate the same value on 32-bit
 and 64-bit architectures.
-@param[in]	page			buffer page (UNIV_PAGE_SIZE bytes)
+@param[in]	page			buffer page (srv_page_size bytes)
 @param[in]	use_legacy_big_endian	if true then use big endian
 byteorder when converting byte strings to integers
 @return checksum */
@@ -73,7 +73,7 @@ buf_calc_page_crc32(
 
 	const uint32_t	c2 = crc32_func(
 		page + FIL_PAGE_DATA,
-		UNIV_PAGE_SIZE - FIL_PAGE_DATA - FIL_PAGE_END_LSN_OLD_CHKSUM);
+		srv_page_size - FIL_PAGE_DATA - FIL_PAGE_END_LSN_OLD_CHKSUM);
 
 	return(c1 ^ c2);
 }
@@ -100,7 +100,7 @@ buf_calc_page_new_checksum(const byte* page)
 				  FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
 				  - FIL_PAGE_OFFSET)
 		+ ut_fold_binary(page + FIL_PAGE_DATA,
-				 UNIV_PAGE_SIZE - FIL_PAGE_DATA
+				 srv_page_size - FIL_PAGE_DATA
 				 - FIL_PAGE_END_LSN_OLD_CHKSUM);
 	return(static_cast<uint32_t>(checksum));
 }
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index f7b31eefa41..099a3752f7f 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -148,11 +148,11 @@ buf_dblwr_init(
 		ut_zalloc_nokey(buf_size * sizeof(bool)));
 
 	buf_dblwr->write_buf_unaligned = static_cast<byte*>(
-		ut_malloc_nokey((1 + buf_size) * UNIV_PAGE_SIZE));
+		ut_malloc_nokey((1 + buf_size) << srv_page_size_shift));
 
 	buf_dblwr->write_buf = static_cast<byte*>(
 		ut_align(buf_dblwr->write_buf_unaligned,
-			 UNIV_PAGE_SIZE));
+			 srv_page_size));
 
 	buf_dblwr->buf_block_arr = static_cast<buf_page_t**>(
 		ut_zalloc_nokey(buf_size * sizeof(void*)));
@@ -197,17 +197,13 @@ start_again:
 		buf_dblwr_being_created = FALSE;
 		return(true);
 	} else {
-		fil_space_t* space = fil_space_acquire(TRX_SYS_SPACE);
-		const bool fail = UT_LIST_GET_FIRST(space->chain)->size
-			< 3 * FSP_EXTENT_SIZE;
-		fil_space_release(space);
-
-		if (fail) {
+		if (UT_LIST_GET_FIRST(fil_system.sys_space->chain)->size
+		    < 3 * FSP_EXTENT_SIZE) {
 			goto too_small;
 		}
 	}
 
-	block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
+	block2 = fseg_create(fil_system.sys_space, TRX_SYS_PAGE_NO,
 			     TRX_SYS_DOUBLEWRITE
 			     + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
 
@@ -217,7 +213,8 @@ too_small:
 			<< "Cannot create doublewrite buffer: "
 			"the first file in innodb_data_file_path"
 			" must be at least "
-			<< (3 * (FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) >> 20)
+			<< (3 * (FSP_EXTENT_SIZE
+				 >> (20U - srv_page_size_shift)))
 			<< "M.";
 		mtr.commit();
 		return(false);
@@ -366,10 +363,10 @@ buf_dblwr_init_or_load_pages(
 	/* We do the file i/o past the buffer pool */
 
 	unaligned_read_buf = static_cast<byte*>(
-		ut_malloc_nokey(3 * UNIV_PAGE_SIZE));
+		ut_malloc_nokey(3U << srv_page_size_shift));
 
 	read_buf = static_cast<byte*>(
-		ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
+		ut_align(unaligned_read_buf, srv_page_size));
 
 	/* Read the trx sys header to check if we are using the doublewrite
 	buffer */
@@ -379,8 +376,8 @@ buf_dblwr_init_or_load_pages(
 
 	err = os_file_read(
 		read_request,
-		file, read_buf, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE,
-		UNIV_PAGE_SIZE);
+		file, read_buf, TRX_SYS_PAGE_NO << srv_page_size_shift,
+		srv_page_size);
 
 	if (err != DB_SUCCESS) {
 
@@ -428,8 +425,8 @@ buf_dblwr_init_or_load_pages(
 	/* Read the pages from the doublewrite buffer to memory */
 	err = os_file_read(
 		read_request,
-		file, buf, block1 * UNIV_PAGE_SIZE,
-		TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE);
+		file, buf, block1 << srv_page_size_shift,
+		TRX_SYS_DOUBLEWRITE_BLOCK_SIZE << srv_page_size_shift);
 
 	if (err != DB_SUCCESS) {
 
@@ -445,9 +442,9 @@ buf_dblwr_init_or_load_pages(
 	err = os_file_read(
 		read_request,
 		file,
-		buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
-		block2 * UNIV_PAGE_SIZE,
-		TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE);
+		buf + (TRX_SYS_DOUBLEWRITE_BLOCK_SIZE << srv_page_size_shift),
+		block2 << srv_page_size_shift,
+		TRX_SYS_DOUBLEWRITE_BLOCK_SIZE << srv_page_size_shift);
 
 	if (err != DB_SUCCESS) {
 
@@ -487,8 +484,8 @@ buf_dblwr_init_or_load_pages(
 
 			err = os_file_write(
 				write_request, path, file, page,
-				source_page_no * UNIV_PAGE_SIZE,
-				UNIV_PAGE_SIZE);
+				source_page_no << srv_page_size_shift,
+				srv_page_size);
 			if (err != DB_SUCCESS) {
 
 				ib::error()
@@ -506,7 +503,7 @@ buf_dblwr_init_or_load_pages(
 			recv_dblwr.add(page);
 		}
 
-		page += univ_page_size.physical();
+		page += srv_page_size;
 	}
 
 	if (reset_space_ids) {
@@ -532,11 +529,11 @@ buf_dblwr_process()
 	}
 
 	unaligned_read_buf = static_cast<byte*>(
-		ut_malloc_nokey(3 * UNIV_PAGE_SIZE));
+		ut_malloc_nokey(3U << srv_page_size_shift));
 
 	read_buf = static_cast<byte*>(
-		ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
-	byte* const buf = read_buf + UNIV_PAGE_SIZE;
+		ut_align(unaligned_read_buf, srv_page_size));
+	byte* const buf = read_buf + srv_page_size;
 
 	for (recv_dblwr_t::list::iterator i = recv_dblwr.pages.begin();
 	     i != recv_dblwr.pages.end();
@@ -793,14 +790,14 @@ buf_dblwr_check_page_lsn(
 	}
 
 	if (memcmp(page + (FIL_PAGE_LSN + 4),
-		   page + (UNIV_PAGE_SIZE
+		   page + (srv_page_size
 			   - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
 		   4)) {
 
 		const ulint	lsn1 = mach_read_from_4(
 			page + FIL_PAGE_LSN + 4);
 		const ulint	lsn2 = mach_read_from_4(
-			page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
+			page + srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM
 			+ 4);
 
 		ib::error() << "The page to be written seems corrupt!"
@@ -845,6 +842,7 @@ buf_dblwr_check_block(
 
 	switch (fil_page_get_type(block->frame)) {
 	case FIL_PAGE_INDEX:
+	case FIL_PAGE_TYPE_INSTANT:
 	case FIL_PAGE_RTREE:
 		if (page_is_comp(block->frame)) {
 			if (page_simple_validate_new(block->frame)) {
@@ -877,7 +875,6 @@ buf_dblwr_check_block(
 	case FIL_PAGE_TYPE_ALLOCATED:
 		/* empty pages should never be flushed */
 		return;
-		break;
 	}
 
 	buf_dblwr_assert_on_corrupt_block(block);
@@ -1003,7 +1000,7 @@ try_again:
 
 	for (ulint len2 = 0, i = 0;
 	     i < buf_dblwr->first_free;
-	     len2 += UNIV_PAGE_SIZE, i++) {
+	     len2 += srv_page_size, i++) {
 
 		const buf_block_t*	block;
 
@@ -1026,8 +1023,8 @@ try_again:
 	}
 
 	/* Write out the first block of the doublewrite buffer */
-	len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
-		     buf_dblwr->first_free) * UNIV_PAGE_SIZE;
+	len = std::min<ulint>(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
+			      buf_dblwr->first_free) << srv_page_size_shift;
 
 	fil_io(IORequestWrite, true,
 	       page_id_t(TRX_SYS_SPACE, buf_dblwr->block1), univ_page_size,
@@ -1040,10 +1037,10 @@ try_again:
 
 	/* Write out the second block of the doublewrite buffer. */
 	len = (buf_dblwr->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
-	       * UNIV_PAGE_SIZE;
+	       << srv_page_size_shift;
 
 	write_buf = buf_dblwr->write_buf
-		    + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
+		+ (TRX_SYS_DOUBLEWRITE_BLOCK_SIZE << srv_page_size_shift);
 
 	fil_io(IORequestWrite, true,
 	       page_id_t(TRX_SYS_SPACE, buf_dblwr->block2), univ_page_size,
@@ -1125,7 +1122,7 @@ try_again:
 	}
 
 	byte*	p = buf_dblwr->write_buf
-		+ univ_page_size.physical() * buf_dblwr->first_free;
+		+ srv_page_size * buf_dblwr->first_free;
 
 	/* We request frame here to get correct buffer in case of
 	encryption and/or page compression */
@@ -1138,7 +1135,7 @@ try_again:
 		memcpy(p, frame, bpage->size.physical());
 
 		memset(p + bpage->size.physical(), 0x0,
-		       univ_page_size.physical() - bpage->size.physical());
+		       srv_page_size - bpage->size.physical());
 	} else {
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
 
@@ -1268,20 +1265,20 @@ retry:
 	void * frame = buf_page_get_frame(bpage);
 
 	if (bpage->size.is_compressed()) {
-		memcpy(buf_dblwr->write_buf + univ_page_size.physical() * i,
+		memcpy(buf_dblwr->write_buf + srv_page_size * i,
 		       frame, bpage->size.physical());
 
-		memset(buf_dblwr->write_buf + univ_page_size.physical() * i
+		memset(buf_dblwr->write_buf + srv_page_size * i
 		       + bpage->size.physical(), 0x0,
-		       univ_page_size.physical() - bpage->size.physical());
+		       srv_page_size - bpage->size.physical());
 
 		fil_io(IORequestWrite,
 		       true,
 		       page_id_t(TRX_SYS_SPACE, offset),
 		       univ_page_size,
 		       0,
-		       univ_page_size.physical(),
-		       (void *)(buf_dblwr->write_buf + univ_page_size.physical() * i),
+		       srv_page_size,
+		       (void *)(buf_dblwr->write_buf + srv_page_size * i),
 		       NULL);
 	} else {
 		/* It is a regular page. Write it directly to the
@@ -1291,7 +1288,7 @@ retry:
 		       page_id_t(TRX_SYS_SPACE, offset),
 		       univ_page_size,
 		       0,
-		       univ_page_size.physical(),
+		       srv_page_size,
 		       (void*) frame,
 		       NULL);
 	}
diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc
index ed2131bdcee..c8e7236dc93 100644
--- a/storage/innobase/buf/buf0dump.cc
+++ b/storage/innobase/buf/buf0dump.cc
@@ -438,6 +438,11 @@ buf_dump(
 
 	buf_dump_status(STATUS_INFO,
 			"Buffer pool(s) dump completed at %s", now);
+
+	/* Though dumping doesn't related to an incomplete load,
+	 we reset this to 0 here to indicate that a shutdown can also perform
+	 a dump */
+	export_vars.innodb_buffer_pool_load_incomplete = 0;
 }
 
 /*****************************************************************//**
@@ -536,7 +541,7 @@ buf_load()
 	buf_load_status(STATUS_INFO,
 			"Loading buffer pool(s) from %s", full_filename);
 
-	f = fopen(full_filename, "r");
+	f = fopen(full_filename, "r" STR_O_CLOEXEC);
 	if (f == NULL) {
 		buf_load_status(STATUS_INFO,
 				"Cannot open '%s' for reading: %s",
@@ -601,6 +606,8 @@ buf_load()
 
 	rewind(f);
 
+	export_vars.innodb_buffer_pool_load_incomplete = 1;
+
 	for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) {
 		fscanf_ret = fscanf(f, ULINTPF "," ULINTPF,
 				    &space_id, &page_no);
@@ -649,7 +656,7 @@ buf_load()
 		ut_sprintf_timestamp(now);
 		buf_load_status(STATUS_INFO,
 				"Buffer pool(s) load completed at %s"
-				" (%s was empty)", now, full_filename);
+				" (%s was empty or had errors)", now, full_filename);
 		return;
 	}
 
@@ -689,7 +696,7 @@ buf_load()
 
 		if (this_space_id != cur_space_id) {
 			if (space != NULL) {
-				fil_space_release(space);
+				space->release();
 			}
 
 			cur_space_id = this_space_id;
@@ -721,7 +728,7 @@ buf_load()
 
 		if (buf_load_abort_flag) {
 			if (space != NULL) {
-				fil_space_release(space);
+				space->release();
 			}
 			buf_load_abort_flag = FALSE;
 			ut_free(dump);
@@ -743,18 +750,39 @@ buf_load()
 
 		buf_load_throttle_if_needed(
 			&last_check_time, &last_activity_cnt, i);
+
+#ifdef UNIV_DEBUG
+		if ((i+1) >= srv_buf_pool_load_pages_abort) {
+			buf_load_abort_flag = 1;
+		}
+#endif
 	}
 
 	if (space != NULL) {
-		fil_space_release(space);
+		space->release();
 	}
 
 	ut_free(dump);
 
 	ut_sprintf_timestamp(now);
 
-	buf_load_status(STATUS_INFO,
+	if (i == dump_n) {
+		buf_load_status(STATUS_INFO,
 			"Buffer pool(s) load completed at %s", now);
+		export_vars.innodb_buffer_pool_load_incomplete = 0;
+	} else if (!buf_load_abort_flag) {
+		buf_load_status(STATUS_INFO,
+			"Buffer pool(s) load aborted due to user instigated abort at %s",
+			now);
+		/* intentionally don't reset innodb_buffer_pool_load_incomplete
+                   as we don't want a shutdown to save the buffer pool */
+	} else {
+		buf_load_status(STATUS_INFO,
+			"Buffer pool(s) load aborted due to shutdown at %s",
+			now);
+		/* intentionally don't reset innodb_buffer_pool_load_incomplete
+                   as we want to abort without saving the buffer pool */
+	}
 
 	/* Make sure that estimated = completed when we end. */
 	/* mysql_stage_set_work_completed(pfs_stage_progress, dump_n); */
@@ -823,15 +851,16 @@ DECLARE_THREAD(buf_dump_thread)(void*)
 	}
 
 	if (srv_buffer_pool_dump_at_shutdown && srv_fast_shutdown != 2) {
+		if (export_vars.innodb_buffer_pool_load_incomplete) {
+			buf_dump_status(STATUS_INFO,
+				"Dumping of buffer pool not started"
+				" as load was incomplete");
 #ifdef WITH_WSREP
-		if (!wsrep_recovery) {
+		} else if (wsrep_recovery) {
 #endif /* WITH_WSREP */
-
-		buf_dump(FALSE /* ignore shutdown down flag,
-		keep going even if we are in a shutdown state */);
-#ifdef WITH_WSREP
+		} else {
+			buf_dump(FALSE/* do complete dump at shutdown */);
 		}
-#endif /* WITH_WSREP */
 	}
 
 	srv_buf_dump_thread_active = false;
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index a84a79669a7..b1c84e1d74a 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -31,7 +31,6 @@ Created 11/11/1995 Heikki Tuuri
 
 #include "buf0flu.h"
 #include "buf0buf.h"
-#include "buf0mtflu.h"
 #include "buf0checksum.h"
 #include "srv0start.h"
 #include "srv0srv.h"
@@ -151,6 +150,8 @@ struct page_cleaner_t {
 						threads. */
 	os_event_t		is_finished;	/*!< event to signal that all
 						slots were finished. */
+	os_event_t		is_started;	/*!< event to signal that
+						thread is started/exiting */
 	volatile ulint		n_workers;	/*!< number of worker threads
 						in existence */
 	bool			requested;	/*!< true if requested pages
@@ -899,7 +900,7 @@ buf_flush_init_for_writing(
 	/* Write the newest modification lsn to the page header and trailer */
 	mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
 
-	mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+	mach_write_to_8(page + srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM,
 			newest_lsn);
 
 	if (block && srv_page_size == 16384) {
@@ -930,6 +931,7 @@ buf_flush_init_for_writing(
 		default:
 			switch (page_type) {
 			case FIL_PAGE_INDEX:
+			case FIL_PAGE_TYPE_INSTANT:
 			case FIL_PAGE_RTREE:
 			case FIL_PAGE_UNDO_LOG:
 			case FIL_PAGE_INODE:
@@ -993,7 +995,7 @@ buf_flush_init_for_writing(
 		new enum is added and not handled here */
 	}
 
-	mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+	mach_write_to_4(page + srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM,
 			checksum);
 }
 
@@ -1018,7 +1020,7 @@ buf_flush_write_block_low(
 	      || space->purpose == FIL_TYPE_IMPORT
 	      || space->purpose == FIL_TYPE_TABLESPACE);
 	ut_ad((space->purpose == FIL_TYPE_TEMPORARY)
-	      == fsp_is_system_temporary(space->id));
+	      == (space == fil_system.temp_space));
 	page_t*	frame = NULL;
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
@@ -1133,7 +1135,7 @@ buf_flush_write_block_low(
 		ut_ad(err == DB_SUCCESS);
 	}
 
-	fil_space_release_for_io(space);
+	space->release_for_io();
 
 	/* Increment the counter of I/O operations used
 	for selecting LRU policy. */
@@ -1853,6 +1855,7 @@ not guaranteed that the actual number is that big, though)
 @param[in]	lsn_limit	in the case of BUF_FLUSH_LIST all blocks whose
 oldest_modification is smaller than this should be flushed (if their number
 does not exceed min_n), otherwise ignored */
+static
 void
 buf_flush_batch(
 	buf_pool_t*		buf_pool,
@@ -1892,6 +1895,7 @@ Gather the aggregated stats for both flush list and LRU list flushing.
 @param page_count_flush	number of pages flushed from the end of the flush_list
 @param page_count_LRU	number of pages flushed from the end of the LRU list
 */
+static
 void
 buf_flush_stats(
 /*============*/
@@ -1908,6 +1912,7 @@ buf_flush_stats(
 
 /******************************************************************//**
 Start a buffer flush batch for LRU or flush list */
+static
 ibool
 buf_flush_start(
 /*============*/
@@ -1939,22 +1944,8 @@ buf_flush_start(
 }
 
 /******************************************************************//**
-Gather the aggregated stats for both flush list and LRU list flushing */
-void
-buf_flush_common(
-/*=============*/
-	buf_flush_t	flush_type,	/*!< in: type of flush */
-	ulint		page_count)	/*!< in: number of pages flushed */
-{
-	buf_dblwr_flush_buffered_writes();
-
-	ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-
-	srv_stats.buf_pool_flushed.add(page_count);
-}
-
-/******************************************************************//**
 End a buffer flush batch for LRU or flush list */
+static
 void
 buf_flush_end(
 /*==========*/
@@ -2126,10 +2117,6 @@ buf_flush_lists(
 	ulint		n_flushed = 0;
 	bool		success = true;
 
-	if (buf_mtflu_init_done()) {
-		return(buf_mtflu_flush_list(min_n, lsn_limit, n_processed));
-	}
-
 	if (n_processed) {
 		*n_processed = 0;
 	}
@@ -2288,11 +2275,6 @@ buf_flush_LRU_list(
 
 	memset(&n, 0, sizeof(flush_counters_t));
 
-	if(buf_mtflu_init_done())
-	{
-		return(buf_mtflu_flush_LRU_tail());
-	}
-
 	ut_ad(buf_pool);
 	/* srv_LRU_scan_depth can be arbitrarily large value.
 	We cap it with current LRU size. */
@@ -2454,7 +2436,7 @@ page_cleaner_flush_pages_recommendation(
 
 	cur_lsn = log_get_lsn_nowait();
 
-	/* log_get_lsn_nowait tries to get log_sys->mutex with
+	/* log_get_lsn_nowait tries to get log_sys.mutex with
 	mutex_enter_nowait, if this does not succeed function
 	returns 0, do not use that value to update stats. */
 	if (cur_lsn == 0) {
@@ -2718,7 +2700,7 @@ buf_flush_page_cleaner_init(void)
 
 	page_cleaner.is_requested = os_event_create("pc_is_requested");
 	page_cleaner.is_finished = os_event_create("pc_is_finished");
-
+	page_cleaner.is_started = os_event_create("pc_is_started");
 	page_cleaner.n_slots = static_cast<ulint>(srv_buf_pool_instances);
 
 	ut_d(page_cleaner.n_disabled_debug = 0);
@@ -2793,8 +2775,8 @@ pc_flush_slot(void)
 {
 	ulint	lru_tm = 0;
 	ulint	list_tm = 0;
-	int	lru_pass = 0;
-	int	list_pass = 0;
+	ulint	lru_pass = 0;
+	ulint	list_pass = 0;
 
 	mutex_enter(&page_cleaner.mutex);
 
@@ -2998,17 +2980,10 @@ buf_flush_page_cleaner_disabled_loop(void)
 }
 
 /** Disables page cleaner threads (coordinator and workers).
-It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0).
-@param[in]	thd		thread handle
-@param[in]	var		pointer to system variable
-@param[out]	var_ptr		where the formal string goes
 @param[in]	save		immediate result from check function */
-void
-buf_flush_page_cleaner_disabled_debug_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save)
+void buf_flush_page_cleaner_disabled_debug_update(THD*,
+						  st_mysql_sys_var*, void*,
+						  const void* save)
 {
 	if (!page_cleaner.is_running) {
 		return;
@@ -3438,6 +3413,7 @@ thread_exit:
 
 	os_event_destroy(page_cleaner.is_finished);
 	os_event_destroy(page_cleaner.is_requested);
+	os_event_destroy(page_cleaner.is_started);
 
 	buf_page_cleaner_is_active = false;
 
@@ -3449,6 +3425,35 @@ thread_exit:
 	OS_THREAD_DUMMY_RETURN;
 }
 
+/** Adjust thread count for page cleaner workers.
+@param[in]	new_cnt		Number of threads to be used */
+void
+buf_flush_set_page_cleaner_thread_cnt(ulong new_cnt)
+{
+	mutex_enter(&page_cleaner.mutex);
+
+	srv_n_page_cleaners = new_cnt;
+	if (new_cnt > page_cleaner.n_workers) {
+		/* User has increased the number of page
+		cleaner threads. */
+		ulint add = new_cnt - page_cleaner.n_workers;
+		for (ulint i = 0; i < add; i++) {
+			os_thread_id_t cleaner_thread_id;
+			os_thread_create(buf_flush_page_cleaner_worker, NULL, &cleaner_thread_id);
+		}
+	}
+
+	mutex_exit(&page_cleaner.mutex);
+
+	/* Wait until defined number of workers has started. */
+	while (page_cleaner.is_running &&
+	       page_cleaner.n_workers != (srv_n_page_cleaners - 1)) {
+		os_event_set(page_cleaner.is_requested);
+		os_event_reset(page_cleaner.is_started);
+		os_event_wait_time(page_cleaner.is_started, 1000000);
+	}
+}
+
 /******************************************************************//**
 Worker thread of page_cleaner.
 @return a dummy parameter */
@@ -3461,9 +3466,18 @@ DECLARE_THREAD(buf_flush_page_cleaner_worker)(
 			os_thread_create */
 {
 	my_thread_init();
+#ifndef DBUG_OFF
+	os_thread_id_t cleaner_thread_id = os_thread_get_curr_id();
+#endif
 
 	mutex_enter(&page_cleaner.mutex);
-	page_cleaner.n_workers++;
+	ulint thread_no = page_cleaner.n_workers++;
+
+	DBUG_LOG("ib_buf", "Thread " << cleaner_thread_id
+		 << " started; n_workers=" << page_cleaner.n_workers);
+
+	/* Signal that we have started */
+	os_event_set(page_cleaner.is_started);
 	mutex_exit(&page_cleaner.mutex);
 
 #ifdef UNIV_LINUX
@@ -3486,11 +3500,31 @@ DECLARE_THREAD(buf_flush_page_cleaner_worker)(
 			break;
 		}
 
+		ut_ad(srv_n_page_cleaners >= 1);
+
+		/* If number of page cleaner threads is decreased
+		exit those that are not anymore needed. */
+		if (srv_shutdown_state == SRV_SHUTDOWN_NONE &&
+		    thread_no >= (srv_n_page_cleaners - 1)) {
+			DBUG_LOG("ib_buf", "Exiting "
+				<< thread_no
+				<< " page cleaner worker thread_id "
+				<< os_thread_pf(cleaner_thread_id)
+				<< " total threads " << srv_n_page_cleaners << ".");
+			break;
+		}
+
 		pc_flush_slot();
 	}
 
 	mutex_enter(&page_cleaner.mutex);
 	page_cleaner.n_workers--;
+
+	DBUG_LOG("ib_buf", "Thread " << cleaner_thread_id
+		 << " exiting; n_workers=" << page_cleaner.n_workers);
+
+	/* Signal that we have stopped */
+	os_event_set(page_cleaner.is_started);
 	mutex_exit(&page_cleaner.mutex);
 
 	my_thread_end();
@@ -3696,17 +3730,17 @@ buf_flush_get_dirty_pages_count(
 }
 
 /** FlushObserver constructor
-@param[in]	space_id	table space id
+@param[in]	space		tablespace
 @param[in]	trx		trx instance
 @param[in]	stage		performance schema accounting object,
 used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages()
 for accounting. */
 FlushObserver::FlushObserver(
-	ulint			space_id,
+	fil_space_t*		space,
 	trx_t*			trx,
 	ut_stage_alter_t*	stage)
 	:
-	m_space_id(space_id),
+	m_space(space),
 	m_trx(trx),
 	m_stage(stage),
 	m_interrupted(false)
@@ -3725,7 +3759,7 @@ FlushObserver::FlushObserver(
 /** FlushObserver deconstructor */
 FlushObserver::~FlushObserver()
 {
-	ut_ad(buf_flush_get_dirty_pages_count(m_space_id, this) == 0);
+	ut_ad(buf_flush_get_dirty_pages_count(m_space->id, this) == 0);
 
 	UT_DELETE(m_flushed);
 	UT_DELETE(m_removed);
@@ -3783,10 +3817,10 @@ FlushObserver::flush()
 
 	if (!m_interrupted && m_stage) {
 		m_stage->begin_phase_flush(buf_flush_get_dirty_pages_count(
-						   m_space_id, this));
+						   m_space->id, this));
 	}
 
-	buf_LRU_flush_or_remove_pages(m_space_id, this);
+	buf_LRU_flush_or_remove_pages(m_space->id, this);
 
 	/* Wait for all dirty pages were flushed. */
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 9218ea1b141..859d5ece06a 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -58,9 +58,6 @@ static const ulint BUF_LRU_OLD_TOLERANCE = 20;
 (that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks).
 @see buf_LRU_old_adjust_len */
 #define BUF_LRU_NON_OLD_MIN_LEN	5
-#if BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN
-# error "BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN"
-#endif
 
 /** When dropping the search hash index entries before deleting an ibd
 file, we build a local array of pages belonging to that tablespace
@@ -371,7 +368,7 @@ bool buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table)
 
 	return false;
 drop_ahi:
-	ulint id = table->space;
+	ulint id = table->space_id;
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_LRU_drop_page_hash_for_tablespace(buf_pool_from_array(i),
 						      id);
@@ -965,7 +962,7 @@ buf_LRU_get_free_only(
 			assert_block_ahi_empty(block);
 
 			buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
-			UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
+			UNIV_MEM_ALLOC(block->frame, srv_page_size);
 
 			ut_ad(buf_pool_from_block(block) == buf_pool);
 
@@ -1012,7 +1009,7 @@ buf_LRU_check_size_of_non_data_objects(
 			" Check that your transactions do not set too many"
 			" row locks, or review if"
 			" innodb_buffer_pool_size="
-			<< (buf_pool->curr_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
+			<< (buf_pool->curr_size >> (20U - srv_page_size_shift))
 			<< "M could be bigger.";
 	} else if (!recv_recovery_is_on()
 		   && buf_pool->curr_size == buf_pool->old_size
@@ -1035,7 +1032,7 @@ buf_LRU_check_size_of_non_data_objects(
 				" set too many row locks."
 				" innodb_buffer_pool_size="
 				<< (buf_pool->curr_size >>
-				    (20 - UNIV_PAGE_SIZE_SHIFT)) << "M."
+				    (20U - srv_page_size_shift)) << "M."
 				" Starting the InnoDB Monitor to print"
 				" diagnostics.";
 
@@ -1222,9 +1219,11 @@ buf_LRU_old_adjust_len(
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
 	ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
-#if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
-# error "BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)"
-#endif
+	compile_time_assert(BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN
+			    > BUF_LRU_OLD_RATIO_DIV
+			    * (BUF_LRU_OLD_TOLERANCE + 5));
+	compile_time_assert(BUF_LRU_NON_OLD_MIN_LEN < BUF_LRU_OLD_MIN_LEN);
+
 #ifdef UNIV_LRU_DEBUG
 	/* buf_pool->LRU_old must be the first item in the LRU list
 	whose "old" flag is set. */
@@ -1772,10 +1771,10 @@ func_exit:
 	order to avoid bogus Valgrind warnings.*/
 
 	UNIV_MEM_VALID(((buf_block_t*) bpage)->frame,
-		       UNIV_PAGE_SIZE);
+		       srv_page_size);
 	btr_search_drop_page_hash_index((buf_block_t*) bpage);
 	UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
-			 UNIV_PAGE_SIZE);
+			 srv_page_size);
 
 	if (b != NULL) {
 
@@ -1841,10 +1840,10 @@ buf_LRU_block_free_non_file_page(
 
 	buf_block_set_state(block, BUF_BLOCK_NOT_USED);
 
-	UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
+	UNIV_MEM_ALLOC(block->frame, srv_page_size);
 #ifdef UNIV_DEBUG
 	/* Wipe contents of page to reveal possible stale pointers to it */
-	memset(block->frame, '\0', UNIV_PAGE_SIZE);
+	memset(block->frame, '\0', srv_page_size);
 #else
 	/* Wipe page_no and space_id */
 	memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
@@ -1885,7 +1884,7 @@ buf_LRU_block_free_non_file_page(
 		ut_d(block->page.in_free_list = TRUE);
 	}
 
-	UNIV_MEM_FREE(block->frame, UNIV_PAGE_SIZE);
+	UNIV_MEM_FREE(block->frame, srv_page_size);
 }
 
 /******************************************************************//**
@@ -1934,7 +1933,7 @@ buf_LRU_block_remove_hashed(
 	case BUF_BLOCK_FILE_PAGE:
 		UNIV_MEM_ASSERT_W(bpage, sizeof(buf_block_t));
 		UNIV_MEM_ASSERT_W(((buf_block_t*) bpage)->frame,
-				  UNIV_PAGE_SIZE);
+				  srv_page_size);
 		buf_block_modify_clock_inc((buf_block_t*) bpage);
 		if (bpage->zip.data) {
 			const page_t*	page = ((buf_block_t*) bpage)->frame;
@@ -1963,11 +1962,11 @@ buf_LRU_block_remove_hashed(
 				break;
 			case FIL_PAGE_INDEX:
 			case FIL_PAGE_RTREE:
-#ifdef UNIV_ZIP_DEBUG
+#if defined UNIV_ZIP_DEBUG && defined BTR_CUR_HASH_ADAPT
 				ut_a(page_zip_validate(
 					     &bpage->zip, page,
 					     ((buf_block_t*) bpage)->index));
-#endif /* UNIV_ZIP_DEBUG */
+#endif /* UNIV_ZIP_DEBUG && BTR_CUR_HASH_ADAPT */
 				break;
 			default:
 				ib::error() << "The compressed page to be"
@@ -2083,7 +2082,7 @@ buf_LRU_block_remove_hashed(
 		memset(((buf_block_t*) bpage)->frame
 		       + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
 		UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
-				 UNIV_PAGE_SIZE);
+				 srv_page_size);
 		buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
 
 		/* Question: If we release bpage and hash mutex here
diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
deleted file mode 100644
index 7c15b12950e..00000000000
--- a/storage/innobase/buf/buf0mtflu.cc
+++ /dev/null
@@ -1,736 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013, 2014, Fusion-io. All Rights Reserved.
-Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file buf/buf0mtflu.cc
-Multi-threaded flush method implementation
-
-Created  06/11/2013 Dhananjoy Das DDas@fusionio.com
-Modified 12/12/2013 Jan Lindström jan.lindstrom@skysql.com
-Modified 03/02/2014 Dhananjoy Das DDas@fusionio.com
-Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
-***********************************************************************/
-
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0mtflu.h"
-#include "buf0checksum.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "page0zip.h"
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "page0page.h"
-#include "fil0fil.h"
-#include "buf0lru.h"
-#include "buf0rea.h"
-#include "ibuf0ibuf.h"
-#include "log0log.h"
-#include "os0file.h"
-#include "trx0sys.h"
-#include "srv0mon.h"
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
-#include "fil0pagecompress.h"
-
-#define	MT_COMP_WATER_MARK	50
-/** Time to wait for a message. */
-#define MT_WAIT_IN_USECS 5000000
-
-/* Work item status */
-typedef enum wrk_status {
-	WRK_ITEM_UNSET=0,	/*!< Work item is not set */
-	WRK_ITEM_START=1,	/*!< Processing of work item has started */
-	WRK_ITEM_DONE=2,	/*!< Processing is done usually set to
-				SUCCESS/FAILED */
-	WRK_ITEM_SUCCESS=2,	/*!< Work item successfully processed */
-	WRK_ITEM_FAILED=3,	/*!< Work item process failed */
-	WRK_ITEM_EXIT=4,	/*!< Exiting */
-	WRK_ITEM_SET=5,		/*!< Work item is set */
-	WRK_ITEM_STATUS_UNDEFINED
-} wrk_status_t;
-
-/* Work item task type */
-typedef enum mt_wrk_tsk {
-	MT_WRK_NONE=0,		/*!< Exit queue-wait */
-	MT_WRK_WRITE=1,		/*!< Flush operation */
-	MT_WRK_READ=2,		/*!< Read operation  */
-	MT_WRK_UNDEFINED
-} mt_wrk_tsk_t;
-
-/* Work thread status */
-typedef enum wthr_status {
-	WTHR_NOT_INIT=0,	/*!< Work thread not initialized */
-	WTHR_INITIALIZED=1,	/*!< Work thread initialized */
-	WTHR_SIG_WAITING=2,	/*!< Work thread wating signal */
-	WTHR_RUNNING=3,		/*!< Work thread running */
-	WTHR_NO_WORK=4,		/*!< Work thread has no work */
-	WTHR_KILL_IT=5,		/*!< Work thread should exit */
-	WTHR_STATUS_UNDEFINED
-} wthr_status_t;
-
-/* Write work task */
-typedef struct wr_tsk {
-	buf_pool_t	*buf_pool;	/*!< buffer-pool instance */
-	buf_flush_t	flush_type;	/*!< flush-type for buffer-pool
-					flush operation */
-	ulint		min;		/*!< minimum number of pages
-					requested to be flushed */
-	lsn_t		lsn_limit;	/*!< lsn limit for the buffer-pool
-					flush operation */
-} wr_tsk_t;
-
-/* Read work task */
-typedef struct rd_tsk {
-	buf_pool_t	*page_pool;	/*!< list of pages to decompress; */
-} rd_tsk_t;
-
-/* Work item */
-typedef struct wrk_itm
-{
-	mt_wrk_tsk_t	tsk;		/*!< Task type. Based on task-type
-					one of the entries wr_tsk/rd_tsk
-					will be used */
-	wr_tsk_t	wr;		/*!< Flush page list */
-	rd_tsk_t	rd;		/*!< Decompress page list */
-        ulint		n_flushed; 	/*!< Number of flushed pages */
-	ulint		n_evicted;	/*!< Number of evicted pages */
- 	os_thread_id_t	id_usr;		/*!< Thread-id currently working */
-    	wrk_status_t    wi_status;	/*!< Work item status */
-	mem_heap_t      *wheap;         /*!< Heap were to allocate memory
-					for queue nodes */
-	mem_heap_t      *rheap;
-} wrk_t;
-
-struct thread_data_t
-{
-	os_thread_id_t	wthread_id;	/*!< Identifier */
-	wthr_status_t   wt_status;	/*!< Worker thread status */
-};
-
-/** Flush dirty pages when multi-threaded flush is used. */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(mtflush_io_thread)(void* arg);
-
-/** Thread syncronization data */
-struct thread_sync_t
-{
-	/** Constructor */
-	thread_sync_t(ulint n_threads, mem_heap_t* wheap, mem_heap_t* rheap) :
-		thread_global_mtx(), n_threads(n_threads),
-		wq(ib_wqueue_create()),
-		wr_cq(ib_wqueue_create()),
-		rd_cq(ib_wqueue_create()),
-		wheap(wheap), rheap(rheap), gwt_status(),
-		thread_data(static_cast<thread_data_t*>(
-				    mem_heap_zalloc(wheap, n_threads
-						    * sizeof *thread_data)))
-	{
-		ut_a(wq);
-		ut_a(wr_cq);
-		ut_a(rd_cq);
-		ut_a(thread_data);
-
-		mutex_create(LATCH_ID_MTFLUSH_THREAD_MUTEX,
-			     &thread_global_mtx);
-
-		/* Create threads for page-compression-flush */
-		for(ulint i = 0; i < n_threads; i++) {
-			thread_data[i].wt_status = WTHR_INITIALIZED;
-			os_thread_create(mtflush_io_thread, this,
-					 &thread_data[i].wthread_id);
-		}
-	}
-
-	/** Destructor */
-	~thread_sync_t()
-	{
-		ut_a(ib_wqueue_is_empty(wq));
-		ut_a(ib_wqueue_is_empty(wr_cq));
-		ut_a(ib_wqueue_is_empty(rd_cq));
-
-		/* Free all queues */
-		ib_wqueue_free(wq);
-		ib_wqueue_free(wr_cq);
-		ib_wqueue_free(rd_cq);
-
-		mutex_free(&thread_global_mtx);
-
-		mem_heap_free(rheap);
-		mem_heap_free(wheap);
-	}
-
-	/* Global variables used by all threads */
-	ib_mutex_t	thread_global_mtx; /*!< Mutex used protecting below
-					   variables */
-	ulint           n_threads;	/*!< Number of threads */
-	ib_wqueue_t	*wq;		/*!< Work Queue */
-	ib_wqueue_t     *wr_cq;		/*!< Write Completion Queue */
-	ib_wqueue_t     *rd_cq;		/*!< Read Completion Queue */
-	mem_heap_t*     wheap;		/*!< Work heap where memory
-					is allocated */
-	mem_heap_t*     rheap;		/*!< Work heap where memory
-					is allocated */
-	wthr_status_t   gwt_status;     /*!< Global thread status */
-
-	/* Variables used by only one thread at a time */
-        thread_data_t*  thread_data;    /*!< Thread specific data */
-};
-
-static thread_sync_t*   mtflush_ctx;
-static ib_mutex_t       mtflush_mtx;
-
-/******************************************************************//**
-Return true if multi-threaded flush is initialized
-@return true if initialized */
-bool
-buf_mtflu_init_done(void)
-/*=====================*/
-{
-	return(mtflush_ctx != NULL);
-}
-
-/******************************************************************//**
-Fush buffer pool instance.
-@return number of flushed pages, or 0 if error happened
-*/
-static
-ulint
-buf_mtflu_flush_pool_instance(
-/*==========================*/
-	wrk_t	*work_item)	/*!< inout: work item to be flushed */
-{
-	flush_counters_t	n;
-	ut_a(work_item != NULL);
-	ut_a(work_item->wr.buf_pool != NULL);
-
-	if (!buf_flush_start(work_item->wr.buf_pool, work_item->wr.flush_type)) {
-		/* We have two choices here. If lsn_limit was
-		specified then skipping an instance of buffer
-		pool means we cannot guarantee that all pages
-		up to lsn_limit has been flushed. We can
-		return right now with failure or we can try
-		to flush remaining buffer pools up to the
-		lsn_limit. We attempt to flush other buffer
-		pools based on the assumption that it will
-		help in the retry which will follow the
-		failure. */
-#ifdef UNIV_MTFLUSH_DEBUG
-		fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n");
-#endif
-		return 0;
-	}
-
-	memset(&n, 0, sizeof(flush_counters_t));
-
-    	if (work_item->wr.flush_type == BUF_FLUSH_LRU) {
-        	/* srv_LRU_scan_depth can be arbitrarily large value.
-        	 * We cap it with current LRU size.
-        	 */
-        	buf_pool_mutex_enter(work_item->wr.buf_pool);
-        	work_item->wr.min = UT_LIST_GET_LEN(work_item->wr.buf_pool->LRU);
-        	buf_pool_mutex_exit(work_item->wr.buf_pool);
-        	work_item->wr.min = ut_min((ulint)srv_LRU_scan_depth,(ulint)work_item->wr.min);
-    	}
-
-	buf_flush_batch(work_item->wr.buf_pool,
-		work_item->wr.flush_type,
-		work_item->wr.min,
-		work_item->wr.lsn_limit,
-		&n);
-
-	buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type);
-	buf_flush_common(work_item->wr.flush_type, n.flushed);
-	work_item->n_flushed = n.flushed;
-	work_item->n_evicted = n.evicted;
-
-	return work_item->n_flushed;
-}
-
-/******************************************************************//**
-Worker function to wait for work items and processing them and
-sending reply back.
-*/
-static
-void
-mtflush_service_io(
-/*===============*/
-	thread_sync_t*	mtflush_io,	/*!< inout: multi-threaded flush
-					syncronization data */
-	thread_data_t*  thread_data)    /* Thread status data */
-{
-	wrk_t		*work_item = NULL;
-	ulint		n_flushed=0;
-
-	ut_a(mtflush_io != NULL);
-	ut_a(thread_data != NULL);
-
-   	thread_data->wt_status = WTHR_SIG_WAITING;
-
-	work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq);
-
-	if (work_item == NULL) {
-		work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq);
-	}
-
-	if (work_item) {
-		thread_data->wt_status = WTHR_RUNNING;
-	} else {
-		/* Thread did not get any work */
-		thread_data->wt_status = WTHR_NO_WORK;
-		return;
-	}
-
-	if (work_item->wi_status != WRK_ITEM_EXIT) {
-		work_item->wi_status = WRK_ITEM_SET;
-	}
-
-#ifdef UNIV_MTFLUSH_DEBUG
-	ut_a(work_item->id_usr == 0);
-#endif
-	work_item->id_usr = os_thread_get_curr_id();
-
-	/*  This works as a producer/consumer model, where in tasks are
-         *  inserted into the work-queue (wq) and completions are based
-         *  on the type of operations performed and as a result the WRITE/
-         *  compression/flush operation completions get posted to wr_cq.
-         *  And READ/decompress operations completions get posted to rd_cq.
-         *  in future we may have others.
-	*/
-
-	switch(work_item->tsk) {
-	case MT_WRK_NONE:
-		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
-		work_item->wi_status = WRK_ITEM_EXIT;
-		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap);
-		thread_data->wt_status = WTHR_KILL_IT;
-		break;
-
-	case MT_WRK_WRITE:
-		ut_a(work_item->wi_status == WRK_ITEM_SET);
-		work_item->wi_status = WRK_ITEM_START;
-		/* Process work item */
-		if (0 == (n_flushed = buf_mtflu_flush_pool_instance(work_item))) {
-			work_item->wi_status = WRK_ITEM_FAILED;
-		}
-		work_item->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap);
-		break;
-
-	case MT_WRK_READ:
-		ut_a(0);
-		break;
-
-	default:
-		/* None other than Write/Read handling planned */
-		ut_a(0);
-		break;
-	}
-}
-
-/** Flush dirty pages when multi-threaded flush is used. */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(mtflush_io_thread)(void* arg)
-{
-	thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
-	thread_data_t *this_thread_data = NULL;
-	ulint i;
-
-	/* Find correct slot for this thread */
-	mutex_enter(&(mtflush_io->thread_global_mtx));
-	for(i=0; i < mtflush_io->n_threads; i ++) {
-		if (mtflush_io->thread_data[i].wthread_id == os_thread_get_curr_id()) {
-			break;
-		}
-	}
-
-	ut_a(i <= mtflush_io->n_threads);
-	this_thread_data = &mtflush_io->thread_data[i];
-	mutex_exit(&(mtflush_io->thread_global_mtx));
-
-	while (TRUE) {
-
-#ifdef UNIV_MTFLUSH_DEBUG
- 		fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n",
- 					os_thread_get_curr_id(),
- 					ib_wqueue_len(mtflush_io->wq),
- 					ib_wqueue_len(mtflush_io->wr_cq));
-#endif /* UNIV_MTFLUSH_DEBUG */
-
-		mtflush_service_io(mtflush_io, this_thread_data);
-
-
-		if (this_thread_data->wt_status == WTHR_KILL_IT) {
-			break;
-		}
-	}
-
-	os_thread_exit();
-	OS_THREAD_DUMMY_RETURN;
-}
-
-/******************************************************************//**
-Add exit work item to work queue to signal multi-threded flush
-threads that they should exit.
-*/
-void
-buf_mtflu_io_thread_exit(void)
-/*==========================*/
-{
-	ulint i;
-	thread_sync_t* mtflush_io = mtflush_ctx;
-	wrk_t* work_item = NULL;
-
-	ut_a(mtflush_io != NULL);
-
-	/* Allocate work items for shutdown message */
-	work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads);
-
-	/* Confirm if the io-thread KILL is in progress, bailout */
-	if (mtflush_io->gwt_status == WTHR_KILL_IT) {
-		return;
-	}
-
-	mtflush_io->gwt_status = WTHR_KILL_IT;
-
-	/* This lock is to safequard against timing bug: flush request take
-	this mutex before sending work items to be processed by flush
-	threads. Inside flush thread we assume that work queue contains only
-	a constant number of items. Thus, we may not install new work items
-	below before all previous ones are processed. This mutex is released
-	by flush request after all work items sent to flush threads have
-	been processed. Thus, we can get this mutex if and only if work
-	queue is empty. */
-
-	mutex_enter(&mtflush_mtx);
-
-	/* Make sure the work queue is empty */
-	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
-
-	/* Send one exit work item/thread */
-	for (i=0; i < (ulint)srv_mtflush_threads; i++) {
-		work_item[i].tsk = MT_WRK_NONE;
-		work_item[i].wi_status = WRK_ITEM_EXIT;
-		work_item[i].wheap = mtflush_io->wheap;
-		work_item[i].rheap = mtflush_io->rheap;
-		work_item[i].id_usr = 0;
-
-		ib_wqueue_add(mtflush_io->wq,
-			(void *)&(work_item[i]),
-			mtflush_io->wheap);
-	}
-
-	/* Requests sent */
-	mutex_exit(&mtflush_mtx);
-
-	/* Wait until all work items on a work queue are processed */
-	while(!ib_wqueue_is_empty(mtflush_io->wq)) {
-		/* Wait */
-		os_thread_sleep(MT_WAIT_IN_USECS);
-	}
-
-	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
-
-	/* Collect all work done items */
-	for (i=0; i < (ulint)srv_mtflush_threads;) {
-		wrk_t* work_item = NULL;
-
-		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, MT_WAIT_IN_USECS);
-
-		/* If we receive reply to work item and it's status is exit,
-		thead has processed this message and existed */
-		if (work_item && work_item->wi_status == WRK_ITEM_EXIT) {
-			i++;
-		}
-	}
-
-	/* Wait about 1/2 sec to allow threads really exit */
-	os_thread_sleep(MT_WAIT_IN_USECS);
-
-	/* Make sure that work queue is empty */
-	while(!ib_wqueue_is_empty(mtflush_io->wq))
-	{
-		ib_wqueue_nowait(mtflush_io->wq);
-	}
-
-	mtflush_ctx->~thread_sync_t();
-	mtflush_ctx = NULL;
-
-	mutex_free(&mtflush_mtx);
-}
-
-/******************************************************************//**
-Initialize multi-threaded flush thread syncronization data.
-@return Initialized multi-threaded flush thread syncroniztion data. */
-void*
-buf_mtflu_handler_init(
-/*===================*/
-	ulint n_threads,	/*!< in: Number of threads to create */
-	ulint wrk_cnt)		/*!< in: Number of work items */
-{
-	mem_heap_t*	mtflush_heap;
-	mem_heap_t*	mtflush_heap2;
-
-	/* Create heap, work queue, write completion queue, read
-	completion queue for multi-threaded flush, and init
-	handler. */
-	mtflush_heap = mem_heap_create(0);
-	ut_a(mtflush_heap != NULL);
-	mtflush_heap2 = mem_heap_create(0);
-	ut_a(mtflush_heap2 != NULL);
-
-	mutex_create(LATCH_ID_MTFLUSH_MUTEX, &mtflush_mtx);
-
-	mtflush_ctx = new (mem_heap_zalloc(mtflush_heap, sizeof *mtflush_ctx))
-		thread_sync_t(n_threads, mtflush_heap, mtflush_heap2);
-
-	return((void *)mtflush_ctx);
-}
-
-/******************************************************************//**
-Flush buffer pool instances.
-@return number of pages flushed. */
-ulint
-buf_mtflu_flush_work_items(
-/*=======================*/
-	ulint buf_pool_inst,		/*!< in: Number of buffer pool instances */
-	flush_counters_t *per_pool_cnt,	/*!< out: Number of pages
-					flushed or evicted /instance */
-	buf_flush_t flush_type,		/*!< in: Type of flush */
-	ulint min_n,			/*!< in: Wished minimum number of
-					blocks to be flushed */
-	lsn_t lsn_limit)		/*!< in: All blocks whose
-					oldest_modification is smaller than
-					this should be flushed (if their
-					number does not exceed min_n) */
-{
-	ulint n_flushed=0, i;
-	mem_heap_t* work_heap;
-	mem_heap_t* reply_heap;
-	wrk_t work_item[MTFLUSH_MAX_WORKER];
-
-	if (mtflush_ctx->gwt_status == WTHR_KILL_IT) {
-		return 0;
-	}
-
-	/* Allocate heap where all work items used and queue
-	node items areallocated */
-	work_heap = mem_heap_create(0);
-	reply_heap = mem_heap_create(0);
-
-
-	for(i=0;i<buf_pool_inst; i++) {
-		work_item[i].tsk = MT_WRK_WRITE;
-		work_item[i].wr.buf_pool = buf_pool_from_array(i);
-		work_item[i].wr.flush_type = flush_type;
-		work_item[i].wr.min = min_n;
-		work_item[i].wr.lsn_limit = lsn_limit;
-		work_item[i].wi_status = WRK_ITEM_UNSET;
-		work_item[i].wheap = work_heap;
-		work_item[i].rheap = reply_heap;
-		work_item[i].n_flushed = 0;
-		work_item[i].n_evicted = 0;
-		work_item[i].id_usr = 0;
-
-		ib_wqueue_add(mtflush_ctx->wq,
-			(void *)(work_item + i),
-			work_heap);
-	}
-
-	/* wait on the completion to arrive */
-   	for(i=0; i< buf_pool_inst;) {
-		wrk_t *done_wi = NULL;
-		done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq);
-
-		if (done_wi != NULL) {
-			per_pool_cnt[i].flushed = done_wi->n_flushed;
-			per_pool_cnt[i].evicted = done_wi->n_evicted;
-
-#ifdef UNIV_MTFLUSH_DEBUG
-			if((int)done_wi->id_usr == 0 &&
-				(done_wi->wi_status == WRK_ITEM_SET ||
-					done_wi->wi_status == WRK_ITEM_UNSET)) {
-				fprintf(stderr,
-					"**Set/Unused work_item[%lu] flush_type=%d\n",
-					i,
-					done_wi->wr.flush_type);
-				ut_a(0);
-			}
-#endif
-
-			n_flushed+= done_wi->n_flushed+done_wi->n_evicted;
-			i++;
-		}
-	}
-
-	/* Release used work_items and queue nodes */
-	mem_heap_free(work_heap);
-	mem_heap_free(reply_heap);
-
-	return(n_flushed);
-}
-
-/*******************************************************************//**
-Multi-threaded version of buf_flush_list
-*/
-bool
-buf_mtflu_flush_list(
-/*=================*/
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
-					blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
-	ulint*		n_processed)	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
-
-{
-	ulint				i;
-	bool				success = true;
-	flush_counters_t		cnt[MTFLUSH_MAX_WORKER];
-
-	if (n_processed) {
-		*n_processed = 0;
-	}
-
-	if (min_n != ULINT_MAX) {
-		/* Ensure that flushing is spread evenly amongst the
-		buffer pool instances. When min_n is ULINT_MAX
-		we need to flush everything up to the lsn limit
-		so no limit here. */
-		min_n = (min_n + srv_buf_pool_instances - 1)
-			 / srv_buf_pool_instances;
-	}
-
-	/* This lock is to safequard against re-entry if any. */
-	mutex_enter(&mtflush_mtx);
-	buf_mtflu_flush_work_items(srv_buf_pool_instances,
-                cnt, BUF_FLUSH_LIST,
-                min_n, lsn_limit);
-	mutex_exit(&mtflush_mtx);
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
-		if (n_processed) {
-			*n_processed += cnt[i].flushed+cnt[i].evicted;
-		}
-
-		if (cnt[i].flushed) {
-			MONITOR_INC_VALUE_CUMULATIVE(
-				MONITOR_FLUSH_BATCH_TOTAL_PAGE,
-				MONITOR_FLUSH_BATCH_COUNT,
-				MONITOR_FLUSH_BATCH_PAGES,
-				cnt[i].flushed);
-		}
-
-		if(cnt[i].evicted) {
-				MONITOR_INC_VALUE_CUMULATIVE(
-				MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
-				MONITOR_LRU_BATCH_EVICT_COUNT,
-				MONITOR_LRU_BATCH_EVICT_PAGES,
-				cnt[i].evicted);
-		}
-	}
-#ifdef UNIV_MTFLUSH_DEBUG
-	fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu ]\n",
-		__FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed);
-#endif
-	return(success);
-}
-
-/*********************************************************************//**
-Clears up tail of the LRU lists:
-* Put replaceable pages at the tail of LRU to the free list
-* Flush dirty pages at the tail of LRU to the disk
-The depth to which we scan each buffer pool is controlled by dynamic
-config parameter innodb_LRU_scan_depth.
-@return total pages flushed */
-UNIV_INTERN
-ulint
-buf_mtflu_flush_LRU_tail(void)
-/*==========================*/
-{
-	ulint	total_flushed=0, i;
-	flush_counters_t	cnt[MTFLUSH_MAX_WORKER];
-
-	ut_a(buf_mtflu_init_done());
-
-	/* At shutdown do not send requests anymore */
-	if (!mtflush_ctx || mtflush_ctx->gwt_status == WTHR_KILL_IT) {
-		return (total_flushed);
-	}
-
-	/* This lock is to safeguard against re-entry if any */
-	mutex_enter(&mtflush_mtx);
-	buf_mtflu_flush_work_items(srv_buf_pool_instances,
-		cnt, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
-	mutex_exit(&mtflush_mtx);
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
-		total_flushed += cnt[i].flushed+cnt[i].evicted;
-
-		if (cnt[i].flushed) {
-			MONITOR_INC_VALUE_CUMULATIVE(
-			        MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
-			        MONITOR_LRU_BATCH_FLUSH_COUNT,
-			        MONITOR_LRU_BATCH_FLUSH_PAGES,
-			        cnt[i].flushed);
-		}
-
-		if(cnt[i].evicted) {
-				MONITOR_INC_VALUE_CUMULATIVE(
-				MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
-				MONITOR_LRU_BATCH_EVICT_COUNT,
-				MONITOR_LRU_BATCH_EVICT_PAGES,
-				cnt[i].evicted);
-		}
-	}
-
-#if UNIV_MTFLUSH_DEBUG
-	fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu ]\n", (
-			srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed);
-#endif
-
-	return(total_flushed);
-}
-
-/*********************************************************************//**
-Set correct thread identifiers to io thread array based on
-information we have. */
-void
-buf_mtflu_set_thread_ids(
-/*=====================*/
-	ulint		n_threads,	/*!<in: Number of threads to fill */
-        void*		ctx,		/*!<in: thread context */
-	os_thread_id_t*	thread_ids)	/*!<in: thread id array */
-{
-	thread_sync_t *mtflush_io = ((thread_sync_t *)ctx);
-	ulint i;
-	ut_a(mtflush_io != NULL);
-	ut_a(thread_ids != NULL);
-
-	for(i = 0; i < n_threads; i++) {
-		thread_ids[i] = mtflush_io->thread_data[i].wthread_id;
-	}
-}
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 372b1c5e0a0..3edb6c6ee98 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -303,7 +303,7 @@ buf_read_ahead_random(
 		if (high > space->size) {
 			high = space->size;
 		}
-		fil_space_release(space);
+		space->release();
 	} else {
 		return(0);
 	}
@@ -586,7 +586,7 @@ buf_read_ahead_linear(
 
 	if (fil_space_t* space = fil_space_acquire(page_id.space())) {
 		space_size = space->size;
-		fil_space_release(space);
+		space->release();
 
 		if (high > space_size) {
 			/* The area is not whole */
@@ -814,7 +814,7 @@ buf_read_ibuf_merge_pages(
 					in the arrays */
 {
 #ifdef UNIV_IBUF_DEBUG
-	ut_a(n_stored < UNIV_PAGE_SIZE);
+	ut_a(n_stored < srv_page_size);
 #endif
 
 	for (ulint i = 0; i < n_stored; i++) {
diff --git a/storage/innobase/data/data0data.cc b/storage/innobase/data/data0data.cc
index cd5a8024db1..cc14664821f 100644
--- a/storage/innobase/data/data0data.cc
+++ b/storage/innobase/data/data0data.cc
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -40,6 +40,40 @@ to data_error. */
 byte	data_error;
 #endif /* UNIV_DEBUG */
 
+/** Trim the tail of an index tuple before insert or update.
+After instant ADD COLUMN, if the last fields of a clustered index tuple
+match the default values that were explicitly specified or implied during
+ADD COLUMN, there will be no need to store them.
+NOTE: A page latch in the index must be held, so that the index
+may not lose 'instantness' before the trimmed tuple has been
+inserted or updated.
+@param[in]	index	index possibly with instantly added columns */
+void dtuple_t::trim(const dict_index_t& index)
+{
+	ut_ad(n_fields >= index.n_core_fields);
+	ut_ad(n_fields <= index.n_fields);
+	ut_ad(index.is_instant());
+
+	ulint i = n_fields;
+	for (; i > index.n_core_fields; i--) {
+		const dfield_t* dfield = dtuple_get_nth_field(this, i - 1);
+		const dict_col_t* col = dict_index_get_nth_col(&index, i - 1);
+		ut_ad(col->is_instant());
+		ulint len = dfield_get_len(dfield);
+		if (len != col->def_val.len) {
+			break;
+		}
+
+		if (len != 0 && len != UNIV_SQL_NULL
+		    && dfield->data != col->def_val.data
+		    && memcmp(dfield->data, col->def_val.data, len)) {
+			break;
+		}
+	}
+
+	n_fields = i;
+}
+
 /** Compare two data tuples.
 @param[in] tuple1 first data tuple
 @param[in] tuple2 second data tuple
@@ -311,7 +345,7 @@ dfield_print_also_hex(
 			val = mach_read_from_1(data);
 
 			if (!(prtype & DATA_UNSIGNED)) {
-				val &= ~0x80;
+				val &= ~0x80U;
 				fprintf(stderr, "%ld", (long) val);
 			} else {
 				fprintf(stderr, "%lu", (ulong) val);
@@ -322,7 +356,7 @@ dfield_print_also_hex(
 			val = mach_read_from_2(data);
 
 			if (!(prtype & DATA_UNSIGNED)) {
-				val &= ~0x8000;
+				val &= ~0x8000U;
 				fprintf(stderr, "%ld", (long) val);
 			} else {
 				fprintf(stderr, "%lu", (ulong) val);
@@ -333,7 +367,7 @@ dfield_print_also_hex(
 			val = mach_read_from_3(data);
 
 			if (!(prtype & DATA_UNSIGNED)) {
-				val &= ~0x800000;
+				val &= ~0x800000U;
 				fprintf(stderr, "%ld", (long) val);
 			} else {
 				fprintf(stderr, "%lu", (ulong) val);
@@ -572,7 +606,7 @@ dtuple_convert_big_rec(
 		return(NULL);
 	}
 
-	if (dict_table_get_format(index->table) < UNIV_FORMAT_B) {
+	if (!dict_table_has_atomic_blobs(index->table)) {
 		/* up to MySQL 5.1: store a 768-byte prefix locally */
 		local_len = BTR_EXTERN_FIELD_REF_SIZE
 			+ DICT_ANTELOPE_MAX_INDEX_COL_LEN;
@@ -811,6 +845,7 @@ dfield_t::clone(mem_heap_t* heap) const
 	dfield_t* obj = static_cast<dfield_t*>(
 		mem_heap_alloc(heap, sizeof(dfield_t) + size));
 
+	ut_ad(len != UNIV_SQL_DEFAULT);
 	obj->ext  = ext;
 	obj->len  = len;
 	obj->type = type;
diff --git a/storage/innobase/data/data0type.cc b/storage/innobase/data/data0type.cc
index 02e79ab6983..84962d097aa 100644
--- a/storage/innobase/data/data0type.cc
+++ b/storage/innobase/data/data0type.cc
@@ -26,6 +26,12 @@ Created 1/16/1996 Heikki Tuuri
 
 #include "data0type.h"
 
+/** The DB_TRX_ID,DB_ROLL_PTR values for "no history is available" */
+const byte reset_trx_id[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN] = {
+	0, 0, 0, 0, 0, 0,
+	0x80, 0, 0, 0, 0, 0, 0
+};
+
 /* At the database startup we store the default-charset collation number of
 this MySQL installation to this global variable. If we have < 4.1.2 format
 column definitions, or records in the insert buffer, we use this
@@ -53,7 +59,7 @@ dtype_get_at_most_n_mbchars(
 	const char*	str)		/*!< in: the string whose prefix
 					length is being determined */
 {
-	ut_a(data_len != UNIV_SQL_NULL);
+	ut_a(len_is_stored(data_len));
 	ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen));
 
 	if (mbminlen != mbmaxlen) {
diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc
index 43c64c86aec..e5bf33593ed 100644
--- a/storage/innobase/dict/dict0boot.cc
+++ b/storage/innobase/dict/dict0boot.cc
@@ -177,10 +177,11 @@ dict_hdr_create(
 	ulint		root_page_no;
 
 	ut_ad(mtr);
+	compile_time_assert(DICT_HDR_SPACE == 0);
 
 	/* Create the dictionary header file block in a new, allocated file
 	segment in the system tablespace */
-	block = fseg_create(DICT_HDR_SPACE, 0,
+	block = fseg_create(fil_system.sys_space, 0,
 			    DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
 
 	ut_a(DICT_HDR_PAGE_NO == block->page.id.page_no());
@@ -209,8 +210,8 @@ dict_hdr_create(
 	system tables */
 
 	/*--------------------------*/
-	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, DICT_HDR_SPACE,
-				  univ_page_size, DICT_TABLES_ID,
+	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+				  fil_system.sys_space, DICT_TABLES_ID,
 				  dict_ind_redundant, NULL, mtr);
 	if (root_page_no == FIL_NULL) {
 
@@ -220,8 +221,8 @@ dict_hdr_create(
 	mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
-	root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE,
-				  univ_page_size, DICT_TABLE_IDS_ID,
+	root_page_no = btr_create(DICT_UNIQUE,
+				  fil_system.sys_space, DICT_TABLE_IDS_ID,
 				  dict_ind_redundant, NULL, mtr);
 	if (root_page_no == FIL_NULL) {
 
@@ -231,8 +232,8 @@ dict_hdr_create(
 	mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
-	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, DICT_HDR_SPACE,
-				  univ_page_size, DICT_COLUMNS_ID,
+	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+				  fil_system.sys_space, DICT_COLUMNS_ID,
 				  dict_ind_redundant, NULL, mtr);
 	if (root_page_no == FIL_NULL) {
 
@@ -242,8 +243,8 @@ dict_hdr_create(
 	mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
-	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, DICT_HDR_SPACE,
-				  univ_page_size, DICT_INDEXES_ID,
+	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+				  fil_system.sys_space, DICT_INDEXES_ID,
 				  dict_ind_redundant, NULL, mtr);
 	if (root_page_no == FIL_NULL) {
 
@@ -253,8 +254,8 @@ dict_hdr_create(
 	mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
-	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, DICT_HDR_SPACE,
-				  univ_page_size, DICT_FIELDS_ID,
+	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+				  fil_system.sys_space, DICT_FIELDS_ID,
 				  dict_ind_redundant, NULL, mtr);
 	if (root_page_no == FIL_NULL) {
 
@@ -281,7 +282,6 @@ dict_boot(void)
 	dict_hdr_t*	dict_hdr;
 	mem_heap_t*	heap;
 	mtr_t		mtr;
-	dberr_t		error;
 
 	/* Be sure these constants do not ever change.  To avoid bloat,
 	only check the *NUM_FIELDS* in each table */
@@ -330,15 +330,16 @@ dict_boot(void)
 	/* Insert into the dictionary cache the descriptions of the basic
 	system tables */
 	/*-------------------------*/
-	table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0, 0, 0);
+	table = dict_mem_table_create("SYS_TABLES", fil_system.sys_space,
+				      8, 0, 0, 0);
 
 	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0,
 			       MAX_FULL_NAME_LEN);
 	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 8);
 	/* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
 	dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
-	/* The low order bit of TYPE is always set to 1.  If the format
-	is UNIV_FORMAT_B or higher, this field matches table->flags. */
+	/* The low order bit of TYPE is always set to 1.  If ROW_FORMAT
+	is not REDUNDANT or COMPACT, this field matches table->flags. */
 	dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
 	/* MIX_LEN may contain additional table flags when
@@ -349,40 +350,35 @@ dict_boot(void)
 
 	table->id = DICT_TABLES_ID;
 
-	dict_table_add_to_cache(table, FALSE, heap);
+	dict_table_add_system_columns(table, heap);
+	table->add_to_cache();
 	dict_sys->sys_tables = table;
 	mem_heap_empty(heap);
 
-	index = dict_mem_index_create("SYS_TABLES", "CLUST_IND",
-				      DICT_HDR_SPACE,
+	index = dict_mem_index_create(table, "CLUST_IND",
 				      DICT_UNIQUE | DICT_CLUSTERED, 1);
 
 	dict_mem_index_add_field(index, "NAME", 0);
 
 	index->id = DICT_TABLES_ID;
-
-	error = dict_index_add_to_cache(table, index,
-					mtr_read_ulint(dict_hdr
-						       + DICT_HDR_TABLES,
-						       MLOG_4BYTES, &mtr),
-					FALSE);
-	ut_a(error == DB_SUCCESS);
+	index = dict_index_add_to_cache(
+		index, mach_read_from_4(dict_hdr + DICT_HDR_TABLES));
+	ut_a(index);
+	ut_ad(!table->is_instant());
+	table->indexes.start->n_core_null_bytes = UT_BITS_IN_BYTES(
+		unsigned(table->indexes.start->n_nullable));
 
 	/*-------------------------*/
-	index = dict_mem_index_create("SYS_TABLES", "ID_IND",
-				      DICT_HDR_SPACE, DICT_UNIQUE, 1);
+	index = dict_mem_index_create(table, "ID_IND", DICT_UNIQUE, 1);
 	dict_mem_index_add_field(index, "ID", 0);
 
 	index->id = DICT_TABLE_IDS_ID;
-	error = dict_index_add_to_cache(table, index,
-					mtr_read_ulint(dict_hdr
-						       + DICT_HDR_TABLE_IDS,
-						       MLOG_4BYTES, &mtr),
-					FALSE);
-	ut_a(error == DB_SUCCESS);
+	index = dict_index_add_to_cache(
+		index, mach_read_from_4(dict_hdr + DICT_HDR_TABLE_IDS));
+	ut_a(index);
 
 	/*-------------------------*/
-	table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE,
+	table = dict_mem_table_create("SYS_COLUMNS", fil_system.sys_space,
 				      7, 0, 0, 0);
 
 	dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 8);
@@ -395,27 +391,27 @@ dict_boot(void)
 
 	table->id = DICT_COLUMNS_ID;
 
-	dict_table_add_to_cache(table, FALSE, heap);
+	dict_table_add_system_columns(table, heap);
+	table->add_to_cache();
 	dict_sys->sys_columns = table;
 	mem_heap_empty(heap);
 
-	index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND",
-				      DICT_HDR_SPACE,
+	index = dict_mem_index_create(table, "CLUST_IND",
 				      DICT_UNIQUE | DICT_CLUSTERED, 2);
 
 	dict_mem_index_add_field(index, "TABLE_ID", 0);
 	dict_mem_index_add_field(index, "POS", 0);
 
 	index->id = DICT_COLUMNS_ID;
-	error = dict_index_add_to_cache(table, index,
-					mtr_read_ulint(dict_hdr
-						       + DICT_HDR_COLUMNS,
-						       MLOG_4BYTES, &mtr),
-					FALSE);
-	ut_a(error == DB_SUCCESS);
+	index = dict_index_add_to_cache(
+		index, mach_read_from_4(dict_hdr + DICT_HDR_COLUMNS));
+	ut_a(index);
+	ut_ad(!table->is_instant());
+	table->indexes.start->n_core_null_bytes = UT_BITS_IN_BYTES(
+		unsigned(table->indexes.start->n_nullable));
 
 	/*-------------------------*/
-	table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE,
+	table = dict_mem_table_create("SYS_INDEXES", fil_system.sys_space,
 				      DICT_NUM_COLS__SYS_INDEXES, 0, 0, 0);
 
 	dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 8);
@@ -423,33 +419,43 @@ dict_boot(void)
 	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
+	/* SYS_INDEXES.SPACE is redundant and not being read;
+	SYS_TABLES.SPACE is being used instead. */
 	dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "MERGE_THRESHOLD", DATA_INT, 0, 4);
 
 	table->id = DICT_INDEXES_ID;
 
-	dict_table_add_to_cache(table, FALSE, heap);
+	dict_table_add_system_columns(table, heap);
+	/* The column SYS_INDEXES.MERGE_THRESHOLD was "instantly"
+	added in MySQL 5.7 and MariaDB 10.2.2. Assign it DEFAULT NULL.
+	Because of file format compatibility, we must treat SYS_INDEXES
+	as a special case, relaxing some debug assertions
+	for DICT_INDEXES_ID. */
+	dict_table_get_nth_col(table, DICT_COL__SYS_INDEXES__MERGE_THRESHOLD)
+		->def_val.len = UNIV_SQL_NULL;
+	table->add_to_cache();
 	dict_sys->sys_indexes = table;
 	mem_heap_empty(heap);
 
-	index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND",
-				      DICT_HDR_SPACE,
+	index = dict_mem_index_create(table, "CLUST_IND",
 				      DICT_UNIQUE | DICT_CLUSTERED, 2);
 
 	dict_mem_index_add_field(index, "TABLE_ID", 0);
 	dict_mem_index_add_field(index, "ID", 0);
 
 	index->id = DICT_INDEXES_ID;
-	error = dict_index_add_to_cache(table, index,
-					mtr_read_ulint(dict_hdr
-						       + DICT_HDR_INDEXES,
-						       MLOG_4BYTES, &mtr),
-					FALSE);
-	ut_a(error == DB_SUCCESS);
+	index = dict_index_add_to_cache(
+		index, mach_read_from_4(dict_hdr + DICT_HDR_INDEXES));
+	ut_a(index);
+	ut_ad(!table->is_instant());
+	table->indexes.start->n_core_null_bytes = UT_BITS_IN_BYTES(
+		unsigned(table->indexes.start->n_nullable));
 
 	/*-------------------------*/
-	table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0, 0, 0);
+	table = dict_mem_table_create("SYS_FIELDS", fil_system.sys_space,
+				      3, 0, 0, 0);
 
 	dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 8);
 	dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
@@ -457,24 +463,24 @@ dict_boot(void)
 
 	table->id = DICT_FIELDS_ID;
 
-	dict_table_add_to_cache(table, FALSE, heap);
+	dict_table_add_system_columns(table, heap);
+	table->add_to_cache();
 	dict_sys->sys_fields = table;
 	mem_heap_free(heap);
 
-	index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
-				      DICT_HDR_SPACE,
+	index = dict_mem_index_create(table, "CLUST_IND",
 				      DICT_UNIQUE | DICT_CLUSTERED, 2);
 
 	dict_mem_index_add_field(index, "INDEX_ID", 0);
 	dict_mem_index_add_field(index, "POS", 0);
 
 	index->id = DICT_FIELDS_ID;
-	error = dict_index_add_to_cache(table, index,
-					mtr_read_ulint(dict_hdr
-						       + DICT_HDR_FIELDS,
-						       MLOG_4BYTES, &mtr),
-					FALSE);
-	ut_a(error == DB_SUCCESS);
+	index = dict_index_add_to_cache(
+		index, mach_read_from_4(dict_hdr + DICT_HDR_FIELDS));
+	ut_a(index);
+	ut_ad(!table->is_instant());
+	table->indexes.start->n_core_null_bytes = UT_BITS_IN_BYTES(
+		unsigned(table->indexes.start->n_nullable));
 
 	mtr_commit(&mtr);
 
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
index cc8c03761d8..25a90342f78 100644
--- a/storage/innobase/dict/dict0crea.cc
+++ b/storage/innobase/dict/dict0crea.cc
@@ -36,6 +36,7 @@ Created 1/8/1996 Heikki Tuuri
 #include "row0mysql.h"
 #include "pars0pars.h"
 #include "trx0roll.h"
+#include "trx0rseg.h"
 #include "trx0undo.h"
 #include "ut0vec.h"
 #include "dict0priv.h"
@@ -62,7 +63,9 @@ dict_create_sys_tables_tuple(
 	ulint		type;
 
 	ut_ad(table);
+	ut_ad(!table->space || table->space->id == table->space_id);
 	ut_ad(heap);
+	ut_ad(table->n_cols >= DATA_N_SYS_COLS);
 
 	sys_tables = dict_sys->sys_tables;
 
@@ -96,12 +99,11 @@ dict_create_sys_tables_tuple(
 
 	/* If there is any virtual column, encode it in N_COLS */
 	mach_write_to_4(ptr, dict_table_encode_n_col(
-				static_cast<ulint>(table->n_def),
-				static_cast<ulint>(table->n_v_def))
-			| ((table->flags & DICT_TF_COMPACT) << 31));
+				ulint(table->n_cols - DATA_N_SYS_COLS),
+				ulint(table->n_v_def))
+			| (ulint(table->flags & DICT_TF_COMPACT) << 31));
 	dfield_set_data(dfield, ptr, 4);
 
-
 	/* 5: TYPE (table flags) -----------------------------*/
 	dfield = dtuple_get_nth_field(
 		entry, DICT_COL__SYS_TABLES__TYPE);
@@ -145,7 +147,7 @@ dict_create_sys_tables_tuple(
 		entry, DICT_COL__SYS_TABLES__SPACE);
 
 	ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
-	mach_write_to_4(ptr, table->space);
+	mach_write_to_4(ptr, table->space_id);
 
 	dfield_set_data(dfield, ptr, 4);
 	/*----------------------------------*/
@@ -188,7 +190,7 @@ dict_create_sys_columns_tuple(
 		v_col_no = column->ind;
 	} else {
 		column = dict_table_get_nth_col(table, i);
-		ut_ad(!dict_col_is_virtual(column));
+		ut_ad(!column->is_virtual());
 	}
 
 	sys_columns = dict_sys->sys_columns;
@@ -348,15 +350,12 @@ dict_build_table_def_step(
 	que_thr_t*	thr,	/*!< in: query thread */
 	tab_node_t*	node)	/*!< in: table create node */
 {
-	dict_table_t*	table;
-
-	table = node->table;
-	ut_ad(!dict_table_is_temporary(table));
-
-	trx_t*	trx = thr_get_trx(thr);
-	dict_table_assign_new_id(table, trx);
-
 	ut_ad(mutex_own(&dict_sys->mutex));
+	dict_table_t*	table = node->table;
+	ut_ad(!table->is_temporary());
+	ut_ad(!table->space);
+	ut_ad(table->space_id == ULINT_UNDEFINED);
+	dict_table_assign_new_id(table, thr_get_trx(thr));
 
 	/* Always set this bit for all new created tables */
 	DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
@@ -367,12 +366,12 @@ dict_build_table_def_step(
 	if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE)) {
 		/* This table will need a new tablespace. */
 
-		ut_ad(dict_table_get_format(table) <= UNIV_FORMAT_MAX);
 		ut_ad(DICT_TF_GET_ZIP_SSIZE(table->flags) == 0
-		      || dict_table_get_format(table) >= UNIV_FORMAT_B);
+		      || dict_table_has_atomic_blobs(table));
+		trx_t* trx = thr_get_trx(thr);
 		ut_ad(trx->table_id);
 		mtr_t mtr;
-		trx_undo_t* undo = trx->rsegs.m_redo.insert_undo;
+		trx_undo_t* undo = trx->rsegs.m_redo.undo;
 		if (undo && !undo->table_id
 		    && trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE) {
 			/* This must be a TRUNCATE operation where
@@ -381,41 +380,43 @@ dict_build_table_def_step(
 			associated with the new empty table, so that
 			we can remove it on recovery. */
 			mtr.start();
-			trx_undo_mark_as_dict(trx, undo, &mtr);
+			undo->table_id = trx->table_id;
+			undo->dict_operation = TRUE;
+			page_t* page = trx_undo_page_get(
+				page_id_t(trx->rsegs.m_redo.rseg->space->id,
+					  undo->hdr_page_no),
+				&mtr);
+			mlog_write_ulint(page + undo->hdr_offset
+					 + TRX_UNDO_DICT_TRANS,
+					 TRUE, MLOG_1BYTE, &mtr);
+			mlog_write_ull(page + undo->hdr_offset
+				       + TRX_UNDO_TABLE_ID,
+				       trx->table_id, &mtr);
 			mtr.commit();
 			log_write_up_to(mtr.commit_lsn(), true);
 		}
-		ulint	space;
 		/* Get a new tablespace ID */
-		dict_hdr_get_new_id(NULL, NULL, &space, table, false);
+		ulint space_id;
+		dict_hdr_get_new_id(NULL, NULL, &space_id, table, false);
 
 		DBUG_EXECUTE_IF(
 			"ib_create_table_fail_out_of_space_ids",
-			space = ULINT_UNDEFINED;
+			space_id = ULINT_UNDEFINED;
 		);
 
-		if (space == ULINT_UNDEFINED) {
+		if (space_id == ULINT_UNDEFINED) {
 			return DB_ERROR;
 		}
-		table->space = unsigned(space);
 
 		/* Determine the tablespace flags. */
 		bool	has_data_dir = DICT_TF_HAS_DATA_DIR(table->flags);
 		ulint	fsp_flags = dict_tf_to_fsp_flags(table->flags);
-		char*	filepath;
-
-		if (has_data_dir) {
-			ut_ad(table->data_dir_path);
-			filepath = fil_make_filepath(
-				table->data_dir_path,
-				table->name.m_name, IBD, true);
-
-		} else {
-			/* Make the tablespace file in the default dir
-			using the table name */
-			filepath = fil_make_filepath(
-				NULL, table->name.m_name, IBD, false);
-		}
+		ut_ad(!has_data_dir || table->data_dir_path);
+		char*	filepath = has_data_dir
+			? fil_make_filepath(table->data_dir_path,
+					    table->name.m_name, IBD, true)
+			: fil_make_filepath(NULL,
+					    table->name.m_name, IBD, false);
 
 		/* We create a new single-table tablespace for the table.
 		We initially let it be 4 pages:
@@ -425,49 +426,36 @@ dict_build_table_def_step(
 		- page 3 will contain the root of the clustered index of
 		the table we create here. */
 
-		dberr_t err = fil_ibd_create(
-			space, table->name.m_name, filepath, fsp_flags,
-			FIL_IBD_FILE_INITIAL_SIZE, node->mode, node->key_id);
+		dberr_t err;
+		table->space = fil_ibd_create(
+			space_id, table->name.m_name, filepath, fsp_flags,
+			FIL_IBD_FILE_INITIAL_SIZE,
+			node->mode, node->key_id, &err);
 
 		ut_free(filepath);
 
-		if (err != DB_SUCCESS) {
+		if (!table->space) {
+			ut_ad(err != DB_SUCCESS);
 			return err;
 		}
 
+		table->space_id = space_id;
 		mtr.start();
 		mtr.set_named_space(table->space);
-
 		fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
-
 		mtr.commit();
 	} else {
 		ut_ad(dict_tf_get_rec_format(table->flags)
 		      != REC_FORMAT_COMPRESSED);
-		ut_ad(table->space == srv_sys_space.space_id());
+		table->space = fil_system.sys_space;
+		table->space_id = TRX_SYS_SPACE;
 	}
 
 	ins_node_set_new_row(node->tab_def,
 			     dict_create_sys_tables_tuple(table, node->heap));
-
 	return DB_SUCCESS;
 }
 
-/***************************************************************//**
-Builds a column definition to insert. */
-static
-void
-dict_build_col_def_step(
-/*====================*/
-	tab_node_t*	node)	/*!< in: table create node */
-{
-	dtuple_t*	row;
-
-	row = dict_create_sys_columns_tuple(node->table, node->col_no,
-					    node->heap);
-	ins_node_set_new_row(node->col_def, row);
-}
-
 /** Builds a SYS_VIRTUAL row definition to insert.
 @param[in]	node	table create node */
 static
@@ -497,19 +485,19 @@ dict_create_sys_indexes_tuple(
 					tuple is allocated */
 {
 	dict_table_t*	sys_indexes;
-	dict_table_t*	table;
 	dtuple_t*	entry;
 	dfield_t*	dfield;
 	byte*		ptr;
 
 	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(index);
+	ut_ad(index->table->space || index->table->file_unreadable);
+	ut_ad(!index->table->space
+	      || index->table->space->id == index->table->space_id);
 	ut_ad(heap);
 
 	sys_indexes = dict_sys->sys_indexes;
 
-	table = dict_table_get_low(index->table_name);
-
 	entry = dtuple_create(
 		heap, DICT_NUM_COLS__SYS_INDEXES + DATA_N_SYS_COLS);
 
@@ -520,7 +508,7 @@ dict_create_sys_indexes_tuple(
 		entry, DICT_COL__SYS_INDEXES__TABLE_ID);
 
 	ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
-	mach_write_to_8(ptr, table->id);
+	mach_write_to_8(ptr, index->table->id);
 
 	dfield_set_data(dfield, ptr, 8);
 
@@ -574,7 +562,7 @@ dict_create_sys_indexes_tuple(
 		entry, DICT_COL__SYS_INDEXES__SPACE);
 
 	ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
-	mach_write_to_4(ptr, index->space);
+	mach_write_to_4(ptr, index->table->space_id);
 
 	dfield_set_data(dfield, ptr, 4);
 
@@ -742,7 +730,8 @@ dict_build_index_def_step(
 
 	index = node->index;
 
-	table = dict_table_get_low(index->table_name);
+	table = index->table = node->table = dict_table_open_on_name(
+		node->table_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (table == NULL) {
 		return(DB_TABLE_NOT_FOUND);
@@ -753,8 +742,6 @@ dict_build_index_def_step(
 		trx->table_id = table->id;
 	}
 
-	node->table = table;
-
 	ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
 	      || dict_index_is_clust(index));
 
@@ -763,7 +750,6 @@ dict_build_index_def_step(
 	/* Inherit the space id from the table; we store all indexes of a
 	table in the same tablespace */
 
-	index->space = table->space;
 	node->page_no = FIL_NULL;
 	row = dict_create_sys_indexes_tuple(index, node->heap);
 	node->ind_row = row;
@@ -774,6 +760,7 @@ dict_build_index_def_step(
 	index->trx_id = trx->id;
 	ut_ad(table->def_trx_id <= trx->id);
 	table->def_trx_id = trx->id;
+	dict_table_close(table, true, false);
 
 	return(DB_SUCCESS);
 }
@@ -800,11 +787,6 @@ dict_build_index_def(
 
 	dict_hdr_get_new_id(NULL, &index->id, NULL, table, false);
 
-	/* Inherit the space id from the table; we store all indexes of a
-	table in the same tablespace */
-
-	index->space = table->space;
-
 	/* Note that the index was created by this transaction. */
 	index->trx_id = trx->id;
 }
@@ -857,14 +839,7 @@ dict_create_index_tree_step(
 	the index and its root address is written to the index entry in
 	sys_indexes */
 
-	mtr_start(&mtr);
-
-	const bool	missing = !index->is_readable()
-		|| dict_table_is_discarded(index->table);
-
-	if (!missing) {
-		mtr.set_named_space(index->space);
-	}
+	mtr.start();
 
 	search_tuple = dict_create_search_tuple(node->ind_row, node->heap);
 
@@ -877,12 +852,13 @@ dict_create_index_tree_step(
 
 	dberr_t		err = DB_SUCCESS;
 
-	if (missing) {
+	if (!index->is_readable()) {
 		node->page_no = FIL_NULL;
 	} else {
+		index->set_modified(mtr);
+
 		node->page_no = btr_create(
-			index->type, index->space,
-			dict_table_page_size(index->table),
+			index->type, index->table->space,
 			index->id, index, NULL, &mtr);
 
 		if (node->page_no == FIL_NULL) {
@@ -900,7 +876,7 @@ dict_create_index_tree_step(
 
 	btr_pcur_close(&pcur);
 
-	mtr_commit(&mtr);
+	mtr.commit();
 
 	return(err);
 }
@@ -916,7 +892,6 @@ dict_create_index_tree_in_mem(
 	const trx_t*	trx)	/*!< in: InnoDB transaction handle */
 {
 	mtr_t		mtr;
-	ulint		page_no;
 
 	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(!(index->type & DICT_FTS));
@@ -924,28 +899,18 @@ dict_create_index_tree_in_mem(
 	mtr_start(&mtr);
 	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
 
-	dberr_t		err = DB_SUCCESS;
-
 	/* Currently this function is being used by temp-tables only.
 	Import/Discard of temp-table is blocked and so this assert. */
 	ut_ad(index->is_readable());
-	ut_ad(!dict_table_is_discarded(index->table));
+	ut_ad(!(index->table->flags2 & DICT_TF2_DISCARDED));
 
-	page_no = btr_create(
-		index->type, index->space,
-		dict_table_page_size(index->table),
-		index->id, index, NULL, &mtr);
+	index->page = btr_create(index->type, index->table->space,
+				 index->id, index, NULL, &mtr);
+	mtr_commit(&mtr);
 
-	index->page = page_no;
 	index->trx_id = trx->id;
 
-	if (page_no == FIL_NULL) {
-		err = DB_OUT_OF_FILE_SPACE;
-	}
-
-	mtr_commit(&mtr);
-
-	return(err);
+	return index->page == FIL_NULL ? DB_OUT_OF_FILE_SPACE : DB_SUCCESS;
 }
 
 /** Drop the index tree associated with a row in SYS_INDEXES table.
@@ -1020,31 +985,6 @@ dict_drop_index_tree(
 }
 
 /*******************************************************************//**
-Drops the index tree but don't update SYS_INDEXES table. */
-void
-dict_drop_index_tree_in_mem(
-/*========================*/
-	const dict_index_t*	index,		/*!< in: index */
-	ulint			page_no)	/*!< in: index page-no */
-{
-	ut_ad(mutex_own(&dict_sys->mutex));
-	ut_ad(dict_table_is_temporary(index->table));
-
-	ulint			root_page_no = page_no;
-	ulint			space = index->space;
-	bool			found;
-	const page_size_t	page_size(fil_space_get_page_size(space,
-								  &found));
-
-	/* If tree has already been freed or it is a single table
-	tablespace and the .ibd file is missing do nothing,
-	else free the all the pages */
-	if (root_page_no != FIL_NULL && found) {
-		btr_free(page_id_t(space, root_page_no), page_size);
-	}
-}
-
-/*******************************************************************//**
 Recreate the index tree associated with a row in SYS_INDEXES table.
 @return	new root page number, or FIL_NULL on failure */
 ulint
@@ -1061,28 +1001,22 @@ dict_recreate_index_tree(
 {
 	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
+	ut_ad(!table->space || table->space->id == table->space_id);
 
 	ulint		len;
-	rec_t*		rec = btr_pcur_get_rec(pcur);
+	const rec_t*	rec = btr_pcur_get_rec(pcur);
 
 	const byte*	ptr = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
 
 	ut_ad(len == 4);
 
-	ulint	root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-
-	ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
+	ut_ad(table->space_id == mach_read_from_4(
+		      rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__SPACE,
+					    &len)));
 	ut_ad(len == 4);
 
-	ut_a(table->space == mtr_read_ulint(ptr, MLOG_4BYTES, mtr));
-
-	ulint			space = table->space;
-	bool			found;
-	const page_size_t	page_size(fil_space_get_page_size(space,
-								  &found));
-
-	if (!found) {
+	if (!table->space) {
 		/* It is a single table tablespae and the .ibd file is
 		missing: do nothing. */
 
@@ -1108,7 +1042,7 @@ dict_recreate_index_tree(
 	mtr_commit(mtr);
 
 	mtr_start(mtr);
-	mtr->set_named_space(space);
+	mtr->set_named_space(table->space);
 	btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
 
 	/* Find the index corresponding to this SYS_INDEXES record. */
@@ -1116,15 +1050,12 @@ dict_recreate_index_tree(
 	     index != NULL;
 	     index = UT_LIST_GET_NEXT(indexes, index)) {
 		if (index->id == index_id) {
-			if (index->type & DICT_FTS) {
-				return(FIL_NULL);
-			} else {
-				root_page_no = btr_create(
-					type, space, page_size, index_id,
-					index, NULL, mtr);
-				index->page = (unsigned int) root_page_no;
-				return(root_page_no);
-			}
+			ulint root_page_no = (index->type & DICT_FTS)
+				? FIL_NULL
+				: btr_create(type, table->space,
+					     index_id, index, NULL, mtr);
+			index->page = unsigned(root_page_no);
+			return root_page_no;
 		}
 	}
 
@@ -1134,73 +1065,6 @@ dict_recreate_index_tree(
 	return(FIL_NULL);
 }
 
-/*******************************************************************//**
-Truncates the index tree but don't update SYSTEM TABLES.
-@return DB_SUCCESS or error */
-dberr_t
-dict_truncate_index_tree_in_mem(
-/*============================*/
-	dict_index_t*	index)		/*!< in/out: index */
-{
-	mtr_t		mtr;
-	bool		truncate;
-	ulint		space = index->space;
-
-	ut_ad(mutex_own(&dict_sys->mutex));
-	ut_ad(dict_table_is_temporary(index->table));
-
-	ulint		type = index->type;
-	ulint		root_page_no = index->page;
-
-	if (root_page_no == FIL_NULL) {
-
-		/* The tree has been freed. */
-		ib::warn() << "Trying to TRUNCATE a missing index of table "
-			<< index->table->name << "!";
-
-		truncate = false;
-	} else {
-		truncate = true;
-	}
-
-	bool			found;
-	const page_size_t	page_size(fil_space_get_page_size(space,
-								  &found));
-
-	if (!found) {
-
-		/* It is a single table tablespace and the .ibd file is
-		missing: do nothing */
-
-		ib::warn()
-			<< "Trying to TRUNCATE a missing .ibd file of table "
-			<< index->table->name << "!";
-	}
-
-	/* If table to truncate resides in its on own tablespace that will
-	be re-created on truncate then we can ignore freeing of existing
-	tablespace objects. */
-
-	if (truncate) {
-		btr_free(page_id_t(space, root_page_no), page_size);
-	}
-
-	mtr_start(&mtr);
-	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
-
-	root_page_no = btr_create(
-		type, space, page_size, index->id, index, NULL, &mtr);
-
-	DBUG_EXECUTE_IF("ib_err_trunc_temp_recreate_index",
-			root_page_no = FIL_NULL;);
-
-	index->page = root_page_no;
-
-	mtr_commit(&mtr);
-
-	return(index->page == FIL_NULL ? DB_ERROR : DB_SUCCESS);
-}
-
 /*********************************************************************//**
 Creates a table create graph.
 @return own: table create node */
@@ -1244,6 +1108,7 @@ tab_create_graph_create(
 
 /** Creates an index create graph.
 @param[in]	index	index to create, built as a memory data structure
+@param[in]	table	table name
 @param[in,out]	heap	heap where created
 @param[in]	add_v	new virtual columns added in the same clause with
 			add index
@@ -1251,6 +1116,7 @@ tab_create_graph_create(
 ind_node_t*
 ind_create_graph_create(
 	dict_index_t*		index,
+	const char*		table,
 	mem_heap_t*		heap,
 	const dict_add_v_col_t*	add_v)
 {
@@ -1263,6 +1129,8 @@ ind_create_graph_create(
 
 	node->index = index;
 
+	node->table_name = table;
+
 	node->add_v = add_v;
 
 	node->state = INDEX_BUILD_INDEX_DEF;
@@ -1325,12 +1193,19 @@ dict_create_table_step(
 
 	if (node->state == TABLE_BUILD_COL_DEF) {
 
-		if (node->col_no < (static_cast<ulint>(node->table->n_def)
-				    + static_cast<ulint>(node->table->n_v_def))) {
+		if (node->col_no + DATA_N_SYS_COLS
+		    < (static_cast<ulint>(node->table->n_def)
+		       + static_cast<ulint>(node->table->n_v_def))) {
 
-			dict_build_col_def_step(node);
+			ulint i = node->col_no++;
+			if (i + DATA_N_SYS_COLS >= node->table->n_def) {
+				i += DATA_N_SYS_COLS;
+			}
 
-			node->col_no++;
+			ins_node_set_new_row(
+				node->col_def,
+				dict_create_sys_columns_tuple(node->table, i,
+							      node->heap));
 
 			thr->run_node = node->col_def;
 
@@ -1388,7 +1263,8 @@ dict_create_table_step(
 	if (node->state == TABLE_ADD_TO_CACHE) {
 		DBUG_EXECUTE_IF("ib_ddl_crash_during_create", DBUG_SUICIDE(););
 
-		dict_table_add_to_cache(node->table, TRUE, node->heap);
+		node->table->can_be_evicted = true;
+		node->table->add_to_cache();
 
 		err = DB_SUCCESS;
 	}
@@ -1473,21 +1349,26 @@ dict_create_index_step(
 	}
 
 	if (node->state == INDEX_ADD_TO_CACHE) {
+		ut_ad(node->index->table == node->table);
+		node->index = dict_index_add_to_cache(
+			node->index, FIL_NULL, trx_is_strict(trx),
+			&err, node->add_v);
 
-		index_id_t	index_id = node->index->id;
-
-		err = dict_index_add_to_cache_w_vcol(
-			node->table, node->index, node->add_v, FIL_NULL,
-			trx_is_strict(trx));
-
-		node->index = dict_index_get_if_in_cache_low(index_id);
-		ut_a((node->index == NULL) == (err != DB_SUCCESS));
-
-		if (err != DB_SUCCESS) {
+		ut_ad((node->index == NULL) == (err != DB_SUCCESS));
 
+		if (!node->index) {
 			goto function_exit;
 		}
 
+		ut_ad(!node->index->is_instant());
+		ut_ad(node->index->n_core_null_bytes
+		      == ((dict_index_is_clust(node->index)
+			   && node->table->supports_instant())
+			  ? dict_index_t::NO_CORE_NULL_BYTES
+			  : UT_BITS_IN_BYTES(
+				  unsigned(node->index->n_nullable))));
+		node->index->n_core_null_bytes = UT_BITS_IN_BYTES(
+			unsigned(node->index->n_nullable));
 		node->state = INDEX_CREATE_INDEX_TREE;
 	}
 
@@ -1639,7 +1520,7 @@ dict_create_or_check_foreign_constraint_tables(void)
 		return(DB_READ_ONLY);
 	}
 
-	trx = trx_allocate_for_mysql();
+	trx = trx_create();
 
 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
@@ -1733,7 +1614,7 @@ dict_create_or_check_foreign_constraint_tables(void)
 
 	row_mysql_unlock_data_dictionary(trx);
 
-	trx_free_for_mysql(trx);
+	trx_free(trx);
 
 	srv_file_per_table = srv_file_per_table_backup;
 
@@ -1779,7 +1660,7 @@ dict_create_or_check_sys_virtual()
 		return(DB_READ_ONLY);
 	}
 
-	trx = trx_allocate_for_mysql();
+	trx = trx_create();
 
 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
@@ -1835,7 +1716,7 @@ dict_create_or_check_sys_virtual()
 
 	row_mysql_unlock_data_dictionary(trx);
 
-	trx_free_for_mysql(trx);
+	trx_free(trx);
 
 	srv_file_per_table = srv_file_per_table_backup;
 
@@ -2074,7 +1955,8 @@ dict_create_add_foreign_to_dictionary(
 				  foreign->referenced_table_name);
 
 	pars_info_add_int4_literal(info, "n_cols",
-				   foreign->n_fields + (foreign->type << 24));
+				   ulint(foreign->n_fields)
+				   | (ulint(foreign->type) << 24));
 
 	DBUG_PRINT("dict_create_add_foreign_to_dictionary",
 		   ("'%s', '%s', '%s', %d", foreign->id, name,
@@ -2306,7 +2188,7 @@ dict_create_or_check_sys_tablespace(void)
 		return(DB_READ_ONLY);
 	}
 
-	trx = trx_allocate_for_mysql();
+	trx = trx_create();
 
 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
@@ -2368,7 +2250,7 @@ dict_create_or_check_sys_tablespace(void)
 
 	row_mysql_unlock_data_dictionary(trx);
 
-	trx_free_for_mysql(trx);
+	trx_free(trx);
 
 	srv_file_per_table = srv_file_per_table_backup;
 
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index 914c9946977..c6f6455be73 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -123,14 +123,12 @@ static bool	innodb_index_stats_not_found_reported = false;
 /*******************************************************************//**
 Tries to find column names for the index and sets the col field of the
 index.
-@param[in]	table	table
 @param[in]	index	index
 @param[in]	add_v	new virtual columns added along with an add index call
-@return TRUE if the column names were found */
+@return whether the column names were found */
 static
-ibool
+bool
 dict_index_find_cols(
-	const dict_table_t*	table,
 	dict_index_t*		index,
 	const dict_add_v_col_t*	add_v);
 /*******************************************************************//**
@@ -141,7 +139,6 @@ static
 dict_index_t*
 dict_index_build_internal_clust(
 /*============================*/
-	const dict_table_t*	table,	/*!< in: table */
 	dict_index_t*		index);	/*!< in: user representation of
 					a clustered index */
 /*******************************************************************//**
@@ -152,7 +149,6 @@ static
 dict_index_t*
 dict_index_build_internal_non_clust(
 /*================================*/
-	const dict_table_t*	table,	/*!< in: table */
 	dict_index_t*		index);	/*!< in: user representation of
 					a non-clustered index */
 /**********************************************************************//**
@@ -162,7 +158,6 @@ static
 dict_index_t*
 dict_index_build_internal_fts(
 /*==========================*/
-	dict_table_t*	table,	/*!< in: table */
 	dict_index_t*	index);	/*!< in: user representation of an FTS index */
 
 /**********************************************************************//**
@@ -254,7 +249,7 @@ dict_get_db_name_len(
 	const char*	s;
 	s = strchr(name, '/');
 	ut_a(s);
-	return(s - name);
+	return ulint(s - name);
 }
 
 /** Reserve the dictionary system mutex. */
@@ -425,7 +420,7 @@ dict_table_try_drop_aborted(
 {
 	trx_t*		trx;
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 	trx->op_info = "try to drop any indexes after an aborted index creation";
 	row_mysql_lock_data_dictionary(trx);
 	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
@@ -448,7 +443,7 @@ dict_table_try_drop_aborted(
 	}
 
 	row_mysql_unlock_data_dictionary(trx);
-	trx_free_for_background(trx);
+	trx_free(trx);
 }
 
 /**********************************************************************//**
@@ -613,26 +608,28 @@ dict_table_has_column(
 	return(col_max);
 }
 
-/**********************************************************************//**
-Returns a column's name.
-@return column name. NOTE: not guaranteed to stay valid if table is
-modified in any way (columns added, etc.). */
-const char*
-dict_table_get_col_name(
-/*====================*/
-	const dict_table_t*	table,	/*!< in: table */
-	ulint			col_nr)	/*!< in: column number */
+/** Retrieve the column name.
+@param[in]	table	the table of this column */
+const char* dict_col_t::name(const dict_table_t& table) const
 {
-	ulint		i;
-	const char*	s;
+	ut_ad(table.magic_n == DICT_TABLE_MAGIC_N);
 
-	ut_ad(table);
-	ut_ad(col_nr < table->n_def);
-	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+	size_t col_nr;
+	const char *s;
+
+	if (is_virtual()) {
+		col_nr = size_t(reinterpret_cast<const dict_v_col_t*>(this)
+				- table.v_cols);
+		ut_ad(col_nr < table.n_v_def);
+		s = table.v_col_names;
+	} else {
+		col_nr = size_t(this - table.cols);
+		ut_ad(col_nr < table.n_def);
+		s = table.col_names;
+	}
 
-	s = table->col_names;
 	if (s) {
-		for (i = 0; i < col_nr; i++) {
+		for (size_t i = 0; i < col_nr; i++) {
 			s += strlen(s) + 1;
 		}
 	}
@@ -892,7 +889,7 @@ dict_index_get_nth_col_or_prefix_pos(
 @param[in]	n		column number
 @param[in]	is_virtual	whether it is a virtual col
 @return TRUE if contains the column or its prefix */
-ibool
+bool
 dict_index_contains_col_or_prefix(
 	const dict_index_t*	index,
 	ulint			n,
@@ -923,11 +920,11 @@ dict_index_contains_col_or_prefix(
 
 		if (col == field->col) {
 
-			return(TRUE);
+			return(true);
 		}
 	}
 
-	return(FALSE);
+	return(false);
 }
 
 /********************************************************************//**
@@ -1106,7 +1103,7 @@ dict_init(void)
 		       dict_operation_lock, SYNC_DICT_OPERATION);
 
 	if (!srv_read_only_mode) {
-		dict_foreign_err_file = os_file_create_tmpfile(NULL);
+		dict_foreign_err_file = os_file_create_tmpfile();
 		ut_a(dict_foreign_err_file);
 	}
 
@@ -1230,8 +1227,7 @@ dict_table_add_system_columns(
 	mem_heap_t*	heap)	/*!< in: temporary heap */
 {
 	ut_ad(table);
-	ut_ad(table->n_def ==
-	      (table->n_cols - dict_table_get_n_sys_cols(table)));
+	ut_ad(table->n_def == (table->n_cols - DATA_N_SYS_COLS));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	ut_ad(!table->cached);
 
@@ -1245,65 +1241,46 @@ dict_table_add_system_columns(
 			       DATA_ROW_ID | DATA_NOT_NULL,
 			       DATA_ROW_ID_LEN);
 
-#if DATA_ROW_ID != 0
-#error "DATA_ROW_ID != 0"
-#endif
+	compile_time_assert(DATA_ROW_ID == 0);
 	dict_mem_table_add_col(table, heap, "DB_TRX_ID", DATA_SYS,
 			       DATA_TRX_ID | DATA_NOT_NULL,
 			       DATA_TRX_ID_LEN);
-#if DATA_TRX_ID != 1
-#error "DATA_TRX_ID != 1"
-#endif
-
+	compile_time_assert(DATA_TRX_ID == 1);
 	dict_mem_table_add_col(table, heap, "DB_ROLL_PTR", DATA_SYS,
 			       DATA_ROLL_PTR | DATA_NOT_NULL,
 			       DATA_ROLL_PTR_LEN);
-#if DATA_ROLL_PTR != 2
-#error "DATA_ROLL_PTR != 2"
-#endif
+	compile_time_assert(DATA_ROLL_PTR == 2);
 
 	/* This check reminds that if a new system column is added to
 	the program, it should be dealt with here */
-#if DATA_N_SYS_COLS != 3
-#error "DATA_N_SYS_COLS != 3"
-#endif
+	compile_time_assert(DATA_N_SYS_COLS == 3);
 }
 
-/**********************************************************************//**
-Adds a table object to the dictionary cache. */
+/** Add the table definition to the data dictionary cache */
 void
-dict_table_add_to_cache(
-/*====================*/
-	dict_table_t*	table,		/*!< in: table */
-	bool		can_be_evicted,	/*!< in: whether can be evicted */
-	mem_heap_t*	heap)		/*!< in: temporary heap */
+dict_table_t::add_to_cache()
 {
-	ulint	fold;
-	ulint	id_fold;
-
 	ut_ad(dict_lru_validate());
 	ut_ad(mutex_own(&dict_sys->mutex));
 
-	dict_table_add_system_columns(table, heap);
-
-	table->cached = TRUE;
+	cached = TRUE;
 
-	fold = ut_fold_string(table->name.m_name);
-	id_fold = ut_fold_ull(table->id);
+	ulint fold = ut_fold_string(name.m_name);
+	ulint id_fold = ut_fold_ull(id);
 
 	/* Look for a table with the same name: error if such exists */
 	{
 		dict_table_t*	table2;
 		HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
 			    dict_table_t*, table2, ut_ad(table2->cached),
-			    !strcmp(table2->name.m_name, table->name.m_name));
+			    !strcmp(table2->name.m_name, name.m_name));
 		ut_a(table2 == NULL);
 
 #ifdef UNIV_DEBUG
 		/* Look for the same table pointer with a different name */
 		HASH_SEARCH_ALL(name_hash, dict_sys->table_hash,
 				dict_table_t*, table2, ut_ad(table2->cached),
-				table2 == table);
+				table2 == this);
 		ut_ad(table2 == NULL);
 #endif /* UNIV_DEBUG */
 	}
@@ -1313,32 +1290,30 @@ dict_table_add_to_cache(
 		dict_table_t*	table2;
 		HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold,
 			    dict_table_t*, table2, ut_ad(table2->cached),
-			    table2->id == table->id);
+			    table2->id == id);
 		ut_a(table2 == NULL);
 
 #ifdef UNIV_DEBUG
 		/* Look for the same table pointer with a different id */
 		HASH_SEARCH_ALL(id_hash, dict_sys->table_id_hash,
 				dict_table_t*, table2, ut_ad(table2->cached),
-				table2 == table);
+				table2 == this);
 		ut_ad(table2 == NULL);
 #endif /* UNIV_DEBUG */
 	}
 
 	/* Add table to hash table of tables */
 	HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
-		    table);
+		    this);
 
 	/* Add table to hash table of tables based on table id */
 	HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, id_fold,
-		    table);
-
-	table->can_be_evicted = can_be_evicted;
+		    this);
 
-	if (table->can_be_evicted) {
-		UT_LIST_ADD_FIRST(dict_sys->table_LRU, table);
+	if (can_be_evicted) {
+		UT_LIST_ADD_FIRST(dict_sys->table_LRU, this);
 	} else {
-		UT_LIST_ADD_FIRST(dict_sys->table_non_LRU, table);
+		UT_LIST_ADD_FIRST(dict_sys->table_non_LRU, this);
 	}
 
 	ut_ad(dict_lru_validate());
@@ -1590,7 +1565,6 @@ dict_table_rename_in_cache(
 {
 	dberr_t		err;
 	dict_foreign_t*	foreign;
-	dict_index_t*	index;
 	ulint		fold;
 	char		old_name[MAX_FULL_NAME_LEN + 1];
 	os_file_type_t	ftype;
@@ -1627,12 +1601,12 @@ dict_table_rename_in_cache(
 	/* If the table is stored in a single-table tablespace, rename the
 	.ibd file and rebuild the .isl file if needed. */
 
-	if (dict_table_is_discarded(table)) {
+	if (!table->space) {
 		bool		exists;
 		char*		filepath;
 
 		ut_ad(dict_table_is_file_per_table(table));
-		ut_ad(!dict_table_is_temporary(table));
+		ut_ad(!table->is_temporary());
 
 		/* Make sure the data_dir_path is set. */
 		dict_get_and_save_data_dir_path(table, true);
@@ -1652,7 +1626,7 @@ dict_table_rename_in_cache(
 			return(DB_OUT_OF_MEMORY);
 		}
 
-		fil_delete_tablespace(table->space);
+		fil_delete_tablespace(table->space_id);
 
 		/* Delete any temp file hanging around. */
 		if (os_file_status(filepath, &exists, &ftype)
@@ -1665,10 +1639,11 @@ dict_table_rename_in_cache(
 		ut_free(filepath);
 
 	} else if (dict_table_is_file_per_table(table)) {
-		char*	new_path = NULL;
-		char*	old_path = fil_space_get_first_path(table->space);
+		char*	new_path;
+		const char* old_path = UT_LIST_GET_FIRST(table->space->chain)
+			->name;
 
-		ut_ad(!dict_table_is_temporary(table));
+		ut_ad(!table->is_temporary());
 
 		if (DICT_TF_HAS_DATA_DIR(table->flags)) {
 			new_path = os_file_make_new_pathname(
@@ -1678,7 +1653,6 @@ dict_table_rename_in_cache(
 
 			if (err != DB_SUCCESS) {
 				ut_free(new_path);
-				ut_free(old_path);
 				return(DB_TABLESPACE_EXISTS);
 			}
 		} else {
@@ -1687,32 +1661,19 @@ dict_table_rename_in_cache(
 		}
 
 		/* New filepath must not exist. */
-		err = fil_rename_tablespace_check(
-			table->space, old_path, new_path, false,
-			replace_new_file);
-		if (err != DB_SUCCESS) {
-			ut_free(old_path);
-			ut_free(new_path);
-			return(err);
-		}
-
-		fil_name_write_rename(table->space, old_path, new_path);
-
-		bool	success = fil_rename_tablespace(
-			table->space, old_path, new_name, new_path);
-
-		ut_free(old_path);
+		err = table->space->rename(new_name, new_path, true,
+					   replace_new_file);
 		ut_free(new_path);
 
 		/* If the tablespace is remote, a new .isl file was created
 		If success, delete the old one. If not, delete the new one. */
 		if (DICT_TF_HAS_DATA_DIR(table->flags)) {
 			RemoteDatafile::delete_link_file(
-				success ? old_name : new_name);
+				err == DB_SUCCESS ? old_name : new_name);
 		}
 
-		if (!success) {
-			return(DB_ERROR);
+		if (err != DB_SUCCESS) {
+			return err;
 		}
 	}
 
@@ -1735,14 +1696,6 @@ dict_table_rename_in_cache(
 	HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
 		    table);
 
-	/* Update the table_name field in indexes */
-	for (index = dict_table_get_first_index(table);
-	     index != NULL;
-	     index = dict_table_get_next_index(index)) {
-
-		index->table_name = table->name.m_name;
-	}
-
 	if (!rename_also_foreigns) {
 		/* In ALTER TABLE we think of the rename table operation
 		in the direction table -> temporary table (#sql...)
@@ -2080,7 +2033,7 @@ dict_table_remove_from_cache_low(
 		/* When evicting the table definition,
 		drop the orphan indexes from the data dictionary
 		and free the index pages. */
-		trx_t* trx = trx_allocate_for_background();
+		trx_t* trx = trx_create();
 
 		ut_ad(mutex_own(&dict_sys->mutex));
 		ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
@@ -2092,7 +2045,7 @@ dict_table_remove_from_cache_low(
 		row_merge_drop_indexes_dict(trx, table->id);
 		trx_commit_for_mysql(trx);
 		trx->dict_operation_lock_mode = 0;
-		trx_free_for_background(trx);
+		trx_free(trx);
 	}
 
 	/* Free virtual column template if any */
@@ -2123,19 +2076,13 @@ dict_col_name_is_reserved(
 /*======================*/
 	const char*	name)	/*!< in: column name */
 {
-	/* This check reminds that if a new system column is added to
-	the program, it should be dealt with here. */
-#if DATA_N_SYS_COLS != 3
-#error "DATA_N_SYS_COLS != 3"
-#endif
-
 	static const char*	reserved_names[] = {
 		"DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR"
 	};
 
-	ulint			i;
+	compile_time_assert(UT_ARR_SIZE(reserved_names) == DATA_N_SYS_COLS);
 
-	for (i = 0; i < UT_ARR_SIZE(reserved_names); i++) {
+	for (ulint i = 0; i < UT_ARR_SIZE(reserved_names); i++) {
 		if (innobase_strcasecmp(name, reserved_names[i]) == 0) {
 
 			return(TRUE);
@@ -2180,13 +2127,13 @@ dict_index_too_big_for_tree(
 
 	comp = dict_table_is_comp(table);
 
-	const page_size_t	page_size(dict_table_page_size(table));
+	const page_size_t page_size(dict_tf_get_page_size(table->flags));
 
 	if (page_size.is_compressed()
-	    && page_size.physical() < univ_page_size.physical()) {
+	    && page_size.physical() < srv_page_size) {
 		/* On a compressed page, two records must fit in the
 		uncompressed page modification log. On compressed pages
-		with size.physical() == univ_page_size.physical(),
+		with size.physical() == srv_page_size,
 		this limit will never be reached. */
 		ut_ad(comp);
 		/* The maximum allowed record size is the size of
@@ -2222,14 +2169,15 @@ dict_index_too_big_for_tree(
 	if (comp) {
 		/* Include the "null" flags in the
 		maximum possible record size. */
-		rec_max_size += UT_BITS_IN_BYTES(new_index->n_nullable);
+		rec_max_size += UT_BITS_IN_BYTES(
+			unsigned(new_index->n_nullable));
 	} else {
 		/* For each column, include a 2-byte offset and a
 		"null" flag.  The 1-byte format is only used in short
 		records that do not contain externally stored columns.
 		Such records could never exceed the page limit, even
 		when using the 2-byte format. */
-		rec_max_size += 2 * new_index->n_fields;
+		rec_max_size += 2 * unsigned(new_index->n_fields);
 	}
 
 	/* Compute the maximum possible record size. */
@@ -2323,31 +2271,11 @@ add_field_size:
 	return(FALSE);
 }
 
-/** Adds an index to the dictionary cache.
-@param[in,out]	table	table on which the index is
-@param[in,out]	index	index; NOTE! The index memory
-			object is freed in this function!
-@param[in]	page_no	root page number of the index
-@param[in]	strict	TRUE=refuse to create the index
-			if records could be too big to fit in
-			an B-tree page
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-dberr_t
-dict_index_add_to_cache(
-	dict_table_t*	table,
-	dict_index_t*	index,
-	ulint		page_no,
-	ibool		strict)
-{
-	return(dict_index_add_to_cache_w_vcol(
-		table, index, NULL, page_no, strict));
-}
-
 /** Clears the virtual column's index list before index is
 being freed.
 @param[in]  index   Index being freed */
-void
-dict_index_remove_from_v_col_list(dict_index_t* index) {
+void dict_index_remove_from_v_col_list(dict_index_t* index)
+{
 	/* Index is not completely formed */
 	if (!index->cached) {
 		return;
@@ -2358,7 +2286,7 @@ dict_index_remove_from_v_col_list(dict_index_t* index) {
 
                 for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
                         col =  dict_index_get_nth_col(index, i);
-                        if (dict_col_is_virtual(col)) {
+                        if (col->is_virtual()) {
                                 vcol = reinterpret_cast<const dict_v_col_t*>(
                                         col);
 				/* This could be NULL, when we do add
@@ -2383,23 +2311,24 @@ dict_index_remove_from_v_col_list(dict_index_t* index) {
 
 /** Adds an index to the dictionary cache, with possible indexing newly
 added column.
-@param[in,out]	table	table on which the index is
-@param[in,out]	index	index; NOTE! The index memory
+@param[in]	index	index; NOTE! The index memory
 			object is freed in this function!
-@param[in]	add_v	new virtual column that being added along with
-			an add index call
 @param[in]	page_no	root page number of the index
 @param[in]	strict	TRUE=refuse to create the index
 			if records could be too big to fit in
 			an B-tree page
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-dberr_t
-dict_index_add_to_cache_w_vcol(
-	dict_table_t*		table,
+@param[out]	err	DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION
+@param[in]	add_v	new virtual column that being added along with
+			an add index call
+@return	the added index
+@retval	NULL	on error */
+dict_index_t*
+dict_index_add_to_cache(
 	dict_index_t*		index,
-	const dict_add_v_col_t* add_v,
 	ulint			page_no,
-	ibool			strict)
+	bool			strict,
+	dberr_t*		err,
+	const dict_add_v_col_t* add_v)
 {
 	dict_index_t*	new_index;
 	ulint		n_ord;
@@ -2414,23 +2343,27 @@ dict_index_add_to_cache_w_vcol(
 
 	ut_d(mem_heap_validate(index->heap));
 	ut_a(!dict_index_is_clust(index)
-	     || UT_LIST_GET_LEN(table->indexes) == 0);
+	     || UT_LIST_GET_LEN(index->table->indexes) == 0);
+	ut_ad(dict_index_is_clust(index) || !index->table->no_rollback());
 
-	if (!dict_index_find_cols(table, index, add_v)) {
+	if (!dict_index_find_cols(index, add_v)) {
 
 		dict_mem_index_free(index);
-		return(DB_CORRUPTION);
+		if (err) *err = DB_CORRUPTION;
+		return NULL;
 	}
 
 	/* Build the cache internal representation of the index,
 	containing also the added system fields */
 
-	if (index->type == DICT_FTS) {
-		new_index = dict_index_build_internal_fts(table, index);
-	} else if (dict_index_is_clust(index)) {
-		new_index = dict_index_build_internal_clust(table, index);
+	if (dict_index_is_clust(index)) {
+		new_index = dict_index_build_internal_clust(index);
 	} else {
-		new_index = dict_index_build_internal_non_clust(table, index);
+		new_index = (index->type & DICT_FTS)
+			? dict_index_build_internal_fts(index)
+			: dict_index_build_internal_non_clust(index);
+		new_index->n_core_null_bytes = UT_BITS_IN_BYTES(
+			unsigned(new_index->n_nullable));
 	}
 
 	/* Set the n_fields value in new_index to the actual defined
@@ -2444,16 +2377,17 @@ dict_index_add_to_cache_w_vcol(
 	new_index->disable_ahi = index->disable_ahi;
 #endif
 
-	if (dict_index_too_big_for_tree(table, new_index, strict)) {
+	if (dict_index_too_big_for_tree(index->table, new_index, strict)) {
 
 		if (strict) {
 			dict_mem_index_free(new_index);
 			dict_mem_index_free(index);
-			return(DB_TOO_BIG_RECORD);
+			if (err) *err = DB_TOO_BIG_RECORD;
+			return NULL;
 		} else if (current_thd != NULL) {
 			/* Avoid the warning to be printed
 			during recovery. */
-			ib_warn_row_too_big((const dict_table_t*)table);
+			ib_warn_row_too_big(index->table);
 		}
 	}
 
@@ -2515,9 +2449,7 @@ dict_index_add_to_cache_w_vcol(
 
 	/* Add the new index as the last index for the table */
 
-	UT_LIST_ADD_LAST(table->indexes, new_index);
-	new_index->table = table;
-	new_index->table_name = table->name.m_name;
+	UT_LIST_ADD_LAST(new_index->table->indexes, new_index);
 #ifdef BTR_CUR_ADAPT
 	new_index->search_info = btr_search_info_create(new_index->heap);
 #endif /* BTR_CUR_ADAPT */
@@ -2526,9 +2458,11 @@ dict_index_add_to_cache_w_vcol(
 	rw_lock_create(index_tree_rw_lock_key, &new_index->lock,
 		       SYNC_INDEX_TREE);
 
-	dict_mem_index_free(index);
+	new_index->n_core_fields = new_index->n_fields;
 
-	return(DB_SUCCESS);
+	dict_mem_index_free(index);
+	if (err) *err = DB_SUCCESS;
+	return new_index;
 }
 
 /**********************************************************************//**
@@ -2615,18 +2549,17 @@ index.
 @param[in]	table	table
 @param[in,out]	index	index
 @param[in]	add_v	new virtual columns added along with an add index call
-@return TRUE if the column names were found */
+@return whether the column names were found */
 static
-ibool
+bool
 dict_index_find_cols(
-	const dict_table_t*	table,
 	dict_index_t*		index,
 	const dict_add_v_col_t*	add_v)
 {
 	std::vector<ulint, ut_allocator<ulint> >	col_added;
 	std::vector<ulint, ut_allocator<ulint> >	v_col_added;
 
-	ut_ad(table != NULL && index != NULL);
+	const dict_table_t* table = index->table;
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	ut_ad(mutex_own(&dict_sys->mutex));
 
@@ -2724,7 +2657,7 @@ dict_index_add_col(
 	dict_field_t*	field;
 	const char*	col_name;
 
-	if (dict_col_is_virtual(col)) {
+	if (col->is_virtual()) {
 		dict_v_col_t*	v_col = reinterpret_cast<dict_v_col_t*>(col);
 
 		/* When v_col->v_indexes==NULL,
@@ -2734,11 +2667,8 @@ dict_index_add_col(
 		if (v_col->v_indexes != NULL) {
 			/* Register the index with the virtual column index
 			list */
-			struct dict_v_idx_t	new_idx
-				 = {index, index->n_def};
-
-			v_col->v_indexes->push_back(new_idx);
-
+			v_col->v_indexes->push_back(
+				dict_v_idx_t(index, index->n_def));
 		}
 
 		col_name = dict_table_get_v_col_name_mysql(
@@ -2749,7 +2679,7 @@ dict_index_add_col(
 
 	dict_mem_index_add_field(index, col_name, prefix_len);
 
-	field = dict_index_get_nth_field(index, index->n_def - 1);
+	field = dict_index_get_nth_field(index, unsigned(index->n_def) - 1);
 
 	field->col = col;
 	field->fixed_len = static_cast<unsigned int>(
@@ -2767,12 +2697,11 @@ dict_index_add_col(
 	if (field->fixed_len > DICT_MAX_FIXED_COL_LEN) {
 		field->fixed_len = 0;
 	}
-#if DICT_MAX_FIXED_COL_LEN != 768
+
 	/* The comparison limit above must be constant.  If it were
 	changed, the disk format of some fixed-length columns would
 	change, which would be a disaster. */
-# error "DICT_MAX_FIXED_COL_LEN != 768"
-#endif
+	compile_time_assert(DICT_MAX_FIXED_COL_LEN == 768);
 
 	if (!(col->prtype & DATA_NOT_NULL)) {
 		index->n_nullable++;
@@ -2786,8 +2715,7 @@ void
 dict_index_copy(
 /*============*/
 	dict_index_t*		index1,	/*!< in: index to copy to */
-	dict_index_t*		index2,	/*!< in: index to copy from */
-	const dict_table_t*	table,	/*!< in: table */
+	const dict_index_t*	index2,	/*!< in: index to copy from */
 	ulint			start,	/*!< in: first position to copy */
 	ulint			end)	/*!< in: last position to copy */
 {
@@ -2800,7 +2728,7 @@ dict_index_copy(
 
 		field = dict_index_get_nth_field(index2, i);
 
-		dict_index_add_col(index1, table, field->col,
+		dict_index_add_col(index1, index2->table, field->col,
 				   field->prefix_len);
 	}
 }
@@ -2921,28 +2849,26 @@ static
 dict_index_t*
 dict_index_build_internal_clust(
 /*============================*/
-	const dict_table_t*	table,	/*!< in: table */
 	dict_index_t*		index)	/*!< in: user representation of
 					a clustered index */
 {
+	dict_table_t*	table = index->table;
 	dict_index_t*	new_index;
 	dict_field_t*	field;
 	ulint		trx_id_pos;
 	ulint		i;
 	ibool*		indexed;
 
-	ut_ad(table && index);
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(!dict_index_is_ibuf(index));
 
 	ut_ad(mutex_own(&dict_sys->mutex));
-	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
 	/* Create a new index object with certainly enough fields */
-	new_index = dict_mem_index_create(table->name.m_name,
-					  index->name, table->space,
+	new_index = dict_mem_index_create(index->table, index->name,
 					  index->type,
-					  index->n_fields + table->n_cols);
+					  unsigned(index->n_fields
+						   + table->n_cols));
 
 	/* Copy other relevant data from the old index struct to the new
 	struct: it inherits the values */
@@ -2952,7 +2878,7 @@ dict_index_build_internal_clust(
 	new_index->id = index->id;
 
 	/* Copy the fields of index */
-	dict_index_copy(new_index, index, table, 0, index->n_fields);
+	dict_index_copy(new_index, index, 0, index->n_fields);
 
 	if (dict_index_is_unique(index)) {
 		/* Only the fields defined so far are needed to identify
@@ -2961,7 +2887,7 @@ dict_index_build_internal_clust(
 		new_index->n_uniq = new_index->n_def;
 	} else {
 		/* Also the row id is needed to identify the entry */
-		new_index->n_uniq = 1 + new_index->n_def;
+		new_index->n_uniq = 1 + unsigned(new_index->n_def);
 	}
 
 	new_index->trx_id_offset = 0;
@@ -2970,15 +2896,9 @@ dict_index_build_internal_clust(
 
 	trx_id_pos = new_index->n_def;
 
-#if DATA_ROW_ID != 0
-# error "DATA_ROW_ID != 0"
-#endif
-#if DATA_TRX_ID != 1
-# error "DATA_TRX_ID != 1"
-#endif
-#if DATA_ROLL_PTR != 2
-# error "DATA_ROLL_PTR != 2"
-#endif
+	compile_time_assert(DATA_ROW_ID == 0);
+	compile_time_assert(DATA_TRX_ID == 1);
+	compile_time_assert(DATA_ROLL_PTR == 2);
 
 	if (!dict_index_is_unique(index)) {
 		dict_index_add_col(new_index, table,
@@ -3052,8 +2972,7 @@ dict_index_build_internal_clust(
 
 	/* Add to new_index non-system columns of table not yet included
 	there */
-	ulint n_sys_cols = dict_table_get_n_sys_cols(table);
-	for (i = 0; i + n_sys_cols < (ulint) table->n_cols; i++) {
+	for (i = 0; i + DATA_N_SYS_COLS < ulint(table->n_cols); i++) {
 
 		dict_col_t*	col = dict_table_get_nth_col(table, i);
 		ut_ad(col->mtype != DATA_SYS);
@@ -3067,6 +2986,9 @@ dict_index_build_internal_clust(
 
 	ut_ad(UT_LIST_GET_LEN(table->indexes) == 0);
 
+	new_index->n_core_null_bytes = table->supports_instant()
+		? dict_index_t::NO_CORE_NULL_BYTES
+		: UT_BITS_IN_BYTES(unsigned(new_index->n_nullable));
 	new_index->cached = TRUE;
 
 	return(new_index);
@@ -3080,13 +3002,13 @@ static
 dict_index_t*
 dict_index_build_internal_non_clust(
 /*================================*/
-	const dict_table_t*	table,	/*!< in: table */
 	dict_index_t*		index)	/*!< in: user representation of
 					a non-clustered index */
 {
 	dict_field_t*	field;
 	dict_index_t*	new_index;
 	dict_index_t*	clust_index;
+	dict_table_t*	table = index->table;
 	ulint		i;
 	ibool*		indexed;
 
@@ -3094,7 +3016,6 @@ dict_index_build_internal_non_clust(
 	ut_ad(!dict_index_is_clust(index));
 	ut_ad(!dict_index_is_ibuf(index));
 	ut_ad(mutex_own(&dict_sys->mutex));
-	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
 	/* The clustered index should be the first in the list of indexes */
 	clust_index = UT_LIST_GET_FIRST(table->indexes);
@@ -3105,8 +3026,8 @@ dict_index_build_internal_non_clust(
 
 	/* Create a new index */
 	new_index = dict_mem_index_create(
-		table->name.m_name, index->name, index->space, index->type,
-		index->n_fields + 1 + clust_index->n_uniq);
+		index->table, index->name, index->type,
+		ulint(index->n_fields + 1 + clust_index->n_uniq));
 
 	/* Copy other relevant data from the old index
 	struct to the new struct: it inherits the values */
@@ -3116,7 +3037,7 @@ dict_index_build_internal_non_clust(
 	new_index->id = index->id;
 
 	/* Copy fields from index to new_index */
-	dict_index_copy(new_index, index, table, 0, index->n_fields);
+	dict_index_copy(new_index, index, 0, index->n_fields);
 
 	/* Remember the table columns already contained in new_index */
 	indexed = static_cast<ibool*>(
@@ -3127,7 +3048,7 @@ dict_index_build_internal_non_clust(
 
 		field = dict_index_get_nth_field(new_index, i);
 
-		if (dict_col_is_virtual(field->col)) {
+		if (field->col->is_virtual()) {
 			continue;
 		}
 
@@ -3183,20 +3104,16 @@ static
 dict_index_t*
 dict_index_build_internal_fts(
 /*==========================*/
-	dict_table_t*	table,	/*!< in: table */
 	dict_index_t*	index)	/*!< in: user representation of an FTS index */
 {
 	dict_index_t*	new_index;
 
-	ut_ad(table && index);
 	ut_ad(index->type == DICT_FTS);
 	ut_ad(mutex_own(&dict_sys->mutex));
-	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
 	/* Create a new index */
-	new_index = dict_mem_index_create(
-		table->name.m_name, index->name, index->space, index->type,
-		index->n_fields);
+	new_index = dict_mem_index_create(index->table, index->name,
+					  index->type, index->n_fields);
 
 	/* Copy other relevant data from the old index struct to the new
 	struct: it inherits the values */
@@ -3206,11 +3123,13 @@ dict_index_build_internal_fts(
 	new_index->id = index->id;
 
 	/* Copy fields from index to new_index */
-	dict_index_copy(new_index, index, table, 0, index->n_fields);
+	dict_index_copy(new_index, index, 0, index->n_fields);
 
 	new_index->n_uniq = 0;
 	new_index->cached = TRUE;
 
+	dict_table_t* table = index->table;
+
 	if (table->fts->cache == NULL) {
 		table->fts->cache = fts_cache_create(table);
 	}
@@ -3762,7 +3681,7 @@ dict_scan_id(
 			ptr++;
 		}
 
-		len = ptr - s;
+		len = ulint(ptr - s);
 	}
 
 	if (heap == NULL) {
@@ -3783,7 +3702,7 @@ dict_scan_id(
 			}
 		}
 		*d++ = 0;
-		len = d - str;
+		len = ulint(d - str);
 		ut_ad(*s == quote);
 		ut_ad(s + 1 == ptr);
 	} else {
@@ -4002,7 +3921,7 @@ dict_scan_table_name(
 		for (s = scan_name; *s; s++) {
 			if (*s == '.') {
 				database_name = scan_name;
-				database_name_len = s - scan_name;
+				database_name_len = ulint(s - scan_name);
 				scan_name = ++s;
 				break;/* to do: multiple dots? */
 			}
@@ -4314,7 +4233,7 @@ dict_foreign_push_index_error(
 		const char*	col_name;
 		field = dict_index_get_nth_field(err_index, err_col);
 
-		col_name = dict_col_is_virtual(field->col)
+		col_name = field->col->is_virtual()
 			? "(null)"
 			: dict_table_get_col_name(
 				table, dict_col_get_no(field->col));
@@ -4581,6 +4500,11 @@ loop:
 		/**********************************************************/
 		/* The following call adds the foreign key constraints
 		to the data dictionary system tables on disk */
+		trx->op_info = "adding foreign keys";
+
+		trx_start_if_not_started_xa(trx, true);
+
+		trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
 		error = dict_create_add_foreigns_to_dictionary(
 			local_fk_set, table, trx);
@@ -4795,23 +4719,6 @@ col_loop1:
 		return(DB_CANNOT_ADD_CONSTRAINT);
 	}
 
-	/* Don't allow foreign keys on partitioned tables yet. */
-	ptr1 = dict_scan_to(ptr, "PARTITION");
-	if (ptr1) {
-		ptr1 = dict_accept(cs, ptr1, "PARTITION", &success);
-		if (success && my_isspace(cs, *ptr1)) {
-			ptr2 = dict_accept(cs, ptr1, "BY", &success);
-			if (success) {
-				my_error(ER_FOREIGN_KEY_ON_PARTITIONED,MYF(0));
-				return(DB_CANNOT_ADD_CONSTRAINT);
-			}
-		}
-	}
-	if (dict_table_is_partition(table)) {
-		my_error(ER_FOREIGN_KEY_ON_PARTITIONED,MYF(0));
-		return(DB_CANNOT_ADD_CONSTRAINT);
-	}
-
 	/* Let us create a constraint struct */
 
 	foreign = dict_mem_foreign_create();
@@ -5566,19 +5473,19 @@ dict_index_copy_rec_order_prefix(
 	UNIV_PREFETCH_R(rec);
 
 	if (dict_index_is_ibuf(index)) {
-		ut_a(!dict_table_is_comp(index->table));
+		ut_ad(!dict_table_is_comp(index->table));
 		n = rec_get_n_fields_old(rec);
 	} else {
 		if (page_rec_is_leaf(rec)) {
 			n = dict_index_get_n_unique_in_tree(index);
+		} else if (dict_index_is_spatial(index)) {
+			ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index)
+			      == DICT_INDEX_SPATIAL_NODEPTR_SIZE);
+			/* For R-tree, we have to compare
+			the child page numbers as well. */
+			n = DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1;
 		} else {
-			n = dict_index_get_n_unique_in_tree_nonleaf(index);
-			/* For internal node of R-tree, since we need to
-			compare the page no field, so, we need to copy this
-			field as well. */
-			if (dict_index_is_spatial(index)) {
-				n++;
-			}
+			n = dict_index_get_n_unique_in_tree(index);
 		}
 	}
 
@@ -5594,21 +5501,14 @@ dict_index_copy_rec_order_prefix(
 @param[in,out]	heap		memory heap for allocation
 @return own: data tuple */
 dtuple_t*
-dict_index_build_data_tuple_func(
+dict_index_build_data_tuple(
 	const rec_t*		rec,
 	const dict_index_t*	index,
-#ifdef UNIV_DEBUG
 	bool			leaf,
-#endif /* UNIV_DEBUG */
 	ulint			n_fields,
 	mem_heap_t*		heap)
 {
-	dtuple_t*	tuple;
-
-	ut_ad(dict_table_is_comp(index->table)
-	      || n_fields <= rec_get_n_fields_old(rec));
-
-	tuple = dtuple_create(heap, n_fields);
+	dtuple_t* tuple = dtuple_create(heap, n_fields);
 
 	dict_index_copy_types(tuple, index, n_fields);
 
@@ -5858,18 +5758,17 @@ dict_print_info_on_foreign_keys(
 
 /** Given a space_id of a file-per-table tablespace, search the
 dict_sys->table_LRU list and return the dict_table_t* pointer for it.
-@param	space_id	Tablespace ID
+@param	space	tablespace
 @return table if found, NULL if not */
 static
 dict_table_t*
-dict_find_single_table_by_space(
-	ulint	space_id)
+dict_find_single_table_by_space(const fil_space_t* space)
 {
 	dict_table_t*	table;
 	ulint		num_item;
 	ulint		count = 0;
 
-	ut_ad(space_id > 0);
+	ut_ad(space->id > 0);
 
 	if (dict_sys == NULL) {
 		/* This could happen when it's in redo processing. */
@@ -5884,7 +5783,7 @@ dict_find_single_table_by_space(
 	killing the server, so it worth to risk some consequences for
 	the action. */
 	while (table && count < num_item) {
-		if (table->space == space_id) {
+		if (table->space == space) {
 			if (dict_table_is_file_per_table(table)) {
 				return(table);
 			}
@@ -5901,41 +5800,28 @@ dict_find_single_table_by_space(
 /**********************************************************************//**
 Flags a table with specified space_id corrupted in the data dictionary
 cache
-@return TRUE if successful */
-ibool
-dict_set_corrupted_by_space(
-/*========================*/
-	ulint	space_id)		/*!< in: space ID */
+@return true if successful */
+bool dict_set_corrupted_by_space(const fil_space_t* space)
 {
 	dict_table_t*   table;
 
-	table = dict_find_single_table_by_space(space_id);
+	table = dict_find_single_table_by_space(space);
 
 	if (!table) {
-		return(FALSE);
+		return false;
 	}
 
 	/* mark the table->corrupted bit only, since the caller
 	could be too deep in the stack for SYS_INDEXES update */
 	table->corrupted = true;
 	table->file_unreadable = true;
-
-	return(TRUE);
+	return true;
 }
 
-
-/** Flag a table with specified space_id encrypted in the data dictionary
-cache
-@param[in]	space_id	Tablespace id */
-UNIV_INTERN
-void
-dict_set_encrypted_by_space(ulint	space_id)
+/** Flag a table encrypted in the data dictionary cache. */
+void dict_set_encrypted_by_space(const fil_space_t* space)
 {
-	dict_table_t*   table;
-
-	table = dict_find_single_table_by_space(space_id);
-
-	if (table) {
+	if (dict_table_t* table = dict_find_single_table_by_space(space)) {
 		table->file_unreadable = true;
 	}
 }
@@ -6187,15 +6073,13 @@ dict_ind_init()
 	dict_table_t*		table;
 
 	/* create dummy table and index for REDUNDANT infimum and supremum */
-	table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0, 0, 0);
+	table = dict_mem_table_create("SYS_DUMMY1", NULL, 1, 0, 0, 0);
 	dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
 			       DATA_ENGLISH | DATA_NOT_NULL, 8);
 
-	dict_ind_redundant = dict_mem_index_create("SYS_DUMMY1", "SYS_DUMMY1",
-						   DICT_HDR_SPACE, 0, 1);
+	dict_ind_redundant = dict_mem_index_create(table, "SYS_DUMMY1", 0, 1);
 	dict_index_add_col(dict_ind_redundant, table,
 			   dict_table_get_nth_col(table, 0), 0);
-	dict_ind_redundant->table = table;
 	/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
 	dict_ind_redundant->cached = TRUE;
 }
@@ -6442,8 +6326,7 @@ dict_table_schema_check(
 		}
 	}
 
-	if (!table->is_readable() &&
-	    fil_space_get(table->space) == NULL) {
+	if (!table->is_readable() && !table->space) {
 		/* missing tablespace */
 
 		snprintf(errstr, errstr_sz,
@@ -6454,15 +6337,13 @@ dict_table_schema_check(
 		return(DB_TABLE_NOT_FOUND);
 	}
 
-	ulint n_sys_cols = dict_table_get_n_sys_cols(table);
-	if ((ulint) table->n_def - n_sys_cols != req_schema->n_cols) {
+	if (ulint(table->n_def - DATA_N_SYS_COLS) != req_schema->n_cols) {
 		/* the table has a different number of columns than required */
 		snprintf(errstr, errstr_sz,
-			 "%s has " ULINTPF " columns but should have "
-			 ULINTPF ".",
+			 "%s has %d columns but should have " ULINTPF ".",
 			 ut_format_name(req_schema->table_name, buf,
 					sizeof buf),
-			 table->n_def - n_sys_cols,
+			 table->n_def - DATA_N_SYS_COLS,
 			 req_schema->n_cols);
 
 		return(DB_ERROR);
@@ -6707,11 +6588,18 @@ void
 dict_close(void)
 /*============*/
 {
-	ulint	i;
+	if (dict_sys == NULL) {
+		/* This should only happen if a failure occurred
+		during redo log processing. */
+		return;
+	}
+
+	/* Acquire only because it's a pre-condition. */
+	mutex_enter(&dict_sys->mutex);
 
 	/* Free the hash elements. We don't remove them from the table
 	because we are going to destroy the table anyway. */
-	for (i = 0; i < hash_get_n_cells(dict_sys->table_hash); i++) {
+	for (ulint i = 0; i < hash_get_n_cells(dict_sys->table_id_hash); i++) {
 		dict_table_t*	table;
 
 		table = static_cast<dict_table_t*>(
@@ -6723,12 +6611,7 @@ dict_close(void)
 			table = static_cast<dict_table_t*>(
 				HASH_GET_NEXT(name_hash, prev_table));
 			ut_ad(prev_table->magic_n == DICT_TABLE_MAGIC_N);
-			/* Acquire only because it's a pre-condition. */
-			mutex_enter(&dict_sys->mutex);
-
 			dict_table_remove_from_cache(prev_table);
-
-			mutex_exit(&dict_sys->mutex);
 		}
 	}
 
@@ -6738,6 +6621,7 @@ dict_close(void)
 	therefore we don't delete the individual elements. */
 	hash_table_free(dict_sys->table_id_hash);
 
+	mutex_exit(&dict_sys->mutex);
 	mutex_free(&dict_sys->mutex);
 
 	rw_lock_free(dict_operation_lock);
@@ -6747,6 +6631,11 @@ dict_close(void)
 
 	mutex_free(&dict_foreign_err_mutex);
 
+	if (dict_foreign_err_file) {
+		fclose(dict_foreign_err_file);
+		dict_foreign_err_file = NULL;
+	}
+
 	ut_free(dict_sys);
 
 	dict_sys = NULL;
@@ -6910,7 +6799,7 @@ dict_foreign_qualify_index(
 			return(false);
 		}
 
-		if (dict_col_is_virtual(field->col)) {
+		if (field->col->is_virtual()) {
 			col_name = "";
 			for (ulint j = 0; j < table->n_v_def; j++) {
 				col_name = dict_table_get_v_col_name(table, j);
@@ -6990,7 +6879,7 @@ dict_index_zip_pad_update(
 		/* Only do increment if it won't increase padding
 		beyond max pad size. */
 		if (info->pad + ZIP_PAD_INCR
-		    < (UNIV_PAGE_SIZE * zip_pad_max) / 100) {
+		    < (srv_page_size * zip_pad_max) / 100) {
 			/* Use atomics even though we have the mutex.
 			This is to ensure that we are able to read
 			info->pad atomically. */
@@ -7016,7 +6905,7 @@ dict_index_zip_pad_update(
 			/* Use atomics even though we have the mutex.
 			This is to ensure that we are able to read
 			info->pad atomically. */
-			my_atomic_addlint(&info->pad, -ZIP_PAD_INCR);
+			my_atomic_addlint(&info->pad, ulint(-ZIP_PAD_INCR));
 
 			info->n_rounds = 0;
 
@@ -7086,17 +6975,17 @@ dict_index_zip_pad_optimal_page_size(
 
 	if (!zip_failure_threshold_pct) {
 		/* Disabled by user. */
-		return(UNIV_PAGE_SIZE);
+		return(srv_page_size);
 	}
 
 	pad = my_atomic_loadlint(&index->zip_pad.pad);
 
-	ut_ad(pad < UNIV_PAGE_SIZE);
-	sz = UNIV_PAGE_SIZE - pad;
+	ut_ad(pad < srv_page_size);
+	sz = srv_page_size - pad;
 
 	/* Min size allowed by user. */
 	ut_ad(zip_pad_max < 100);
-	min_sz = (UNIV_PAGE_SIZE * (100 - zip_pad_max)) / 100;
+	min_sz = (srv_page_size * (100 - zip_pad_max)) / 100;
 
 	return(ut_max(sz, min_sz));
 }
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index 2fe34886df2..1540f7e53bc 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -88,9 +88,9 @@ dict_load_table_one(
 
 /** Load a table definition from a SYS_TABLES record to dict_table_t.
 Do not load any columns or indexes.
-@param[in]	name	Table name
-@param[in]	rec	SYS_TABLES record
-@param[out,own]	table	table, or NULL
+@param[in]	name		Table name
+@param[in]	rec		SYS_TABLES record
+@param[out,own]	table		table, or NULL
 @return	error message
 @retval	NULL on success */
 static
@@ -110,7 +110,6 @@ dict_load_index_low(
 	byte*		table_id,	/*!< in/out: table id (8 bytes),
 					an "in" value if allocate=TRUE
 					and "out" when allocate=FALSE */
-	const char*	table_name,	/*!< in: table name */
 	mem_heap_t*	heap,		/*!< in/out: temporary memory heap */
 	const rec_t*	rec,		/*!< in: SYS_INDEXES record */
 	ibool		allocate,	/*!< in: TRUE=allocate *index,
@@ -142,7 +141,6 @@ dict_load_column_low(
 /** Load a virtual column "mapping" (to base columns) information
 from a SYS_VIRTUAL record
 @param[in,out]	table		table
-@param[in,out]	heap		memory heap
 @param[in,out]	column		mapped base column's dict_column_t
 @param[in,out]	table_id	table id
 @param[in,out]	pos		virtual column position
@@ -154,7 +152,6 @@ static
 const char*
 dict_load_virtual_low(
 	dict_table_t*	table,
-	mem_heap_t*	heap,
 	dict_col_t**	column,
 	table_id_t*	table_id,
 	ulint*		pos,
@@ -382,16 +379,12 @@ dict_process_sys_tables_rec_and_mtr_commit(
 	mem_heap_t*	heap,		/*!< in/out: temporary memory heap */
 	const rec_t*	rec,		/*!< in: SYS_TABLES record */
 	dict_table_t**	table,		/*!< out: dict_table_t to fill */
-	dict_table_info_t status,	/*!< in: status bit controls
-					options such as whether we shall
-					look for dict_table_t from cache
-					first */
+	bool		cached,		/*!< in: whether to load from cache */
 	mtr_t*		mtr)		/*!< in/out: mini-transaction,
 					will be committed */
 {
 	ulint		len;
 	const char*	field;
-	const char*	err_msg = NULL;
 	table_name_t	table_name;
 
 	field = (const char*) rec_get_nth_field_old(
@@ -404,28 +397,17 @@ dict_process_sys_tables_rec_and_mtr_commit(
 	/* Get the table name */
 	table_name.m_name = mem_heap_strdupl(heap, field, len);
 
-	/* If DICT_TABLE_LOAD_FROM_CACHE is set, first check
-	whether there is cached dict_table_t struct */
-	if (status & DICT_TABLE_LOAD_FROM_CACHE) {
-
+	if (cached) {
 		/* Commit before load the table again */
 		mtr_commit(mtr);
 
 		*table = dict_table_get_low(table_name.m_name);
-
-		if (!(*table)) {
-			err_msg = "Table not found in cache";
-		}
+		return *table ? NULL : "Table not found in cache";
 	} else {
-		err_msg = dict_load_table_low(table_name, rec, table);
+		const char* err = dict_load_table_low(table_name, rec, table);
 		mtr_commit(mtr);
+		return err;
 	}
-
-	if (err_msg) {
-		return(err_msg);
-	}
-
-	return(NULL);
 }
 
 /********************************************************************//**
@@ -447,8 +429,7 @@ dict_process_sys_indexes_rec(
 	buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
 
 	/* Parse the record, and get "dict_index_t" struct filled */
-	err_msg = dict_load_index_low(buf, NULL,
-				      heap, rec, FALSE, &index);
+	err_msg = dict_load_index_low(buf, heap, rec, FALSE, &index);
 
 	*table_id = mach_read_from_8(buf);
 
@@ -481,7 +462,6 @@ dict_process_sys_columns_rec(
 
 /** This function parses a SYS_VIRTUAL record and extracts virtual column
 information
-@param[in,out]	heap		heap memory
 @param[in]	rec		current SYS_COLUMNS rec
 @param[in,out]	table_id	table id
 @param[in,out]	pos		virtual column position
@@ -489,7 +469,6 @@ information
 @return error message, or NULL on success */
 const char*
 dict_process_sys_virtual_rec(
-	mem_heap_t*	heap,
 	const rec_t*	rec,
 	table_id_t*	table_id,
 	ulint*		pos,
@@ -498,7 +477,7 @@ dict_process_sys_virtual_rec(
 	const char*	err_msg;
 
 	/* Parse the record, and get "dict_col_t" struct filled */
-	err_msg = dict_load_virtual_low(NULL, heap, NULL, table_id,
+	err_msg = dict_load_virtual_low(NULL, NULL, table_id,
 					pos, base_pos, rec);
 
 	return(err_msg);
@@ -908,7 +887,7 @@ dict_update_filepath(
 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_ad(mutex_own(&dict_sys->mutex));
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 	trx->op_info = "update filepath";
 	trx->dict_operation_lock_mode = RW_X_LATCH;
 	trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
@@ -928,7 +907,7 @@ dict_update_filepath(
 
 	trx_commit_for_mysql(trx);
 	trx->dict_operation_lock_mode = 0;
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	if (err == DB_SUCCESS) {
 		/* We just updated SYS_DATAFILES due to the contents in
@@ -977,7 +956,7 @@ dict_replace_tablespace_and_filepath(
 	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(filepath);
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 	trx->op_info = "insert tablespace and filepath";
 	trx->dict_operation_lock_mode = RW_X_LATCH;
 	trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
@@ -990,7 +969,7 @@ dict_replace_tablespace_and_filepath(
 
 	trx_commit_for_mysql(trx);
 	trx->dict_operation_lock_mode = 0;
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	return(err);
 }
@@ -1150,8 +1129,9 @@ dict_sys_tables_type_valid(ulint type, bool not_redundant)
 	}
 
 	if (!not_redundant) {
-		/* SYS_TABLES.TYPE must be 1 for ROW_FORMAT=REDUNDANT. */
-		return(false);
+		/* SYS_TABLES.TYPE must be 1 or 1|DICT_TF_MASK_NO_ROLLBACK
+		for ROW_FORMAT=REDUNDANT. */
+		return !(type & ~(1U | DICT_TF_MASK_NO_ROLLBACK));
 	}
 
 	if (type >= 1U << DICT_TF_POS_UNUSED) {
@@ -1159,11 +1139,6 @@ dict_sys_tables_type_valid(ulint type, bool not_redundant)
 		return(false);
 	}
 
-	/* ATOMIC_WRITES cannot be 3; it is the 10.3 NO_ROLLBACK flag. */
-	if (!(~type & DICT_TF_MASK_ATOMIC_WRITES)) {
-		return(false);
-	}
-
 	return(dict_tf_is_valid_not_redundant(type));
 }
 
@@ -1184,7 +1159,8 @@ dict_sys_tables_type_to_tf(ulint type, bool not_redundant)
 			 | DICT_TF_MASK_ATOMIC_BLOBS
 			 | DICT_TF_MASK_DATA_DIR
 			 | DICT_TF_MASK_PAGE_COMPRESSION
-			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL);
+			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+			 | DICT_TF_MASK_NO_ROLLBACK);
 
 	ut_ad(dict_tf_is_valid(flags));
 	return(flags);
@@ -1236,7 +1212,8 @@ dict_sys_tables_rec_read(
 
 	MariaDB 10.2.2 introduced the SHARED_SPACE flag from MySQL 5.7,
 	shifting the flags PAGE_COMPRESSION, PAGE_COMPRESSION_LEVEL,
-	ATOMIC_WRITES by one bit. The SHARED_SPACE flag would always
+	ATOMIC_WRITES (repurposed to NO_ROLLBACK in 10.3.1) by one bit.
+	The SHARED_SPACE flag would always
 	be written as 0 by MariaDB, because MariaDB does not support
 	CREATE TABLESPACE or CREATE TABLE...TABLESPACE for InnoDB.
 
@@ -1305,7 +1282,7 @@ dict_sys_tables_rec_read(
 
 	/* The low order bit of SYS_TABLES.TYPE is always set to 1. But in
 	dict_table_t::flags the low order bit is used to determine if the
-	row format is Redundant (0) or Compact (1) when the format is Antelope.
+	ROW_FORMAT=REDUNDANT (0) or anything else (1).
 	Read the 4 byte N_COLS field and look at the high order bit.  It
 	should be set for COMPACT and later.  It should not be set for
 	REDUNDANT. */
@@ -1443,20 +1420,19 @@ dict_check_sys_tables(
 
 		/* Now that we have the proper name for this tablespace,
 		look to see if it is already in the tablespace cache. */
-		if (fil_space_for_table_exists_in_mem(
-			    space_id, table_name.m_name,
-			    false, NULL, flags)) {
+		if (const fil_space_t* space
+		    = fil_space_for_table_exists_in_mem(
+			    space_id, table_name.m_name, false, flags)) {
 			/* Recovery can open a datafile that does not
 			match SYS_DATAFILES.  If they don't match, update
 			SYS_DATAFILES. */
 			char *dict_path = dict_get_first_path(space_id);
-			char *fil_path = fil_space_get_first_path(space_id);
-			if (dict_path && fil_path
+			const char *fil_path = space->chain.start->name;
+			if (dict_path
 			    && strcmp(dict_path, fil_path)) {
 				dict_update_filepath(space_id, fil_path);
 			}
 			ut_free(dict_path);
-			ut_free(fil_path);
 			ut_free(table_name.m_name);
 			continue;
 		}
@@ -1469,15 +1445,12 @@ dict_check_sys_tables(
 		char*	filepath = dict_get_first_path(space_id);
 
 		/* Check that the .ibd file exists. */
-		dberr_t	err = fil_ibd_open(
-			validate,
-			!srv_read_only_mode && srv_log_file_size != 0,
-			FIL_TYPE_TABLESPACE,
-			space_id, dict_tf_to_fsp_flags(flags),
-			table_name.m_name,
-			filepath);
-
-		if (err != DB_SUCCESS) {
+		if (!fil_ibd_open(
+			    validate,
+			    !srv_read_only_mode && srv_log_file_size != 0,
+			    FIL_TYPE_TABLESPACE,
+			    space_id, dict_tf_to_fsp_flags(flags),
+			    table_name, filepath)) {
 			ib::warn() << "Ignoring tablespace for "
 				<< table_name
 				<< " because it could not be opened.";
@@ -1713,7 +1686,6 @@ static const char* dict_load_virtual_del = "delete-marked record in SYS_VIRTUAL"
 /** Load a virtual column "mapping" (to base columns) information
 from a SYS_VIRTUAL record
 @param[in,out]	table		table
-@param[in,out]	heap		memory heap
 @param[in,out]	column		mapped base column's dict_column_t
 @param[in,out]	table_id	table id
 @param[in,out]	pos		virtual column position
@@ -1725,7 +1697,6 @@ static
 const char*
 dict_load_virtual_low(
 	dict_table_t*	table,
-	mem_heap_t*	heap,
 	dict_col_t**	column,
 	table_id_t*	table_id,
 	ulint*		pos,
@@ -1985,7 +1956,7 @@ dict_load_virtual_one_col(
 
 		ut_a(btr_pcur_is_on_user_rec(&pcur));
 
-		err_msg = dict_load_virtual_low(table, heap,
+		err_msg = dict_load_virtual_low(table,
 						&v_col->base_col[i - skipped],
 						NULL,
 					        &pos, NULL, rec);
@@ -2052,7 +2023,7 @@ dict_load_field_low(
 	ulint		len;
 	unsigned	pos_and_prefix_len;
 	unsigned	prefix_len;
-	ibool		first_field;
+	bool		first_field;
 	ulint		position;
 
 	/* Either index or sys_field is supplied, not both */
@@ -2241,7 +2212,6 @@ dict_load_index_low(
 	byte*		table_id,	/*!< in/out: table id (8 bytes),
 					an "in" value if allocate=TRUE
 					and "out" when allocate=FALSE */
-	const char*	table_name,	/*!< in: table name */
 	mem_heap_t*	heap,		/*!< in/out: temporary memory heap */
 	const rec_t*	rec,		/*!< in: SYS_INDEXES record */
 	ibool		allocate,	/*!< in: TRUE=allocate *index,
@@ -2256,7 +2226,6 @@ dict_load_index_low(
 	index_id_t	id;
 	ulint		n_fields;
 	ulint		type;
-	ulint		space;
 	unsigned	merge_threshold;
 
 	if (allocate) {
@@ -2355,26 +2324,18 @@ err_len:
 	}
 
 	field = rec_get_nth_field_old(
-		rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
-	if (len != 4) {
-		goto err_len;
-	}
-	space = mach_read_from_4(field);
-
-	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
 	if (len != 4) {
 		goto err_len;
 	}
 
 	if (allocate) {
-		*index = dict_mem_index_create(table_name, name_buf,
-					       space, type, n_fields);
+		*index = dict_mem_index_create(NULL, name_buf, type, n_fields);
 	} else {
 		ut_a(*index);
 
-		dict_mem_fill_index_struct(*index, NULL, NULL, name_buf,
-					   space, type, n_fields);
+		dict_mem_fill_index_struct(*index, NULL, name_buf,
+					   type, n_fields);
 	}
 
 	(*index)->id = id;
@@ -2481,8 +2442,7 @@ dict_load_indexes(
 			}
 		}
 
-		err_msg = dict_load_index_low(
-			buf, table->name.m_name, heap, rec, TRUE, &index);
+		err_msg = dict_load_index_low(buf, heap, rec, TRUE, &index);
 		ut_ad((index == NULL && err_msg != NULL)
 		      || (index != NULL && err_msg == NULL));
 
@@ -2613,17 +2573,15 @@ corrupted:
 			dict_mem_index_free(index);
 		} else {
 			dict_load_fields(index, heap);
-
-			error = dict_index_add_to_cache(
-				table, index, index->page, FALSE);
+			index->table = table;
 
 			/* The data dictionary tables should never contain
 			invalid index definitions.  If we ignored this error
 			and simply did not load this index definition, the
 			.frm file would disagree with the index definitions
 			inside InnoDB. */
-			if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
-
+			if (!dict_index_add_to_cache(
+				    index, index->page, false, &error)) {
 				goto func_exit;
 			}
 		}
@@ -2655,9 +2613,9 @@ func_exit:
 
 /** Load a table definition from a SYS_TABLES record to dict_table_t.
 Do not load any columns or indexes.
-@param[in]	name	Table name
-@param[in]	rec	SYS_TABLES record
-@param[out,own]	table	table, or NULL
+@param[in]	name		Table name
+@param[in]	rec		SYS_TABLES record
+@param[out,own]	table		table, or NULL
 @return	error message
 @retval	NULL on success */
 static
@@ -2686,7 +2644,8 @@ dict_load_table_low(table_name_t& name, const rec_t* rec, dict_table_t** table)
 	dict_table_decode_n_col(t_num, &n_cols, &n_v_col);
 
 	*table = dict_mem_table_create(
-		name.m_name, space_id, n_cols + n_v_col, n_v_col, flags, flags2);
+		name.m_name, NULL, n_cols + n_v_col, n_v_col, flags, flags2);
+	(*table)->space_id = space_id;
 	(*table)->id = table_id;
 	(*table)->file_unreadable = false;
 
@@ -2704,7 +2663,7 @@ void
 dict_save_data_dir_path(
 /*====================*/
 	dict_table_t*	table,		/*!< in/out: table */
-	char*		filepath)	/*!< in: filepath of tablespace */
+	const char*	filepath)	/*!< in: filepath of tablespace */
 {
 	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_a(DICT_TF_HAS_DATA_DIR(table->flags));
@@ -2739,20 +2698,19 @@ dict_get_and_save_data_dir_path(
 	dict_table_t*	table,
 	bool		dict_mutex_own)
 {
-	ut_ad(!dict_table_is_temporary(table));
-
-	if (!table->data_dir_path && table->space) {
-		char*	path = fil_space_get_first_path(table->space);
+	ut_ad(!table->is_temporary());
+	ut_ad(!table->space || table->space->id == table->space_id);
 
+	if (!table->data_dir_path && table->space_id) {
 		if (!dict_mutex_own) {
 			dict_mutex_enter_for_mysql();
 		}
 
-		if (path == NULL) {
-			path = dict_get_first_path(table->space);
-		}
-
-		if (path != NULL) {
+		if (const char* p = table->space
+		    ? table->space->chain.start->name : NULL) {
+			table->flags |= (1 << DICT_TF_POS_DATA_DIR);
+			dict_save_data_dir_path(table, p);
+		} else if (char* path = dict_get_first_path(table->space_id)) {
 			table->flags |= (1 << DICT_TF_POS_DATA_DIR);
 			dict_save_data_dir_path(table, path);
 			ut_free(path);
@@ -2827,19 +2785,20 @@ dict_load_table(
 
 /** Opens a tablespace for dict_load_table_one()
 @param[in,out]	table		A table that refers to the tablespace to open
-@param[in]	heap		A memory heap
 @param[in]	ignore_err	Whether to ignore an error. */
 UNIV_INLINE
 void
 dict_load_tablespace(
 	dict_table_t*		table,
-	mem_heap_t*		heap,
 	dict_err_ignore_t	ignore_err)
 {
-	ut_ad(!dict_table_is_temporary(table));
+	ut_ad(!table->is_temporary());
+	ut_ad(!table->space);
+	ut_ad(table->space_id < SRV_LOG_SPACE_FIRST_ID);
+	ut_ad(fil_system.sys_space);
 
-	/* The system tablespace is always available. */
-	if (is_system_tablespace(table->space)) {
+	if (table->space_id == TRX_SYS_SPACE) {
+		table->space = fil_system.sys_space;
 		return;
 	}
 
@@ -2850,11 +2809,10 @@ dict_load_tablespace(
 		return;
 	}
 
-	char*	space_name = table->name.m_name;
-
 	/* The tablespace may already be open. */
-	if (fil_space_for_table_exists_in_mem(
-		    table->space, space_name, false, heap, table->flags)) {
+	table->space = fil_space_for_table_exists_in_mem(
+		table->space_id, table->name.m_name, false, table->flags);
+	if (table->space) {
 		return;
 	}
 
@@ -2862,12 +2820,12 @@ dict_load_tablespace(
 		ib::error() << "Failed to find tablespace for table "
 			<< table->name << " in the cache. Attempting"
 			" to load the tablespace with space id "
-			<< table->space;
+			<< table->space_id;
 	}
 
 	/* Use the remote filepath if needed. This parameter is optional
 	in the call to fil_ibd_open(). If not supplied, it will be built
-	from the space_name. */
+	from the table->name. */
 	char* filepath = NULL;
 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
 		/* This will set table->data_dir_path from either
@@ -2883,12 +2841,12 @@ dict_load_tablespace(
 
 	/* Try to open the tablespace.  We set the 2nd param (fix_dict) to
 	false because we do not have an x-lock on dict_operation_lock */
-	dberr_t err = fil_ibd_open(
-		true, false, FIL_TYPE_TABLESPACE, table->space,
+	table->space = fil_ibd_open(
+		true, false, FIL_TYPE_TABLESPACE, table->space_id,
 		dict_tf_to_fsp_flags(table->flags),
-		space_name, filepath);
+		table->name, filepath);
 
-	if (err != DB_SUCCESS) {
+	if (!table->space) {
 		/* We failed to find a sensible tablespace file */
 		table->file_unreadable = true;
 	}
@@ -2923,7 +2881,6 @@ dict_load_table_one(
 	dict_names_t&		fk_tables)
 {
 	dberr_t		err;
-	dict_table_t*	table;
 	dict_table_t*	sys_tables;
 	btr_pcur_t	pcur;
 	dict_index_t*	sys_index;
@@ -2989,6 +2946,7 @@ err_exit:
 		goto err_exit;
 	}
 
+	dict_table_t* table;
 	if (const char* err_msg = dict_load_table_low(name, rec, &table)) {
 		if (err_msg != dict_load_table_flags) {
 			ib::error() << err_msg;
@@ -2999,16 +2957,17 @@ err_exit:
 	btr_pcur_close(&pcur);
 	mtr_commit(&mtr);
 
-	dict_load_tablespace(table, heap, ignore_err);
+	dict_load_tablespace(table, ignore_err);
 
 	dict_load_columns(table, heap);
 
 	dict_load_virtual(table, heap);
 
+	dict_table_add_system_columns(table, heap);
+
 	if (cached) {
-		dict_table_add_to_cache(table, TRUE, heap);
-	} else {
-		dict_table_add_system_columns(table, heap);
+		table->can_be_evicted = true;
+		table->add_to_cache();
 	}
 
 	mem_heap_empty(heap);
@@ -3047,6 +3006,15 @@ err_exit:
 		}
 	}
 
+	if (err == DB_SUCCESS && cached && table->is_readable()) {
+		if (table->space && !fil_space_get_size(table->space_id)) {
+			table->corrupted = true;
+			table->file_unreadable = true;
+		} else if (table->supports_instant()) {
+			err = btr_cur_instant_init(table);
+		}
+	}
+
 	/* Initialize table foreign_child value. Its value could be
 	changed when dict_load_foreigns() is called below */
 	table->fk_max_recusive_level = 0;
@@ -3073,12 +3041,6 @@ err_exit:
 		} else {
 			dict_mem_table_fill_foreign_vcol_set(table);
 			table->fk_max_recusive_level = 0;
-
-			if (table->space
-			    && !fil_space_get_size(table->space)) {
-				table->corrupted = true;
-				table->file_unreadable = true;
-			}
 		}
 	} else {
 		dict_index_t*   index;
diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc
index b01cd657369..3056f73f0cb 100644
--- a/storage/innobase/dict/dict0mem.cc
+++ b/storage/innobase/dict/dict0mem.cc
@@ -37,6 +37,7 @@ Created 1/8/1996 Heikki Tuuri
 #include "ut0crc32.h"
 #include "lock0lock.h"
 #include "sync0sync.h"
+#include "row0row.h"
 #include <iostream>
 
 #define	DICT_HEAP_SIZE		100	/*!< initial memory heap size when
@@ -50,6 +51,29 @@ static const char* innobase_system_databases[] = {
 	NullS
 };
 
+/** Determine if a table belongs to innobase_system_databases[]
+@param[in]	name	database_name/table_name
+@return	whether the database_name is in innobase_system_databases[] */
+static bool dict_mem_table_is_system(const char *name)
+{
+	/* table has the following format: database/table
+	and some system table are of the form SYS_* */
+	if (!strchr(name, '/')) {
+		return true;
+	}
+	size_t table_len = strlen(name);
+	const char *system_db;
+	int i = 0;
+	while ((system_db = innobase_system_databases[i++])
+	       && (system_db != NullS)) {
+		size_t len = strlen(system_db);
+		if (table_len > len && !strncmp(name, system_db, len)) {
+			return true;
+		}
+	}
+	return false;
+}
+
 /** The start of the table basename suffix for partitioned tables */
 const char table_name_t::part_suffix[4]
 #ifdef _WIN32
@@ -103,8 +127,7 @@ dict_table_t*
 dict_mem_table_create(
 /*==================*/
 	const char*	name,	/*!< in: table name */
-	ulint		space,	/*!< in: space where the clustered index of
-				the table is placed */
+	fil_space_t*	space,	/*!< in: tablespace */
 	ulint		n_cols,	/*!< in: total number of columns including
 				virtual and non-virtual columns */
 	ulint		n_v_cols,/*!< in: number of virtual columns */
@@ -115,6 +138,10 @@ dict_mem_table_create(
 	mem_heap_t*	heap;
 
 	ut_ad(name);
+	ut_ad(!space
+	      || space->purpose == FIL_TYPE_TABLESPACE
+	      || space->purpose == FIL_TYPE_TEMPORARY
+	      || space->purpose == FIL_TYPE_IMPORT);
 	ut_a(dict_tf2_is_valid(flags, flags2));
 	ut_a(!(flags2 & DICT_TF2_UNUSED_BIT_MASK));
 
@@ -135,11 +162,11 @@ dict_mem_table_create(
 	table->flags2 = (unsigned int) flags2;
 	table->name.m_name = mem_strdup(name);
 	table->is_system_db = dict_mem_table_is_system(table->name.m_name);
-	table->space = (unsigned int) space;
-	table->n_t_cols = (unsigned int) (n_cols +
-			dict_table_get_n_sys_cols(table));
+	table->space = space;
+	table->space_id = space ? space->id : ULINT_UNDEFINED;
+	table->n_t_cols = unsigned(n_cols + DATA_N_SYS_COLS);
 	table->n_v_cols = (unsigned int) (n_v_cols);
-	table->n_cols = table->n_t_cols - table->n_v_cols;
+	table->n_cols = unsigned(table->n_t_cols - table->n_v_cols);
 
 	table->cols = static_cast<dict_col_t*>(
 		mem_heap_alloc(heap, table->n_cols * sizeof(dict_col_t)));
@@ -249,7 +276,7 @@ dict_add_col_name(
 			s += strlen(s) + 1;
 		}
 
-		old_len = s - col_names;
+		old_len = unsigned(s - col_names);
 	} else {
 		old_len = 0;
 	}
@@ -312,6 +339,16 @@ dict_mem_table_add_col(
 	col = dict_table_get_nth_col(table, i);
 
 	dict_mem_fill_column_struct(col, i, mtype, prtype, len);
+
+	switch (prtype & DATA_VERSIONED) {
+	case DATA_VERS_START:
+		ut_ad(!table->vers_start);
+		table->vers_start = i;
+		break;
+	case DATA_VERS_END:
+		ut_ad(!table->vers_end);
+		table->vers_end = i;
+	}
 }
 
 /** Adds a virtual column definition to a table.
@@ -369,7 +406,7 @@ dict_mem_table_add_v_col(
 						       i, name, heap);
 	}
 
-	v_col = dict_table_get_nth_v_col(table, i);
+	v_col = &table->v_cols[i];
 
 	dict_mem_fill_column_struct(&v_col->m_col, pos, mtype, prtype, len);
 	v_col->v_pos = i;
@@ -398,7 +435,7 @@ dict_mem_table_add_s_col(
 	dict_table_t*	table,
 	ulint		num_base)
 {
-	ulint	i = table->n_def - 1;
+	unsigned	i = unsigned(table->n_def) - 1;
 	dict_col_t*	col = dict_table_get_nth_col(table, i);
 	dict_s_col_t	s_col;
 
@@ -458,13 +495,13 @@ dict_mem_table_col_rename_low(
 		/* We need to adjust all affected index->field
 		pointers, as in dict_index_add_col(). First, copy
 		table->col_names. */
-		ulint	prefix_len	= s - t_col_names;
+		ulint	prefix_len	= ulint(s - t_col_names);
 
 		for (; i < n_col; i++) {
 			s += strlen(s) + 1;
 		}
 
-		ulint	full_len	= s - t_col_names;
+		ulint	full_len	= ulint(s - t_col_names);
 		char*	col_names;
 
 		if (to_len > from_len) {
@@ -497,12 +534,12 @@ dict_mem_table_col_rename_low(
 				/* if is_virtual and that in field->col does
 				not match, continue */
 				if ((!is_virtual) !=
-				    (!dict_col_is_virtual(field->col))) {
+				    (!field->col->is_virtual())) {
 					continue;
 				}
 
 				ulint		name_ofs
-					= field->name - t_col_names;
+					= ulint(field->name - t_col_names);
 				if (name_ofs <= prefix_len) {
 					field->name = col_names + name_ofs;
 				} else {
@@ -673,9 +710,11 @@ dict_mem_fill_column_struct(
 	column->mtype = (unsigned int) mtype;
 	column->prtype = (unsigned int) prtype;
 	column->len = (unsigned int) col_len;
-        dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
+	dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
 	column->mbminlen = mbminlen;
 	column->mbmaxlen = mbmaxlen;
+	column->def_val.data = NULL;
+	column->def_val.len = UNIV_SQL_DEFAULT;
 }
 
 /**********************************************************************//**
@@ -684,11 +723,8 @@ Creates an index memory object.
 dict_index_t*
 dict_mem_index_create(
 /*==================*/
-	const char*	table_name,	/*!< in: table name */
+	dict_table_t*	table,		/*!< in: table */
 	const char*	index_name,	/*!< in: index name */
-	ulint		space,		/*!< in: space where the index tree is
-					placed, ignored if the index is of
-					the clustered type */
 	ulint		type,		/*!< in: DICT_UNIQUE,
 					DICT_CLUSTERED, ... ORed */
 	ulint		n_fields)	/*!< in: number of fields */
@@ -696,15 +732,16 @@ dict_mem_index_create(
 	dict_index_t*	index;
 	mem_heap_t*	heap;
 
-	ut_ad(table_name && index_name);
+	ut_ad(!table || table->magic_n == DICT_TABLE_MAGIC_N);
+	ut_ad(index_name);
 
 	heap = mem_heap_create(DICT_HEAP_SIZE);
 
 	index = static_cast<dict_index_t*>(
 		mem_heap_zalloc(heap, sizeof(*index)));
+	index->table = table;
 
-	dict_mem_fill_index_struct(index, heap, table_name, index_name,
-				   space, type, n_fields);
+	dict_mem_fill_index_struct(index, heap, index_name, type, n_fields);
 
 	dict_index_zip_pad_mutex_create_lazy(index);
 
@@ -1006,7 +1043,7 @@ dict_mem_index_add_field(
 
 	index->n_def++;
 
-	field = dict_index_get_nth_field(index, index->n_def - 1);
+	field = dict_index_get_nth_field(index, unsigned(index->n_def) - 1);
 
 	field->name = name;
 	field->prefix_len = (unsigned int) prefix_len;
@@ -1066,7 +1103,7 @@ dict_mem_create_temporary_tablename(
 	char*		name;
 	const char*	dbend   = strchr(dbtab, '/');
 	ut_ad(dbend);
-	size_t		dblen   = dbend - dbtab + 1;
+	size_t		dblen   = size_t(dbend - dbtab) + 1;
 
 	/* Increment a randomly initialized  number for each temp file. */
 	my_atomic_add32((int32*) &dict_temp_file_num, 1);
@@ -1154,31 +1191,354 @@ operator<< (std::ostream& out, const dict_foreign_set& fk_set)
 	return(out);
 }
 
-/****************************************************************//**
-Determines if a table belongs to a system database
-@return */
-bool
-dict_mem_table_is_system(
-/*================*/
-	char	*name)		/*!< in: table name */
+/** Adjust clustered index metadata for instant ADD COLUMN.
+@param[in]	clustered index definition after instant ADD COLUMN */
+inline void dict_index_t::instant_add_field(const dict_index_t& instant)
 {
-	ut_ad(name);
+	DBUG_ASSERT(is_primary());
+	DBUG_ASSERT(instant.is_primary());
+	DBUG_ASSERT(!instant.is_instant());
+	DBUG_ASSERT(n_def == n_fields);
+	DBUG_ASSERT(instant.n_def == instant.n_fields);
+
+	DBUG_ASSERT(type == instant.type);
+	DBUG_ASSERT(trx_id_offset == instant.trx_id_offset);
+	DBUG_ASSERT(n_user_defined_cols == instant.n_user_defined_cols);
+	DBUG_ASSERT(n_uniq == instant.n_uniq);
+	DBUG_ASSERT(instant.n_fields > n_fields);
+	DBUG_ASSERT(instant.n_def > n_def);
+	DBUG_ASSERT(instant.n_nullable >= n_nullable);
+	DBUG_ASSERT(instant.n_core_fields >= n_core_fields);
+	DBUG_ASSERT(instant.n_core_null_bytes >= n_core_null_bytes);
+
+	n_fields = instant.n_fields;
+	n_def = instant.n_def;
+	n_nullable = instant.n_nullable;
+	fields = static_cast<dict_field_t*>(
+		mem_heap_dup(heap, instant.fields, n_fields * sizeof *fields));
+
+	ut_d(unsigned n_null = 0);
+
+	for (unsigned i = 0; i < n_fields; i++) {
+		DBUG_ASSERT(fields[i].same(instant.fields[i]));
+		const dict_col_t* icol = instant.fields[i].col;
+		DBUG_ASSERT(!icol->is_virtual());
+		dict_col_t* col = fields[i].col = &table->cols[
+			icol - instant.table->cols];
+		fields[i].name = col->name(*table);
+		ut_d(n_null += col->is_nullable());
+	}
 
-	/* table has the following format: database/table
-	and some system table are of the form SYS_* */
-	if (strchr(name, '/')) {
-		size_t table_len = strlen(name);
-		const char *system_db;
-		int i = 0;
-		while ((system_db = innobase_system_databases[i++])
-			&& (system_db != NullS)) {
-			size_t len = strlen(system_db);
-			if (table_len > len && !strncmp(name, system_db, len)) {
-				return true;
+	ut_ad(n_null == n_nullable);
+}
+
+/** Adjust metadata for instant ADD COLUMN.
+@param[in]	table	table definition after instant ADD COLUMN */
+void dict_table_t::instant_add_column(const dict_table_t& table)
+{
+	DBUG_ASSERT(!table.cached);
+	DBUG_ASSERT(table.n_def == table.n_cols);
+	DBUG_ASSERT(table.n_t_def == table.n_t_cols);
+	DBUG_ASSERT(n_def == n_cols);
+	DBUG_ASSERT(n_t_def == n_t_cols);
+	DBUG_ASSERT(table.n_cols > n_cols);
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	const char* end = table.col_names;
+	for (unsigned i = table.n_cols; i--; ) end += strlen(end) + 1;
+
+	col_names = static_cast<char*>(
+		mem_heap_dup(heap, table.col_names,
+			     ulint(end - table.col_names)));
+	const dict_col_t* const old_cols = cols;
+	const dict_col_t* const old_cols_end = cols + n_cols;
+	cols = static_cast<dict_col_t*>(mem_heap_dup(heap, table.cols,
+						     table.n_cols
+						     * sizeof *cols));
+
+	/* Preserve the default values of previously instantly
+	added columns. */
+	for (unsigned i = unsigned(n_cols) - DATA_N_SYS_COLS; i--; ) {
+		cols[i].def_val = old_cols[i].def_val;
+	}
+
+	/* Copy the new default values to this->heap. */
+	for (unsigned i = n_cols; i < table.n_cols; i++) {
+		dict_col_t& c = cols[i - DATA_N_SYS_COLS];
+		DBUG_ASSERT(c.is_instant());
+		if (c.def_val.len == 0) {
+			c.def_val.data = field_ref_zero;
+		} else if (const void*& d = c.def_val.data) {
+			d = mem_heap_dup(heap, d, c.def_val.len);
+		} else {
+			DBUG_ASSERT(c.def_val.len == UNIV_SQL_NULL);
+		}
+	}
+
+	const unsigned old_n_cols = n_cols;
+	const unsigned n_add = unsigned(table.n_cols - n_cols);
+
+	n_t_def += n_add;
+	n_t_cols += n_add;
+	n_cols = table.n_cols;
+	n_def = n_cols;
+
+	for (unsigned i = n_v_def; i--; ) {
+		const dict_v_col_t& v = v_cols[i];
+		for (ulint n = v.num_base; n--; ) {
+			dict_col_t*& base = v.base_col[n];
+			if (!base->is_virtual()) {
+				DBUG_ASSERT(base >= old_cols);
+				size_t n = size_t(base - old_cols);
+				DBUG_ASSERT(n + DATA_N_SYS_COLS < old_n_cols);
+				base = &cols[n];
 			}
 		}
-		return false;
-	} else {
-		return true;
 	}
+
+	dict_index_t* index = dict_table_get_first_index(this);
+
+	index->instant_add_field(*dict_table_get_first_index(&table));
+
+	while ((index = dict_table_get_next_index(index)) != NULL) {
+		for (unsigned i = 0; i < index->n_fields; i++) {
+			dict_field_t& field = index->fields[i];
+			if (field.col < old_cols
+			    || field.col >= old_cols_end) {
+				DBUG_ASSERT(field.col->is_virtual());
+			} else {
+				/* Secondary indexes may contain user
+				columns and DB_ROW_ID (if there is
+				GEN_CLUST_INDEX instead of PRIMARY KEY),
+				but not DB_TRX_ID,DB_ROLL_PTR. */
+				DBUG_ASSERT(field.col >= old_cols);
+				size_t n = size_t(field.col - old_cols);
+				DBUG_ASSERT(n + DATA_N_SYS_COLS <= old_n_cols);
+				if (n + DATA_N_SYS_COLS >= old_n_cols) {
+					/* Replace DB_ROW_ID */
+					n += n_add;
+				}
+				field.col = &cols[n];
+				DBUG_ASSERT(!field.col->is_virtual());
+				field.name = field.col->name(*this);
+			}
+		}
+	}
+}
+
+/** Roll back instant_add_column().
+@param[in]	old_n_cols	original n_cols
+@param[in]	old_cols	original cols
+@param[in]	old_col_names	original col_names */
+void
+dict_table_t::rollback_instant(
+	unsigned	old_n_cols,
+	dict_col_t*	old_cols,
+	const char*	old_col_names)
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+	dict_index_t* index = indexes.start;
+	/* index->is_instant() does not necessarily hold here, because
+	the table may have been emptied */
+	DBUG_ASSERT(old_n_cols >= DATA_N_SYS_COLS);
+	DBUG_ASSERT(n_cols >= old_n_cols);
+	DBUG_ASSERT(n_cols == n_def);
+	DBUG_ASSERT(index->n_def == index->n_fields);
+
+	const unsigned n_remove = n_cols - old_n_cols;
+
+	for (unsigned i = index->n_fields - n_remove; i < index->n_fields;
+	     i++) {
+		if (index->fields[i].col->is_nullable()) {
+			index->n_nullable--;
+		}
+	}
+
+	index->n_fields -= n_remove;
+	index->n_def = index->n_fields;
+	if (index->n_core_fields > index->n_fields) {
+		index->n_core_fields = index->n_fields;
+		index->n_core_null_bytes
+			= UT_BITS_IN_BYTES(unsigned(index->n_nullable));
+	}
+
+	const dict_col_t* const new_cols = cols;
+	const dict_col_t* const new_cols_end = cols + n_cols;
+
+	cols = old_cols;
+	col_names = old_col_names;
+	n_cols = old_n_cols;
+	n_def = old_n_cols;
+	n_t_def -= n_remove;
+	n_t_cols -= n_remove;
+
+	for (unsigned i = n_v_def; i--; ) {
+		const dict_v_col_t& v = v_cols[i];
+		for (ulint n = v.num_base; n--; ) {
+			dict_col_t*& base = v.base_col[n];
+			if (!base->is_virtual()) {
+				base = &cols[base - new_cols];
+			}
+		}
+	}
+
+	do {
+		for (unsigned i = 0; i < index->n_fields; i++) {
+			dict_field_t& field = index->fields[i];
+			if (field.col < new_cols
+			    || field.col >= new_cols_end) {
+				DBUG_ASSERT(field.col->is_virtual());
+			} else {
+				DBUG_ASSERT(field.col >= new_cols);
+				size_t n = size_t(field.col - new_cols);
+				DBUG_ASSERT(n <= n_cols);
+				if (n + DATA_N_SYS_COLS >= n_cols) {
+					n -= n_remove;
+				}
+				field.col = &cols[n];
+				DBUG_ASSERT(!field.col->is_virtual());
+				field.name = field.col->name(*this);
+			}
+		}
+	} while ((index = dict_table_get_next_index(index)) != NULL);
+}
+
+/** Trim the instantly added columns when an insert into SYS_COLUMNS
+is rolled back during ALTER TABLE or recovery.
+@param[in]	n	number of surviving non-system columns */
+void dict_table_t::rollback_instant(unsigned n)
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+	dict_index_t* index = indexes.start;
+	DBUG_ASSERT(index->is_instant());
+	DBUG_ASSERT(index->n_def == index->n_fields);
+	DBUG_ASSERT(n_cols == n_def);
+	DBUG_ASSERT(n >= index->n_uniq);
+	DBUG_ASSERT(n_cols > n + DATA_N_SYS_COLS);
+	const unsigned n_remove = n_cols - n - DATA_N_SYS_COLS;
+
+	char* names = const_cast<char*>(dict_table_get_col_name(this, n));
+	const char* sys = names;
+	for (unsigned i = n_remove; i--; ) {
+		sys += strlen(sys) + 1;
+	}
+	static const char system[] = "DB_ROW_ID\0DB_TRX_ID\0DB_ROLL_PTR";
+	DBUG_ASSERT(!memcmp(sys, system, sizeof system));
+	for (unsigned i = index->n_fields - n_remove; i < index->n_fields;
+	     i++) {
+		if (index->fields[i].col->is_nullable()) {
+			index->n_nullable--;
+		}
+	}
+	index->n_fields -= n_remove;
+	index->n_def = index->n_fields;
+	memmove(names, sys, sizeof system);
+	memmove(cols + n, cols + n_cols - DATA_N_SYS_COLS,
+		DATA_N_SYS_COLS * sizeof *cols);
+	n_cols -= n_remove;
+	n_def = n_cols;
+	n_t_cols -= n_remove;
+	n_t_def -= n_remove;
+
+	for (unsigned i = DATA_N_SYS_COLS; i--; ) {
+		cols[n_cols - i].ind--;
+	}
+
+	if (dict_index_is_auto_gen_clust(index)) {
+		DBUG_ASSERT(index->n_uniq == 1);
+		dict_field_t* field = index->fields;
+		field->name = sys;
+		field->col = dict_table_get_sys_col(this, DATA_ROW_ID);
+		field++;
+		field->name = sys + sizeof "DB_ROW_ID";
+		field->col = dict_table_get_sys_col(this, DATA_TRX_ID);
+		field++;
+		field->name = sys + sizeof "DB_ROW_ID\0DB_TRX_ID";
+		field->col = dict_table_get_sys_col(this, DATA_ROLL_PTR);
+
+		/* Replace the DB_ROW_ID column in secondary indexes. */
+		while ((index = dict_table_get_next_index(index)) != NULL) {
+			field = &index->fields[index->n_fields - 1];
+			DBUG_ASSERT(field->col->mtype == DATA_SYS);
+			DBUG_ASSERT(field->col->prtype
+				    == DATA_NOT_NULL + DATA_TRX_ID);
+			field->col--;
+			field->name = sys;
+		}
+
+		return;
+	}
+
+	dict_field_t* field = &index->fields[index->n_uniq];
+	field->name = sys + sizeof "DB_ROW_ID";
+	field->col = dict_table_get_sys_col(this, DATA_TRX_ID);
+	field++;
+	field->name = sys + sizeof "DB_ROW_ID\0DB_TRX_ID";
+	field->col = dict_table_get_sys_col(this, DATA_ROLL_PTR);
+}
+
+
+/** Check if record in clustered index is historical row.
+@param[in]	rec	clustered row
+@param[in]	offsets	offsets
+@return true if row is historical */
+bool
+dict_index_t::vers_history_row(
+	const rec_t*		rec,
+	const ulint*		offsets)
+{
+	ut_ad(is_primary());
+
+	ulint len;
+	dict_col_t& col= table->cols[table->vers_end];
+	ut_ad(col.vers_sys_end());
+	ulint nfield = dict_col_get_clust_pos(&col, this);
+	const byte *data = rec_get_nth_field(rec, offsets, nfield, &len);
+	if (col.vers_native()) {
+		ut_ad(len == sizeof trx_id_max_bytes);
+		return 0 != memcmp(data, trx_id_max_bytes, len);
+	}
+	ut_ad(len == sizeof timestamp_max_bytes);
+	return 0 != memcmp(data, timestamp_max_bytes, len);
+}
+
+/** Check if record in secondary index is historical row.
+@param[in]	rec	record in a secondary index
+@param[out]	history_row true if row is historical
+@return true on error */
+bool
+dict_index_t::vers_history_row(
+	const rec_t* rec,
+	bool &history_row)
+{
+	ut_ad(!is_primary());
+
+	bool error = false;
+	mem_heap_t* heap = NULL;
+	dict_index_t* clust_index = NULL;
+	ulint offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint* offsets = offsets_;
+	rec_offs_init(offsets_);
+
+	mtr_t mtr;
+	mtr.start();
+
+	rec_t* clust_rec =
+	    row_get_clust_rec(BTR_SEARCH_LEAF, rec, this, &clust_index, &mtr);
+	if (clust_rec) {
+		offsets = rec_get_offsets(clust_rec, clust_index, offsets, true,
+					  ULINT_UNDEFINED, &heap);
+
+		history_row = clust_index->vers_history_row(clust_rec, offsets);
+        } else {
+		ib::error() << "foreign constraints: secondary index is out of "
+			       "sync";
+		ut_ad(!"secondary index is out of sync");
+		error = true;
+	}
+	mtr.commit();
+	if (heap) {
+		mem_heap_free(heap);
+	}
+	return(error);
 }
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index efa5892fc66..f236d2edadf 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -292,7 +292,7 @@ dict_stats_exec_sql(
 	}
 
 	if (trx == NULL) {
-		trx = trx_allocate_for_background();
+		trx = trx_create();
 		trx_started = true;
 
 		if (srv_read_only_mode) {
@@ -326,7 +326,7 @@ dict_stats_exec_sql(
 	}
 
 	if (trx_started) {
-		trx_free_for_background(trx);
+		trx_free(trx);
 	}
 
 	return(err);
@@ -444,8 +444,6 @@ dict_stats_table_clone_create(
 
 		idx->name = mem_heap_strdup(heap, index->name);
 
-		idx->table_name = t->name.m_name;
-
 		idx->table = t;
 
 		idx->type = index->type;
@@ -914,7 +912,7 @@ dict_stats_update_transient(
 
 	index = dict_table_get_first_index(table);
 
-	if (dict_table_is_discarded(table)) {
+	if (!table->space) {
 		/* Nothing to do. */
 		dict_stats_empty_table(table, true);
 		return;
@@ -1034,10 +1032,10 @@ dict_stats_analyze_index_level(
 	memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0]));
 
 	/* Allocate space for the offsets header (the allocation size at
-	offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
+	offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_uniq + 1,
 	so that this will never be less than the size calculated in
 	rec_get_offsets_func(). */
-	i = (REC_OFFS_HEADER_SIZE + 1 + 1) + index->n_fields;
+	i = (REC_OFFS_HEADER_SIZE + 1 + 1) + n_uniq;
 
 	heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
 	rec_offsets = static_cast<ulint*>(
@@ -1073,16 +1071,24 @@ dict_stats_analyze_index_level(
 	      == page_rec_get_next_const(page_get_infimum_rec(page)));
 
 	/* check that we are indeed on the desired level */
-	ut_a(btr_page_get_level(page, mtr) == level);
+	ut_a(btr_page_get_level(page) == level);
 
 	/* there should not be any pages on the left */
-	ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
-
-	/* check whether the first record on the leftmost page is marked
-	as such, if we are on a non-leaf level */
-	ut_a((level == 0)
-	     == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
-			  btr_pcur_get_rec(&pcur), page_is_comp(page))));
+	ut_a(!page_has_prev(page));
+
+	if (REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+		    btr_pcur_get_rec(&pcur), page_is_comp(page))) {
+		ut_ad(btr_pcur_is_on_user_rec(&pcur));
+		if (level == 0) {
+			/* Skip the metadata pseudo-record */
+			ut_ad(index->is_instant());
+			btr_pcur_move_to_next_user_rec(&pcur, mtr);
+		}
+	} else {
+		/* The first record on the leftmost page must be
+		marked as such on each level except the leaf level. */
+		ut_a(level == 0);
+	}
 
 	prev_rec = NULL;
 	prev_rec_is_copied = false;
@@ -1145,8 +1151,7 @@ dict_stats_analyze_index_level(
 					n_uniq, &heap);
 
 				prev_rec = rec_copy_prefix_to_buf(
-					prev_rec, index,
-					rec_offs_n_fields(prev_rec_offsets),
+					prev_rec, index, n_uniq,
 					&prev_rec_buf, &prev_rec_buf_size);
 
 				prev_rec_is_copied = true;
@@ -1219,7 +1224,7 @@ dict_stats_analyze_index_level(
 			btr_pcur_move_to_next_user_rec() will release the
 			latch on the page that prev_rec is on */
 			prev_rec = rec_copy_prefix_to_buf(
-				rec, index, rec_offs_n_fields(rec_offsets),
+				rec, index, n_uniq,
 				&prev_rec_buf, &prev_rec_buf_size);
 			prev_rec_is_copied = true;
 
@@ -1497,10 +1502,10 @@ dict_stats_analyze_index_below_cur(
 	offsets_rec = rec_get_offsets(rec, index, offsets1, false,
 				      ULINT_UNDEFINED, &heap);
 
-	page_id_t		page_id(dict_index_get_space(index),
+	page_id_t		page_id(index->table->space_id,
 					btr_node_ptr_get_child_page_no(
 						rec, offsets_rec));
-	const page_size_t	page_size(dict_table_page_size(index->table));
+	const page_size_t	page_size(index->table->space->flags);
 
 	/* assume no external pages by default - in case we quit from this
 	function without analyzing any leaf pages */
@@ -1686,10 +1691,10 @@ dict_stats_analyze_index_for_n_prefix(
 	ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
 
 	/* check that we are indeed on the desired level */
-	ut_a(btr_page_get_level(page, mtr) == n_diff_data->level);
+	ut_a(btr_page_get_level(page) == n_diff_data->level);
 
 	/* there should not be any pages on the left */
-	ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
+	ut_a(!page_has_prev(page));
 
 	/* check whether the first record on the leftmost page is marked
 	as such; we are on a non-leaf level */
@@ -1743,11 +1748,8 @@ dict_stats_analyze_index_for_n_prefix(
 		ut_a(left <= right);
 		ut_a(right <= last_idx_on_level);
 
-		/* we do not pass (left, right) because we do not want to ask
-		ut_rnd_interval() to work with too big numbers since
-		ib_uint64_t could be bigger than ulint */
-		const ulint	rnd = ut_rnd_interval(
-			0, static_cast<ulint>(right - left));
+		const ulint	rnd = right == left ? 0 :
+			ut_rnd_gen_ulint() % (right - left);
 
 		const ib_uint64_t	dive_below_idx
 			= boundaries->at(static_cast<unsigned>(left + rnd));
@@ -2310,7 +2312,7 @@ dict_stats_save_index_stat(
 	char		db_utf8[MAX_DB_UTF8_LEN];
 	char		table_utf8[MAX_TABLE_UTF8_LEN];
 
-	ut_ad(!trx || trx->internal || trx->in_mysql_trx_list);
+	ut_ad(!trx || trx->internal || trx->mysql_thd);
 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_ad(mutex_own(&dict_sys->mutex));
 
@@ -2322,7 +2324,7 @@ dict_stats_save_index_stat(
 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
 	pars_info_add_str_literal(pinfo, "index_name", index->name);
 	UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
-	pars_info_add_int4_literal(pinfo, "last_update", (lint)last_update);
+	pars_info_add_int4_literal(pinfo, "last_update", uint32(last_update));
 	UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
 	pars_info_add_str_literal(pinfo, "stat_name", stat_name);
 	UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8);
@@ -2391,10 +2393,9 @@ dict_stats_report_error(dict_table_t* table, bool defragment)
 {
 	dberr_t		err;
 
-	FilSpace space(table->space);
 	const char*	df = defragment ? " defragment" : "";
 
-	if (!space()) {
+	if (!table->space) {
 		ib::warn() << "Cannot save" << df << " statistics for table "
 			   << table->name
 			   << " because the .ibd file is missing. "
@@ -2403,7 +2404,8 @@ dict_stats_report_error(dict_table_t* table, bool defragment)
 	} else {
 		ib::warn() << "Cannot save" << df << " statistics for table "
 			   << table->name
-			   << " because file " << space()->chain.start->name
+			   << " because file "
+			   << table->space->chain.start->name
 			   << (table->corrupted
 			       ? " is corrupted."
 			       : " cannot be decrypted.");
@@ -2454,7 +2456,7 @@ dict_stats_save(
 
 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
-	pars_info_add_int4_literal(pinfo, "last_update", (lint)now);
+	pars_info_add_int4_literal(pinfo, "last_update", uint32(now));
 	pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
 	pars_info_add_ull_literal(pinfo, "clustered_index_size",
 		table->stat_clustered_index_size);
@@ -2495,7 +2497,7 @@ dict_stats_save(
 		return(ret);
 	}
 
-	trx_t*	trx = trx_allocate_for_background();
+	trx_t*	trx = trx_create();
 	trx_start_internal(trx);
 
 	dict_index_t*	index;
@@ -2592,7 +2594,7 @@ dict_stats_save(
 	trx_commit_for_mysql(trx);
 
 end:
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	mutex_exit(&dict_sys->mutex);
 	rw_lock_x_unlock(dict_operation_lock);
@@ -2900,7 +2902,7 @@ dict_stats_fetch_index_stats_step(
 
 		/* extract 12 from "n_diff_pfx12..." into n_pfx
 		note that stat_name does not have a terminating '\0' */
-		n_pfx = (num_ptr[0] - '0') * 10 + (num_ptr[1] - '0');
+		n_pfx = ulong(num_ptr[0] - '0') * 10 + ulong(num_ptr[1] - '0');
 
 		ulint	n_uniq = index->n_uniq;
 
@@ -2974,7 +2976,7 @@ dict_stats_fetch_from_ps(
 	stats. */
 	dict_stats_empty_table(table, true);
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 
 	/* Use 'read-uncommitted' so that the SELECTs we execute
 	do not get blocked in case some user has locked the rows we
@@ -3068,7 +3070,7 @@ dict_stats_fetch_from_ps(
 
 	trx_commit_for_mysql(trx);
 
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	if (!index_fetch_arg.stats_were_modified) {
 		return(DB_STATS_DO_NOT_EXIST);
diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc
index 55bcafddc70..ae31b3d0e37 100644
--- a/storage/innobase/dict/dict0stats_bg.cc
+++ b/storage/innobase/dict/dict0stats_bg.cc
@@ -82,6 +82,8 @@ typedef recalc_pool_t::iterator
 /** Pool where we store information on which tables are to be processed
 by background statistics gathering. */
 static recalc_pool_t		recalc_pool;
+/** Whether the global data structures have been initialized */
+static bool			stats_initialised;
 
 /*****************************************************************//**
 Free the resources occupied by the recalc pool, called once during
@@ -283,7 +285,6 @@ dict_stats_thread_init()
 
 	dict_stats_event = os_event_create(0);
 	dict_stats_shutdown_event = os_event_create(0);
-
 	ut_d(dict_stats_disabled_event = os_event_create(0));
 
 	/* The recalc_pool_mutex is acquired from:
@@ -303,6 +304,7 @@ dict_stats_thread_init()
 	mutex_create(LATCH_ID_RECALC_POOL, &recalc_pool_mutex);
 
 	dict_defrag_pool_init();
+	stats_initialised = true;
 }
 
 /*****************************************************************//**
@@ -315,6 +317,12 @@ dict_stats_thread_deinit()
 	ut_a(!srv_read_only_mode);
 	ut_ad(!srv_dict_stats_thread_active);
 
+	if (!stats_initialised) {
+		return;
+	}
+
+	stats_initialised = false;
+
 	dict_stats_recalc_pool_deinit();
 	dict_defrag_pool_deinit();
 
@@ -357,7 +365,7 @@ dict_stats_process_entry_from_recalc_pool()
 		return;
 	}
 
-	ut_ad(!dict_table_is_temporary(table));
+	ut_ad(!table->is_temporary());
 
 	if (!fil_table_accessible(table)) {
 		dict_table_close(table, TRUE, FALSE);
@@ -402,16 +410,9 @@ dict_stats_process_entry_from_recalc_pool()
 #ifdef UNIV_DEBUG
 /** Disables dict stats thread. It's used by:
 	SET GLOBAL innodb_dict_stats_disabled_debug = 1 (0).
-@param[in]	thd		thread handle
-@param[in]	var		pointer to system variable
-@param[out]	var_ptr		where the formal string goes
 @param[in]	save		immediate result from check function */
-void
-dict_stats_disabled_debug_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save)
+void dict_stats_disabled_debug_update(THD*, st_mysql_sys_var*, void*,
+				      const void* save)
 {
 	/* This method is protected by mutex, as every SET GLOBAL .. */
 	ut_ad(dict_stats_disabled_event != NULL);
diff --git a/storage/innobase/eval/eval0eval.cc b/storage/innobase/eval/eval0eval.cc
index fa0b265b3db..6cc63b3a004 100644
--- a/storage/innobase/eval/eval0eval.cc
+++ b/storage/innobase/eval/eval0eval.cc
@@ -585,7 +585,7 @@ eval_instr(
 				/* We have already matched j characters */
 
 				if (j == len2) {
-					int_val = i + 1;
+					int_val = lint(i) + 1;
 
 					goto match_found;
 				}
@@ -781,7 +781,7 @@ eval_predefined(
 		}
 
 		/* allocate the string */
-		data = eval_node_ensure_val_buf(func_node, int_len + 1);
+		data = eval_node_ensure_val_buf(func_node, ulint(int_len) + 1);
 
 		/* add terminating NUL character */
 		data[int_len] = 0;
@@ -804,7 +804,7 @@ eval_predefined(
 			}
 		}
 
-		dfield_set_len(que_node_get_val(func_node), int_len);
+		dfield_set_len(que_node_get_val(func_node), ulint(int_len));
 
 		return;
 
@@ -833,12 +833,11 @@ eval_func(
 {
 	que_node_t*	arg;
 	ulint		fclass;
-	ulint		func;
 
 	ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
 
 	fclass = func_node->fclass;
-	func = func_node->func;
+	const int func = func_node->func;
 
 	arg = func_node->args;
 
diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc
index 77760d7159a..6a190a35b17 100644
--- a/storage/innobase/fil/fil0crypt.cc
+++ b/storage/innobase/fil/fil0crypt.cc
@@ -468,7 +468,6 @@ byte*
 fil_parse_write_crypt_data(
 	byte*			ptr,
 	const byte*		end_ptr,
-	const buf_block_t*	block,
 	dberr_t*		err)
 {
 	/* check that redo log entry is complete */
@@ -524,7 +523,7 @@ fil_parse_write_crypt_data(
 	/* update fil_space memory cache with crypt_data */
 	if (fil_space_t* space = fil_space_acquire_silent(space_id)) {
 		crypt_data = fil_space_set_crypt_data(space, crypt_data);
-		fil_space_release(space);
+		space->release();
 		/* Check is used key found from encryption plugin */
 		if (crypt_data->should_encrypt()
 		    && !crypt_data->is_key_found()) {
@@ -589,7 +588,7 @@ fil_encrypt_buf(
 
 	int rc = encryption_scheme_encrypt(src, srclen, dst, &dstlen,
 					   crypt_data, key_version,
-					   space, offset, lsn);
+					   (uint32)space, (uint32)offset, lsn);
 	ut_a(rc == MY_AES_OK);
 	ut_a(dstlen == srclen);
 
@@ -657,7 +656,7 @@ fil_space_encrypt(
 
 	fil_space_crypt_t* crypt_data = space->crypt_data;
 	const page_size_t	page_size(space->flags);
-	ut_ad(space->n_pending_ios > 0);
+	ut_ad(space->pending_io());
 	byte* tmp = fil_encrypt_buf(crypt_data, space->id, offset, lsn,
 				    src_frame, page_size, dst_frame);
 
@@ -810,7 +809,7 @@ fil_space_decrypt(
 	*decrypted = false;
 
 	ut_ad(space->crypt_data != NULL && space->crypt_data->is_encrypted());
-	ut_ad(space->n_pending_ios > 0);
+	ut_ad(space->pending_io());
 
 	bool encrypted = fil_space_decrypt(space->crypt_data, tmp_frame,
 					   page_size, src_frame, &err);
@@ -956,12 +955,12 @@ fil_crypt_read_crypt_data(fil_space_t* space)
 	mtr.start();
 	if (buf_block_t* block = buf_page_get(page_id_t(space->id, 0),
 					      page_size, RW_S_LATCH, &mtr)) {
-		mutex_enter(&fil_system->mutex);
+		mutex_enter(&fil_system.mutex);
 		if (!space->crypt_data) {
 			space->crypt_data = fil_space_read_crypt_data(
 				page_size, block->frame);
 		}
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 	}
 	mtr.commit();
 }
@@ -1153,7 +1152,7 @@ fil_crypt_space_needs_rotation(
 		return false;
 	}
 
-	ut_ad(space->n_pending_ops > 0);
+	ut_ad(space->referenced());
 
 	fil_space_crypt_t *crypt_data = space->crypt_data;
 
@@ -1450,7 +1449,7 @@ fil_crypt_find_space_to_rotate(
 
 	if (state->should_shutdown()) {
 		if (state->space) {
-			fil_space_release(state->space);
+			state->space->release();
 			state->space = NULL;
 		}
 		return false;
@@ -1459,7 +1458,7 @@ fil_crypt_find_space_to_rotate(
 	if (state->first) {
 		state->first = false;
 		if (state->space) {
-			fil_space_release(state->space);
+			state->space->release();
 		}
 		state->space = NULL;
 	}
@@ -1566,7 +1565,7 @@ fil_crypt_find_page_to_rotate(
 	ulint batch = srv_alloc_time * state->allocated_iops;
 	fil_space_t* space = state->space;
 
-	ut_ad(!space || space->n_pending_ops > 0);
+	ut_ad(!space || space->referenced());
 
 	/* If space is marked to be dropped stop rotation. */
 	if (!space || space->is_stopping()) {
@@ -1624,7 +1623,7 @@ fil_crypt_get_page_throttle_func(
 	fil_space_t* space = state->space;
 	const page_size_t page_size = page_size_t(space->flags);
 	const page_id_t page_id(space->id, offset);
-	ut_ad(space->n_pending_ops > 0);
+	ut_ad(space->referenced());
 
 	/* Before reading from tablespace we need to make sure that
 	the tablespace is not about to be dropped. */
@@ -1707,11 +1706,11 @@ btr_scrub_get_block_and_allocation_status(
 	buf_block_t *block = NULL;
 	fil_space_t* space = state->space;
 
-	ut_ad(space->n_pending_ops > 0);
+	ut_ad(space->referenced());
 
 	mtr_start(&local_mtr);
 
-	*allocation_status = fseg_page_is_free(space, offset) ?
+	*allocation_status = fseg_page_is_free(space, (uint32_t)offset) ?
 		BTR_SCRUB_PAGE_FREE :
 		BTR_SCRUB_PAGE_ALLOCATED;
 
@@ -1759,7 +1758,7 @@ fil_crypt_rotate_page(
 	ulint sleeptime_ms = 0;
 	fil_space_crypt_t *crypt_data = space->crypt_data;
 
-	ut_ad(space->n_pending_ops > 0);
+	ut_ad(space->referenced());
 	ut_ad(offset > 0);
 
 	/* In fil_crypt_thread where key rotation is done we have
@@ -1775,7 +1774,7 @@ fil_crypt_rotate_page(
 		return;
 	}
 
-	ut_d(const bool was_free = fseg_page_is_free(space, offset));
+	ut_d(const bool was_free = fseg_page_is_free(space, (uint32_t)offset));
 
 	mtr_t mtr;
 	mtr.start();
@@ -1937,7 +1936,7 @@ fil_crypt_rotate_pages(
 	ulint end = std::min(state->offset + state->batch,
 			     state->space->free_limit);
 
-	ut_ad(state->space->n_pending_ops > 0);
+	ut_ad(state->space->referenced());
 
 	for (; state->offset < end; state->offset++) {
 
@@ -1976,7 +1975,7 @@ fil_crypt_flush_space(
 	fil_space_t* space = state->space;
 	fil_space_crypt_t *crypt_data = space->crypt_data;
 
-	ut_ad(space->n_pending_ops > 0);
+	ut_ad(space->referenced());
 
 	/* flush tablespace pages so that there are no pages left with old key */
 	lsn_t end_lsn = crypt_data->rotate_state.end_lsn;
@@ -2031,18 +2030,13 @@ fil_crypt_flush_space(
 
 /***********************************************************************
 Complete rotating a space
-@param[in,out]		key_state		Key state
 @param[in,out]		state			Rotation state */
-static
-void
-fil_crypt_complete_rotate_space(
-	const key_state_t*	key_state,
-	rotate_thread_t*	state)
+static void fil_crypt_complete_rotate_space(rotate_thread_t* state)
 {
 	fil_space_crypt_t *crypt_data = state->space->crypt_data;
 
 	ut_ad(crypt_data);
-	ut_ad(state->space->n_pending_ops > 0);
+	ut_ad(state->space->referenced());
 
 	/* Space might already be dropped */
 	if (!state->space->is_stopping()) {
@@ -2200,9 +2194,8 @@ DECLARE_THREAD(fil_crypt_thread)(
 				/* If space is marked as stopping, release
 				space and stop rotation. */
 				if (thr.space->is_stopping()) {
-					fil_crypt_complete_rotate_space(
-						&new_state, &thr);
-					fil_space_release(thr.space);
+					fil_crypt_complete_rotate_space(&thr);
+					thr.space->release();
 					thr.space = NULL;
 					break;
 				}
@@ -2213,7 +2206,7 @@ DECLARE_THREAD(fil_crypt_thread)(
 
 			/* complete rotation */
 			if (thr.space) {
-				fil_crypt_complete_rotate_space(&new_state, &thr);
+				fil_crypt_complete_rotate_space(&thr);
 			}
 
 			/* force key state refresh */
@@ -2229,7 +2222,7 @@ DECLARE_THREAD(fil_crypt_thread)(
 
 	/* release current space if shutting down */
 	if (thr.space) {
-		fil_space_release(thr.space);
+		thr.space->release();
 		thr.space = NULL;
 	}
 
@@ -2430,7 +2423,7 @@ fil_space_crypt_get_status(
 {
 	memset(status, 0, sizeof(*status));
 
-	ut_ad(space->n_pending_ops > 0);
+	ut_ad(space->referenced());
 
 	/* If there is no crypt data and we have not yet read
 	page 0 for this tablespace, we need to read it before
@@ -2495,7 +2488,7 @@ fil_space_get_scrub_status(
 {
 	memset(status, 0, sizeof(*status));
 
-	ut_ad(space->n_pending_ops > 0);
+	ut_ad(space->referenced());
 	fil_space_crypt_t* crypt_data = space->crypt_data;
 
 	status->space = space->id;
@@ -2629,7 +2622,7 @@ fil_space_verify_crypt_checksum(
 		checksum2 = checksum1;
 	} else {
 		checksum2 = mach_read_from_4(
-			page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
+			page + srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM);
 		valid = buf_page_is_checksum_valid_crc32(
 			page, checksum1, checksum2, false
 			/* FIXME: also try the original crc32 that was
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 21a2dddfb9f..0c2ee5203cf 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -28,7 +28,6 @@ Created 10/25/1995 Heikki Tuuri
 #include "fil0crypt.h"
 
 #include "btr0btr.h"
-#include "btr0sea.h"
 #include "buf0buf.h"
 #include "dict0boot.h"
 #include "dict0dict.h"
@@ -66,6 +65,35 @@ static
 bool
 fil_try_to_close_file_in_LRU(bool print_info);
 
+/** Test if a tablespace file can be renamed to a new filepath by checking
+if that the old filepath exists and the new filepath does not exist.
+@param[in]	old_path	old filepath
+@param[in]	new_path	new filepath
+@param[in]	is_discarded	whether the tablespace is discarded
+@param[in]	replace_new	whether to ignore the existence of new_path
+@return innodb error code */
+static dberr_t
+fil_rename_tablespace_check(
+	const char*	old_path,
+	const char*	new_path,
+	bool		is_discarded,
+	bool		replace_new = false);
+/** Rename a single-table tablespace.
+The tablespace must exist in the memory cache.
+@param[in]	id		tablespace identifier
+@param[in]	old_path	old file name
+@param[in]	new_name	new table name in the
+databasename/tablename format
+@param[in]	new_path_in	new file name,
+or NULL if it is located in the normal data directory
+@return true if success */
+static bool
+fil_rename_tablespace(
+	ulint		id,
+	const char*	old_path,
+	const char*	new_name,
+	const char*	new_path_in);
+
 /*
 		IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
 		=============================================
@@ -139,11 +167,11 @@ ulint	fil_n_pending_log_flushes		= 0;
 ulint	fil_n_pending_tablespace_flushes	= 0;
 
 /** The null file address */
-fil_addr_t	fil_addr_null = {FIL_NULL, 0};
+const fil_addr_t	fil_addr_null = {FIL_NULL, 0};
 
 /** The tablespace memory cache. This variable is NULL before the module is
 initialized. */
-UNIV_INTERN fil_system_t*	fil_system	= NULL;
+fil_system_t	fil_system;
 
 /** At this age or older a space/page will be rotated */
 UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age;
@@ -179,26 +207,26 @@ bool
 fil_validate_skip(void)
 /*===================*/
 {
-	/** The fil_validate() call skip counter. Use a signed type
-	because of the race condition below. */
+	/** The fil_validate() call skip counter. */
 	static int fil_validate_count = FIL_VALIDATE_SKIP;
 
-	/* There is a race condition below, but it does not matter,
-	because this call is only for heuristic purposes. We want to
-	reduce the call frequency of the costly fil_validate() check
-	in debug builds. */
-	if (--fil_validate_count > 0) {
+	/* We want to reduce the call frequency of the costly fil_validate()
+	check in debug builds. */
+	int count = my_atomic_add32_explicit(&fil_validate_count, -1,
+					     MY_MEMORY_ORDER_RELAXED);
+	if (count > 0) {
 		return(true);
 	}
 
-	fil_validate_count = FIL_VALIDATE_SKIP;
+	my_atomic_store32_explicit(&fil_validate_count, FIL_VALIDATE_SKIP,
+				   MY_MEMORY_ORDER_RELAXED);
 	return(fil_validate());
 }
 #endif /* UNIV_DEBUG */
 
 /********************************************************************//**
 Determines if a file node belongs to the least-recently-used list.
-@return true if the file belongs to fil_system->LRU mutex. */
+@return true if the file belongs to fil_system.LRU mutex. */
 UNIV_INLINE
 bool
 fil_space_belongs_in_lru(
@@ -232,7 +260,6 @@ bool
 fil_node_prepare_for_io(
 /*====================*/
 	fil_node_t*	node,	/*!< in: file node */
-	fil_system_t*	system,	/*!< in: tablespace memory cache */
 	fil_space_t*	space);	/*!< in: space */
 
 /** Update the data structures when an i/o operation finishes.
@@ -307,9 +334,9 @@ fil_space_get_by_id(
 {
 	fil_space_t*	space;
 
-	ut_ad(mutex_own(&fil_system->mutex));
+	ut_ad(mutex_own(&fil_system.mutex));
 
-	HASH_SEARCH(hash, fil_system->spaces, id,
+	HASH_SEARCH(hash, fil_system.spaces, id,
 		    fil_space_t*, space,
 		    ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
 		    space->id == id);
@@ -318,33 +345,11 @@ fil_space_get_by_id(
 }
 
 /** Look up a tablespace.
-@param[in]	name	tablespace name
-@return	tablespace
-@retval	NULL	if not found */
-fil_space_t*
-fil_space_get_by_name(const char* name)
-{
-	fil_space_t*	space;
-	ulint		fold;
-
-	ut_ad(mutex_own(&fil_system->mutex));
-
-	fold = ut_fold_string(name);
-
-	HASH_SEARCH(name_hash, fil_system->name_hash, fold,
-		    fil_space_t*, space,
-		    ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
-		    !strcmp(name, space->name));
-
-	return(space);
-}
-
-/** Look up a tablespace.
 The caller should hold an InnoDB table lock or a MDL that prevents
 the tablespace from being dropped during the operation,
 or the caller should be in single-threaded crash recovery mode
 (no user connections that could drop tablespaces).
-If this is not the case, fil_space_acquire() and fil_space_release()
+If this is not the case, fil_space_acquire() and fil_space_t::release()
 should be used instead.
 @param[in]	id	tablespace ID
 @return tablespace, or NULL if not found */
@@ -352,9 +357,9 @@ fil_space_t*
 fil_space_get(
 	ulint	id)
 {
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 	fil_space_t*	space = fil_space_get_by_id(id);
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 	ut_ad(space == NULL || space->purpose != FIL_TYPE_LOG);
 	return(space);
 }
@@ -370,9 +375,9 @@ fil_space_get_latch(
 {
 	fil_space_t*	space;
 
-	ut_ad(fil_system);
+	ut_ad(fil_system.is_initialised());
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	space = fil_space_get_by_id(id);
 
@@ -382,63 +387,27 @@ fil_space_get_latch(
 		*flags = space->flags;
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	return(&(space->latch));
 }
 
-/** Gets the type of a file space.
-@param[in]	id	tablespace identifier
-@return file type */
-fil_type_t
-fil_space_get_type(
-	ulint	id)
-{
-	fil_space_t*	space;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	ut_a(space);
-
-	mutex_exit(&fil_system->mutex);
-
-	return(space->purpose);
-}
-
-/** Note that a tablespace has been imported.
-It is initially marked as FIL_TYPE_IMPORT so that no logging is
-done during the import process when the space ID is stamped to each page.
-Now we change it to FIL_SPACE_TABLESPACE to start redo and undo logging.
-NOTE: temporary tablespaces are never imported.
-@param[in]	id	tablespace identifier */
-void
-fil_space_set_imported(
-	ulint	id)
+/** Note that the tablespace has been imported.
+Initially, purpose=FIL_TYPE_IMPORT so that no redo log is
+written while the space ID is being updated in each page. */
+void fil_space_t::set_imported()
 {
-	ut_ad(fil_system != NULL);
-
-	mutex_enter(&fil_system->mutex);
-
-	fil_space_t*	space = fil_space_get_by_id(id);
-	const fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
-
-	ut_ad(space->purpose == FIL_TYPE_IMPORT);
-	space->purpose = FIL_TYPE_TABLESPACE;
-	space->atomic_write_supported = node->atomic_write
+	ut_ad(purpose == FIL_TYPE_IMPORT);
+	const fil_node_t* node = UT_LIST_GET_FIRST(chain);
+	atomic_write_supported = node->atomic_write
 		&& srv_use_atomic_writes
 		&& my_test_if_atomic_write(node->handle,
-					   int(page_size_t(space->flags)
-					       .physical()));
-	mutex_exit(&fil_system->mutex);
+					   int(page_size_t(flags).physical()));
+	purpose = FIL_TYPE_TABLESPACE;
 }
 
 /**********************************************************************//**
-Checks if all the file nodes in a space are flushed. The caller must hold
-the fil_system mutex.
+Checks if all the file nodes in a space are flushed.
 @return true if all are flushed */
 static
 bool
@@ -446,7 +415,7 @@ fil_space_is_flushed(
 /*=================*/
 	fil_space_t*	space)	/*!< in: space */
 {
-	ut_ad(mutex_own(&fil_system->mutex));
+	ut_ad(mutex_own(&fil_system.mutex));
 
 	for (const fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
 	     node != NULL;
@@ -479,7 +448,7 @@ fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
 	fil_node_t*	node;
 
 	ut_ad(name != NULL);
-	ut_ad(fil_system != NULL);
+	ut_ad(fil_system.is_initialised());
 
 	node = reinterpret_cast<fil_node_t*>(ut_zalloc_nokey(sizeof(*node)));
 
@@ -504,13 +473,13 @@ fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
 
 	node->atomic_write = atomic_write;
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 	this->size += size;
 	UT_LIST_ADD_LAST(chain, node);
 	if (node->is_open()) {
-		fil_system->n_open++;
+		fil_system.n_open++;
 	}
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	return node;
 }
@@ -520,7 +489,7 @@ fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
 @return	whether the page was found valid */
 bool fil_node_t::read_page0(bool first)
 {
-	ut_ad(mutex_own(&fil_system->mutex));
+	ut_ad(mutex_own(&fil_system.mutex));
 	ut_a(space->purpose != FIL_TYPE_LOG);
 	const page_size_t page_size(space->flags);
 	const ulint psize = page_size.physical();
@@ -606,7 +575,6 @@ bool fil_node_t::read_page0(bool first)
 }
 
 /** Open a file node of a tablespace.
-The caller must own the fil_system mutex.
 @param[in,out]	node	File node
 @return false if the file can't be opened, otherwise true */
 static bool fil_node_open_file(fil_node_t* node)
@@ -615,7 +583,7 @@ static bool fil_node_open_file(fil_node_t* node)
 	bool		read_only_mode;
 	fil_space_t*	space = node->space;
 
-	ut_ad(mutex_own(&fil_system->mutex));
+	ut_ad(mutex_own(&fil_system.mutex));
 	ut_a(node->n_pending == 0);
 	ut_a(!node->is_open());
 
@@ -703,52 +671,45 @@ retry:
 	ut_a(success);
 	ut_a(node->is_open());
 
-	fil_system->n_open++;
+	fil_system.n_open++;
 
 	if (fil_space_belongs_in_lru(space)) {
 
 		/* Put the node to the LRU list */
-		UT_LIST_ADD_FIRST(fil_system->LRU, node);
+		UT_LIST_ADD_FIRST(fil_system.LRU, node);
 	}
 
 	return(true);
 }
 
-/** Close a file node.
-@param[in,out]	node	File node */
-static
-void
-fil_node_close_file(
-	fil_node_t*	node)
+/** Close the file handle. */
+void fil_node_t::close()
 {
 	bool	ret;
 
-	ut_ad(mutex_own(&(fil_system->mutex)));
-	ut_a(node->is_open());
-	ut_a(node->n_pending == 0);
-	ut_a(node->n_pending_flushes == 0);
-	ut_a(!node->being_extended);
-	ut_a(node->modification_counter == node->flush_counter
-	     || node->space->purpose == FIL_TYPE_TEMPORARY
+	ut_ad(mutex_own(&fil_system.mutex));
+	ut_a(is_open());
+	ut_a(n_pending == 0);
+	ut_a(n_pending_flushes == 0);
+	ut_a(!being_extended);
+	ut_a(modification_counter == flush_counter
+	     || space->purpose == FIL_TYPE_TEMPORARY
 	     || srv_fast_shutdown == 2
 	     || !srv_was_started);
 
-	ret = os_file_close(node->handle);
+	ret = os_file_close(handle);
 	ut_a(ret);
 
-	/* printf("Closing file %s\n", node->name); */
-
-	node->handle = OS_FILE_CLOSED;
-	ut_ad(!node->is_open());
-	ut_a(fil_system->n_open > 0);
-	fil_system->n_open--;
-
-	if (fil_space_belongs_in_lru(node->space)) {
+	/* printf("Closing file %s\n", name); */
 
-		ut_a(UT_LIST_GET_LEN(fil_system->LRU) > 0);
+	handle = OS_FILE_CLOSED;
+	ut_ad(!is_open());
+	ut_a(fil_system.n_open > 0);
+	fil_system.n_open--;
 
-		/* The node is in the LRU list, remove it */
-		UT_LIST_REMOVE(fil_system->LRU, node);
+	if (fil_space_belongs_in_lru(space)) {
+		ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0);
+		UT_LIST_REMOVE(fil_system.LRU, this);
 	}
 }
 
@@ -769,14 +730,14 @@ fil_try_to_close_file_in_LRU(
 {
 	fil_node_t*	node;
 
-	ut_ad(mutex_own(&fil_system->mutex));
+	ut_ad(mutex_own(&fil_system.mutex));
 
 	if (print_info) {
 		ib::info() << "fil_sys open file LRU len "
-			<< UT_LIST_GET_LEN(fil_system->LRU);
+			<< UT_LIST_GET_LEN(fil_system.LRU);
 	}
 
-	for (node = UT_LIST_GET_LAST(fil_system->LRU);
+	for (node = UT_LIST_GET_LAST(fil_system.LRU);
 	     node != NULL;
 	     node = UT_LIST_GET_PREV(LRU, node)) {
 
@@ -784,7 +745,7 @@ fil_try_to_close_file_in_LRU(
 		    && node->n_pending_flushes == 0
 		    && !node->being_extended) {
 
-			fil_node_close_file(node);
+			node->close();
 
 			return(true);
 		}
@@ -822,7 +783,7 @@ static
 void
 fil_flush_low(fil_space_t* space)
 {
-	ut_ad(mutex_own(&fil_system->mutex));
+	ut_ad(mutex_own(&fil_system.mutex));
 	ut_ad(space);
 	ut_ad(!space->stop_new_ops);
 
@@ -889,11 +850,11 @@ retry:
 
 			int64_t	sig_count = os_event_reset(node->sync_event);
 
-			mutex_exit(&fil_system->mutex);
+			mutex_exit(&fil_system.mutex);
 
 			os_event_wait_low(node->sync_event, sig_count);
 
-			mutex_enter(&fil_system->mutex);
+			mutex_enter(&fil_system.mutex);
 
 			if (node->flush_counter >= old_mod_counter) {
 
@@ -906,11 +867,11 @@ retry:
 		ut_a(node->is_open());
 		node->n_pending_flushes++;
 
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 
 		os_file_flush(node->handle);
 
-		mutex_enter(&fil_system->mutex);
+		mutex_enter(&fil_system.mutex);
 
 		os_event_set(node->sync_event);
 
@@ -925,7 +886,7 @@ skip_flush:
 				space->is_in_unflushed_spaces = false;
 
 				UT_LIST_REMOVE(
-					fil_system->unflushed_spaces,
+					fil_system.unflushed_spaces,
 					space);
 			}
 		}
@@ -962,7 +923,7 @@ fil_space_extend_must_retry(
 	ulint		size,
 	bool*		success)
 {
-	ut_ad(mutex_own(&fil_system->mutex));
+	ut_ad(mutex_own(&fil_system.mutex));
 	ut_ad(UT_LIST_GET_LAST(space->chain) == node);
 	ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE);
 
@@ -978,23 +939,23 @@ fil_space_extend_must_retry(
 		for it to finish.
 		It'd have been better to use event driven mechanism but
 		the entire module is peppered with polling stuff. */
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 		os_thread_sleep(100000);
 		return(true);
 	}
 
 	node->being_extended = true;
 
-	if (!fil_node_prepare_for_io(node, fil_system, space)) {
+	if (!fil_node_prepare_for_io(node, space)) {
 		/* The tablespace data file, such as .ibd file, is missing */
 		node->being_extended = false;
 		return(false);
 	}
 
-	/* At this point it is safe to release fil_system mutex. No
+	/* At this point it is safe to release fil_system.mutex. No
 	other thread can rename, delete, close or extend the file because
 	we have set the node->being_extended flag. */
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	ut_ad(size > space->size);
 
@@ -1010,11 +971,11 @@ fil_space_extend_must_retry(
 	const page_size_t	pageSize(space->flags);
 	const ulint		page_size = pageSize.physical();
 
-	/* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
-	fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.*/
+	/* fil_read_first_page() expects srv_page_size bytes.
+	fil_node_open_file() expects at least 4 * srv_page_size bytes.*/
 	os_offset_t new_size = std::max(
 		os_offset_t(size - file_start_page_no) * page_size,
-		os_offset_t(FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE));
+		os_offset_t(FIL_IBD_FILE_INITIAL_SIZE << srv_page_size_shift));
 
 	*success = os_file_set_size(node->name, node->handle, new_size,
 		FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags));
@@ -1032,7 +993,7 @@ fil_space_extend_must_retry(
 		last_page_no = ulint(fsize / page_size)
 			+ file_start_page_no;
 	}
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	ut_a(node->being_extended);
 	node->being_extended = false;
@@ -1042,7 +1003,7 @@ fil_space_extend_must_retry(
 	space->size += file_size - node->size;
 	node->size = file_size;
 	const ulint pages_in_MiB = node->size
-		& ~((1 << (20 - UNIV_PAGE_SIZE_SHIFT)) - 1);
+		& ~ulint((1U << (20U - srv_page_size_shift)) - 1);
 
 	fil_node_complete_io(node,IORequestRead);
 
@@ -1071,7 +1032,7 @@ fil_space_extend_must_retry(
 }
 
 /*******************************************************************//**
-Reserves the fil_system mutex and tries to make sure we can open at least one
+Reserves the fil_system.mutex and tries to make sure we can open at least one
 file while holding it. This should be called before calling
 fil_node_prepare_for_io(), because that function may need to open a file. */
 static
@@ -1081,7 +1042,7 @@ fil_mutex_enter_and_prepare_for_io(
 	ulint	space_id)	/*!< in: space id */
 {
 	for (ulint count = 0;;) {
-		mutex_enter(&fil_system->mutex);
+		mutex_enter(&fil_system.mutex);
 
 		if (space_id >= SRV_LOG_SPACE_FIRST_ID) {
 			/* We keep log files always open. */
@@ -1112,19 +1073,19 @@ fil_mutex_enter_and_prepare_for_io(
 			situation in the function which called this
 			function */
 		} else {
-			while (fil_system->n_open >= fil_system->max_n_open) {
+			while (fil_system.n_open >= srv_max_n_open_files) {
 				/* Too many files are open */
 				if (fil_try_to_close_file_in_LRU(count > 1)) {
 					/* No problem */
 				} else if (count >= 2) {
 					ib::warn() << "innodb_open_files="
-						<< fil_system->max_n_open
+						<< srv_max_n_open_files
 						<< " is exceeded ("
-						<< fil_system->n_open
+						<< fil_system.n_open
 						<< ") files stay open)";
 					break;
 				} else {
-					mutex_exit(&fil_system->mutex);
+					mutex_exit(&fil_system.mutex);
 					os_aio_simulated_wake_handler_threads();
 					os_thread_sleep(20000);
 					/* Flush tablespaces so that we can
@@ -1132,13 +1093,14 @@ fil_mutex_enter_and_prepare_for_io(
 					fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
 
 					count++;
-					mutex_enter(&fil_system->mutex);
+					mutex_enter(&fil_system.mutex);
 					continue;
 				}
 			}
 		}
 
-		if (ulint size = ulint(UNIV_UNLIKELY(space->recv_size))) {
+		ulint size = space->recv_size;
+		if (UNIV_UNLIKELY(size != 0)) {
 			ut_ad(node);
 			bool	success;
 			if (fil_space_extend_must_retry(space, node, size,
@@ -1146,7 +1108,7 @@ fil_mutex_enter_and_prepare_for_io(
 				continue;
 			}
 
-			ut_ad(mutex_own(&fil_system->mutex));
+			ut_ad(mutex_own(&fil_system.mutex));
 			/* Crash recovery requires the file extension
 			to succeed. */
 			ut_a(success);
@@ -1158,7 +1120,7 @@ fil_mutex_enter_and_prepare_for_io(
 			this tablespace).
 
 			Also, fil_space_set_recv_size() may have been invoked
-			again during the file extension while fil_system->mutex
+			again during the file extension while fil_system.mutex
 			was not being held by us.
 
 			Only if space->recv_size matches what we read
@@ -1194,7 +1156,7 @@ fil_space_extend(
 			 space, UT_LIST_GET_LAST(space->chain), size,
 			 &success));
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 	return(success);
 }
 
@@ -1207,13 +1169,13 @@ fil_node_close_to_free(
 	fil_node_t*	node,
 	fil_space_t*	space)
 {
-	ut_ad(mutex_own(&fil_system->mutex));
+	ut_ad(mutex_own(&fil_system.mutex));
 	ut_a(node->magic_n == FIL_NODE_MAGIC_N);
 	ut_a(node->n_pending == 0);
 	ut_a(!node->being_extended);
 
 	if (node->is_open()) {
-		/* We fool the assertion in fil_node_close_file() to think
+		/* We fool the assertion in fil_node_t::close() to think
 		there are no unflushed modifications in the file */
 
 		node->modification_counter = node->flush_counter;
@@ -1229,10 +1191,10 @@ fil_node_close_to_free(
 
 			space->is_in_unflushed_spaces = false;
 
-			UT_LIST_REMOVE(fil_system->unflushed_spaces, space);
+			UT_LIST_REMOVE(fil_system.unflushed_spaces, space);
 		}
 
-		fil_node_close_file(node);
+		node->close();
 	}
 }
 
@@ -1245,32 +1207,25 @@ void
 fil_space_detach(
 	fil_space_t*	space)
 {
-	ut_ad(mutex_own(&fil_system->mutex));
-
-	HASH_DELETE(fil_space_t, hash, fil_system->spaces, space->id, space);
-
-	fil_space_t*	fnamespace = fil_space_get_by_name(space->name);
-
-	ut_a(space == fnamespace);
+	ut_ad(mutex_own(&fil_system.mutex));
 
-	HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
-		    ut_fold_string(space->name), space);
+	HASH_DELETE(fil_space_t, hash, fil_system.spaces, space->id, space);
 
 	if (space->is_in_unflushed_spaces) {
 
 		ut_ad(!fil_buffering_disabled(space));
 		space->is_in_unflushed_spaces = false;
 
-		UT_LIST_REMOVE(fil_system->unflushed_spaces, space);
+		UT_LIST_REMOVE(fil_system.unflushed_spaces, space);
 	}
 
 	if (space->is_in_rotation_list) {
 		space->is_in_rotation_list = false;
 
-		UT_LIST_REMOVE(fil_system->rotation_list, space);
+		UT_LIST_REMOVE(fil_system.rotation_list, space);
 	}
 
-	UT_LIST_REMOVE(fil_system->space_list, space);
+	UT_LIST_REMOVE(fil_system.space_list, space);
 
 	ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
 	ut_a(space->n_pending_flushes == 0);
@@ -1281,6 +1236,12 @@ fil_space_detach(
 
 		fil_node_close_to_free(fil_node, space);
 	}
+
+	if (space == fil_system.sys_space) {
+		fil_system.sys_space = NULL;
+	} else if (space == fil_system.temp_space) {
+		fil_system.temp_space = NULL;
+	}
 }
 
 /** Free a tablespace object on which fil_space_detach() was invoked.
@@ -1291,14 +1252,14 @@ void
 fil_space_free_low(
 	fil_space_t*	space)
 {
-	/* The tablespace must not be in fil_system->named_spaces. */
+	/* The tablespace must not be in fil_system.named_spaces. */
 	ut_ad(srv_fast_shutdown == 2 || !srv_was_started
 	      || space->max_lsn == 0);
 
-	/* Wait for fil_space_release_for_io(); after
+	/* Wait for fil_space_t::release_for_io(); after
 	fil_space_detach(), the tablespace cannot be found, so
 	fil_space_acquire_for_io() would return NULL */
-	while (space->n_pending_ios) {
+	while (space->pending_io()) {
 		os_thread_sleep(100);
 	}
 
@@ -1334,14 +1295,14 @@ fil_space_free(
 {
 	ut_ad(id != TRX_SYS_SPACE);
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 	fil_space_t*	space = fil_space_get_by_id(id);
 
 	if (space != NULL) {
 		fil_space_detach(space);
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	if (space != NULL) {
 		if (x_latched) {
@@ -1358,7 +1319,7 @@ fil_space_free(
 
 		if (space->max_lsn != 0) {
 			ut_d(space->max_lsn = 0);
-			UT_LIST_REMOVE(fil_system->named_spaces, space);
+			UT_LIST_REMOVE(fil_system.named_spaces, space);
 		}
 
 		if (need_mutex) {
@@ -1392,26 +1353,14 @@ fil_space_create(
 {
 	fil_space_t*	space;
 
-	ut_ad(fil_system);
+	ut_ad(fil_system.is_initialised());
 	ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, id));
 	ut_ad(purpose == FIL_TYPE_LOG
 	      || srv_page_size == UNIV_PAGE_SIZE_ORIG || flags != 0);
 
 	DBUG_EXECUTE_IF("fil_space_create_failure", return(NULL););
 
-	mutex_enter(&fil_system->mutex);
-
-	/* Look for a matching tablespace. */
-	space = fil_space_get_by_name(name);
-
-	if (space != NULL) {
-		mutex_exit(&fil_system->mutex);
-
-		ib::warn() << "Tablespace '" << name << "' exists in the"
-			" cache with id " << space->id << " != " << id;
-
-		return(NULL);
-	}
+	mutex_enter(&fil_system.mutex);
 
 	space = fil_space_get_by_id(id);
 
@@ -1420,7 +1369,7 @@ fil_space_create(
 			<< "' with id " << id
 			<< " to the tablespace memory cache, but tablespace '"
 			<< space->name << "' already exists in the cache!";
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 		return(NULL);
 	}
 
@@ -1433,17 +1382,17 @@ fil_space_create(
 
 	if ((purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT)
 	    && !recv_recovery_on
-	    && id > fil_system->max_assigned_id) {
+	    && id > fil_system.max_assigned_id) {
 
-		if (!fil_system->space_id_reuse_warned) {
-			fil_system->space_id_reuse_warned = true;
+		if (!fil_system.space_id_reuse_warned) {
+			fil_system.space_id_reuse_warned = true;
 
 			ib::warn() << "Allocated tablespace ID " << id
 				<< " for " << name << ", old maximum was "
-				<< fil_system->max_assigned_id;
+				<< fil_system.max_assigned_id;
 		}
 
-		fil_system->max_assigned_id = id;
+		fil_system.max_assigned_id = id;
 	}
 
 	space->purpose = purpose;
@@ -1466,7 +1415,6 @@ fil_space_create(
 	rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
 
 	if (space->purpose == FIL_TYPE_TEMPORARY) {
-		ut_d(space->latch.set_temp_fsp());
 		/* SysTablespace::open_or_create() would pass
 		size!=0 to fil_space_t::add(), so first_time_open
 		would not hold in fil_node_open_file(), and we
@@ -1476,16 +1424,13 @@ fil_space_create(
 		space->atomic_write_supported = true;
 	}
 
-	HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
-
-	HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
-		    ut_fold_string(name), space);
+	HASH_INSERT(fil_space_t, hash, fil_system.spaces, id, space);
 
-	UT_LIST_ADD_LAST(fil_system->space_list, space);
+	UT_LIST_ADD_LAST(fil_system.space_list, space);
 
-	if (id < SRV_LOG_SPACE_FIRST_ID && id > fil_system->max_assigned_id) {
+	if (id < SRV_LOG_SPACE_FIRST_ID && id > fil_system.max_assigned_id) {
 
-		fil_system->max_assigned_id = id;
+		fil_system.max_assigned_id = id;
 	}
 
 	/* Inform key rotation that there could be something
@@ -1496,14 +1441,14 @@ fil_space_create(
 		    srv_encrypt_tables)) {
 		/* Key rotation is not enabled, need to inform background
 		encryption threads. */
-		UT_LIST_ADD_LAST(fil_system->rotation_list, space);
+		UT_LIST_ADD_LAST(fil_system.rotation_list, space);
 		space->is_in_rotation_list = true;
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 		mutex_enter(&fil_crypt_threads_mutex);
 		os_event_set(fil_crypt_threads_event);
 		mutex_exit(&fil_crypt_threads_mutex);
 	} else {
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 	}
 
 	return(space);
@@ -1522,12 +1467,12 @@ fil_assign_new_space_id(
 	ulint	id;
 	bool	success;
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	id = *space_id;
 
-	if (id < fil_system->max_assigned_id) {
-		id = fil_system->max_assigned_id;
+	if (id < fil_system.max_assigned_id) {
+		id = fil_system.max_assigned_id;
 	}
 
 	id++;
@@ -1544,7 +1489,7 @@ fil_assign_new_space_id(
 	success = (id < SRV_LOG_SPACE_FIRST_ID);
 
 	if (success) {
-		*space_id = fil_system->max_assigned_id = id;
+		*space_id = fil_system.max_assigned_id = id;
 	} else {
 		ib::warn() << "You have run out of single-table tablespace"
 			" id's! Current counter is " << id
@@ -1554,14 +1499,14 @@ fil_assign_new_space_id(
 		*space_id = ULINT_UNDEFINED;
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	return(success);
 }
 
 /*******************************************************************//**
 Returns a pointer to the fil_space_t that is in the memory cache
-associated with a space id. The caller must lock fil_system->mutex.
+associated with a space id. The caller must lock fil_system.mutex.
 @return file_space_t pointer, NULL if space not found */
 UNIV_INLINE
 fil_space_t*
@@ -1572,7 +1517,7 @@ fil_space_get_space(
 	fil_space_t*	space;
 	fil_node_t*	node;
 
-	ut_ad(fil_system);
+	ut_ad(fil_system.is_initialised());
 
 	space = fil_space_get_by_id(id);
 	if (space == NULL || space->size != 0) {
@@ -1587,15 +1532,15 @@ fil_space_get_space(
 	case FIL_TYPE_IMPORT:
 		ut_a(id != 0);
 
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 
 		/* It is possible that the space gets evicted at this point
 		before the fil_mutex_enter_and_prepare_for_io() acquires
-		the fil_system->mutex. Check for this after completing the
+		the fil_system.mutex. Check for this after completing the
 		call to fil_mutex_enter_and_prepare_for_io(). */
 		fil_mutex_enter_and_prepare_for_io(id);
 
-		/* We are still holding the fil_system->mutex. Check if
+		/* We are still holding the fil_system.mutex. Check if
 		the space is still in memory cache. */
 		space = fil_space_get_by_id(id);
 
@@ -1613,7 +1558,7 @@ fil_space_get_space(
 		the file yet; the following calls will open it and update the
 		size fields */
 
-		if (!fil_node_prepare_for_io(node, fil_system, space)) {
+		if (!fil_node_prepare_for_io(node, space)) {
 			/* The single-table tablespace can't be opened,
 			because the ibd file is missing. */
 			return(NULL);
@@ -1625,44 +1570,6 @@ fil_space_get_space(
 	return(space);
 }
 
-/** Returns the path from the first fil_node_t found with this space ID.
-The caller is responsible for freeing the memory allocated here for the
-value returned.
-@param[in]	id	Tablespace ID
-@return own: A copy of fil_node_t::path, NULL if space ID is zero
-or not found. */
-char*
-fil_space_get_first_path(
-	ulint		id)
-{
-	fil_space_t*	space;
-	fil_node_t*	node;
-	char*		path;
-
-	ut_ad(fil_system);
-	ut_a(id);
-
-	fil_mutex_enter_and_prepare_for_io(id);
-
-	space = fil_space_get_space(id);
-
-	if (space == NULL) {
-		mutex_exit(&fil_system->mutex);
-
-		return(NULL);
-	}
-
-	ut_ad(mutex_own(&fil_system->mutex));
-
-	node = UT_LIST_GET_FIRST(space->chain);
-
-	path = mem_strdup(node->name);
-
-	mutex_exit(&fil_system->mutex);
-
-	return(path);
-}
-
 /** Set the recovered size of a tablespace in pages.
 @param id	tablespace ID
 @param size	recovered size in pages */
@@ -1670,7 +1577,7 @@ UNIV_INTERN
 void
 fil_space_set_recv_size(ulint id, ulint size)
 {
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 	ut_ad(size);
 	ut_ad(id < SRV_LOG_SPACE_FIRST_ID);
 
@@ -1678,7 +1585,7 @@ fil_space_set_recv_size(ulint id, ulint size)
 		space->recv_size = size;
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 }
 
 /*******************************************************************//**
@@ -1693,14 +1600,14 @@ fil_space_get_size(
 	fil_space_t*	space;
 	ulint		size;
 
-	ut_ad(fil_system);
-	mutex_enter(&fil_system->mutex);
+	ut_ad(fil_system.is_initialised());
+	mutex_enter(&fil_system.mutex);
 
 	space = fil_space_get_space(id);
 
 	size = space ? space->size : 0;
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	return(size);
 }
@@ -1717,83 +1624,72 @@ fil_space_get_flags(
 	fil_space_t*	space;
 	ulint		flags;
 
-	ut_ad(fil_system);
+	ut_ad(fil_system.is_initialised());
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	space = fil_space_get_space(id);
 
 	if (space == NULL) {
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 
 		return(ULINT_UNDEFINED);
 	}
 
 	flags = space->flags;
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	return(flags);
 }
 
-/** Open each fil_node_t of a named fil_space_t if not already open.
-@param[in]	name	Tablespace name
-@return true if all nodes are open  */
-bool
-fil_space_open(
-	const char*	name)
+/** Open each file. Only invoked on fil_system.temp_space.
+@return whether all files were opened */
+bool fil_space_t::open()
 {
-	ut_ad(fil_system != NULL);
+	ut_ad(fil_system.is_initialised());
 
-	mutex_enter(&fil_system->mutex);
-
-	fil_space_t*	space = fil_space_get_by_name(name);
-	fil_node_t*	node;
+	mutex_enter(&fil_system.mutex);
+	ut_ad(this == fil_system.temp_space
+	      || srv_operation == SRV_OPERATION_BACKUP
+	      || srv_operation == SRV_OPERATION_RESTORE
+	      || srv_operation == SRV_OPERATION_RESTORE_DELTA);
 
-	for (node = UT_LIST_GET_FIRST(space->chain);
+	for (fil_node_t* node = UT_LIST_GET_FIRST(chain);
 	     node != NULL;
 	     node = UT_LIST_GET_NEXT(chain, node)) {
-
-		if (!node->is_open()
-		    && !fil_node_open_file(node)) {
-			mutex_exit(&fil_system->mutex);
-			return(false);
+		if (!node->is_open() && !fil_node_open_file(node)) {
+			mutex_exit(&fil_system.mutex);
+			return false;
 		}
 	}
 
-	mutex_exit(&fil_system->mutex);
-
-	return(true);
+	mutex_exit(&fil_system.mutex);
+	return true;
 }
 
-/** Close each fil_node_t of a named fil_space_t if open.
-@param[in]	name	Tablespace name */
-void
-fil_space_close(
-	const char*	name)
+/** Close each file. Only invoked on fil_system.temp_space. */
+void fil_space_t::close()
 {
-	if (fil_system == NULL) {
+	if (!fil_system.is_initialised()) {
 		return;
 	}
 
-	mutex_enter(&fil_system->mutex);
-
-	fil_space_t*	space = fil_space_get_by_name(name);
-	if (space == NULL) {
-		mutex_exit(&fil_system->mutex);
-		return;
-	}
+	mutex_enter(&fil_system.mutex);
+	ut_ad(this == fil_system.temp_space
+	      || srv_operation == SRV_OPERATION_BACKUP
+	      || srv_operation == SRV_OPERATION_RESTORE
+	      || srv_operation == SRV_OPERATION_RESTORE_DELTA);
 
-	for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+	for (fil_node_t* node = UT_LIST_GET_FIRST(chain);
 	     node != NULL;
 	     node = UT_LIST_GET_NEXT(chain, node)) {
-
 		if (node->is_open()) {
-			fil_node_close_file(node);
+			node->close();
 		}
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 }
 
 /** Returns the page size of the space and whether it is compressed or not.
@@ -1818,39 +1714,48 @@ fil_space_get_page_size(
 	return(page_size_t(flags));
 }
 
-/****************************************************************//**
-Initializes the tablespace memory cache. */
-void
-fil_init(
-/*=====*/
-	ulint	hash_size,	/*!< in: hash table size */
-	ulint	max_n_open)	/*!< in: max number of open files */
+void fil_system_t::create(ulint hash_size)
 {
-	ut_a(fil_system == NULL);
-
-	ut_a(hash_size > 0);
-	ut_a(max_n_open > 0);
+	ut_ad(this == &fil_system);
+	ut_ad(!is_initialised());
+	ut_ad(!(srv_page_size % FSP_EXTENT_SIZE));
+	ut_ad(srv_page_size);
+	ut_ad(!spaces);
 
-	fil_system = static_cast<fil_system_t*>(
-		ut_zalloc_nokey(sizeof(*fil_system)));
+	m_initialised = true;
 
-	mutex_create(LATCH_ID_FIL_SYSTEM, &fil_system->mutex);
+	compile_time_assert(!(UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX));
+	compile_time_assert(!(UNIV_PAGE_SIZE_MIN % FSP_EXTENT_SIZE_MIN));
 
-	fil_system->spaces = hash_create(hash_size);
-	fil_system->name_hash = hash_create(hash_size);
+	ut_ad(hash_size > 0);
 
-	UT_LIST_INIT(fil_system->LRU, &fil_node_t::LRU);
-	UT_LIST_INIT(fil_system->space_list, &fil_space_t::space_list);
-	UT_LIST_INIT(fil_system->rotation_list, &fil_space_t::rotation_list);
-	UT_LIST_INIT(fil_system->unflushed_spaces,
-		     &fil_space_t::unflushed_spaces);
-	UT_LIST_INIT(fil_system->named_spaces, &fil_space_t::named_spaces);
+	mutex_create(LATCH_ID_FIL_SYSTEM, &mutex);
 
-	fil_system->max_n_open = max_n_open;
+	spaces = hash_create(hash_size);
 
 	fil_space_crypt_init();
 }
 
+void fil_system_t::close()
+{
+	ut_ad(this == &fil_system);
+	ut_a(!UT_LIST_GET_LEN(LRU));
+	ut_a(!UT_LIST_GET_LEN(unflushed_spaces));
+	ut_a(!UT_LIST_GET_LEN(space_list));
+	ut_ad(!sys_space);
+	ut_ad(!temp_space);
+
+	if (is_initialised()) {
+		m_initialised = false;
+		hash_table_free(spaces);
+		spaces = NULL;
+		mutex_free(&mutex);
+		fil_space_crypt_cleanup();
+	}
+
+	ut_ad(!spaces);
+}
+
 /*******************************************************************//**
 Opens all log files and system tablespace data files. They stay open until the
 database server shutdown. This should be called at a server startup after the
@@ -1863,9 +1768,9 @@ fil_open_log_and_system_tablespace_files(void)
 {
 	fil_space_t*	space;
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
-	for (space = UT_LIST_GET_FIRST(fil_system->space_list);
+	for (space = UT_LIST_GET_FIRST(fil_system.space_list);
 	     space != NULL;
 	     space = UT_LIST_GET_NEXT(space_list, space)) {
 
@@ -1891,7 +1796,7 @@ fil_open_log_and_system_tablespace_files(void)
 				}
 			}
 
-			if (fil_system->max_n_open < 10 + fil_system->n_open) {
+			if (srv_max_n_open_files < 10 + fil_system.n_open) {
 
 				ib::warn() << "You must raise the value of"
 					" innodb_open_files in my.cnf!"
@@ -1903,15 +1808,15 @@ fil_open_log_and_system_tablespace_files(void)
 					" some .ibd files if the"
 					" file-per-table storage model is used."
 					" Current open files "
-					<< fil_system->n_open
+					<< fil_system.n_open
 					<< ", max allowed open files "
-					<< fil_system->max_n_open
+					<< srv_max_n_open_files
 					<< ".";
 			}
 		}
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 }
 
 /*******************************************************************//**
@@ -1924,13 +1829,14 @@ fil_close_all_files(void)
 	fil_space_t*	space;
 
 	/* At shutdown, we should not have any files in this list. */
+	ut_ad(fil_system.is_initialised());
 	ut_ad(srv_fast_shutdown == 2
 	      || !srv_was_started
-	      || UT_LIST_GET_LEN(fil_system->named_spaces) == 0);
+	      || UT_LIST_GET_LEN(fil_system.named_spaces) == 0);
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
-	for (space = UT_LIST_GET_FIRST(fil_system->space_list);
+	for (space = UT_LIST_GET_FIRST(fil_system.space_list);
 	     space != NULL; ) {
 		fil_node_t*	node;
 		fil_space_t*	prev_space = space;
@@ -1940,7 +1846,7 @@ fil_close_all_files(void)
 		     node = UT_LIST_GET_NEXT(chain, node)) {
 
 			if (node->is_open()) {
-				fil_node_close_file(node);
+				node->close();
 			}
 		}
 
@@ -1949,11 +1855,11 @@ fil_close_all_files(void)
 		fil_space_free_low(prev_space);
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	ut_ad(srv_fast_shutdown == 2
 	      || !srv_was_started
-	      || UT_LIST_GET_LEN(fil_system->named_spaces) == 0);
+	      || UT_LIST_GET_LEN(fil_system.named_spaces) == 0);
 }
 
 /*******************************************************************//**
@@ -1966,9 +1872,9 @@ fil_close_log_files(
 {
 	fil_space_t*	space;
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
-	space = UT_LIST_GET_FIRST(fil_system->space_list);
+	space = UT_LIST_GET_FIRST(fil_system.space_list);
 
 	while (space != NULL) {
 		fil_node_t*	node;
@@ -1979,7 +1885,7 @@ fil_close_log_files(
 			continue;
 		}
 
-		/* Log files are not in the fil_system->named_spaces list. */
+		/* Log files are not in the fil_system.named_spaces list. */
 		ut_ad(space->max_lsn == 0);
 
 		for (node = UT_LIST_GET_FIRST(space->chain);
@@ -1987,7 +1893,7 @@ fil_close_log_files(
 		     node = UT_LIST_GET_NEXT(chain, node)) {
 
 			if (node->is_open()) {
-				fil_node_close_file(node);
+				node->close();
 			}
 		}
 
@@ -1999,7 +1905,11 @@ fil_close_log_files(
 		}
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
+
+	if (free) {
+		log_sys.log.close();
+	}
 }
 
 /*******************************************************************//**
@@ -2014,14 +1924,14 @@ fil_set_max_space_id_if_bigger(
 		ib::fatal() << "Max tablespace id is too high, " << max_id;
 	}
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
-	if (fil_system->max_assigned_id < max_id) {
+	if (fil_system.max_assigned_id < max_id) {
 
-		fil_system->max_assigned_id = max_id;
+		fil_system.max_assigned_id = max_id;
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 }
 
 /** Write the flushed LSN to the page header of the first page in the
@@ -2036,18 +1946,18 @@ fil_write_flushed_lsn(
 	byte*	buf;
 	dberr_t	err = DB_TABLESPACE_NOT_FOUND;
 
-	buf1 = static_cast<byte*>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
-	buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
+	buf1 = static_cast<byte*>(ut_malloc_nokey(2U << srv_page_size_shift));
+	buf = static_cast<byte*>(ut_align(buf1, srv_page_size));
 
 	const page_id_t	page_id(TRX_SYS_SPACE, 0);
 
-	err = fil_read(page_id, univ_page_size, 0, univ_page_size.physical(),
+	err = fil_read(page_id, univ_page_size, 0, srv_page_size,
 		       buf);
 
 	if (err == DB_SUCCESS) {
 		mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, lsn);
 		err = fil_write(page_id, univ_page_size, 0,
-				univ_page_size.physical(), buf);
+				srv_page_size, buf);
 		fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
 	}
 
@@ -2068,7 +1978,7 @@ fil_space_acquire_low(ulint id, bool silent)
 {
 	fil_space_t*	space;
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	space = fil_space_get_by_id(id);
 
@@ -2080,26 +1990,14 @@ fil_space_acquire_low(ulint id, bool silent)
 	} else if (space->is_stopping()) {
 		space = NULL;
 	} else {
-		space->n_pending_ops++;
+		space->acquire();
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	return(space);
 }
 
-/** Release a tablespace acquired with fil_space_acquire().
-@param[in,out]	space	tablespace to release  */
-void
-fil_space_release(fil_space_t* space)
-{
-	mutex_enter(&fil_system->mutex);
-	ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
-	ut_ad(space->n_pending_ops > 0);
-	space->n_pending_ops--;
-	mutex_exit(&fil_system->mutex);
-}
-
 /** Acquire a tablespace for reading or writing a block,
 when it could be dropped concurrently.
 @param[in]	id	tablespace ID
@@ -2108,31 +2006,19 @@ when it could be dropped concurrently.
 fil_space_t*
 fil_space_acquire_for_io(ulint id)
 {
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	fil_space_t* space = fil_space_get_by_id(id);
 
 	if (space) {
-		space->n_pending_ios++;
+		space->acquire_for_io();
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	return(space);
 }
 
-/** Release a tablespace acquired with fil_space_acquire_for_io().
-@param[in,out]	space	tablespace to release  */
-void
-fil_space_release_for_io(fil_space_t* space)
-{
-	mutex_enter(&fil_system->mutex);
-	ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
-	ut_ad(space->n_pending_ios > 0);
-	space->n_pending_ios--;
-	mutex_exit(&fil_system->mutex);
-}
-
 /********************************************************//**
 Creates the database directory for a table if it does not exist yet. */
 void
@@ -2148,12 +2034,13 @@ fil_create_directory_for_tablename(
 	len = strlen(fil_path_to_mysql_datadir);
 	namend = strchr(name, '/');
 	ut_a(namend);
-	path = static_cast<char*>(ut_malloc_nokey(len + (namend - name) + 2));
+	path = static_cast<char*>(
+		ut_malloc_nokey(len + ulint(namend - name) + 2));
 
 	memcpy(path, fil_path_to_mysql_datadir, len);
 	path[len] = '/';
-	memcpy(path + len + 1, name, namend - name);
-	path[len + (namend - name) + 1] = 0;
+	memcpy(path + len + 1, name, ulint(namend - name));
+	path[len + ulint(namend - name) + 1] = 0;
 
 	os_normalize_path(path);
 
@@ -2271,7 +2158,7 @@ fil_name_write_rename_low(
 @param[in]	space_id	tablespace id
 @param[in]	old_name	tablespace file name
 @param[in]	new_name	tablespace file name after renaming */
-void
+static void
 fil_name_write_rename(
 	ulint		space_id,
 	const char*	old_name,
@@ -2316,242 +2203,6 @@ fil_name_write(
 	fil_name_write(space->id, first_page_no, file->name, mtr);
 }
 
-/********************************************************//**
-Recreates table indexes by applying
-TRUNCATE log record during recovery.
-@return DB_SUCCESS or error code */
-dberr_t
-fil_recreate_table(
-/*===============*/
-	ulint		space_id,	/*!< in: space id */
-	ulint		format_flags,	/*!< in: page format */
-	ulint		flags,		/*!< in: tablespace flags */
-	const char*	name,		/*!< in: table name */
-	truncate_t&	truncate)	/*!< in: The information of
-					TRUNCATE log record */
-{
-	dberr_t			err = DB_SUCCESS;
-	bool			found;
-	const page_size_t	page_size(fil_space_get_page_size(space_id,
-								  &found));
-
-	if (!found) {
-		ib::info() << "Missing .ibd file for table '" << name
-			<< "' with tablespace " << space_id;
-		return(DB_ERROR);
-	}
-
-	ut_ad(!truncate_t::s_fix_up_active);
-	truncate_t::s_fix_up_active = true;
-
-	/* Step-1: Scan for active indexes from REDO logs and drop
-	all the indexes using low level function that take root_page_no
-	and space-id. */
-	truncate.drop_indexes(space_id);
-
-	/* Step-2: Scan for active indexes and re-create them. */
-	err = truncate.create_indexes(
-		name, space_id, page_size, flags, format_flags);
-	if (err != DB_SUCCESS) {
-		ib::info() << "Failed to create indexes for the table '"
-			<< name << "' with tablespace " << space_id
-			<< " while fixing up truncate action";
-		return(err);
-	}
-
-	truncate_t::s_fix_up_active = false;
-
-	return(err);
-}
-
-/********************************************************//**
-Recreates the tablespace and table indexes by applying
-TRUNCATE log record during recovery.
-@return DB_SUCCESS or error code */
-dberr_t
-fil_recreate_tablespace(
-/*====================*/
-	ulint		space_id,	/*!< in: space id */
-	ulint		format_flags,	/*!< in: page format */
-	ulint		flags,		/*!< in: tablespace flags */
-	const char*	name,		/*!< in: table name */
-	truncate_t&	truncate,	/*!< in: The information of
-					TRUNCATE log record */
-	lsn_t		recv_lsn)	/*!< in: the end LSN of
-						the log record */
-{
-	dberr_t		err = DB_SUCCESS;
-	mtr_t		mtr;
-
-	ut_ad(!truncate_t::s_fix_up_active);
-	truncate_t::s_fix_up_active = true;
-
-	/* Step-1: Invalidate buffer pool pages belonging to the tablespace
-	to re-create. */
-	buf_LRU_flush_or_remove_pages(space_id, NULL);
-
-	/* Remove all insert buffer entries for the tablespace */
-	ibuf_delete_for_discarded_space(space_id);
-
-	/* Step-2: truncate tablespace (reset the size back to original or
-	default size) of tablespace. */
-	err = truncate.truncate(
-		space_id, truncate.get_dir_path(), name, flags, true);
-
-	if (err != DB_SUCCESS) {
-
-		ib::info() << "Cannot access .ibd file for table '"
-			<< name << "' with tablespace " << space_id
-			<< " while truncating";
-		return(DB_ERROR);
-	}
-
-	bool			found;
-	const page_size_t&	page_size =
-		fil_space_get_page_size(space_id, &found);
-
-	if (!found) {
-		ib::info() << "Missing .ibd file for table '" << name
-			<< "' with tablespace " << space_id;
-		return(DB_ERROR);
-	}
-
-	/* Step-3: Initialize Header. */
-	if (page_size.is_compressed()) {
-		byte*	buf;
-		page_t*	page;
-
-		buf = static_cast<byte*>(ut_zalloc_nokey(3 * UNIV_PAGE_SIZE));
-
-		/* Align the memory for file i/o */
-		page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
-
-		flags |= FSP_FLAGS_PAGE_SSIZE();
-
-		fsp_header_init_fields(page, space_id, flags);
-
-		mach_write_to_4(
-			page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
-
-		page_zip_des_t  page_zip;
-		page_zip_set_size(&page_zip, page_size.physical());
-		page_zip.data = page + UNIV_PAGE_SIZE;
-
-#ifdef UNIV_DEBUG
-		page_zip.m_start =
-#endif /* UNIV_DEBUG */
-		page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0;
-		buf_flush_init_for_writing(NULL, page, &page_zip, 0);
-
-		err = fil_write(page_id_t(space_id, 0), page_size, 0,
-				page_size.physical(), page_zip.data);
-
-		ut_free(buf);
-
-		if (err != DB_SUCCESS) {
-			ib::info() << "Failed to clean header of the"
-				" table '" << name << "' with tablespace "
-				<< space_id;
-			return(err);
-		}
-	}
-
-	mtr_start(&mtr);
-	/* Don't log the operation while fixing up table truncate operation
-	as crash at this level can still be sustained with recovery restarting
-	from last checkpoint. */
-	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
-
-	/* Initialize the first extent descriptor page and
-	the second bitmap page for the new tablespace. */
-	fsp_header_init(space_id, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
-	mtr_commit(&mtr);
-
-	/* Step-4: Re-Create Indexes to newly re-created tablespace.
-	This operation will restore tablespace back to what it was
-	when it was created during CREATE TABLE. */
-	err = truncate.create_indexes(
-		name, space_id, page_size, flags, format_flags);
-	if (err != DB_SUCCESS) {
-		return(err);
-	}
-
-	/* Step-5: Write new created pages into ibd file handle and
-	flush it to disk for the tablespace, in case i/o-handler thread
-	deletes the bitmap page from buffer. */
-	mtr_start(&mtr);
-
-	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
-
-	mutex_enter(&fil_system->mutex);
-
-	fil_space_t*	space = fil_space_get_by_id(space_id);
-
-	mutex_exit(&fil_system->mutex);
-
-	fil_node_t*	node = UT_LIST_GET_FIRST(space->chain);
-
-	for (ulint page_no = 0; page_no < node->size; ++page_no) {
-
-		const page_id_t	cur_page_id(space_id, page_no);
-
-		buf_block_t*	block = buf_page_get(cur_page_id, page_size,
-						     RW_X_LATCH, &mtr);
-
-		byte*	page = buf_block_get_frame(block);
-
-		if (!FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
-			ut_ad(!page_size.is_compressed());
-
-			buf_flush_init_for_writing(
-				block, page, NULL, recv_lsn);
-
-			err = fil_write(cur_page_id, page_size, 0,
-					page_size.physical(), page);
-		} else {
-			ut_ad(page_size.is_compressed());
-
-			/* We don't want to rewrite empty pages. */
-
-			if (fil_page_get_type(page) != 0) {
-				page_zip_des_t*  page_zip =
-					buf_block_get_page_zip(block);
-
-				buf_flush_init_for_writing(
-					block, page, page_zip, recv_lsn);
-
-				err = fil_write(cur_page_id, page_size, 0,
-						page_size.physical(),
-						page_zip->data);
-			} else {
-#ifdef UNIV_DEBUG
-				const byte*	data = block->page.zip.data;
-
-				/* Make sure that the page is really empty */
-				for (ulint i = 0;
-				     i < page_size.physical();
-				     ++i) {
-
-					ut_a(data[i] == 0);
-				}
-#endif /* UNIV_DEBUG */
-			}
-		}
-
-		if (err != DB_SUCCESS) {
-			ib::info() << "Cannot write page " << page_no
-				<< " into a .ibd file for table '"
-				<< name << "' with tablespace " << space_id;
-		}
-	}
-
-	mtr_commit(&mtr);
-
-	truncate_t::s_fix_up_active = false;
-
-	return(err);
-}
-
 /** Replay a file rename operation if possible.
 @param[in]	space_id	tablespace identifier
 @param[in]	first_page_no	first page number in the file
@@ -2594,9 +2245,9 @@ fil_op_replay_rename(
 	ut_a(namend != NULL);
 
 	char*		dir = static_cast<char*>(
-		ut_malloc_nokey(namend - new_name + 1));
+		ut_malloc_nokey(ulint(namend - new_name) + 1));
 
-	memcpy(dir, new_name, namend - new_name);
+	memcpy(dir, new_name, ulint(namend - new_name));
 	dir[namend - new_name] = '\0';
 
 	bool		success = os_file_create_directory(dir, false);
@@ -2605,14 +2256,14 @@ fil_op_replay_rename(
 	ulint		dirlen = 0;
 
 	if (const char* dirend = strrchr(dir, OS_PATH_SEPARATOR)) {
-		dirlen = dirend - dir + 1;
+		dirlen = ulint(dirend - dir) + 1;
 	}
 
 	ut_free(dir);
 
 	/* New path must not exist. */
 	dberr_t		err = fil_rename_tablespace_check(
-		space_id, name, new_name, false);
+		name, new_name, false);
 	if (err != DB_SUCCESS) {
 		ib::error() << " Cannot replay file rename."
 			" Remove either file and try again.";
@@ -2624,7 +2275,7 @@ fil_op_replay_rename(
 		strlen(new_name + dirlen)
 		- 4 /* remove ".ibd" */);
 
-	ut_ad(new_table[namend - new_name - dirlen]
+	ut_ad(new_table[ulint(namend - new_name) - dirlen]
 	      == OS_PATH_SEPARATOR);
 #if OS_PATH_SEPARATOR != '/'
 	new_table[namend - new_name - dirlen] = '/';
@@ -2654,13 +2305,13 @@ static
 ulint
 fil_check_pending_ops(const fil_space_t* space, ulint count)
 {
-	ut_ad(mutex_own(&fil_system->mutex));
+	ut_ad(mutex_own(&fil_system.mutex));
 
 	if (space == NULL) {
 		return 0;
 	}
 
-	if (ulint n_pending_ops = space->n_pending_ops) {
+	if (ulint n_pending_ops = my_atomic_loadlint(&space->n_pending_ops)) {
 
 		if (count > 5000) {
 			ib::warn() << "Trying to close/delete/truncate"
@@ -2687,8 +2338,8 @@ fil_check_pending_io(
 	fil_node_t**	node,		/*!< out: Node in space list */
 	ulint		count)		/*!< in: number of attempts so far */
 {
-	ut_ad(mutex_own(&fil_system->mutex));
-	ut_a(space->n_pending_ops == 0);
+	ut_ad(mutex_own(&fil_system.mutex));
+	ut_ad(!space->referenced());
 
 	switch (operation) {
 	case FIL_OPERATION_DELETE:
@@ -2744,18 +2395,17 @@ fil_check_pending_operations(
 
 	*space = 0;
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 	fil_space_t* sp = fil_space_get_by_id(id);
 
 	if (sp) {
 		sp->stop_new_ops = true;
 		if (sp->crypt_data) {
-			sp->n_pending_ops++;
-			mutex_exit(&fil_system->mutex);
+			sp->acquire();
+			mutex_exit(&fil_system.mutex);
 			fil_space_crypt_close_tablespace(sp);
-			mutex_enter(&fil_system->mutex);
-			ut_ad(sp->n_pending_ops > 0);
-			sp->n_pending_ops--;
+			mutex_enter(&fil_system.mutex);
+			sp->release();
 		}
 	}
 
@@ -2766,13 +2416,13 @@ fil_check_pending_operations(
 
 		count = fil_check_pending_ops(sp, count);
 
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 
 		if (count > 0) {
 			os_thread_sleep(20000);
 		}
 
-		mutex_enter(&fil_system->mutex);
+		mutex_enter(&fil_system.mutex);
 	} while (count > 0);
 
 	/* Check for pending IO. */
@@ -2781,7 +2431,7 @@ fil_check_pending_operations(
 		sp = fil_space_get_by_id(id);
 
 		if (sp == NULL) {
-			mutex_exit(&fil_system->mutex);
+			mutex_exit(&fil_system.mutex);
 			return(DB_TABLESPACE_NOT_FOUND);
 		}
 
@@ -2793,14 +2443,14 @@ fil_check_pending_operations(
 			*path = mem_strdup(node->name);
 		}
 
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 
 		if (count == 0) {
 			break;
 		}
 
 		os_thread_sleep(20000);
-		mutex_enter(&fil_system->mutex);
+		mutex_enter(&fil_system.mutex);
 	}
 
 	ut_ad(sp);
@@ -2845,7 +2495,7 @@ fil_close_tablespace(
 	fil_flush() from being applied to this tablespace. */
 
 	{
-		FlushObserver observer(id, trx, NULL);
+		FlushObserver observer(space, trx, NULL);
 		buf_LRU_flush_or_remove_pages(id, &observer);
 	}
 
@@ -2888,14 +2538,11 @@ fil_table_accessible(const dict_table_t* table)
 		return(false);
 	}
 
-	if (fil_space_t* space = fil_space_acquire(table->space)) {
-		bool accessible = !space->is_stopping();
-		fil_space_release(space);
-		ut_ad(accessible || dict_table_is_file_per_table(table));
-		return(accessible);
-	} else {
-		return(false);
-	}
+	mutex_enter(&fil_system.mutex);
+	bool accessible = table->space && !table->space->is_stopping();
+	mutex_exit(&fil_system.mutex);
+	ut_ad(accessible || dict_table_is_file_per_table(table));
+	return accessible;
 }
 
 /** Delete a tablespace and associated .ibd file.
@@ -2980,25 +2627,25 @@ fil_delete_tablespace(
 		RemoteDatafile::delete_link_file(space->name);
 	}
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	/* Double check the sanity of pending ops after reacquiring
 	the fil_system::mutex. */
 	if (const fil_space_t* s = fil_space_get_by_id(id)) {
 		ut_a(s == space);
-		ut_a(space->n_pending_ops == 0);
+		ut_a(!space->referenced());
 		ut_a(UT_LIST_GET_LEN(space->chain) == 1);
 		fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
 		ut_a(node->n_pending == 0);
 
 		fil_space_detach(space);
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 
 		log_mutex_enter();
 
 		if (space->max_lsn != 0) {
 			ut_d(space->max_lsn = 0);
-			UT_LIST_REMOVE(fil_system->named_spaces, space);
+			UT_LIST_REMOVE(fil_system.named_spaces, space);
 		}
 
 		log_mutex_exit();
@@ -3014,7 +2661,7 @@ fil_delete_tablespace(
 			err = DB_IO_ERROR;
 		}
 	} else {
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 		err = DB_TABLESPACE_NOT_FOUND;
 	}
 
@@ -3052,239 +2699,6 @@ void fil_truncate_log(fil_space_t* space, ulint size, mtr_t* mtr)
 			 NULL, space->flags & ~FSP_FLAGS_MEM_MASK, mtr);
 }
 
-/** Truncate the tablespace to needed size.
-@param[in]	space_id	id of tablespace to truncate
-@param[in]	size_in_pages	truncate size.
-@return true if truncate was successful. */
-bool
-fil_truncate_tablespace(
-	ulint		space_id,
-	ulint		size_in_pages)
-{
-	/* Step-1: Prepare tablespace for truncate. This involves
-	stopping all the new operations + IO on that tablespace
-	and ensuring that related pages are flushed to disk. */
-	if (fil_prepare_for_truncate(space_id) != DB_SUCCESS) {
-		return(false);
-	}
-
-	/* Step-2: Invalidate buffer pool pages belonging to the tablespace
-	to re-create. Remove all insert buffer entries for the tablespace */
-	buf_LRU_flush_or_remove_pages(space_id, NULL);
-
-	/* Step-3: Truncate the tablespace and accordingly update
-	the fil_space_t handler that is used to access this tablespace. */
-	mutex_enter(&fil_system->mutex);
-	fil_space_t*	space = fil_space_get_by_id(space_id);
-
-	/* The following code must change when InnoDB supports
-	multiple datafiles per tablespace. */
-	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
-
-	fil_node_t*	node = UT_LIST_GET_FIRST(space->chain);
-
-	ut_ad(node->is_open());
-
-	space->size = node->size = size_in_pages;
-
-	bool success = os_file_truncate(node->name, node->handle, 0);
-	if (success) {
-
-		os_offset_t	size = os_offset_t(size_in_pages) * UNIV_PAGE_SIZE;
-
-		success = os_file_set_size(
-			node->name, node->handle, size,
-			FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags));
-
-		if (success) {
-			space->stop_new_ops = false;
-			space->is_being_truncated = false;
-		}
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return(success);
-}
-
-/*******************************************************************//**
-Prepare for truncating a single-table tablespace.
-1) Check pending operations on a tablespace;
-2) Remove all insert buffer entries for the tablespace;
-@return DB_SUCCESS or error */
-dberr_t
-fil_prepare_for_truncate(
-/*=====================*/
-	ulint	id)		/*!< in: space id */
-{
-	char*		path = 0;
-	fil_space_t*	space = 0;
-
-	ut_a(!is_system_tablespace(id));
-
-	dberr_t	err = fil_check_pending_operations(
-		id, FIL_OPERATION_TRUNCATE, &space, &path);
-
-	ut_free(path);
-
-	if (err == DB_TABLESPACE_NOT_FOUND) {
-		ib::error() << "Cannot truncate tablespace " << id
-			<< " because it is not found in the tablespace"
-			" memory cache.";
-	}
-
-	return(err);
-}
-
-/** Reinitialize the original tablespace header with the same space id
-for single tablespace
-@param[in]      table		table belongs to tablespace
-@param[in]      size            size in blocks
-@param[in]      trx             Transaction covering truncate */
-void
-fil_reinit_space_header_for_table(
-	dict_table_t*	table,
-	ulint		size,
-	trx_t*		trx)
-{
-	ulint	id = table->space;
-
-	ut_a(!is_system_tablespace(id));
-
-	/* Invalidate in the buffer pool all pages belonging
-	to the tablespace. The buffer pool scan may take long
-	time to complete, therefore we release dict_sys->mutex
-	and the dict operation lock during the scan and aquire
-	it again after the buffer pool scan.*/
-
-	/* Release the lock on the indexes too. So that
-	they won't violate the latch ordering. */
-	dict_table_x_unlock_indexes(table);
-	row_mysql_unlock_data_dictionary(trx);
-
-	/* Lock the search latch in shared mode to prevent user
-	from disabling AHI during the scan */
-	btr_search_s_lock_all();
-	DEBUG_SYNC_C("buffer_pool_scan");
-	buf_LRU_flush_or_remove_pages(id, NULL);
-	btr_search_s_unlock_all();
-
-	row_mysql_lock_data_dictionary(trx);
-
-	dict_table_x_lock_indexes(table);
-
-	/* Remove all insert buffer entries for the tablespace */
-	ibuf_delete_for_discarded_space(id);
-
-	mutex_enter(&fil_system->mutex);
-
-	fil_space_t*	space = fil_space_get_by_id(id);
-
-	/* The following code must change when InnoDB supports
-	multiple datafiles per tablespace. */
-	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
-
-	fil_node_t*	node = UT_LIST_GET_FIRST(space->chain);
-
-	space->size = node->size = size;
-
-	mutex_exit(&fil_system->mutex);
-
-	mtr_t	mtr;
-
-	mtr_start(&mtr);
-	mtr.set_named_space(id);
-
-	fsp_header_init(id, size, &mtr);
-
-	mtr_commit(&mtr);
-}
-
-#ifdef UNIV_DEBUG
-/** Increase redo skipped count for a tablespace.
-@param[in]	id	space id */
-void
-fil_space_inc_redo_skipped_count(
-	ulint		id)
-{
-	fil_space_t*	space;
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	ut_a(space != NULL);
-
-	space->redo_skipped_count++;
-
-	mutex_exit(&fil_system->mutex);
-}
-
-/** Decrease redo skipped count for a tablespace.
-@param[in]	id	space id */
-void
-fil_space_dec_redo_skipped_count(
-	ulint		id)
-{
-	fil_space_t*	space;
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	ut_a(space != NULL);
-	ut_a(space->redo_skipped_count > 0);
-
-	space->redo_skipped_count--;
-
-	mutex_exit(&fil_system->mutex);
-}
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Discards a single-table tablespace. The tablespace must be cached in the
-memory cache. Discarding is like deleting a tablespace, but
-
- 1. We do not drop the table from the data dictionary;
-
- 2. We remove all insert buffer entries for the tablespace immediately;
-    in DROP TABLE they are only removed gradually in the background;
-
- 3. Free all the pages in use by the tablespace.
-@return DB_SUCCESS or error */
-dberr_t
-fil_discard_tablespace(
-/*===================*/
-	ulint	id)	/*!< in: space id */
-{
-	dberr_t	err;
-
-	switch (err = fil_delete_tablespace(id)) {
-	case DB_SUCCESS:
-		break;
-
-	case DB_IO_ERROR:
-		ib::warn() << "While deleting tablespace " << id
-			<< " in DISCARD TABLESPACE. File rename/delete"
-			" failed: " << ut_strerr(err);
-		break;
-
-	case DB_TABLESPACE_NOT_FOUND:
-		ib::warn() << "Cannot delete tablespace " << id
-			<< " in DISCARD TABLESPACE: " << ut_strerr(err);
-		break;
-
-	default:
-		ut_error;
-	}
-
-	/* Remove all insert buffer entries for the tablespace */
-
-	ibuf_delete_for_discarded_space(id);
-
-	return(err);
-}
-
 /*******************************************************************//**
 Allocates and builds a file name from a path, a table or tablespace name
 and a suffix. The string must be freed by caller with ut_free().
@@ -3391,15 +2805,13 @@ fil_make_filepath(
 
 /** Test if a tablespace file can be renamed to a new filepath by checking
 if that the old filepath exists and the new filepath does not exist.
-@param[in]	space_id	tablespace id
 @param[in]	old_path	old filepath
 @param[in]	new_path	new filepath
 @param[in]	is_discarded	whether the tablespace is discarded
 @param[in]	replace_new	whether to ignore the existence of new_path
 @return innodb error code */
-dberr_t
+static dberr_t
 fil_rename_tablespace_check(
-	ulint		space_id,
 	const char*	old_path,
 	const char*	new_path,
 	bool		is_discarded,
@@ -3413,8 +2825,7 @@ fil_rename_tablespace_check(
 	    && !exists) {
 		ib::error() << "Cannot rename '" << old_path
 			<< "' to '" << new_path
-			<< "' for space ID " << space_id
-			<< " because the source file"
+			<< "' because the source file"
 			<< " does not exist.";
 		return(DB_TABLESPACE_NOT_FOUND);
 	}
@@ -3427,8 +2838,7 @@ fil_rename_tablespace_check(
 	if (!replace_new) {
 		ib::error() << "Cannot rename '" << old_path
 			<< "' to '" << new_path
-			<< "' for space ID " << space_id
-			<< " because the target file exists."
+			<< "' because the target file exists."
 			" Remove the target file and try again.";
 		return(DB_TABLESPACE_EXISTS);
 	}
@@ -3440,8 +2850,8 @@ fil_rename_tablespace_check(
 	a possibly existing tablespace that is associated with the
 	new tablespace file. */
 retry:
-	mutex_enter(&fil_system->mutex);
-	for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->space_list);
+	mutex_enter(&fil_system.mutex);
+	for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
 	     space; space = UT_LIST_GET_NEXT(space_list, space)) {
 		ulint id = space->id;
 		if (id && id < SRV_LOG_SPACE_FIRST_ID
@@ -3450,7 +2860,7 @@ retry:
 			       UT_LIST_GET_FIRST(space->chain)->name)) {
 			ib::info() << "TRUNCATE rollback: " << id
 				<< "," << new_path;
-			mutex_exit(&fil_system->mutex);
+			mutex_exit(&fil_system.mutex);
 			dberr_t err = fil_delete_tablespace(id);
 			if (err != DB_SUCCESS) {
 				return err;
@@ -3458,12 +2868,31 @@ retry:
 			goto retry;
 		}
 	}
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 	fil_delete_file(new_path);
 
 	return(DB_SUCCESS);
 }
 
+dberr_t fil_space_t::rename(const char* name, const char* path, bool log,
+			    bool replace)
+{
+	ut_ad(UT_LIST_GET_LEN(chain) == 1);
+	ut_ad(!is_system_tablespace(id));
+
+	if (log) {
+		dberr_t err = fil_rename_tablespace_check(
+			chain.start->name, path, false, replace);
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+		fil_name_write_rename(id, chain.start->name, path);
+	}
+
+	return fil_rename_tablespace(id, chain.start->name, name, path)
+		? DB_SUCCESS : DB_ERROR;
+}
+
 /** Rename a single-table tablespace.
 The tablespace must exist in the memory cache.
 @param[in]	id		tablespace identifier
@@ -3473,7 +2902,7 @@ databasename/tablename format
 @param[in]	new_path_in	new file name,
 or NULL if it is located in the normal data directory
 @return true if success */
-bool
+static bool
 fil_rename_tablespace(
 	ulint		id,
 	const char*	old_path,
@@ -3486,7 +2915,7 @@ fil_rename_tablespace(
 
 	ut_ad(strchr(new_name, '/') != NULL);
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	space = fil_space_get_by_id(id);
 
@@ -3495,30 +2924,17 @@ fil_rename_tablespace(
 			<< " in the tablespace memory cache, though the file '"
 			<< old_path
 			<< "' in a rename operation should have that id.";
-func_exit:
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 		return(false);
 	}
 
-	if (space != fil_space_get_by_name(space->name)) {
-		ib::error() << "Cannot find " << space->name
-			<< " in tablespace memory cache";
-		goto func_exit;
-	}
-
-	if (fil_space_get_by_name(new_name)) {
-		ib::error() << new_name
-			<< " is already in tablespace memory cache";
-		goto func_exit;
-	}
-
 	/* The following code must change when InnoDB supports
 	multiple datafiles per tablespace. */
 	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
 	node = UT_LIST_GET_FIRST(space->chain);
 	space->n_pending_ops++;
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	char*	new_file_name = new_path_in == NULL
 		? fil_make_filepath(NULL, new_name, IBD, false)
@@ -3526,8 +2942,6 @@ func_exit:
 	char*	old_file_name = node->name;
 	char*	new_space_name = mem_strdup(new_name);
 	char*	old_space_name = space->name;
-	ulint	old_fold = ut_fold_string(old_space_name);
-	ulint	new_fold = ut_fold_string(new_space_name);
 
 	ut_ad(strchr(old_file_name, OS_PATH_SEPARATOR) != NULL);
 	ut_ad(strchr(new_file_name, OS_PATH_SEPARATOR) != NULL);
@@ -3537,15 +2951,12 @@ func_exit:
 		log_mutex_enter();
 	}
 
-	/* log_sys->mutex is above fil_system->mutex in the latching order */
+	/* log_sys.mutex is above fil_system.mutex in the latching order */
 	ut_ad(log_mutex_own());
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 	ut_ad(space->n_pending_ops);
 	space->n_pending_ops--;
 	ut_ad(space->name == old_space_name);
-	/* We already checked these. */
-	ut_ad(space == fil_space_get_by_name(old_space_name));
-	ut_ad(!fil_space_get_by_name(new_space_name));
 	ut_ad(node->name == old_file_name);
 
 	bool	success = os_file_rename(
@@ -3563,11 +2974,7 @@ func_exit:
 
 	ut_ad(space->name == old_space_name);
 	if (success) {
-		HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
-			    old_fold, space);
 		space->name = new_space_name;
-		HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
-			    new_fold, space);
 	} else {
 		/* Because nothing was renamed, we must free the new
 		names, not the old ones. */
@@ -3575,7 +2982,7 @@ func_exit:
 		old_space_name = new_space_name;
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	ut_free(old_file_name);
 	ut_free(old_space_name);
@@ -3588,12 +2995,14 @@ func_exit:
 @param[in]	name		Tablespace name in dbname/tablename format.
 @param[in]	path		Path and filename of the datafile to create.
 @param[in]	flags		Tablespace flags
-@param[in]	size		Initial size of the tablespace file in
-                                pages, must be >= FIL_IBD_FILE_INITIAL_SIZE
+@param[in]	size		Initial size of the tablespace file in pages,
+must be >= FIL_IBD_FILE_INITIAL_SIZE
 @param[in]	mode		MariaDB encryption mode
 @param[in]	key_id		MariaDB encryption key_id
-@return DB_SUCCESS or error code */
-dberr_t
+@param[out]	err		DB_SUCCESS or error code
+@return	the created tablespace
+@retval	NULL	on error */
+fil_space_t*
 fil_ibd_create(
 	ulint		space_id,
 	const char*	name,
@@ -3601,10 +3010,10 @@ fil_ibd_create(
 	ulint		flags,
 	ulint		size,
 	fil_encryption_t mode,
-	uint32_t	key_id)
+	uint32_t	key_id,
+	dberr_t*	err)
 {
 	pfs_os_file_t	file;
-	dberr_t		err;
 	byte*		buf2;
 	byte*		page;
 	bool		success;
@@ -3620,9 +3029,9 @@ fil_ibd_create(
 
 	/* Create the subdirectories in the path, if they are
 	not there already. */
-	err = os_file_create_subdirs_if_needed(path);
-	if (err != DB_SUCCESS) {
-		return(err);
+	*err = os_file_create_subdirs_if_needed(path);
+	if (*err != DB_SUCCESS) {
+		return NULL;
 	}
 
 	file = os_file_create(
@@ -3635,26 +3044,24 @@ fil_ibd_create(
 
 	if (!success) {
 		/* The following call will print an error message */
-		ulint	error = os_file_get_last_error(true);
-
-		ib::error() << "Cannot create file '" << path << "'";
-
-		if (error == OS_FILE_ALREADY_EXISTS) {
+		switch (os_file_get_last_error(true)) {
+		case OS_FILE_ALREADY_EXISTS:
 			ib::info() << "The file '" << path << "'"
 				" already exists though the"
 				" corresponding table did not exist"
 				" in the InnoDB data dictionary."
 				" You can resolve the problem by removing"
 				" the file.";
-
-			return(DB_TABLESPACE_EXISTS);
-		}
-
-		if (error == OS_FILE_DISK_FULL) {
-			return(DB_OUT_OF_FILE_SPACE);
+			*err = DB_TABLESPACE_EXISTS;
+			break;
+		case OS_FILE_DISK_FULL:
+			*err = DB_OUT_OF_FILE_SPACE;
+			break;
+		default:
+			*err = DB_ERROR;
 		}
-
-		return(DB_ERROR);
+		ib::error() << "Cannot create file '" << path << "'";
+		return NULL;
 	}
 
 	const bool is_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(flags);
@@ -3665,14 +3072,14 @@ fil_ibd_create(
 	}
 #endif
 
-	success = os_file_set_size(
+	if (!os_file_set_size(
 		path, file,
-		os_offset_t(size) << UNIV_PAGE_SIZE_SHIFT, is_compressed);
-
-	if (!success) {
+		os_offset_t(size) << srv_page_size_shift, is_compressed)) {
+		*err = DB_OUT_OF_FILE_SPACE;
+err_exit:
 		os_file_close(file);
 		os_file_delete(innodb_data_file_key, path);
-		return(DB_OUT_OF_FILE_SPACE);
+		return NULL;
 	}
 
 	bool punch_hole = os_is_sparse_file_supported(file);
@@ -3688,11 +3095,11 @@ fil_ibd_create(
 	with zeros from the call of os_file_set_size(), until a buffer pool
 	flush would write to it. */
 
-	buf2 = static_cast<byte*>(ut_malloc_nokey(3 * UNIV_PAGE_SIZE));
+	buf2 = static_cast<byte*>(ut_malloc_nokey(3U << srv_page_size_shift));
 	/* Align the memory for file i/o if we might have O_DIRECT set */
-	page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+	page = static_cast<byte*>(ut_align(buf2, srv_page_size));
 
-	memset(page, '\0', UNIV_PAGE_SIZE);
+	memset(page, '\0', srv_page_size);
 
 	flags |= FSP_FLAGS_PAGE_SSIZE();
 	fsp_header_init_fields(page, space_id, flags);
@@ -3705,12 +3112,12 @@ fil_ibd_create(
 
 		buf_flush_init_for_writing(NULL, page, NULL, 0);
 
-		err = os_file_write(
+		*err = os_file_write(
 			request, path, file, page, 0, page_size.physical());
 	} else {
 		page_zip_des_t	page_zip;
 		page_zip_set_size(&page_zip, page_size.physical());
-		page_zip.data = page + UNIV_PAGE_SIZE;
+		page_zip.data = page + srv_page_size;
 #ifdef UNIV_DEBUG
 		page_zip.m_start =
 #endif /* UNIV_DEBUG */
@@ -3719,43 +3126,33 @@ fil_ibd_create(
 
 		buf_flush_init_for_writing(NULL, page, &page_zip, 0);
 
-		err = os_file_write(
+		*err = os_file_write(
 			request, path, file, page_zip.data, 0,
 			page_size.physical());
 	}
 
 	ut_free(buf2);
 
-	if (err != DB_SUCCESS) {
-
+	if (*err != DB_SUCCESS) {
 		ib::error()
 			<< "Could not write the first page to"
 			<< " tablespace '" << path << "'";
-
-		os_file_close(file);
-		os_file_delete(innodb_data_file_key, path);
-
-		return(DB_ERROR);
+		goto err_exit;
 	}
 
-	success = os_file_flush(file);
-
-	if (!success) {
+	if (!os_file_flush(file)) {
 		ib::error() << "File flush of tablespace '"
 			<< path << "' failed";
-		os_file_close(file);
-		os_file_delete(innodb_data_file_key, path);
-		return(DB_ERROR);
+		*err = DB_ERROR;
+		goto err_exit;
 	}
 
 	if (has_data_dir) {
 		/* Make the ISL file if the IBD file is not
 		in the default location. */
-		err = RemoteDatafile::create_link_file(name, path);
-		if (err != DB_SUCCESS) {
-			os_file_close(file);
-			os_file_delete(innodb_data_file_key, path);
-			return(err);
+		*err = RemoteDatafile::create_link_file(name, path);
+		if (*err != DB_SUCCESS) {
+			goto err_exit;
 		}
 	}
 
@@ -3769,16 +3166,12 @@ fil_ibd_create(
 	space = fil_space_create(name, space_id, flags, FIL_TYPE_TABLESPACE,
 				 crypt_data, mode);
 	if (!space) {
-		if (crypt_data) {
-			free(crypt_data);
-		}
-
-		err = DB_ERROR;
+		free(crypt_data);
+		*err = DB_ERROR;
 	} else {
-		mtr_t mtr;
 		fil_node_t* file = space->add(path, OS_FILE_CLOSED, size,
 					      false, true);
-
+		mtr_t mtr;
 		mtr.start();
 		fil_op_write_log(
 			MLOG_FILE_CREATE2, space_id, 0, file->name,
@@ -3789,12 +3182,12 @@ fil_ibd_create(
 		file->block_size = block_size;
 		space->punch_hole = punch_hole;
 
-		err = DB_SUCCESS;
+		*err = DB_SUCCESS;
 	}
 
 	os_file_close(file);
 
-	if (err != DB_SUCCESS) {
+	if (*err != DB_SUCCESS) {
 		if (has_data_dir) {
 			RemoteDatafile::delete_link_file(name);
 		}
@@ -3802,7 +3195,7 @@ fil_ibd_create(
 		os_file_delete(innodb_data_file_key, path);
 	}
 
-	return(err);
+	return space;
 }
 
 /** Try to open a single-table tablespace and optionally check that the
@@ -3833,18 +3226,44 @@ statement to update the dictionary tables if they are incorrect.
 @param[in]	space_name	tablespace name of the datafile
 If file-per-table, it is the table name in the databasename/tablename format
 @param[in]	path_in		expected filepath, usually read from dictionary
-@return DB_SUCCESS or error code */
-dberr_t
+@param[out]	err		DB_SUCCESS or error code
+@return	tablespace
+@retval	NULL	if the tablespace could not be opened */
+fil_space_t*
 fil_ibd_open(
-	bool		validate,
-	bool		fix_dict,
-	fil_type_t	purpose,
-	ulint		id,
-	ulint		flags,
-	const char*	space_name,
-	const char*	path_in)
+	bool			validate,
+	bool			fix_dict,
+	fil_type_t		purpose,
+	ulint			id,
+	ulint			flags,
+	const table_name_t&	tablename,
+	const char*		path_in,
+	dberr_t*		err)
 {
-	dberr_t		err = DB_SUCCESS;
+	mutex_enter(&fil_system.mutex);
+	if (fil_space_t* space = fil_space_get_by_id(id)) {
+		if (strcmp(space->name, tablename.m_name)) {
+			table_name_t space_name;
+			space_name.m_name = space->name;
+			ib::error()
+				<< "Trying to open table " << tablename
+				<< " with id " << id
+				<< ", conflicting with " << space_name;
+			space = NULL;
+			if (err) *err = DB_TABLESPACE_EXISTS;
+		} else if (err) *err = DB_SUCCESS;
+
+		mutex_exit(&fil_system.mutex);
+
+		if (space && validate && !srv_read_only_mode) {
+			fsp_flags_try_adjust(space,
+					     flags & ~FSP_FLAGS_MEM_MASK);
+		}
+
+		return space;
+	}
+	mutex_exit(&fil_system.mutex);
+
 	bool		dict_filepath_same_as_default = false;
 	bool		link_file_found = false;
 	bool		link_file_is_bad = false;
@@ -3864,19 +3283,21 @@ fil_ibd_open(
 	/* Table flags can be ULINT_UNDEFINED if
 	dict_tf_to_fsp_flags_failure is set. */
 	if (flags == ULINT_UNDEFINED) {
-		return(DB_CORRUPTION);
+corrupted:
+		if (err) *err = DB_CORRUPTION;
+		return NULL;
 	}
 
 	ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, id));
-	df_default.init(space_name, flags);
-	df_dict.init(space_name, flags);
-	df_remote.init(space_name, flags);
+	df_default.init(tablename.m_name, flags);
+	df_dict.init(tablename.m_name, flags);
+	df_remote.init(tablename.m_name, flags);
 
 	/* Discover the correct file by looking in three possible locations
 	while avoiding unecessary effort. */
 
 	/* We will always look for an ibd in the default location. */
-	df_default.make_filepath(NULL, space_name, IBD);
+	df_default.make_filepath(NULL, tablename.m_name, IBD);
 
 	/* Look for a filepath embedded in an ISL where the default file
 	would be. */
@@ -3960,8 +3381,8 @@ fil_ibd_open(
 	if (valid_tablespaces_found == 0) {
 		os_file_get_last_error(true);
 		ib::error() << "Could not find a valid tablespace file for `"
-			<< space_name << "`. " << TROUBLESHOOT_DATADICT_MSG;
-		return(DB_CORRUPTION);
+			<< tablename << "`. " << TROUBLESHOOT_DATADICT_MSG;
+		goto corrupted;
 	}
 	if (!validate) {
 		goto skip_validate;
@@ -3970,7 +3391,7 @@ fil_ibd_open(
 	/* Do not open any tablespaces if more than one tablespace with
 	the correct space ID and flags were found. */
 	if (tablespaces_found > 1) {
-		ib::error() << "A tablespace for `" << space_name
+		ib::error() << "A tablespace for `" << tablename
 			<< "` has been found in multiple places;";
 
 		if (df_default.is_open()) {
@@ -4001,7 +3422,7 @@ fil_ibd_open(
 		any bad tablespaces. */
 		if (valid_tablespaces_found > 1 || srv_force_recovery > 0) {
 			ib::error() << "Will not open tablespace `"
-				<< space_name << "`";
+				<< tablename << "`";
 
 			/* If the file is not open it cannot be valid. */
 			ut_ad(df_default.is_open() || !df_default.is_valid());
@@ -4013,10 +3434,11 @@ fil_ibd_open(
 			if (df_default.is_open() != df_default.is_valid()
 			    || df_dict.is_open() != df_dict.is_valid()
 			    || df_remote.is_open() != df_remote.is_valid()) {
-				return(DB_CORRUPTION);
+				goto corrupted;
 			}
 error:
-			return(DB_ERROR);
+			if (err) *err = DB_ERROR;
+			return NULL;
 		}
 
 		/* There is only one valid tablespace found and we did
@@ -4070,7 +3492,8 @@ error:
 			ut_ad(!dict_filepath_same_as_default);
 			dict_update_filepath(id, df_default.filepath());
 			if (link_file_is_bad) {
-				RemoteDatafile::delete_link_file(space_name);
+				RemoteDatafile::delete_link_file(
+					tablename.m_name);
 			}
 
 		} else if (!link_file_found || link_file_is_bad) {
@@ -4078,9 +3501,9 @@ error:
 			/* Fix the link file if we got our filepath
 			from the dictionary but a link file did not
 			exist or it did not point to a valid file. */
-			RemoteDatafile::delete_link_file(space_name);
+			RemoteDatafile::delete_link_file(tablename.m_name);
 			RemoteDatafile::create_link_file(
-				space_name, df_dict.filepath());
+				tablename.m_name, df_dict.filepath());
 		}
 
 	} else if (df_remote.is_open()) {
@@ -4091,7 +3514,8 @@ error:
 			/* SYS_DATAFILES record for this space ID
 			was not found. */
 			dict_replace_tablespace_and_filepath(
-				id, space_name, df_remote.filepath(), flags);
+				id, tablename.m_name,
+				df_remote.filepath(), flags);
 		}
 
 	} else if (df_default.is_open()) {
@@ -4106,46 +3530,44 @@ error:
 		    || (path_in == NULL && DICT_TF_HAS_DATA_DIR(flags))
 		    || df_remote.filepath() != NULL) {
 			dict_replace_tablespace_and_filepath(
-				id, space_name, df_default.filepath(), flags);
+				id, tablename.m_name, df_default.filepath(),
+				flags);
 		}
 	}
 
 skip_validate:
-	if (err == DB_SUCCESS) {
-		const byte* first_page =
-			df_default.is_open() ? df_default.get_first_page() :
-			df_dict.is_open() ? df_dict.get_first_page() :
-			df_remote.get_first_page();
-
-		fil_space_crypt_t* crypt_data = first_page
-			? fil_space_read_crypt_data(page_size_t(flags),
-						    first_page)
-			: NULL;
-
-		fil_space_t* space = fil_space_create(
-			space_name, id, flags, purpose, crypt_data);
-		if (!space) {
-			goto error;
-		}
+	const byte* first_page =
+		df_default.is_open() ? df_default.get_first_page() :
+		df_dict.is_open() ? df_dict.get_first_page() :
+		df_remote.get_first_page();
+
+	fil_space_crypt_t* crypt_data = first_page
+		? fil_space_read_crypt_data(page_size_t(flags), first_page)
+		: NULL;
+
+	fil_space_t* space = fil_space_create(
+		tablename.m_name, id, flags, purpose, crypt_data);
+	if (!space) {
+		goto error;
+	}
 
-		/* We do not measure the size of the file, that is why
-		we pass the 0 below */
+	/* We do not measure the size of the file, that is why
+	we pass the 0 below */
 
-		space->add(
-			df_remote.is_open() ? df_remote.filepath() :
-			df_dict.is_open() ? df_dict.filepath() :
-			df_default.filepath(), OS_FILE_CLOSED, 0, false, true);
+	space->add(
+		df_remote.is_open() ? df_remote.filepath() :
+		df_dict.is_open() ? df_dict.filepath() :
+		df_default.filepath(), OS_FILE_CLOSED, 0, false, true);
 
-		if (err == DB_SUCCESS && validate
-		    && purpose != FIL_TYPE_IMPORT && !srv_read_only_mode) {
-			df_remote.close();
-			df_dict.close();
-			df_default.close();
-			fsp_flags_try_adjust(id, flags & ~FSP_FLAGS_MEM_MASK);
-		}
+	if (validate && purpose != FIL_TYPE_IMPORT && !srv_read_only_mode) {
+		df_remote.close();
+		df_dict.close();
+		df_default.close();
+		fsp_flags_try_adjust(space, flags & ~FSP_FLAGS_MEM_MASK);
 	}
 
-	return(err);
+	if (err) *err = DB_SUCCESS;
+	return space;
 }
 
 /** Looks for a pre-existing fil_space_t with the given tablespace ID
@@ -4166,7 +3588,7 @@ fil_space_read_name_and_filepath(
 	*name = NULL;
 	*filepath = NULL;
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	fil_space_t*	space = fil_space_get_by_id(space_id);
 
@@ -4179,7 +3601,7 @@ fil_space_read_name_and_filepath(
 		success = true;
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	return(success);
 }
@@ -4208,7 +3630,7 @@ fil_path_to_space_name(
 
 	while (const char* t = static_cast<const char*>(
 		       memchr(tablename, OS_PATH_SEPARATOR,
-			      end - tablename))) {
+			      ulint(end - tablename)))) {
 		dbname = tablename;
 		tablename = t + 1;
 	}
@@ -4220,7 +3642,7 @@ fil_path_to_space_name(
 	ut_ad(end - tablename > 4);
 	ut_ad(memcmp(end - 4, DOT_IBD, 4) == 0);
 
-	char*	name = mem_strdupl(dbname, end - dbname - 4);
+	char*	name = mem_strdupl(dbname, ulint(end - dbname) - 4);
 
 	ut_ad(name[tablename - dbname - 1] == OS_PATH_SEPARATOR);
 #if OS_PATH_SEPARATOR != '/'
@@ -4359,9 +3781,9 @@ fil_ibd_load(
 {
 	/* If the a space is already in the file system cache with this
 	space ID, then there is nothing to do. */
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 	space = fil_space_get_by_id(space_id);
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	if (space != NULL) {
 		/* Compare the filename we are trying to open with the
@@ -4427,7 +3849,8 @@ fil_ibd_load(
 
 		/* Every .ibd file is created >= 4 pages in size.
 		Smaller files cannot be OK. */
-		minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE;
+		minimum_size = os_offset_t(FIL_IBD_FILE_INITIAL_SIZE)
+			<< srv_page_size_shift;
 
 		if (size == static_cast<os_offset_t>(-1)) {
 			/* The following call prints an error message */
@@ -4523,51 +3946,37 @@ fil_file_readdir_next_file(
 	return(-1);
 }
 
-/*******************************************************************//**
-Report that a tablespace for a table was not found. */
-static
-void
-fil_report_missing_tablespace(
-/*===========================*/
-	const char*	name,			/*!< in: table name */
-	ulint		space_id)		/*!< in: table's space id */
-{
-	ib::error() << "Table " << name
-		<< " in the InnoDB data dictionary has tablespace id "
-		<< space_id << ","
-		" but tablespace with that id or name does not exist. Have"
-		" you deleted or moved .ibd files?";
-}
-
 /** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations.
 (Typically when upgrading from MariaDB 10.1.0..10.1.20.)
-@param[in]	space_id	tablespace ID
+@param[in,out]	space		tablespace
 @param[in]	flags		desired tablespace flags */
-UNIV_INTERN
-void
-fsp_flags_try_adjust(ulint space_id, ulint flags)
+void fsp_flags_try_adjust(fil_space_t* space, ulint flags)
 {
 	ut_ad(!srv_read_only_mode);
-	ut_ad(fsp_flags_is_valid(flags, space_id));
-	if (!fil_space_get_size(space_id)) {
+	ut_ad(fsp_flags_is_valid(flags, space->id));
+	if (!space->size && (space->purpose != FIL_TYPE_TABLESPACE
+			     || !fil_space_get_size(space->id))) {
 		return;
 	}
+	/* This code is executed during server startup while no
+	connections are allowed. We do not need to protect against
+	DROP TABLE by fil_space_acquire(). */
 	mtr_t	mtr;
 	mtr.start();
 	if (buf_block_t* b = buf_page_get(
-		    page_id_t(space_id, 0), page_size_t(flags),
+		    page_id_t(space->id, 0), page_size_t(flags),
 		    RW_X_LATCH, &mtr)) {
 		ulint f = fsp_header_get_flags(b->frame);
 		/* Suppress the message if only the DATA_DIR flag to differs. */
 		if ((f ^ flags) & ~(1U << FSP_FLAGS_POS_RESERVED)) {
 			ib::warn()
-				<< "adjusting FSP_SPACE_FLAGS of tablespace "
-				<< space_id
-				<< " from " << ib::hex(f)
+				<< "adjusting FSP_SPACE_FLAGS of file '"
+				<< UT_LIST_GET_FIRST(space->chain)->name
+				<< "' from " << ib::hex(f)
 				<< " to " << ib::hex(flags);
 		}
 		if (f != flags) {
-			mtr.set_named_space(space_id);
+			mtr.set_named_space(space);
 			mlog_write_ulint(FSP_HEADER_OFFSET
 					 + FSP_SPACE_FLAGS + b->frame,
 					 flags, MLOG_4BYTES, &mtr);
@@ -4584,200 +3993,59 @@ startup, there may be many tablespaces which are not yet in the memory cache.
 @param[in]	print_error_if_does_not_exist
 				Print detailed error information to the
 error log if a matching tablespace is not found from memory.
-@param[in]	heap		Heap memory
 @param[in]	table_flags	table flags
-@return true if a matching tablespace exists in the memory cache */
-bool
+@return the tablespace
+@retval	NULL	if no matching tablespace exists in the memory cache */
+fil_space_t*
 fil_space_for_table_exists_in_mem(
 	ulint		id,
 	const char*	name,
 	bool		print_error_if_does_not_exist,
-	mem_heap_t*	heap,
 	ulint		table_flags)
 {
-	fil_space_t*	fnamespace;
-	fil_space_t*	space;
-
 	const ulint	expected_flags = dict_tf_to_fsp_flags(table_flags);
 
-	mutex_enter(&fil_system->mutex);
-
-	/* Look if there is a space with the same id */
-
-	space = fil_space_get_by_id(id);
-
-	/* Look if there is a space with the same name; the name is the
-	directory path from the datadir to the file */
-
-	fnamespace = fil_space_get_by_name(name);
-	bool valid = space && !((space->flags ^ expected_flags)
-				& ~FSP_FLAGS_MEM_MASK);
-
-	if (!space) {
-	} else if (!valid || space == fnamespace) {
-		/* Found with the same file name, or got a flag mismatch. */
-		goto func_exit;
-	}
-
-	if (!print_error_if_does_not_exist) {
-		valid = false;
-		goto func_exit;
-	}
-
-	if (space == NULL) {
-		if (fnamespace == NULL) {
-			if (print_error_if_does_not_exist) {
-				fil_report_missing_tablespace(name, id);
-			}
-		} else {
-			ib::error() << "Table " << name << " in InnoDB data"
-				" dictionary has tablespace id " << id
-				<< ", but a tablespace with that id does not"
-				" exist. There is a tablespace of name "
-				<< fnamespace->name << " and id "
-				<< fnamespace->id << ", though. Have you"
-				" deleted or moved .ibd files?";
+	mutex_enter(&fil_system.mutex);
+	if (fil_space_t* space = fil_space_get_by_id(id)) {
+		if ((space->flags ^ expected_flags) & ~FSP_FLAGS_MEM_MASK) {
+			goto func_exit;
 		}
-error_exit:
-		ib::info() << TROUBLESHOOT_DATADICT_MSG;
-		valid = false;
-		goto func_exit;
-	}
-
-	if (0 != strcmp(space->name, name)) {
 
-		ib::error() << "Table " << name << " in InnoDB data dictionary"
-			" has tablespace id " << id << ", but the tablespace"
-			" with that id has name " << space->name << "."
-			" Have you deleted or moved .ibd files?";
-
-		if (fnamespace != NULL) {
-			ib::error() << "There is a tablespace with the right"
-				" name: " << fnamespace->name << ", but its id"
-				" is " << fnamespace->id << ".";
+		if (strcmp(space->name, name)) {
+			ib::error() << "Table " << name
+				<< " in InnoDB data dictionary"
+				" has tablespace id " << id
+				<< ", but the tablespace"
+				" with that id has name " << space->name << "."
+				" Have you deleted or moved .ibd files?";
+			goto error_exit;
 		}
 
-		goto error_exit;
-	}
-
-func_exit:
-	if (valid) {
 		/* Adjust the flags that are in FSP_FLAGS_MEM_MASK.
 		FSP_SPACE_FLAGS will not be written back here. */
 		space->flags = expected_flags;
+		mutex_exit(&fil_system.mutex);
+		if (!srv_read_only_mode) {
+			fsp_flags_try_adjust(space, expected_flags
+					     & ~FSP_FLAGS_MEM_MASK);
+		}
+		return space;
 	}
-	mutex_exit(&fil_system->mutex);
-
-	if (valid && !srv_read_only_mode) {
-		fsp_flags_try_adjust(id, expected_flags & ~FSP_FLAGS_MEM_MASK);
-	}
-
-	return(valid);
-}
-
-/** Return the space ID based on the tablespace name.
-The tablespace must be found in the tablespace memory cache.
-This call is made from external to this module, so the mutex is not owned.
-@param[in]	tablespace	Tablespace name
-@return space ID if tablespace found, ULINT_UNDEFINED if space not. */
-ulint
-fil_space_get_id_by_name(
-	const char*	tablespace)
-{
-	mutex_enter(&fil_system->mutex);
 
-	/* Search for a space with the same name. */
-	fil_space_t*	space = fil_space_get_by_name(tablespace);
-	ulint		id = (space == NULL) ? ULINT_UNDEFINED : space->id;
-
-	mutex_exit(&fil_system->mutex);
-
-	return(id);
-}
-
-/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
-
-/*******************************************************************//**
-Tries to reserve free extents in a file space.
-@return true if succeed */
-bool
-fil_space_reserve_free_extents(
-/*===========================*/
-	ulint	id,		/*!< in: space id */
-	ulint	n_free_now,	/*!< in: number of free extents now */
-	ulint	n_to_reserve)	/*!< in: how many one wants to reserve */
-{
-	fil_space_t*	space;
-	bool		success;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	ut_a(space);
-
-	if (space->n_reserved_extents + n_to_reserve > n_free_now) {
-		success = false;
-	} else {
-		space->n_reserved_extents += n_to_reserve;
-		success = true;
+	if (print_error_if_does_not_exist) {
+		ib::error() << "Table " << name
+			    << " in the InnoDB data dictionary"
+			" has tablespace id " << id
+			    << ", but tablespace with that id"
+			" or name does not exist. Have"
+			" you deleted or moved .ibd files?";
+error_exit:
+		ib::info() << TROUBLESHOOT_DATADICT_MSG;
 	}
 
-	mutex_exit(&fil_system->mutex);
-
-	return(success);
-}
-
-/*******************************************************************//**
-Releases free extents in a file space. */
-void
-fil_space_release_free_extents(
-/*===========================*/
-	ulint	id,		/*!< in: space id */
-	ulint	n_reserved)	/*!< in: how many one reserved */
-{
-	fil_space_t*	space;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	ut_a(space);
-	ut_a(space->n_reserved_extents >= n_reserved);
-
-	space->n_reserved_extents -= n_reserved;
-
-	mutex_exit(&fil_system->mutex);
-}
-
-/*******************************************************************//**
-Gets the number of reserved extents. If the database is silent, this number
-should be zero. */
-ulint
-fil_space_get_n_reserved_extents(
-/*=============================*/
-	ulint	id)		/*!< in: space id */
-{
-	fil_space_t*	space;
-	ulint		n;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	ut_a(space);
-
-	n = space->n_reserved_extents;
-
-	mutex_exit(&fil_system->mutex);
-
-	return(n);
+func_exit:
+	mutex_exit(&fil_system.mutex);
+	return NULL;
 }
 
 /*============================ FILE I/O ================================*/
@@ -4795,15 +4063,14 @@ bool
 fil_node_prepare_for_io(
 /*====================*/
 	fil_node_t*	node,	/*!< in: file node */
-	fil_system_t*	system,	/*!< in: tablespace memory cache */
 	fil_space_t*	space)	/*!< in: space */
 {
-	ut_ad(node && system && space);
-	ut_ad(mutex_own(&(system->mutex)));
+	ut_ad(node && space);
+	ut_ad(mutex_own(&fil_system.mutex));
 
-	if (system->n_open > system->max_n_open + 5) {
-		ib::warn() << "Open files " << system->n_open
-			<< " exceeds the limit " << system->max_n_open;
+	if (fil_system.n_open > srv_max_n_open_files + 5) {
+		ib::warn() << "Open files " << fil_system.n_open
+			<< " exceeds the limit " << srv_max_n_open_files;
 	}
 
 	if (!node->is_open()) {
@@ -4817,10 +4084,8 @@ fil_node_prepare_for_io(
 
 	if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) {
 		/* The node is in the LRU list, remove it */
-
-		ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
-
-		UT_LIST_REMOVE(system->LRU, node);
+		ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0);
+		UT_LIST_REMOVE(fil_system.LRU, node);
 	}
 
 	node->n_pending++;
@@ -4835,7 +4100,7 @@ static
 void
 fil_node_complete_io(fil_node_t* node, const IORequest& type)
 {
-	ut_ad(mutex_own(&fil_system->mutex));
+	ut_ad(mutex_own(&fil_system.mutex));
 	ut_a(node->n_pending > 0);
 
 	--node->n_pending;
@@ -4845,11 +4110,11 @@ fil_node_complete_io(fil_node_t* node, const IORequest& type)
 	if (type.is_write()) {
 
 		ut_ad(!srv_read_only_mode
-		      || fsp_is_system_temporary(node->space->id));
+		      || node->space->purpose == FIL_TYPE_TEMPORARY);
 
-		++fil_system->modification_counter;
+		++fil_system.modification_counter;
 
-		node->modification_counter = fil_system->modification_counter;
+		node->modification_counter = fil_system.modification_counter;
 
 		if (fil_buffering_disabled(node->space)) {
 
@@ -4864,14 +4129,14 @@ fil_node_complete_io(fil_node_t* node, const IORequest& type)
 			node->space->is_in_unflushed_spaces = true;
 
 			UT_LIST_ADD_FIRST(
-				fil_system->unflushed_spaces, node->space);
+				fil_system.unflushed_spaces, node->space);
 		}
 	}
 
 	if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) {
 
 		/* The node must be put back to the LRU list */
-		UT_LIST_ADD_FIRST(fil_system->LRU, node);
+		UT_LIST_ADD_FIRST(fil_system.LRU, node);
 	}
 }
 
@@ -4935,15 +4200,13 @@ fil_io(
 	ut_ad(req_type.validate());
 
 	ut_ad(len > 0);
-	ut_ad(byte_offset < UNIV_PAGE_SIZE);
+	ut_ad(byte_offset < srv_page_size);
 	ut_ad(!page_size.is_compressed() || byte_offset == 0);
-	ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
-#if (1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX
-# error "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX"
-#endif
-#if (1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN
-# error "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN"
-#endif
+	ut_ad(srv_page_size == 1UL << srv_page_size_shift);
+	compile_time_assert((1U << UNIV_PAGE_SIZE_SHIFT_MAX)
+			    == UNIV_PAGE_SIZE_MAX);
+	compile_time_assert((1U << UNIV_PAGE_SIZE_SHIFT_MIN)
+			    == UNIV_PAGE_SIZE_MIN);
 	ut_ad(fil_validate_skip());
 
 	/* ibuf bitmap pages must be read in the sync AIO mode: */
@@ -5004,7 +4267,7 @@ fil_io(
 		&& space->stop_new_ops
 		&& !space->is_being_truncated)) {
 
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 
 		if (!req_type.ignore_missing() && !ignore_missing_space) {
 			ib::error()
@@ -5028,7 +4291,7 @@ fil_io(
 		if (node == NULL) {
 
 			if (req_type.ignore_missing()) {
-				mutex_exit(&fil_system->mutex);
+				mutex_exit(&fil_system.mutex);
 				return(DB_ERROR);
 			}
 
@@ -5052,14 +4315,13 @@ fil_io(
 			if (space->id != TRX_SYS_SPACE
 			    && UT_LIST_GET_LEN(space->chain) == 1
 			    && (srv_is_tablespace_truncated(space->id)
-				|| space->is_being_truncated
 				|| srv_was_tablespace_truncated(space))
 			    && req_type.is_read()) {
 
 				/* Handle page which is outside the truncated
 				tablespace bounds when recovering from a crash
 				happened during a truncation */
-				mutex_exit(&fil_system->mutex);
+				mutex_exit(&fil_system.mutex);
 				return(DB_TABLESPACE_TRUNCATED);
 			}
 
@@ -5070,10 +4332,10 @@ fil_io(
 	}
 
 	/* Open file if closed */
-	if (!fil_node_prepare_for_io(node, fil_system, space)) {
+	if (!fil_node_prepare_for_io(node, space)) {
 		if (fil_type_is_data(space->purpose)
 		    && fil_is_user_tablespace_id(space->id)) {
-			mutex_exit(&fil_system->mutex);
+			mutex_exit(&fil_system.mutex);
 
 			if (!req_type.ignore_missing()) {
 				ib::error()
@@ -5109,7 +4371,7 @@ fil_io(
 			should return with DB_ERROR and let caller decide
 			what to do. */
 			fil_node_complete_io(node, req_type);
-			mutex_exit(&fil_system->mutex);
+			mutex_exit(&fil_system.mutex);
 			return(DB_ERROR);
 		}
 
@@ -5119,18 +4381,18 @@ fil_io(
 	}
 
 	/* Now we have made the changes in the data structures of fil_system */
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	/* Calculate the low 32 bits and the high 32 bits of the file offset */
 
 	if (!page_size.is_compressed()) {
 
 		offset = ((os_offset_t) cur_page_no
-			  << UNIV_PAGE_SIZE_SHIFT) + byte_offset;
+			  << srv_page_size_shift) + byte_offset;
 
 		ut_a(node->size - cur_page_no
-		     >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
-			 / UNIV_PAGE_SIZE));
+		     >= ((byte_offset + len + (srv_page_size - 1))
+			 >> srv_page_size_shift));
 	} else {
 		ulint	size_shift;
 
@@ -5179,11 +4441,11 @@ fil_io(
 		/* The i/o operation is already completed when we return from
 		os_aio: */
 
-		mutex_enter(&fil_system->mutex);
+		mutex_enter(&fil_system.mutex);
 
 		fil_node_complete_io(node, req_type);
 
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 
 		ut_ad(fil_validate_skip());
 	}
@@ -5219,14 +4481,14 @@ fil_aio_wait(
 
 	srv_set_io_thread_op_info(segment, "complete io for fil node");
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	fil_node_complete_io(node, type);
 	const fil_type_t	purpose	= node->space->purpose;
 	const ulint		space_id= node->space->id;
 	const bool		dblwr	= node->space->use_doublewrite();
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	ut_ad(fil_validate_skip());
 
@@ -5239,7 +4501,26 @@ fil_aio_wait(
 	switch (purpose) {
 	case FIL_TYPE_LOG:
 		srv_set_io_thread_op_info(segment, "complete io for log");
-		log_io_complete(static_cast<log_group_t*>(message));
+		/* We use synchronous writing of the logs
+		and can only end up here when writing a log checkpoint! */
+		ut_a(ptrdiff_t(message) == 1);
+		/* It was a checkpoint write */
+		switch (srv_flush_t(srv_file_flush_method)) {
+		case SRV_O_DSYNC:
+		case SRV_NOSYNC:
+			break;
+		case SRV_FSYNC:
+		case SRV_LITTLESYNC:
+		case SRV_O_DIRECT:
+		case SRV_O_DIRECT_NO_FSYNC:
+#ifdef _WIN32
+		case SRV_ALL_O_DIRECT_FSYNC:
+#endif
+			fil_flush(SRV_LOG_SPACE_FIRST_ID);
+		}
+
+		DBUG_PRINT("ib_log", ("checkpoint info written"));
+		log_sys.complete_checkpoint();
 		return;
 	case FIL_TYPE_TABLESPACE:
 	case FIL_TYPE_TEMPORARY:
@@ -5272,7 +4553,7 @@ fil_aio_wait(
 					    << ": " << ut_strerr(err);
 			}
 
-			fil_space_release_for_io(space);
+			space->release_for_io();
 		}
 		return;
 	}
@@ -5289,7 +4570,7 @@ fil_flush(
 	ulint	space_id)	/*!< in: file space id (this can be a group of
 				log files or a tablespace of the database) */
 {
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	if (fil_space_t* space = fil_space_get_by_id(space_id)) {
 		if (space->purpose != FIL_TYPE_TEMPORARY
@@ -5298,7 +4579,7 @@ fil_flush(
 		}
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 }
 
 /** Flush a tablespace.
@@ -5306,16 +4587,16 @@ fil_flush(
 void
 fil_flush(fil_space_t* space)
 {
-	ut_ad(space->n_pending_ios > 0);
+	ut_ad(space->pending_io());
 	ut_ad(space->purpose == FIL_TYPE_TABLESPACE
 	      || space->purpose == FIL_TYPE_IMPORT);
 
 	if (!space->is_stopping()) {
-		mutex_enter(&fil_system->mutex);
+		mutex_enter(&fil_system.mutex);
 		if (!space->is_stopping()) {
 			fil_flush_low(space);
 		}
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 	}
 }
 
@@ -5332,17 +4613,17 @@ fil_flush_file_spaces(
 
 	ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_LOG);
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
-	n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
+	n_space_ids = UT_LIST_GET_LEN(fil_system.unflushed_spaces);
 	if (n_space_ids == 0) {
 
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 		return;
 	}
 
 	/* Assemble a list of space ids to flush.  Previously, we
-	traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
+	traversed fil_system.unflushed_spaces and called UT_LIST_GET_NEXT()
 	on a space that was just removed from the list by fil_flush().
 	Thus, the space could be dropped and the memory overwritten. */
 	space_ids = static_cast<ulint*>(
@@ -5350,7 +4631,7 @@ fil_flush_file_spaces(
 
 	n_space_ids = 0;
 
-	for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
+	for (space = UT_LIST_GET_FIRST(fil_system.unflushed_spaces);
 	     space;
 	     space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
 
@@ -5361,7 +4642,7 @@ fil_flush_file_spaces(
 		}
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	/* Flush the spaces.  It will not hurt to call fil_flush() on
 	a non-existing space id. */
@@ -5397,10 +4678,14 @@ struct	Check {
 	@return		number of open file nodes */
 	static ulint validate(const fil_space_t* space)
 	{
-		ut_ad(mutex_own(&fil_system->mutex));
+		ut_ad(mutex_own(&fil_system.mutex));
 		Check	check;
 		ut_list_validate(space->chain, check);
 		ut_a(space->size == check.size);
+		ut_ad(space->id != TRX_SYS_SPACE
+		      || space == fil_system.sys_space);
+		ut_ad(space->id != SRV_TMP_SPACE_ID
+		      || space == fil_system.temp_space);
 		return(check.n_open);
 	}
 };
@@ -5416,14 +4701,14 @@ fil_validate(void)
 	fil_node_t*	fil_node;
 	ulint		n_open		= 0;
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	/* Look for spaces in the hash table */
 
-	for (ulint i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
+	for (ulint i = 0; i < hash_get_n_cells(fil_system.spaces); i++) {
 
 		for (space = static_cast<fil_space_t*>(
-				HASH_GET_FIRST(fil_system->spaces, i));
+				HASH_GET_FIRST(fil_system.spaces, i));
 		     space != 0;
 		     space = static_cast<fil_space_t*>(
 				HASH_GET_NEXT(hash, space))) {
@@ -5432,11 +4717,11 @@ fil_validate(void)
 		}
 	}
 
-	ut_a(fil_system->n_open == n_open);
+	ut_a(fil_system.n_open == n_open);
 
-	UT_LIST_CHECK(fil_system->LRU);
+	UT_LIST_CHECK(fil_system.LRU);
 
-	for (fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
+	for (fil_node = UT_LIST_GET_FIRST(fil_system.LRU);
 	     fil_node != 0;
 	     fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) {
 
@@ -5446,7 +4731,7 @@ fil_validate(void)
 		ut_a(fil_space_belongs_in_lru(fil_node->space));
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	return(true);
 }
@@ -5497,30 +4782,6 @@ fil_page_set_type(
 	mach_write_to_2(page + FIL_PAGE_TYPE, type);
 }
 
-/****************************************************************//**
-Closes the tablespace memory cache. */
-void
-fil_close(void)
-/*===========*/
-{
-	if (fil_system) {
-		hash_table_free(fil_system->spaces);
-
-		hash_table_free(fil_system->name_hash);
-
-		ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
-		ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
-		ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
-
-		mutex_free(&fil_system->mutex);
-
-		ut_free(fil_system);
-		fil_system = NULL;
-
-		fil_space_crypt_cleanup();
-	}
-}
-
 /********************************************************************//**
 Delete the tablespace file and any related files like .cfg.
 This should not be called for temporary tables.
@@ -5544,51 +4805,6 @@ fil_delete_file(
 	}
 }
 
-/**
-Iterate over all the spaces in the space list and fetch the
-tablespace names. It will return a copy of the name that must be
-freed by the caller using: delete[].
-@return DB_SUCCESS if all OK. */
-dberr_t
-fil_get_space_names(
-/*================*/
-	space_name_list_t&	space_name_list)
-				/*!< in/out: List to append to */
-{
-	fil_space_t*	space;
-	dberr_t		err = DB_SUCCESS;
-
-	mutex_enter(&fil_system->mutex);
-
-	for (space = UT_LIST_GET_FIRST(fil_system->space_list);
-	     space != NULL;
-	     space = UT_LIST_GET_NEXT(space_list, space)) {
-
-		if (space->purpose == FIL_TYPE_TABLESPACE) {
-			ulint	len;
-			char*	name;
-
-			len = ::strlen(space->name);
-			name = UT_NEW_ARRAY_NOKEY(char, len + 1);
-
-			if (name == 0) {
-				/* Caller to free elements allocated so far. */
-				err = DB_OUT_OF_MEMORY;
-				break;
-			}
-
-			memcpy(name, space->name, len);
-			name[len] = 0;
-
-			space_name_list.push_back(name);
-		}
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return(err);
-}
-
 /** Generate redo log for swapping two .ibd files
 @param[in]	old_table	old table
 @param[in]	new_table	new table
@@ -5602,88 +4818,65 @@ fil_mtr_rename_log(
 	const char*		tmp_name,
 	mtr_t*			mtr)
 {
-	dberr_t	err;
-
-	bool	old_is_file_per_table =
-		!is_system_tablespace(old_table->space);
-
-	bool	new_is_file_per_table =
-		!is_system_tablespace(new_table->space);
+	ut_ad(old_table->space != fil_system.temp_space);
+	ut_ad(new_table->space != fil_system.temp_space);
+	ut_ad(old_table->space->id == old_table->space_id);
+	ut_ad(new_table->space->id == new_table->space_id);
 
 	/* If neither table is file-per-table,
 	there will be no renaming of files. */
-	if (!old_is_file_per_table && !new_is_file_per_table) {
+	if (!old_table->space_id && !new_table->space_id) {
 		return(DB_SUCCESS);
 	}
 
-	const char*	old_dir = DICT_TF_HAS_DATA_DIR(old_table->flags)
-		? old_table->data_dir_path
-		: NULL;
-
-	char*	old_path = fil_make_filepath(
-		old_dir, old_table->name.m_name, IBD, (old_dir != NULL));
-	if (old_path == NULL) {
-		return(DB_OUT_OF_MEMORY);
-	}
+	const bool has_data_dir = DICT_TF_HAS_DATA_DIR(old_table->flags);
 
-	if (old_is_file_per_table) {
+	if (old_table->space_id) {
 		char*	tmp_path = fil_make_filepath(
-			old_dir, tmp_name, IBD, (old_dir != NULL));
+			has_data_dir ? old_table->data_dir_path : NULL,
+			tmp_name, IBD, has_data_dir);
 		if (tmp_path == NULL) {
-			ut_free(old_path);
 			return(DB_OUT_OF_MEMORY);
 		}
 
+		const char* old_path = old_table->space->chain.start->name;
 		/* Temp filepath must not exist. */
-		err = fil_rename_tablespace_check(
-			old_table->space, old_path, tmp_path,
-			dict_table_is_discarded(old_table));
+		dberr_t err = fil_rename_tablespace_check(
+			old_path, tmp_path, !old_table->space);
 		if (err != DB_SUCCESS) {
-			ut_free(old_path);
 			ut_free(tmp_path);
 			return(err);
 		}
 
 		fil_name_write_rename_low(
-			old_table->space, 0, old_path, tmp_path, mtr);
+			old_table->space_id, 0, old_path, tmp_path, mtr);
 
 		ut_free(tmp_path);
 	}
 
-	if (new_is_file_per_table) {
-		const char*	new_dir = DICT_TF_HAS_DATA_DIR(new_table->flags)
-			? new_table->data_dir_path
-			: NULL;
-		char*	new_path = fil_make_filepath(
-				new_dir, new_table->name.m_name,
-				IBD, (new_dir != NULL));
-		if (new_path == NULL) {
-			ut_free(old_path);
-			return(DB_OUT_OF_MEMORY);
-		}
+	if (new_table->space_id) {
+		const char* new_path = new_table->space->chain.start->name;
+		char* old_path = fil_make_filepath(
+			has_data_dir ? old_table->data_dir_path : NULL,
+			old_table->name.m_name, IBD, has_data_dir);
 
 		/* Destination filepath must not exist unless this ALTER
 		TABLE starts and ends with a file_per-table tablespace. */
-		if (!old_is_file_per_table) {
-			err = fil_rename_tablespace_check(
-				new_table->space, new_path, old_path,
-				dict_table_is_discarded(new_table));
+		if (!old_table->space_id) {
+			dberr_t err = fil_rename_tablespace_check(
+				new_path, old_path, !new_table->space);
 			if (err != DB_SUCCESS) {
 				ut_free(old_path);
-				ut_free(new_path);
 				return(err);
 			}
 		}
 
 		fil_name_write_rename_low(
-			new_table->space, 0, new_path, old_path, mtr);
-
-		ut_free(new_path);
+			new_table->space_id, 0, new_path, old_path, mtr);
+		ut_free(old_path);
 	}
 
-	ut_free(old_path);
-
-	return(DB_SUCCESS);
+	return DB_SUCCESS;
 }
 
 #ifdef UNIV_DEBUG
@@ -5694,7 +4887,7 @@ void
 fil_space_validate_for_mtr_commit(
 	const fil_space_t*	space)
 {
-	ut_ad(!mutex_own(&fil_system->mutex));
+	ut_ad(!mutex_own(&fil_system.mutex));
 	ut_ad(space != NULL);
 	ut_ad(space->purpose == FIL_TYPE_TABLESPACE);
 	ut_ad(!is_predefined_tablespace(space->id));
@@ -5709,11 +4902,11 @@ fil_space_validate_for_mtr_commit(
 	to quiesce. This is not a problem, because
 	ibuf_merge_or_delete_for_page() would call
 	fil_space_acquire() before mtr_start() and
-	fil_space_release() after mtr_commit(). This is why
+	fil_space_t::release() after mtr_commit(). This is why
 	n_pending_ops should not be zero if stop_new_ops is set. */
 	ut_ad(!space->stop_new_ops
 	      || space->is_being_truncated /* fil_truncate_prepare() */
-	      || space->n_pending_ops > 0);
+	      || space->referenced());
 }
 #endif /* UNIV_DEBUG */
 
@@ -5739,12 +4932,12 @@ fil_names_dirty(
 {
 	ut_ad(log_mutex_own());
 	ut_ad(recv_recovery_is_on());
-	ut_ad(log_sys->lsn != 0);
+	ut_ad(log_sys.lsn != 0);
 	ut_ad(space->max_lsn == 0);
 	ut_d(fil_space_validate_for_mtr_commit(space));
 
-	UT_LIST_ADD_LAST(fil_system->named_spaces, space);
-	space->max_lsn = log_sys->lsn;
+	UT_LIST_ADD_LAST(fil_system.named_spaces, space);
+	space->max_lsn = log_sys.lsn;
 }
 
 /** Write MLOG_FILE_NAME records when a non-predefined persistent
@@ -5759,9 +4952,9 @@ fil_names_dirty_and_write(
 {
 	ut_ad(log_mutex_own());
 	ut_d(fil_space_validate_for_mtr_commit(space));
-	ut_ad(space->max_lsn == log_sys->lsn);
+	ut_ad(space->max_lsn == log_sys.lsn);
 
-	UT_LIST_ADD_LAST(fil_system->named_spaces, space);
+	UT_LIST_ADD_LAST(fil_system.named_spaces, space);
 	fil_names_write(space, mtr);
 
 	DBUG_EXECUTE_IF("fil_names_write_bogus",
@@ -5796,14 +4989,14 @@ fil_names_clear(
 
 	ut_ad(log_mutex_own());
 
-	if (log_sys->append_on_checkpoint) {
-		mtr_write_log(log_sys->append_on_checkpoint);
+	if (log_sys.append_on_checkpoint) {
+		mtr_write_log(log_sys.append_on_checkpoint);
 		do_write = true;
 	}
 
 	mtr.start();
 
-	for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->named_spaces);
+	for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.named_spaces);
 	     space != NULL; ) {
 		fil_space_t*	next = UT_LIST_GET_NEXT(named_spaces, space);
 
@@ -5815,7 +5008,7 @@ fil_names_clear(
 			modified any more, subsequent checkpoints will
 			avoid calling fil_names_write() on it. */
 			space->max_lsn = 0;
-			UT_LIST_REMOVE(fil_system->named_spaces, space);
+			UT_LIST_REMOVE(fil_system.named_spaces, space);
 		}
 
 		/* max_lsn is the last LSN where fil_names_dirty_and_write()
@@ -5886,7 +5079,7 @@ truncate_t::truncate(
 		return(DB_OUT_OF_MEMORY);
 	}
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	fil_space_t*	space = fil_space_get_by_id(space_id);
 
@@ -5909,8 +5102,8 @@ truncate_t::truncate(
 		node->handle = os_file_create_simple_no_error_handling(
 			innodb_data_file_key, path, OS_FILE_OPEN,
 			OS_FILE_READ_WRITE,
-			fsp_is_system_temporary(space_id)
-			? false : srv_read_only_mode, &ret);
+			space->purpose != FIL_TYPE_TEMPORARY
+			&& srv_read_only_mode, &ret);
 
 		if (!ret) {
 			ib::error() << "Failed to open tablespace file "
@@ -5929,7 +5122,7 @@ truncate_t::truncate(
 		: space->size;
 
 	const bool success = os_file_truncate(
-		path, node->handle, trunc_size * UNIV_PAGE_SIZE);
+		path, node->handle, trunc_size << srv_page_size_shift);
 
 	if (!success) {
 		ib::error() << "Cannot truncate file " << path
@@ -5938,7 +5131,6 @@ truncate_t::truncate(
 	}
 
 	space->stop_new_ops = false;
-	space->is_being_truncated = false;
 
 	/* If we opened the file in this function, close it. */
 	if (!already_open) {
@@ -5955,7 +5147,7 @@ truncate_t::truncate(
 		}
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	ut_free(path);
 
@@ -6009,23 +5201,12 @@ test_make_filepath()
 #endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
 /* @} */
 
-/** Release the reserved free extents.
-@param[in]	n_reserved	number of reserved extents */
-void
-fil_space_t::release_free_extents(ulint	n_reserved)
-{
-	ut_ad(rw_lock_own(&latch, RW_LOCK_X));
-
-	ut_a(n_reserved_extents >= n_reserved);
-	n_reserved_extents -= n_reserved;
-}
-
 /** Return the next fil_space_t.
 Once started, the caller must keep calling this until it returns NULL.
-fil_space_acquire() and fil_space_release() are invoked here which
+fil_space_t::acquire() and fil_space_t::release() are invoked here which
 blocks a concurrent operation from dropping the tablespace.
 @param[in]	prev_space	Pointer to the previous fil_space_t.
-If NULL, use the first fil_space_t on fil_system->space_list.
+If NULL, use the first fil_space_t on fil_system.space_list.
 @return pointer to the next fil_space_t.
 @retval NULL if this was the last*/
 fil_space_t*
@@ -6033,36 +5214,32 @@ fil_space_next(fil_space_t* prev_space)
 {
 	fil_space_t*		space=prev_space;
 
-	mutex_enter(&fil_system->mutex);
-
-	if (prev_space == NULL) {
-		space = UT_LIST_GET_FIRST(fil_system->space_list);
+	mutex_enter(&fil_system.mutex);
 
-		/* We can trust that space is not NULL because at least the
-		system tablespace is always present and loaded first. */
-		space->n_pending_ops++;
+	if (!space) {
+		space = UT_LIST_GET_FIRST(fil_system.space_list);
 	} else {
-		ut_ad(space->n_pending_ops > 0);
+		ut_a(space->referenced());
 
 		/* Move on to the next fil_space_t */
-		space->n_pending_ops--;
+		space->release();
 		space = UT_LIST_GET_NEXT(space_list, space);
+	}
 
-		/* Skip spaces that are being created by
-		fil_ibd_create(), or dropped, or !tablespace. */
-		while (space != NULL
-			&& (UT_LIST_GET_LEN(space->chain) == 0
-			    || space->is_stopping()
-			    || space->purpose != FIL_TYPE_TABLESPACE)) {
-			space = UT_LIST_GET_NEXT(space_list, space);
-		}
+	/* Skip spaces that are being created by
+	fil_ibd_create(), or dropped, or !tablespace. */
+	while (space != NULL
+	       && (UT_LIST_GET_LEN(space->chain) == 0
+		   || space->is_stopping()
+		   || space->purpose != FIL_TYPE_TABLESPACE)) {
+		space = UT_LIST_GET_NEXT(space_list, space);
+	}
 
-		if (space != NULL) {
-			space->n_pending_ops++;
-		}
+	if (space != NULL) {
+		space->acquire();
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	return(space);
 }
@@ -6075,23 +5252,23 @@ static
 void
 fil_space_remove_from_keyrotation(fil_space_t* space)
 {
-	ut_ad(mutex_own(&fil_system->mutex));
+	ut_ad(mutex_own(&fil_system.mutex));
 	ut_ad(space);
 
-	if (space->n_pending_ops == 0 && space->is_in_rotation_list) {
+	if (space->is_in_rotation_list && !space->referenced()) {
 		space->is_in_rotation_list = false;
-		ut_a(UT_LIST_GET_LEN(fil_system->rotation_list) > 0);
-		UT_LIST_REMOVE(fil_system->rotation_list, space);
+		ut_a(UT_LIST_GET_LEN(fil_system.rotation_list) > 0);
+		UT_LIST_REMOVE(fil_system.rotation_list, space);
 	}
 }
 
 
 /** Return the next fil_space_t from key rotation list.
 Once started, the caller must keep calling this until it returns NULL.
-fil_space_acquire() and fil_space_release() are invoked here which
+fil_space_t::acquire() and fil_space_t::release() are invoked here which
 blocks a concurrent operation from dropping the tablespace.
 @param[in]	prev_space	Pointer to the previous fil_space_t.
-If NULL, use the first fil_space_t on fil_system->space_list.
+If NULL, use the first fil_space_t on fil_system.space_list.
 @return pointer to the next fil_space_t.
 @retval NULL if this was the last*/
 fil_space_t*
@@ -6101,28 +5278,25 @@ fil_space_keyrotate_next(
 	fil_space_t* space = prev_space;
 	fil_space_t* old   = NULL;
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
-	if (UT_LIST_GET_LEN(fil_system->rotation_list) == 0) {
+	if (UT_LIST_GET_LEN(fil_system.rotation_list) == 0) {
 		if (space) {
-			ut_ad(space->n_pending_ops > 0);
-			space->n_pending_ops--;
+			space->release();
 			fil_space_remove_from_keyrotation(space);
 		}
-		mutex_exit(&fil_system->mutex);
+		mutex_exit(&fil_system.mutex);
 		return(NULL);
 	}
 
 	if (prev_space == NULL) {
-		space = UT_LIST_GET_FIRST(fil_system->rotation_list);
+		space = UT_LIST_GET_FIRST(fil_system.rotation_list);
 
 		/* We can trust that space is not NULL because we
 		checked list length above */
 	} else {
-		ut_ad(space->n_pending_ops > 0);
-
 		/* Move on to the next fil_space_t */
-		space->n_pending_ops--;
+		space->release();
 
 		old = space;
 		space = UT_LIST_GET_NEXT(rotation_list, space);
@@ -6143,10 +5317,10 @@ fil_space_keyrotate_next(
 	}
 
 	if (space != NULL) {
-		space->n_pending_ops++;
+		space->acquire();
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	return(space);
 }
@@ -6189,7 +5363,7 @@ fil_space_found_by_id(
 	ulint	id)	/*!< in: space id */
 {
 	fil_space_t* space = NULL;
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 	space = fil_space_get_by_id(id);
 
 	/* Not found if space is being deleted */
@@ -6197,7 +5371,7 @@ fil_space_found_by_id(
 		space = NULL;
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 	return space;
 }
 
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index 1c734f39f15..9d90c287ffc 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -106,7 +106,7 @@ ulint fil_page_compress(const byte* buf, byte* out_buf, ulint level,
 	/* If no compression level was provided to this table, use system
 	default level */
 	if (comp_level == 0) {
-		comp_level = page_zip_level;
+		comp_level = int(page_zip_level);
 	}
 
 	ulint write_size = srv_page_size - header_len;
@@ -278,12 +278,6 @@ success:
 	srv_stats.page_compression_saved.add(srv_page_size - write_size);
 	srv_stats.pages_page_compressed.inc();
 
-	/* If we do not persistently trim rest of page, we need to write it
-	all */
-	if (!srv_use_trim) {
-		memset(out_buf + write_size, 0, srv_page_size - write_size);
-	}
-
 	return write_size;
 }
 
diff --git a/storage/innobase/fsp/fsp0file.cc b/storage/innobase/fsp/fsp0file.cc
index 4a8874d0fa5..3070f989c04 100644
--- a/storage/innobase/fsp/fsp0file.cc
+++ b/storage/innobase/fsp/fsp0file.cc
@@ -297,7 +297,7 @@ Datafile::read_first_page(bool read_only_mode)
 	/* Align the memory for a possible read from a raw device */
 
 	m_first_page = static_cast<byte*>(
-		ut_align(m_first_page_buf, UNIV_PAGE_SIZE));
+		ut_align(m_first_page_buf, srv_page_size));
 
 	IORequest	request;
 	dberr_t		err = DB_ERROR;
@@ -524,7 +524,7 @@ err_exit:
 	/* Check if the whole page is blank. */
 	if (!m_space_id && !m_flags) {
 		const byte*	b		= m_first_page;
-		ulint		nonzero_bytes	= UNIV_PAGE_SIZE;
+		ulint		nonzero_bytes	= srv_page_size;
 
 		while (*b == '\0' && --nonzero_bytes != 0) {
 
@@ -545,13 +545,13 @@ err_exit:
 
 	const page_size_t	page_size(m_flags);
 
-	if (univ_page_size.logical() != page_size.logical()) {
-		/* Page size must be univ_page_size. */
+	if (srv_page_size != page_size.logical()) {
+		/* Logical size must be innodb_page_size. */
 		ib::error()
 			<< "Data file '" << m_filepath << "' uses page size "
 			<< page_size.logical() << ", but the innodb_page_size"
 			" start-up parameter is "
-			<< univ_page_size.logical();
+			<< srv_page_size;
 		free_first_page();
 		return(DB_ERROR);
 	}
@@ -678,8 +678,8 @@ Datafile::find_space_id()
 			bool	noncompressed_ok = false;
 
 			/* For noncompressed pages, the page size must be
-			equal to univ_page_size.physical(). */
-			if (page_size == univ_page_size.physical()) {
+			equal to srv_page_size. */
+			if (page_size == srv_page_size) {
 				noncompressed_ok = !buf_page_is_corrupted(
 					false, page, univ_page_size, NULL);
 			}
@@ -693,11 +693,11 @@ Datafile::find_space_id()
 			assume the page is compressed if univ_page_size.
 			logical() is equal to or less than 16k and the
 			page_size we are checking is equal to or less than
-			univ_page_size.logical(). */
-			if (univ_page_size.logical() <= UNIV_PAGE_SIZE_DEF
-			    && page_size <= univ_page_size.logical()) {
+			srv_page_size. */
+			if (srv_page_size <= UNIV_PAGE_SIZE_DEF
+			    && page_size <= srv_page_size) {
 				const page_size_t	compr_page_size(
-					page_size, univ_page_size.logical(),
+					page_size, srv_page_size,
 					true);
 
 				compressed_ok = !buf_page_is_corrupted(
@@ -826,7 +826,10 @@ open that file, and read the contents into m_filepath.
 dberr_t
 RemoteDatafile::open_link_file()
 {
-	set_link_filepath(NULL);
+	if (m_link_filepath == NULL) {
+		m_link_filepath = fil_make_filepath(NULL, name(), ISL, false);
+	}
+
 	m_filepath = read_link_file(m_link_filepath);
 
 	return(m_filepath == NULL ? DB_CANNOT_OPEN_FILE : DB_SUCCESS);
@@ -892,18 +895,6 @@ RemoteDatafile::shutdown()
 	}
 }
 
-/** Set the link filepath. Use default datadir, the base name of
-the path provided without its suffix, plus DOT_ISL.
-@param[in]	path	filepath which contains a basename to use.
-			If NULL, use m_name as the basename. */
-void
-RemoteDatafile::set_link_filepath(const char* path)
-{
-	if (m_link_filepath == NULL) {
-		m_link_filepath = fil_make_filepath(NULL, name(), ISL, false);
-	}
-}
-
 /** Creates a new InnoDB Symbolic Link (ISL) file.  It is always created
 under the 'datadir' of MySQL. The datadir is the directory of a
 running mysqld program. We can refer to it by simply using the path ".".
@@ -1038,7 +1029,7 @@ char*
 RemoteDatafile::read_link_file(
 	const char*	link_filepath)
 {
-	FILE* file = fopen(link_filepath, "r+b");
+	FILE* file = fopen(link_filepath, "r+b" STR_O_CLOEXEC);
 	if (file == NULL) {
 		return(NULL);
 	}
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index 11c5c65d861..6ef6764ebb4 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -173,20 +173,6 @@ fsp_get_space_header(
 	return(header);
 }
 
-#ifdef UNIV_DEBUG
-/** Skip some of the sanity checks that are time consuming even in debug mode
-and can affect frequent verification runs that are done to ensure stability of
-the product.
-@return true if check should be skipped for given space. */
-bool
-fsp_skip_sanity_check(
-	ulint	space_id)
-{
-	return(srv_skip_temp_table_checks_debug
-	       && fsp_is_system_temporary(space_id));
-}
-#endif /* UNIV_DEBUG */
-
 /**********************************************************************//**
 Gets a descriptor bit of a page.
 @return TRUE if free */
@@ -598,7 +584,7 @@ fsp_init_file_page_low(
 {
 	page_t*		page	= buf_block_get_frame(block);
 
-	memset(page, 0, UNIV_PAGE_SIZE);
+	memset(page, 0, srv_page_size);
 
 	mach_write_to_4(page + FIL_PAGE_OFFSET, block->page.id.page_no());
 	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
@@ -635,8 +621,7 @@ fsp_space_modify_check(
 	case MTR_LOG_NO_REDO:
 		ut_ad(space->purpose == FIL_TYPE_TEMPORARY
 		      || space->purpose == FIL_TYPE_IMPORT
-		      || space->redo_skipped_count
-		      || space->is_being_truncated
+		      || my_atomic_loadlint(&space->redo_skipped_count)
 		      || srv_is_tablespace_truncated(space->id));
 		return;
 	case MTR_LOG_ALL:
@@ -697,26 +682,6 @@ fsp_parse_init_file_page(
 }
 
 /**********************************************************************//**
-Initializes the fsp system. */
-void
-fsp_init(void)
-/*==========*/
-{
-	/* FSP_EXTENT_SIZE must be a multiple of page & zip size */
-	ut_a(0 == (UNIV_PAGE_SIZE % FSP_EXTENT_SIZE));
-	ut_a(UNIV_PAGE_SIZE);
-
-#if UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX
-# error "UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX != 0"
-#endif
-#if UNIV_ZIP_SIZE_MIN % FSP_EXTENT_SIZE_MIN
-# error "UNIV_ZIP_SIZE_MIN % FSP_EXTENT_SIZE_MIN != 0"
-#endif
-
-	/* Does nothing at the moment */
-}
-
-/**********************************************************************//**
 Writes the space id and flags to a tablespace header.  The flags contain
 row type, physical/compressed page size, and logical/uncompressed page
 size of the tablespace. */
@@ -737,24 +702,16 @@ fsp_header_init_fields(
 }
 
 /** Initialize a tablespace header.
-@param[in]	space_id	space id
-@param[in]	size		current size in blocks
-@param[in,out]	mtr		mini-transaction */
-void
-fsp_header_init(ulint space_id, ulint size, mtr_t* mtr)
+@param[in,out]	space	tablespace
+@param[in]	size	current size in blocks
+@param[in,out]	mtr	mini-transaction */
+void fsp_header_init(fil_space_t* space, ulint size, mtr_t* mtr)
 {
-	fsp_header_t*	header;
-	buf_block_t*	block;
-	page_t*		page;
-
-	ut_ad(mtr);
-
-	fil_space_t*		space	= mtr_x_lock_space(space_id, mtr);
-
-	const page_id_t		page_id(space_id, 0);
+	const page_id_t		page_id(space->id, 0);
 	const page_size_t	page_size(space->flags);
 
-	block = buf_page_create(page_id, page_size, mtr);
+	mtr_x_lock(&space->latch, mtr);
+	buf_block_t* block = buf_page_create(page_id, page_size, mtr);
 	buf_page_get(page_id, page_size, RW_SX_LATCH, mtr);
 	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
@@ -765,40 +722,41 @@ fsp_header_init(ulint space_id, ulint size, mtr_t* mtr)
 	/* The prior contents of the file page should be ignored */
 
 	fsp_init_file_page(space, block, mtr);
-	page = buf_block_get_frame(block);
 
-	mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR,
+	mlog_write_ulint(block->frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR,
 			 MLOG_2BYTES, mtr);
 
-	header = FSP_HEADER_OFFSET + page;
-
-	mlog_write_ulint(header + FSP_SPACE_ID, space_id, MLOG_4BYTES, mtr);
-	mlog_write_ulint(header + FSP_NOT_USED, 0, MLOG_4BYTES, mtr);
-
-	mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
-	mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
-	mlog_write_ulint(header + FSP_SPACE_FLAGS,
+	mlog_write_ulint(FSP_HEADER_OFFSET + FSP_SPACE_ID + block->frame,
+			 space->id, MLOG_4BYTES, mtr);
+	mlog_write_ulint(FSP_HEADER_OFFSET + FSP_NOT_USED + block->frame, 0,
+			 MLOG_4BYTES, mtr);
+	mlog_write_ulint(FSP_HEADER_OFFSET + FSP_SIZE + block->frame, size,
+			 MLOG_4BYTES, mtr);
+	mlog_write_ulint(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + block->frame, 0,
+			 MLOG_4BYTES, mtr);
+	mlog_write_ulint(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + block->frame,
 			 space->flags & ~FSP_FLAGS_MEM_MASK,
 			 MLOG_4BYTES, mtr);
-	mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
+	mlog_write_ulint(FSP_HEADER_OFFSET + FSP_FRAG_N_USED + block->frame, 0,
+			 MLOG_4BYTES, mtr);
 
-	flst_init(header + FSP_FREE, mtr);
-	flst_init(header + FSP_FREE_FRAG, mtr);
-	flst_init(header + FSP_FULL_FRAG, mtr);
-	flst_init(header + FSP_SEG_INODES_FULL, mtr);
-	flst_init(header + FSP_SEG_INODES_FREE, mtr);
+	flst_init(FSP_HEADER_OFFSET + FSP_FREE + block->frame, mtr);
+	flst_init(FSP_HEADER_OFFSET + FSP_FREE_FRAG + block->frame, mtr);
+	flst_init(FSP_HEADER_OFFSET + FSP_FULL_FRAG + block->frame, mtr);
+	flst_init(FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL + block->frame, mtr);
+	flst_init(FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE + block->frame, mtr);
 
-	mlog_write_ull(header + FSP_SEG_ID, 1, mtr);
+	mlog_write_ull(FSP_HEADER_OFFSET + FSP_SEG_ID + block->frame, 1, mtr);
 
-	fsp_fill_free_list(!is_system_tablespace(space_id),
-			   space, header, mtr);
+	fsp_fill_free_list(!is_system_tablespace(space->id),
+			   space, FSP_HEADER_OFFSET + block->frame, mtr);
 
 	/* Write encryption metadata to page 0 if tablespace is
 	encrypted or encryption is disabled by table option. */
 	if (space->crypt_data &&
 	    (space->crypt_data->should_encrypt() ||
 	     space->crypt_data->not_encrypted())) {
-		space->crypt_data->write_page0(space, page, mtr);
+		space->crypt_data->write_page0(space, block->frame, mtr);
 	}
 }
 
@@ -829,63 +787,6 @@ fsp_header_get_space_id(
 	return(id);
 }
 
-/**********************************************************************//**
-Increases the space size field of a space. */
-void
-fsp_header_inc_size(
-/*================*/
-	ulint	space_id,	/*!< in: space id */
-	ulint	size_inc,	/*!< in: size increment in pages */
-	mtr_t*	mtr)		/*!< in/out: mini-transaction */
-{
-	fsp_header_t*	header;
-	ulint		size;
-
-	ut_ad(mtr);
-
-	fil_space_t*	space = mtr_x_lock_space(space_id, mtr);
-	ut_d(fsp_space_modify_check(space, mtr));
-
-	header = fsp_get_space_header(
-		space, page_size_t(space->flags), mtr);
-
-	size = mach_read_from_4(header + FSP_SIZE);
-	ut_ad(size == space->size_in_header);
-
-	size += size_inc;
-
-	mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
-	space->size_in_header = size;
-}
-
-/**********************************************************************//**
-Gets the size of the system tablespace from the tablespace header.  If
-we do not have an auto-extending data file, this should be equal to
-the size of the data files.  If there is an auto-extending data file,
-this can be smaller.
-@return size in pages */
-ulint
-fsp_header_get_tablespace_size(void)
-/*================================*/
-{
-	fsp_header_t*	header;
-	ulint		size;
-	mtr_t		mtr;
-
-	mtr_start(&mtr);
-
-	fil_space_t*	space = mtr_x_lock_space(TRX_SYS_SPACE, &mtr);
-
-	header = fsp_get_space_header(space, univ_page_size, &mtr);
-
-	size = mach_read_from_4(header + FSP_SIZE);
-	ut_ad(space->size_in_header == size);
-
-	mtr_commit(&mtr);
-
-	return(size);
-}
-
 /** Try to extend a single-table tablespace so that a page would fit in the
 data file.
 @param[in,out]	space	tablespace
@@ -2072,22 +1973,21 @@ Creates a new segment.
 @return the block where the segment header is placed, x-latched, NULL
 if could not create segment because of lack of space */
 buf_block_t*
-fseg_create_general(
-/*================*/
-	ulint	space_id,/*!< in: space id */
+fseg_create(
+	fil_space_t* space, /*!< in,out: tablespace */
 	ulint	page,	/*!< in: page where the segment header is placed: if
 			this is != 0, the page must belong to another segment,
 			if this is 0, a new page will be allocated and it
 			will belong to the created segment */
 	ulint	byte_offset, /*!< in: byte offset of the created segment header
 			on the page */
-	ibool	has_done_reservation, /*!< in: TRUE if the caller has already
-			done the reservation for the pages with
+	mtr_t*	mtr,
+   	bool	has_done_reservation) /*!< in: whether the caller
+			has already done the reservation for the pages with
 			fsp_reserve_free_extents (at least 2 extents: one for
 			the inode and the other for the segment) then there is
 			no need to do the check for this individual
 			operation */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
 {
 	fsp_header_t*	space_header;
 	fseg_inode_t*	inode;
@@ -2097,23 +1997,23 @@ fseg_create_general(
 	ulint		n_reserved;
 	ulint		i;
 
-	DBUG_ENTER("fseg_create_general");
+	DBUG_ENTER("fseg_create");
 
 	ut_ad(mtr);
 	ut_ad(byte_offset + FSEG_HEADER_SIZE
-	      <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
+	      <= srv_page_size - FIL_PAGE_DATA_END);
 
-	fil_space_t*		space = mtr_x_lock_space(space_id, mtr);
+	mtr_x_lock(&space->latch, mtr);
 	const page_size_t	page_size(space->flags);
 	ut_d(fsp_space_modify_check(space, mtr));
 
 	if (page != 0) {
-		block = buf_page_get(page_id_t(space_id, page), page_size,
+		block = buf_page_get(page_id_t(space->id, page), page_size,
 				     RW_SX_LATCH, mtr);
 
 		header = byte_offset + buf_block_get_frame(block);
 
-		const ulint	type = space_id == TRX_SYS_SPACE
+		const ulint	type = space->id == TRX_SYS_SPACE
 			&& page == TRX_SYS_PAGE_NO
 			? FIL_PAGE_TYPE_TRX_SYS
 			: FIL_PAGE_TYPE_SYS;
@@ -2122,7 +2022,7 @@ fseg_create_general(
 	}
 
 	if (!has_done_reservation
-	    && !fsp_reserve_free_extents(&n_reserved, space_id, 2,
+	    && !fsp_reserve_free_extents(&n_reserved, space, 2,
 					 FSP_NORMAL, mtr)) {
 		DBUG_RETURN(NULL);
 	}
@@ -2189,37 +2089,17 @@ fseg_create_general(
 			 page_get_page_no(page_align(inode)),
 			 MLOG_4BYTES, mtr);
 
-	mlog_write_ulint(header + FSEG_HDR_SPACE, space_id, MLOG_4BYTES, mtr);
+	mlog_write_ulint(header + FSEG_HDR_SPACE, space->id, MLOG_4BYTES, mtr);
 
 funct_exit:
 	if (!has_done_reservation) {
-
-		fil_space_release_free_extents(space_id, n_reserved);
+		space->release_free_extents(n_reserved);
 	}
 
 	DBUG_RETURN(block);
 }
 
 /**********************************************************************//**
-Creates a new segment.
-@return the block where the segment header is placed, x-latched, NULL
-if could not create segment because of lack of space */
-buf_block_t*
-fseg_create(
-/*========*/
-	ulint	space,	/*!< in: space id */
-	ulint	page,	/*!< in: page where the segment header is placed: if
-			this is != 0, the page must belong to another segment,
-			if this is 0, a new page will be allocated and it
-			will belong to the created segment */
-	ulint	byte_offset, /*!< in: byte offset of the created segment header
-			on the page */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
-{
-	return(fseg_create_general(space, page, byte_offset, FALSE, mtr));
-}
-
-/**********************************************************************//**
 Calculates the number of pages reserved by a segment, and how many pages are
 currently used.
 @return number of reserved pages */
@@ -2706,7 +2586,7 @@ fseg_alloc_free_page_general(
 	fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr);
 
 	if (!has_done_reservation
-	    && !fsp_reserve_free_extents(&n_reserved, space_id, 2,
+	    && !fsp_reserve_free_extents(&n_reserved, space, 2,
 					 FSP_NORMAL, mtr)) {
 		return(NULL);
 	}
@@ -2724,7 +2604,7 @@ fseg_alloc_free_page_general(
 	ut_ad(!has_done_reservation || block != NULL);
 
 	if (!has_done_reservation) {
-		fil_space_release_free_extents(space_id, n_reserved);
+		space->release_free_extents(n_reserved);
 	}
 
 	return(block);
@@ -2773,7 +2653,7 @@ fsp_reserve_free_pages(
 use several pages from the tablespace should call this function beforehand
 and reserve enough free extents so that they certainly will be able
 to do their operation, like a B-tree page split, fully. Reservations
-must be released with function fil_space_release_free_extents!
+must be released with function fil_space_t::release_free_extents()!
 
 The alloc_type below has the following meaning: FSP_NORMAL means an
 operation which will probably result in more space usage, like an
@@ -2799,7 +2679,7 @@ free pages available.
 				return true and the tablespace size is <
 				FSP_EXTENT_SIZE pages, then this can be 0,
 				otherwise it is n_ext
-@param[in]	space_id	tablespace identifier
+@param[in,out]	space		tablespace
 @param[in]	n_ext		number of extents to reserve
 @param[in]	alloc_type	page reservation type (FSP_BLOB, etc)
 @param[in,out]	mtr		the mini transaction
@@ -2810,7 +2690,7 @@ free pages available.
 bool
 fsp_reserve_free_extents(
 	ulint*		n_reserved,
-	ulint		space_id,
+	fil_space_t*	space,
 	ulint		n_ext,
 	fsp_reserve_t	alloc_type,
 	mtr_t*		mtr,
@@ -2828,7 +2708,7 @@ fsp_reserve_free_extents(
 	ut_ad(mtr);
 	*n_reserved = n_ext;
 
-	fil_space_t*		space = mtr_x_lock_space(space_id, mtr);
+	mtr_x_lock(&space->latch, mtr);
 	const page_size_t	page_size(space->flags);
 
 	space_header = fsp_get_space_header(space, page_size, mtr);
@@ -2900,7 +2780,7 @@ try_again:
 		ut_error;
 	}
 
-	if (fil_space_reserve_free_extents(space_id, n_free, n_ext)) {
+	if (space->reserve_free_extents(n_free, n_ext)) {
 		return(true);
 	}
 try_to_extend:
@@ -2912,69 +2792,6 @@ try_to_extend:
 	return(false);
 }
 
-/** Calculate how many KiB of new data we will be able to insert to the
-tablespace without running out of space.
-@param[in]	space_id	tablespace ID
-@return available space in KiB
-@retval UINTMAX_MAX if unknown */
-uintmax_t
-fsp_get_available_space_in_free_extents(
-	ulint	space_id)
-{
-	FilSpace	space(space_id);
-	if (space() == NULL) {
-		return(UINTMAX_MAX);
-	}
-
-	return(fsp_get_available_space_in_free_extents(space));
-}
-
-/** Calculate how many KiB of new data we will be able to insert to the
-tablespace without running out of space. Start with a space object that has
-been acquired by the caller who holds it for the calculation,
-@param[in]	space		tablespace object from fil_space_acquire()
-@return available space in KiB */
-uintmax_t
-fsp_get_available_space_in_free_extents(
-	const fil_space_t*	space)
-{
-	ut_ad(space->n_pending_ops > 0);
-
-	ulint	size_in_header = space->size_in_header;
-	if (size_in_header < FSP_EXTENT_SIZE) {
-		return(0);		/* TODO: count free frag pages and
-					return a value based on that */
-	}
-
-	/* Below we play safe when counting free extents above the free limit:
-	some of them will contain extent descriptor pages, and therefore
-	will not be free extents */
-	ut_ad(size_in_header >= space->free_limit);
-	ulint	n_free_up =
-		(size_in_header - space->free_limit) / FSP_EXTENT_SIZE;
-
-	page_size_t	page_size(space->flags);
-	if (n_free_up > 0) {
-		n_free_up--;
-		n_free_up -= n_free_up / (page_size.physical()
-					  / FSP_EXTENT_SIZE);
-	}
-
-	/* We reserve 1 extent + 0.5 % of the space size to undo logs
-	and 1 extent + 0.5 % to cleaning operations; NOTE: this source
-	code is duplicated in the function above! */
-
-	ulint	reserve = 2 + ((size_in_header / FSP_EXTENT_SIZE) * 2) / 200;
-	ulint	n_free = space->free_len + n_free_up;
-
-	if (reserve > n_free) {
-		return(0);
-	}
-
-	return(static_cast<uintmax_t>(n_free - reserve)
-	       * FSP_EXTENT_SIZE * (page_size.physical() / 1024));
-}
-
 /********************************************************************//**
 Marks a page used. The page must reside within the extents of the given
 segment. */
diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc
index b856bf3da74..aec2a3914b8 100644
--- a/storage/innobase/fsp/fsp0sysspace.cc
+++ b/storage/innobase/fsp/fsp0sysspace.cc
@@ -47,14 +47,6 @@ SysTablespace srv_tmp_space;
 at a time. We have to make this public because it is a config variable. */
 ulong sys_tablespace_auto_extend_increment;
 
-#ifdef UNIV_DEBUG
-/** Control if extra debug checks need to be done for temporary tablespace.
-Default = true that is disable such checks.
-This variable is not exposed to end-user but still kept as variable for
-developer to enable it during debug. */
-bool srv_skip_temp_table_checks_debug = true;
-#endif /* UNIV_DEBUG */
-
 /** Convert a numeric string that optionally ends in G or M or K,
     to a number containing megabytes.
 @param[in]	str	String with a quantity in bytes
@@ -358,7 +350,7 @@ SysTablespace::check_size(
 	So we need to round the size downward to a  megabyte.*/
 
 	const ulint	rounded_size_pages = static_cast<ulint>(
-		size >> UNIV_PAGE_SIZE_SHIFT);
+		size >> srv_page_size_shift);
 
 	/* If last file */
 	if (&file == &m_files.back() && m_auto_extend_last_file) {
@@ -402,16 +394,16 @@ SysTablespace::set_size(
 
 	/* We created the data file and now write it full of zeros */
 	ib::info() << "Setting file '" << file.filepath() << "' size to "
-		<< (file.m_size >> (20 - UNIV_PAGE_SIZE_SHIFT)) << " MB."
+		<< (file.m_size >> (20U - srv_page_size_shift)) << " MB."
 		" Physically writing the file full; Please wait ...";
 
 	bool	success = os_file_set_size(
 		file.m_filepath, file.m_handle,
-		static_cast<os_offset_t>(file.m_size) << UNIV_PAGE_SIZE_SHIFT);
+		static_cast<os_offset_t>(file.m_size) << srv_page_size_shift);
 
 	if (success) {
 		ib::info() << "File '" << file.filepath() << "' size is now "
-			<< (file.m_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
+			<< (file.m_size >> (20U - srv_page_size_shift))
 			<< " MB.";
 	} else {
 		ib::error() << "Could not set the file size of '"
@@ -771,11 +763,10 @@ SysTablespace::check_file_spec(
 	}
 
 	if (!m_auto_extend_last_file
-	    && get_sum_of_sizes() < min_expected_size / UNIV_PAGE_SIZE) {
-
+	    && get_sum_of_sizes()
+	    < (min_expected_size >> srv_page_size_shift)) {
 		ib::error() << "Tablespace size must be at least "
-			<< min_expected_size / (1024 * 1024) << " MB";
-
+			<< (min_expected_size >> 20) << " MB";
 		return(DB_ERROR);
 	}
 
@@ -909,15 +900,22 @@ SysTablespace::open_or_create(
 		it->close();
 		it->m_exists = true;
 
-		if (it == begin) {
-			/* First data file. */
-
-			/* Create the tablespace entry for the multi-file
-			tablespace in the tablespace manager. */
-			space = fil_space_create(
-				name(), space_id(), flags(), is_temp
-				? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE,
-				NULL);
+		if (it != begin) {
+		} else if (is_temp) {
+			ut_ad(!fil_system.temp_space);
+			ut_ad(space_id() == SRV_TMP_SPACE_ID);
+			space = fil_system.temp_space = fil_space_create(
+				name(), SRV_TMP_SPACE_ID, flags(),
+				FIL_TYPE_TEMPORARY, NULL);
+			if (!space) {
+				return DB_ERROR;
+			}
+		} else {
+			ut_ad(!fil_system.sys_space);
+			ut_ad(space_id() == TRX_SYS_SPACE);
+			space = fil_system.sys_space = fil_space_create(
+				name(), TRX_SYS_SPACE, flags(),
+				FIL_TYPE_TABLESPACE, NULL);
 			if (!space) {
 				return DB_ERROR;
 			}
@@ -940,16 +938,16 @@ SysTablespace::open_or_create(
 
 /** Normalize the file size, convert from megabytes to number of pages. */
 void
-SysTablespace::normalize()
+SysTablespace::normalize_size()
 {
 	files_t::iterator	end = m_files.end();
 
 	for (files_t::iterator it = m_files.begin(); it != end; ++it) {
 
-		it->m_size *= (1024 * 1024) / UNIV_PAGE_SIZE;
+		it->m_size <<= (20U - srv_page_size_shift);
 	}
 
-	m_last_file_size_max *= (1024 * 1024) / UNIV_PAGE_SIZE;
+	m_last_file_size_max <<= (20U - srv_page_size_shift);
 }
 
 
diff --git a/storage/innobase/fts/fts0config.cc b/storage/innobase/fts/fts0config.cc
index 7ad7459ea6a..6b6042dee66 100644
--- a/storage/innobase/fts/fts0config.cc
+++ b/storage/innobase/fts/fts0config.cc
@@ -422,7 +422,7 @@ fts_config_set_ulint(
 
 	ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN);
 
-	value.f_len = snprintf(
+	value.f_len = (ulint) snprintf(
 		(char*) value.f_str, FTS_MAX_INT_LEN, ULINTPF, int_value);
 
 	error = fts_config_set_value(trx, fts_table, name, &value);
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index e217758b651..f63ae9d82d7 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -469,7 +469,7 @@ fts_load_user_stopword(
 	trx_t*		trx;
 	ibool		has_lock = fts->fts_status & TABLE_DICT_LOCKED;
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 	trx->op_info = "Load user stopword table into FTS cache";
 
 	if (!has_lock) {
@@ -499,7 +499,6 @@ fts_load_user_stopword(
 				stopword_info);
 
 	graph = fts_parse_sql_no_dict_lock(
-		NULL,
 		info,
 		"DECLARE FUNCTION my_func;\n"
 		"DECLARE CURSOR c IS"
@@ -549,7 +548,7 @@ cleanup:
 		mutex_exit(&dict_sys->mutex);
 	}
 
-	trx_free_for_background(trx);
+	trx_free(trx);
 	return(ret);
 }
 
@@ -1741,9 +1740,9 @@ fts_create_in_mem_aux_table(
 	ulint			n_cols)
 {
 	dict_table_t*	new_table = dict_mem_table_create(
-		aux_table_name, table->space, n_cols, 0, table->flags,
-		table->space == TRX_SYS_SPACE
-		? 0 : table->space == SRV_TMP_SPACE_ID
+		aux_table_name, NULL, n_cols, 0, table->flags,
+		table->space_id == TRX_SYS_SPACE
+		? 0 : table->space->purpose == FIL_TYPE_TEMPORARY
 		? DICT_TF2_TEMPORARY : DICT_TF2_USE_FILE_PER_TABLE);
 
 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
@@ -1760,7 +1759,7 @@ fts_create_in_mem_aux_table(
 @param[in]	table		Table that has FTS Index
 @param[in]	fts_table_name	FTS AUX table name
 @param[in]	fts_suffix	FTS AUX table suffix
-@param[in]	heap		heap
+@param[in,out]	heap		temporary memory heap
 @return table object if created, else NULL */
 static
 dict_table_t*
@@ -1797,14 +1796,15 @@ fts_create_one_common_table(
 			FTS_CONFIG_TABLE_VALUE_COL_LEN);
 	}
 
+	dict_table_add_system_columns(new_table, heap);
 	error = row_create_table_for_mysql(new_table, trx,
 		FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
 
 	if (error == DB_SUCCESS) {
 
 		dict_index_t*	index = dict_mem_index_create(
-			fts_table_name, "FTS_COMMON_TABLE_IND",
-			new_table->space, DICT_UNIQUE|DICT_CLUSTERED, 1);
+			new_table, "FTS_COMMON_TABLE_IND",
+			DICT_UNIQUE|DICT_CLUSTERED, 1);
 
 		if (!is_config) {
 			dict_mem_index_add_field(index, "doc_id", 0);
@@ -1849,16 +1849,14 @@ CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
 CREATE TABLE $FTS_PREFIX_CONFIG
 	(key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
 @param[in,out]	trx			transaction
-@param[in]	table			table with FTS index
-@param[in]	name			table name normalized
+@param[in,out]	table			table with FTS index
 @param[in]	skip_doc_id_index	Skip index on doc id
 @return DB_SUCCESS if succeed */
 dberr_t
 fts_create_common_tables(
-	trx_t*			trx,
-	const dict_table_t*	table,
-	const char*		name,
-	bool			skip_doc_id_index)
+	trx_t*		trx,
+	dict_table_t*	table,
+	bool		skip_doc_id_index)
 {
 	dberr_t		error;
 	que_t*		graph;
@@ -1893,13 +1891,15 @@ fts_create_common_tables(
 		dict_table_t*	common_table = fts_create_one_common_table(
 			trx, table, full_name[i], fts_table.suffix, heap);
 
-		 if (common_table == NULL) {
+		if (common_table == NULL) {
 			error = DB_ERROR;
 			goto func_exit;
 		} else {
 			common_tables.push_back(common_table);
 		}
 
+		mem_heap_empty(heap);
+
 		DBUG_EXECUTE_IF("ib_fts_aux_table_error",
 			/* Return error after creating FTS_AUX_CONFIG table. */
 			if (i == 4) {
@@ -1918,7 +1918,7 @@ fts_create_common_tables(
 	pars_info_bind_id(info, true, "config_table", fts_name);
 
 	graph = fts_parse_sql_no_dict_lock(
-		&fts_table, info, fts_config_table_insert_values_sql);
+		info, fts_config_table_insert_values_sql);
 
 	error = fts_eval_sql(trx, graph);
 
@@ -1929,9 +1929,8 @@ fts_create_common_tables(
 		goto func_exit;
 	}
 
-	index = dict_mem_index_create(
-		name, FTS_DOC_ID_INDEX_NAME, table->space,
-		DICT_UNIQUE, 1);
+	index = dict_mem_index_create(table, FTS_DOC_ID_INDEX_NAME,
+				      DICT_UNIQUE, 1);
 	dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
 
 	op = trx_get_dict_operation(trx);
@@ -1959,7 +1958,7 @@ func_exit:
 @param[in,out]	trx		transaction
 @param[in]	index		the index instance
 @param[in]	fts_table	fts_table structure
-@param[in,out]	heap		memory heap
+@param[in,out]	heap		temporary memory heap
 @see row_merge_create_fts_sort_index()
 @return DB_SUCCESS or error code */
 static
@@ -1992,7 +1991,7 @@ fts_create_one_index_table(
 			       ? DATA_VARCHAR : DATA_VARMYSQL,
 			       field->col->prtype,
 			       FTS_MAX_WORD_LEN_IN_CHAR
-			       * field->col->mbmaxlen);
+			       * unsigned(field->col->mbmaxlen));
 
 	dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
 			       DATA_NOT_NULL | DATA_UNSIGNED,
@@ -2016,12 +2015,13 @@ fts_create_one_index_table(
 		(DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
 		FTS_INDEX_ILIST_LEN);
 
+	dict_table_add_system_columns(new_table, heap);
 	error = row_create_table_for_mysql(new_table, trx,
 		FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
 
 	if (error == DB_SUCCESS) {
 		dict_index_t*	index = dict_mem_index_create(
-			table_name, "FTS_INDEX_TABLE_IND", new_table->space,
+			new_table, "FTS_INDEX_TABLE_IND",
 			DICT_UNIQUE|DICT_CLUSTERED, 2);
 		dict_mem_index_add_field(index, "word", 0);
 		dict_mem_index_add_field(index, "first_doc_id", 0);
@@ -2046,18 +2046,24 @@ fts_create_one_index_table(
 	return(new_table);
 }
 
-/** Create auxiliary index tables for an FTS index.
-@param[in,out]	trx		transaction
-@param[in]	index		the index instance
-@param[in]	table_name	table name
-@param[in]	table_id	the table id
+/** Creates the column specific ancillary tables needed for supporting an
+FTS index on the given table. row_mysql_lock_data_dictionary must have
+been called before this.
+
+All FTS AUX Index tables have the following schema.
+CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
+	word		VARCHAR(FTS_MAX_WORD_LEN),
+	first_doc_id	INT NOT NULL,
+	last_doc_id	UNSIGNED NOT NULL,
+	doc_count	UNSIGNED INT NOT NULL,
+	ilist		VARBINARY NOT NULL,
+	UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
+@param[in,out]	trx	dictionary transaction
+@param[in]	index	fulltext index
+@param[in]	id	table id
 @return DB_SUCCESS or error code */
 dberr_t
-fts_create_index_tables_low(
-	trx_t*			trx,
-	const dict_index_t*	index,
-	const char*		table_name,
-	table_id_t		table_id)
+fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id)
 {
 	ulint		i;
 	fts_table_t	fts_table;
@@ -2066,8 +2072,8 @@ fts_create_index_tables_low(
 
 	fts_table.type = FTS_INDEX_TABLE;
 	fts_table.index_id = index->id;
-	fts_table.table_id = table_id;
-	fts_table.parent = table_name;
+	fts_table.table_id = id;
+	fts_table.parent = index->table->name.m_name;
 	fts_table.table = index->table;
 
 	/* aux_idx_tables vector is used for dropping FTS AUX INDEX
@@ -2093,6 +2099,8 @@ fts_create_index_tables_low(
 			aux_idx_tables.push_back(new_table);
 		}
 
+		mem_heap_empty(heap);
+
 		DBUG_EXECUTE_IF("ib_fts_index_table_error",
 			/* Return error after creating FTS_INDEX_5
 			aux table. */
@@ -2118,41 +2126,6 @@ fts_create_index_tables_low(
 	return(error);
 }
 
-/** Creates the column specific ancillary tables needed for supporting an
-FTS index on the given table. row_mysql_lock_data_dictionary must have
-been called before this.
-
-All FTS AUX Index tables have the following schema.
-CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
-	word		VARCHAR(FTS_MAX_WORD_LEN),
-	first_doc_id	INT NOT NULL,
-	last_doc_id	UNSIGNED NOT NULL,
-	doc_count	UNSIGNED INT NOT NULL,
-	ilist		VARBINARY NOT NULL,
-	UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
-@param[in,out]	trx	transaction
-@param[in]	index	index instance
-@return DB_SUCCESS or error code */
-dberr_t
-fts_create_index_tables(
-	trx_t*			trx,
-	const dict_index_t*	index)
-{
-	dberr_t		err;
-	dict_table_t*	table;
-
-	table = dict_table_get_low(index->table_name);
-	ut_a(table != NULL);
-
-	err = fts_create_index_tables_low(
-		trx, index, table->name.m_name, table->id);
-
-	if (err == DB_SUCCESS) {
-		trx_commit(trx);
-	}
-
-	return(err);
-}
 #if 0
 /******************************************************************//**
 Return string representation of state. */
@@ -2326,7 +2299,7 @@ fts_trx_create(
 	     savep != NULL;
 	     savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
 
-		fts_savepoint_take(trx, ftt, savep->name);
+		fts_savepoint_take(ftt, savep->name);
 	}
 
 	return(ftt);
@@ -2687,7 +2660,7 @@ retry:
 
 	fts_table.parent = table->name.m_name;
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 	if (srv_read_only_mode) {
 		trx_start_internal_read_only(trx);
 	} else {
@@ -2773,7 +2746,7 @@ func_exit:
 		}
 	}
 
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	return(error);
 }
@@ -2816,7 +2789,7 @@ fts_update_sync_doc_id(
 	}
 
 	if (!trx) {
-		trx = trx_allocate_for_background();
+		trx = trx_create();
 		trx_start_internal(trx);
 
 		trx->op_info = "setting last FTS document id";
@@ -2825,7 +2798,7 @@ fts_update_sync_doc_id(
 
 	info = pars_info_create();
 
-	id_len = snprintf(
+	id_len = (ulint) snprintf(
 		(char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
 
 	pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
@@ -2854,7 +2827,7 @@ fts_update_sync_doc_id(
 
 			fts_sql_rollback(trx);
 		}
-		trx_free_for_background(trx);
+		trx_free(trx);
 	}
 
 	return(error);
@@ -3057,7 +3030,7 @@ fts_commit_table(
 	ib_rbt_t*		rows;
 	dberr_t			error = DB_SUCCESS;
 	fts_cache_t*		cache = ftt->table->fts->cache;
-	trx_t*			trx = trx_allocate_for_background();
+	trx_t*			trx = trx_create();
 
 	trx_start_internal(trx);
 
@@ -3099,7 +3072,7 @@ fts_commit_table(
 
 	fts_sql_commit(trx);
 
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	return(error);
 }
@@ -3290,6 +3263,8 @@ fts_fetch_doc_from_rec(
 	parser = get_doc->index_cache->index->parser;
 
 	clust_rec = btr_pcur_get_rec(pcur);
+	ut_ad(!page_rec_is_comp(clust_rec)
+	      || rec_get_status(clust_rec) == REC_STATUS_ORDINARY);
 
 	num_field = dict_index_get_n_fields(index);
 
@@ -3574,7 +3549,7 @@ fts_add_doc_by_id(
 			dict_index_copy_types(clust_ref, clust_index, n_fields);
 
 			row_build_row_ref_in_tuple(
-				clust_ref, rec, fts_id_index, NULL, NULL);
+				clust_ref, rec, fts_id_index, NULL);
 
 			btr_pcur_open_with_no_init(
 				clust_index, clust_ref, PAGE_CUR_LE,
@@ -3723,6 +3698,8 @@ fts_get_max_doc_id(
 		return(0);
 	}
 
+	ut_ad(!index->is_instant());
+
 	dfield = dict_index_get_nth_field(index, 0);
 
 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
@@ -3757,6 +3734,7 @@ fts_get_max_doc_id(
 			goto func_exit;
 		}
 
+		ut_ad(!rec_is_metadata(rec, index));
 		offsets = rec_get_offsets(
 			rec, index, offsets, true, ULINT_UNDEFINED, &heap);
 
@@ -3794,7 +3772,7 @@ fts_doc_fetch_by_doc_id(
 	const char*	select_str;
 	doc_id_t	write_doc_id;
 	dict_index_t*	index;
-	trx_t*		trx = trx_allocate_for_background();
+	trx_t*		trx = trx_create();
 	que_t*          graph;
 
 	trx->op_info = "fetching indexed FTS document";
@@ -3815,7 +3793,7 @@ fts_doc_fetch_by_doc_id(
 	pars_info_bind_function(info, "my_func", callback, arg);
 
 	select_str = fts_get_select_columns_str(index, info, info->heap);
-	pars_info_bind_id(info, TRUE, "table_name", index->table_name);
+	pars_info_bind_id(info, TRUE, "table_name", index->table->name.m_name);
 
 	if (!get_doc || !get_doc->get_document_graph) {
 		if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
@@ -3885,7 +3863,7 @@ fts_doc_fetch_by_doc_id(
 
 	error = fts_eval_sql(trx, graph);
 	fts_sql_commit(trx);
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	if (!get_doc) {
 		fts_que_graph_free(graph);
@@ -4139,7 +4117,7 @@ fts_sync_begin(
 
 	sync->start_time = ut_time();
 
-	sync->trx = trx_allocate_for_background();
+	sync->trx = trx_create();
 	trx_start_internal(sync->trx);
 
 	if (fts_enable_diag_print) {
@@ -4284,7 +4262,7 @@ fts_sync_commit(
 
 	/* Avoid assertion in trx_free(). */
 	trx->dict_operation_lock_mode = 0;
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	return(error);
 }
@@ -4338,7 +4316,7 @@ fts_sync_rollback(
 
 	/* Avoid assertion in trx_free(). */
 	trx->dict_operation_lock_mode = 0;
-	trx_free_for_background(trx);
+	trx_free(trx);
 }
 
 /** Run SYNC on the table, i.e., write out data from the cache to the
@@ -4498,7 +4476,7 @@ fts_sync_table(
 
 	ut_ad(table->fts);
 
-	if (!dict_table_is_discarded(table) && table->fts->cache
+	if (table->space && table->fts->cache
 	    && !dict_table_is_corrupted(table)) {
 		err = fts_sync(table->fts->cache->sync,
 			       unlock_cache, wait, has_dict);
@@ -4736,7 +4714,7 @@ fts_tokenize_add_word_for_parser(
 	MYSQL_FTPARSER_PARAM*	param,		/* in: parser paramter */
 	const char*			word,		/* in: token word */
 	int			word_len,	/* in: word len */
-	MYSQL_FTPARSER_BOOLEAN_INFO* boolean_info) /* in: word boolean info */
+	MYSQL_FTPARSER_BOOLEAN_INFO*)
 {
 	fts_string_t	str;
 	fts_tokenize_param_t*	fts_param;
@@ -4748,9 +4726,9 @@ fts_tokenize_add_word_for_parser(
 	ut_ad(result_doc != NULL);
 
 	str.f_str = (byte*)(word);
-	str.f_len = word_len;
+	str.f_len = ulint(word_len);
 	str.f_n_char = fts_get_token_size(
-		const_cast<CHARSET_INFO*>(param->cs), word, word_len);
+		const_cast<CHARSET_INFO*>(param->cs), word, str.f_len);
 
 	/* JAN: TODO: MySQL 5.7 FTS
 	ut_ad(boolean_info->position >= 0);
@@ -5012,7 +4990,7 @@ fts_get_rows_count(
 	ulint		count = 0;
 	char		table_name[MAX_FULL_NAME_LEN];
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 	trx->op_info = "fetching FT table rows count";
 
 	info = pars_info_create();
@@ -5066,7 +5044,7 @@ fts_get_rows_count(
 
 	fts_que_graph_free(graph);
 
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	return(count);
 }
@@ -5083,7 +5061,7 @@ fts_update_max_cache_size(
 	trx_t*		trx;
 	fts_table_t	fts_table;
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 
 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
 
@@ -5092,7 +5070,7 @@ fts_update_max_cache_size(
 
 	fts_sql_commit(trx);
 
-	trx_free_for_background(trx);
+	trx_free(trx);
 }
 #endif /* FTS_CACHE_SIZE_DEBUG */
 
@@ -5645,7 +5623,6 @@ Take a FTS savepoint. */
 void
 fts_savepoint_take(
 /*===============*/
-	trx_t*		trx,		/*!< in: transaction */
 	fts_trx_t*	fts_trx,	/*!< in: fts transaction */
 	const char*	name)		/*!< in: savepoint name */
 {
@@ -5923,7 +5900,7 @@ fts_savepoint_rollback(
 		ut_a(ib_vector_size(savepoints) > 0);
 
 		/* Restore the savepoint. */
-		fts_savepoint_take(trx, trx->fts_trx, name);
+		fts_savepoint_take(trx->fts_trx, name);
 	}
 }
 
@@ -5953,7 +5930,7 @@ fts_is_aux_table_name(
 	if (ptr != NULL) {
 		/* We will start the match after the '/' */
 		++ptr;
-		len = end - ptr;
+		len = ulint(end - ptr);
 	}
 
 	/* All auxiliary tables are prefixed with "FTS_" and the name
@@ -5980,7 +5957,7 @@ fts_is_aux_table_name(
 		/* Skip the underscore. */
 		++ptr;
 		ut_a(end > ptr);
-		len = end - ptr;
+		len = ulint(end - ptr);
 
 		/* First search the common table suffix array. */
 		for (i = 0; fts_common_tables[i] != NULL; ++i) {
@@ -6011,7 +5988,7 @@ fts_is_aux_table_name(
 		/* Skip the underscore. */
 		++ptr;
 		ut_a(end > ptr);
-		len = end - ptr;
+		len = ulint(end - ptr);
 
 		/* Search the FT index specific array. */
 		for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
@@ -6387,7 +6364,7 @@ fts_rename_aux_tables_to_hex_format_low(
 				continue;
 			}
 
-			trx_bg = trx_allocate_for_background();
+			trx_bg = trx_create();
 			trx_bg->op_info = "Revert half done rename";
 			trx_bg->dict_operation_lock_mode = RW_X_LATCH;
 			trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
@@ -6405,14 +6382,14 @@ fts_rename_aux_tables_to_hex_format_low(
 					<< table->name << ". Please revert"
 					" manually.";
 				fts_sql_rollback(trx_bg);
-				trx_free_for_background(trx_bg);
+				trx_free(trx_bg);
 				/* Continue to clear aux tables' flags2 */
 				not_rename = true;
 				continue;
 			}
 
 			fts_sql_commit(trx_bg);
-			trx_free_for_background(trx_bg);
+			trx_free(trx_bg);
 		}
 
 		DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
@@ -6629,7 +6606,7 @@ fts_rename_aux_tables_to_hex_format(
 	dict_table_t*	parent_table)
 {
 	dberr_t err;
-	trx_t*	trx_rename = trx_allocate_for_background();
+	trx_t*	trx_rename = trx_create();
 	trx_rename->op_info = "Rename aux tables to hex format";
 	trx_rename->dict_operation_lock_mode = RW_X_LATCH;
 	trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE);
@@ -6648,18 +6625,18 @@ fts_rename_aux_tables_to_hex_format(
 
 		/* Corrupting the fts index related to parent table. */
 		trx_t*	trx_corrupt;
-		trx_corrupt = trx_allocate_for_background();
+		trx_corrupt = trx_create();
 		trx_corrupt->dict_operation_lock_mode = RW_X_LATCH;
 		trx_start_for_ddl(trx_corrupt, TRX_DICT_OP_TABLE);
 		fts_parent_all_index_set_corrupt(trx_corrupt, parent_table);
 		trx_corrupt->dict_operation_lock_mode = 0;
 		fts_sql_commit(trx_corrupt);
-		trx_free_for_background(trx_corrupt);
+		trx_free(trx_corrupt);
 	} else {
 		fts_sql_commit(trx_rename);
 	}
 
-	trx_free_for_background(trx_rename);
+	trx_free(trx_rename);
 	ib_vector_reset(aux_tables);
 }
 
@@ -6708,7 +6685,7 @@ fts_drop_obsolete_aux_table_from_vector(
 		fts_aux_table_t*	aux_drop_table;
 		aux_drop_table = static_cast<fts_aux_table_t*>(
 			ib_vector_get(tables, count));
-		trx_t*	trx_drop = trx_allocate_for_background();
+		trx_t*	trx_drop = trx_create();
 		trx_drop->op_info = "Drop obsolete aux tables";
 		trx_drop->dict_operation_lock_mode = RW_X_LATCH;
 		trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
@@ -6738,7 +6715,7 @@ fts_drop_obsolete_aux_table_from_vector(
 			fts_sql_commit(trx_drop);
 		}
 
-		trx_free_for_background(trx_drop);
+		trx_free(trx_drop);
 	}
 }
 
@@ -7140,15 +7117,6 @@ fts_drop_orphaned_tables(void)
 	que_t*			graph;
 	ib_vector_t*		tables;
 	ib_alloc_t*		heap_alloc;
-	space_name_list_t	space_name_list;
-	dberr_t			error = DB_SUCCESS;
-
-	/* Note: We have to free the memory after we are done with the list. */
-	error = fil_get_space_names(space_name_list);
-
-	if (error == DB_OUT_OF_MEMORY) {
-		ib::fatal() << "Out of memory";
-	}
 
 	heap = mem_heap_create(1024);
 	heap_alloc = ib_heap_allocator_create(heap);
@@ -7161,36 +7129,33 @@ fts_drop_orphaned_tables(void)
 	users can't map them back to table names and this will create
 	unnecessary clutter. */
 
-	for (space_name_list_t::iterator it = space_name_list.begin();
-	     it != space_name_list.end();
-	     ++it) {
-
-		fts_aux_table_t*	fts_aux_table;
+	mutex_enter(&fil_system.mutex);
 
-		fts_aux_table = static_cast<fts_aux_table_t*>(
-			ib_vector_push(tables, NULL));
+	for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
+	     space != NULL;
+	     space = UT_LIST_GET_NEXT(space_list, space)) {
 
-		memset(fts_aux_table, 0x0, sizeof(*fts_aux_table));
-
-		if (!fts_is_aux_table_name(fts_aux_table, *it, strlen(*it))) {
-			ib_vector_pop(tables);
-		} else {
-			ulint	len = strlen(*it);
-
-			fts_aux_table->id = fil_space_get_id_by_name(*it);
+		if (space->purpose != FIL_TYPE_TABLESPACE) {
+			continue;
+		}
 
-			/* We got this list from fil0fil.cc. The tablespace
-			with this name must exist. */
-			ut_a(fts_aux_table->id != ULINT_UNDEFINED);
+		fts_aux_table_t	fts_aux_table;
+		memset(&fts_aux_table, 0x0, sizeof fts_aux_table);
 
-			fts_aux_table->name = static_cast<char*>(
-				mem_heap_dup(heap, *it, len + 1));
+		size_t len = strlen(space->name);
 
-			fts_aux_table->name[len] = 0;
+		if (!fts_is_aux_table_name(&fts_aux_table, space->name, len)) {
+			continue;
 		}
+
+		fts_aux_table.id = space->id;
+		fts_aux_table.name = mem_heap_strdupl(heap, space->name, len);
+		ib_vector_push(tables, &fts_aux_table);
 	}
 
-	trx = trx_allocate_for_background();
+	mutex_exit(&fil_system.mutex);
+
+	trx = trx_create();
 	trx->op_info = "dropping orphaned FTS tables";
 	row_mysql_lock_data_dictionary(trx);
 
@@ -7199,7 +7164,6 @@ fts_drop_orphaned_tables(void)
 	pars_info_bind_function(info, "my_func", fts_read_tables, tables);
 
 	graph = fts_parse_sql_no_dict_lock(
-		NULL,
 		info,
 		"DECLARE FUNCTION my_func;\n"
 		"DECLARE CURSOR c IS"
@@ -7217,7 +7181,7 @@ fts_drop_orphaned_tables(void)
 		"CLOSE c;");
 
 	for (;;) {
-		error = fts_eval_sql(trx, graph);
+		dberr_t error = fts_eval_sql(trx, graph);
 
 		if (error == DB_SUCCESS) {
 			fts_check_and_drop_orphaned_tables(trx, tables);
@@ -7245,19 +7209,11 @@ fts_drop_orphaned_tables(void)
 
 	row_mysql_unlock_data_dictionary(trx);
 
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	if (heap != NULL) {
 		mem_heap_free(heap);
 	}
-
-	/** Free the memory allocated to store the .ibd names. */
-	for (space_name_list_t::iterator it = space_name_list.begin();
-	     it != space_name_list.end();
-	     ++it) {
-
-		UT_DELETE_ARRAY(*it);
-	}
 }
 
 /**********************************************************************//**
@@ -7354,7 +7310,7 @@ fts_load_stopword(
 	}
 
 	if (!trx) {
-		trx = trx_allocate_for_background();
+		trx = trx_create();
 		if (srv_read_only_mode) {
 			trx_start_internal_read_only(trx);
 		} else {
@@ -7434,7 +7390,7 @@ cleanup:
 			fts_sql_rollback(trx);
 		}
 
-		trx_free_for_background(trx);
+		trx_free(trx);
 	}
 
 	if (!cache->stopword_info.cached_stopword) {
diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc
index a87e7091c1a..cf862cb8c6d 100644
--- a/storage/innobase/fts/fts0opt.cc
+++ b/storage/innobase/fts/fts0opt.cc
@@ -624,9 +624,9 @@ fts_zip_read_word(
 				ptr[len] = 0;
 
 				zip->zp->next_out = ptr;
-				zip->zp->avail_out = len;
+				zip->zp->avail_out = uInt(len);
 
-				word->f_len = len;
+				word->f_len = ulint(len);
 				len = 0;
 			}
 			break;
@@ -679,15 +679,15 @@ fts_fetch_index_words(
 
 	/* Skip the duplicate words. */
 	if (zip->word.f_len == static_cast<ulint>(len)
-	    && !memcmp(zip->word.f_str, data, len)) {
+	    && !memcmp(zip->word.f_str, data, zip->word.f_len)) {
 
 		return(TRUE);
 	}
 
 	ut_a(len <= FTS_MAX_WORD_LEN);
 
-	memcpy(zip->word.f_str, data, len);
-	zip->word.f_len = len;
+	zip->word.f_len = ulint(len);
+	memcpy(zip->word.f_str, data, zip->word.f_len);
 
 	ut_a(zip->zp->avail_in == 0);
 	ut_a(zip->zp->next_in == NULL);
@@ -716,7 +716,7 @@ fts_fetch_index_words(
 		case Z_OK:
 			if (zip->zp->avail_in == 0) {
 				zip->zp->next_in = static_cast<byte*>(data);
-				zip->zp->avail_in = len;
+				zip->zp->avail_in = uInt(len);
 				ut_a(len <= FTS_MAX_WORD_LEN);
 				len = 0;
 			}
@@ -977,7 +977,7 @@ fts_table_fetch_doc_ids(
 	ut_a(fts_table->type == FTS_COMMON_TABLE);
 
 	if (!trx) {
-		trx = trx_allocate_for_background();
+		trx = trx_create();
 		alloc_bk_trx = TRUE;
 	}
 
@@ -1017,7 +1017,7 @@ fts_table_fetch_doc_ids(
 	}
 
 	if (alloc_bk_trx) {
-		trx_free_for_background(trx);
+		trx_free(trx);
 	}
 
 	return(error);
@@ -1147,7 +1147,7 @@ fts_optimize_encode_node(
 	++src;
 
 	/* Number of encoded pos bytes to copy. */
-	pos_enc_len = src - enc->src_ilist_ptr;
+	pos_enc_len = ulint(src - enc->src_ilist_ptr);
 
 	/* Total number of bytes required for copy. */
 	enc_len += pos_enc_len;
@@ -1219,7 +1219,7 @@ fts_optimize_node(
 		enc->src_ilist_ptr = src_node->ilist;
 	}
 
-	copied = enc->src_ilist_ptr - src_node->ilist;
+	copied = ulint(enc->src_ilist_ptr - src_node->ilist);
 
 	/* While there is data in the source node and space to copy
 	into in the destination node. */
@@ -1240,7 +1240,7 @@ test_again:
 			fts_update_t*	update;
 
 			update = (fts_update_t*) ib_vector_get(
-				del_vec, *del_pos);
+				del_vec, ulint(*del_pos));
 
 			del_doc_id = update->doc_id;
 		}
@@ -1284,7 +1284,7 @@ test_again:
 		}
 
 		/* Bytes copied so for from source. */
-		copied = enc->src_ilist_ptr - src_node->ilist;
+		copied = ulint(enc->src_ilist_ptr - src_node->ilist);
 	}
 
 	if (copied >= src_node->ilist_size) {
@@ -1391,7 +1391,7 @@ fts_optimize_word(
 		ut_a(enc.src_ilist_ptr != NULL);
 
 		/* Determine the numer of bytes copied to dst_node. */
-		copied = enc.src_ilist_ptr - src_node->ilist;
+		copied = ulint(enc.src_ilist_ptr - src_node->ilist);
 
 		/* Can't copy more than whats in the vlc array. */
 		ut_a(copied <= src_node->ilist_size);
@@ -1604,7 +1604,7 @@ fts_optimize_create(
 
 	optim->table = table;
 
-	optim->trx = trx_allocate_for_background();
+	optim->trx = trx_create();
 	trx_start_internal(optim->trx);
 
 	optim->fts_common_table.parent = table->name.m_name;
@@ -1729,7 +1729,7 @@ fts_optimize_free(
 	mem_heap_t*	heap = static_cast<mem_heap_t*>(optim->self_heap->arg);
 
 	trx_commit_for_mysql(optim->trx);
-	trx_free_for_background(optim->trx);
+	trx_free(optim->trx);
 
 	fts_doc_ids_free(optim->to_delete);
 	fts_optimize_graph_free(&optim->graph);
@@ -2655,7 +2655,7 @@ fts_optimize_new_table(
 			empty_slot = i;
 		} else if (slot->table == table) {
 			/* Already exists in our optimize queue. */
-			ut_ad(slot->table_id = table->id);
+			ut_ad(slot->table_id == table->id);
 			return(FALSE);
 		}
 	}
@@ -2838,7 +2838,7 @@ Optimize all FTS tables.
 @return Dummy return */
 static
 os_thread_ret_t
-fts_optimize_thread(
+DECLARE_THREAD(fts_optimize_thread)(
 /*================*/
 	void*		arg)			/*!< in: work queue*/
 {
diff --git a/storage/innobase/fts/fts0plugin.cc b/storage/innobase/fts/fts0plugin.cc
index b7a05deeb34..7f4f5161148 100644
--- a/storage/innobase/fts/fts0plugin.cc
+++ b/storage/innobase/fts/fts0plugin.cc
@@ -32,26 +32,12 @@ Created 2013/06/04 Shaohua Wang
 /******************************************************************//**
 FTS default parser init
 @return 0 */
-static
-int
-fts_default_parser_init(
-/*====================*/
-	MYSQL_FTPARSER_PARAM *param)	/*!< in: plugin parser param */
-{
-	return(0);
-}
+static int fts_default_parser_init(MYSQL_FTPARSER_PARAM*) { return 0; }
 
 /******************************************************************//**
 FTS default parser deinit
 @return 0 */
-static
-int
-fts_default_parser_deinit(
-/*======================*/
-	MYSQL_FTPARSER_PARAM *param)	/*!< in: plugin parser param */
-{
-        return(0);
-}
+static int fts_default_parser_deinit(MYSQL_FTPARSER_PARAM*) { return 0; }
 
 /******************************************************************//**
 FTS default parser parse from ft_static.c in MYISAM.
@@ -134,7 +120,7 @@ fts_query_add_word_for_parser(
 
 	case FT_TOKEN_WORD:
 		term_node = fts_ast_create_node_term_for_parser(
-			state, word, word_len);
+			state, word, ulint(word_len));
 
 		if (info->trunc) {
 			fts_ast_term_set_wildcard(term_node);
@@ -251,7 +237,7 @@ fts_parse_query_internal(
 		int ret = param->mysql_add_word(
 				param,
 				reinterpret_cast<char*>(w.pos),
-				w.len, &info);
+				int(w.len), &info);
 		if (ret) {
 			return(ret);
 		}
diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc
index 2396a376853..00f3b9aedf0 100644
--- a/storage/innobase/fts/fts0que.cc
+++ b/storage/innobase/fts/fts0que.cc
@@ -1747,7 +1747,7 @@ fts_query_match_phrase_add_word_for_parser(
 	MYSQL_FTPARSER_PARAM*	param,		/*!< in: parser param */
 	const char*			word,		/*!< in: token */
 	int			word_len,	/*!< in: token length */
-	MYSQL_FTPARSER_BOOLEAN_INFO* info)	/*!< in: token info */
+	MYSQL_FTPARSER_BOOLEAN_INFO*)
 {
 	fts_phrase_param_t*	phrase_param;
 	fts_phrase_t*		phrase;
@@ -1769,8 +1769,8 @@ fts_query_match_phrase_add_word_for_parser(
 	}
 
 	match.f_str = (uchar *)(word);
-	match.f_len = word_len;
-	match.f_n_char = fts_get_token_size(phrase->charset, word, word_len);
+	match.f_len = ulint(word_len);
+	match.f_n_char= fts_get_token_size(phrase->charset, word, match.f_len);
 
 	if (match.f_len > 0) {
 		/* Get next token to match. */
@@ -1902,7 +1902,7 @@ fts_query_match_phrase(
 				&phrase_param,
 				phrase->parser,
 				ptr,
-				(end - ptr))) {
+				ulint(end - ptr))) {
 				break;
 			}
 		} else {
@@ -3293,7 +3293,7 @@ fts_query_filter_doc_ids(
 		++ptr;
 
 		/* Bytes decoded so far */
-		decoded = ptr - (byte*) data;
+		decoded = ulint(ptr - (byte*) data);
 
 		/* We simply collect the matching documents and the
 		positions here and match later. */
@@ -3917,7 +3917,7 @@ fts_query_can_optimize(
 }
 
 /** FTS Query entry point.
-@param[in]	trx		transaction
+@param[in,out]	trx		transaction
 @param[in]	index		fts index to search
 @param[in]	flags		FTS search mode
 @param[in]	query_str	FTS query
@@ -3939,7 +3939,7 @@ fts_query(
 	ulint		lc_query_str_len;
 	ulint		result_len;
 	bool		boolean_mode;
-	trx_t*		query_trx;
+	trx_t*		query_trx; /* FIXME: use provided trx */
 	CHARSET_INFO*	charset;
 	ulint		start_time_ms;
 	bool		will_be_ignored = false;
@@ -3948,7 +3948,7 @@ fts_query(
 
 	*result = NULL;
 	memset(&query, 0x0, sizeof(query));
-	query_trx = trx_allocate_for_background();
+	query_trx = trx_create();
 	query_trx->op_info = "FTS query";
 
 	start_time_ms = ut_time_ms();
@@ -4117,7 +4117,7 @@ fts_query(
 			<< diff_time / 1000 << " secs: " << diff_time % 1000
 			<< " millisec: row(s) "
 			<< ((*result)->rankings_by_id
-			    ? rbt_size((*result)->rankings_by_id)
+			    ? lint(rbt_size((*result)->rankings_by_id))
 			    : -1);
 
 		/* Log memory consumption & result size */
@@ -4132,7 +4132,7 @@ fts_query(
 func_exit:
 	fts_query_free(&query);
 
-	trx_free_for_background(query_trx);
+	trx_free(query_trx);
 
 	return(error);
 }
diff --git a/storage/innobase/fts/fts0sql.cc b/storage/innobase/fts/fts0sql.cc
index ae2186c2d30..6f66486ed6e 100644
--- a/storage/innobase/fts/fts0sql.cc
+++ b/storage/innobase/fts/fts0sql.cc
@@ -116,7 +116,8 @@ fts_get_table_name_prefix(
 
 	prefix_name_len = dbname_len + 4 + len + 1;
 
-	prefix_name = static_cast<char*>(ut_malloc_nokey(prefix_name_len));
+	prefix_name = static_cast<char*>(
+		ut_malloc_nokey(unsigned(prefix_name_len)));
 
 	len = sprintf(prefix_name, "%.*sFTS_%s",
 		      dbname_len, fts_table->parent, table_id);
@@ -198,16 +199,13 @@ Parse an SQL string.
 que_t*
 fts_parse_sql_no_dict_lock(
 /*=======================*/
-	fts_table_t*	fts_table,	/*!< in: FTS aux table info */
 	pars_info_t*	info,		/*!< in: info struct, or NULL */
 	const char*	sql)		/*!< in: SQL string to evaluate */
 {
 	char*		str;
 	que_t*		graph;
 
-#ifdef UNIV_DEBUG
 	ut_ad(mutex_own(&dict_sys->mutex));
-#endif
 
 	str = ut_str3cat(fts_sql_begin, sql, fts_sql_end);
 
diff --git a/storage/innobase/fut/fut0lst.cc b/storage/innobase/fut/fut0lst.cc
index 9f79ac8df2b..3e77165ac31 100644
--- a/storage/innobase/fut/fut0lst.cc
+++ b/storage/innobase/fut/fut0lst.cc
@@ -40,7 +40,6 @@ flst_add_to_empty(
 {
 	ulint		space;
 	fil_addr_t	node_addr;
-	ulint		len;
 
 	ut_ad(mtr && base && node);
 	ut_ad(base != node);
@@ -50,8 +49,7 @@ flst_add_to_empty(
 	ut_ad(mtr_memo_contains_page_flagged(mtr, node,
 					     MTR_MEMO_PAGE_X_FIX
 					     | MTR_MEMO_PAGE_SX_FIX));
-	len = flst_get_len(base);
-	ut_a(len == 0);
+	ut_a(!flst_get_len(base));
 
 	buf_ptr_get_fsp_addr(node, &space, &node_addr);
 
@@ -64,7 +62,7 @@ flst_add_to_empty(
 	flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr);
 
 	/* Update len of base node */
-	mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
+	mlog_write_ulint(base + FLST_LEN, 1, MLOG_4BYTES, mtr);
 }
 
 /********************************************************************//**
diff --git a/storage/innobase/gis/gis0geo.cc b/storage/innobase/gis/gis0geo.cc
index 436249c0026..71d637d62d5 100644
--- a/storage/innobase/gis/gis0geo.cc
+++ b/storage/innobase/gis/gis0geo.cc
@@ -30,6 +30,7 @@ Created 2013/03/27 Allen Lai and Jimmy Yang
 #include "mach0data.h"
 
 #include <spatial.h>
+#include <cmath>
 
 /* These definitions are for comparing 2 mbrs. */
 
@@ -72,7 +73,6 @@ rtree_add_point_to_mbr(
 				where point is stored */
 	uchar*	end,		/*!< in: end of wkb. */
 	uint	n_dims,		/*!< in: dimensions. */
-	uchar	byte_order,	/*!< in: byte order. */
 	double*	mbr)		/*!< in/out: mbr, which
 				must be of length n_dims * 2. */
 {
@@ -112,11 +112,10 @@ rtree_get_point_mbr(
 				where point is stored. */
 	uchar*	end,		/*!< in: end of wkb. */
 	uint	n_dims,		/*!< in: dimensions. */
-	uchar	byte_order,	/*!< in: byte order. */
 	double*	mbr)		/*!< in/out: mbr,
 				must be of length n_dims * 2. */
 {
-	return rtree_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr);
+	return rtree_add_point_to_mbr(wkb, end, n_dims, mbr);
 }
 
 
@@ -131,7 +130,6 @@ rtree_get_linestring_mbr(
 				where point is stored. */
 	uchar*	end,		/*!< in: end of wkb. */
 	uint	n_dims,		/*!< in: dimensions. */
-	uchar	byte_order,	/*!< in: byte order. */
 	double*	mbr)		/*!< in/out: mbr,
 				must be of length n_dims * 2. */
 {
@@ -142,8 +140,7 @@ rtree_get_linestring_mbr(
 
 	for (; n_points > 0; --n_points) {
 		/* Add next point to mbr */
-		if (rtree_add_point_to_mbr(wkb, end, n_dims,
-					   byte_order, mbr)) {
+		if (rtree_add_point_to_mbr(wkb, end, n_dims, mbr)) {
 			return(-1);
 		}
 	}
@@ -162,7 +159,6 @@ rtree_get_polygon_mbr(
 				where point is stored. */
 	uchar*	end,		/*!< in: end of wkb. */
 	uint	n_dims,		/*!< in: dimensions. */
-	uchar	byte_order,	/*!< in: byte order. */
 	double*	mbr)		/*!< in/out: mbr,
 				must be of length n_dims * 2. */
 {
@@ -178,8 +174,7 @@ rtree_get_polygon_mbr(
 
 		for (; n_points > 0; --n_points) {
 			/* Add next point to mbr */
-			if (rtree_add_point_to_mbr(wkb, end, n_dims,
-						   byte_order, mbr)) {
+			if (rtree_add_point_to_mbr(wkb, end, n_dims, mbr)) {
 				return(-1);
 			}
 		}
@@ -205,11 +200,10 @@ rtree_get_geometry_mbr(
 				by itself. */
 {
 	int	res;
-	uchar	byte_order = 2;
 	uint	wkb_type = 0;
 	uint	n_items;
 
-	byte_order = *(*wkb);
+	/* byte_order = *(*wkb); */
 	++(*wkb);
 
 	wkb_type = uint4korr((*wkb));
@@ -217,24 +211,22 @@ rtree_get_geometry_mbr(
 
 	switch ((enum wkbType) wkb_type) {
 	case wkbPoint:
-		res = rtree_get_point_mbr(wkb, end, n_dims, byte_order, mbr);
+		res = rtree_get_point_mbr(wkb, end, n_dims, mbr);
 		break;
 	case wkbLineString:
-		res = rtree_get_linestring_mbr(wkb, end, n_dims,
-					       byte_order, mbr);
+		res = rtree_get_linestring_mbr(wkb, end, n_dims, mbr);
 		break;
 	case wkbPolygon:
-		res = rtree_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr);
+		res = rtree_get_polygon_mbr(wkb, end, n_dims, mbr);
 		break;
 	case wkbMultiPoint:
 		n_items = uint4korr((*wkb));
 		(*wkb) += 4;
 		for (; n_items > 0; --n_items) {
-			byte_order = *(*wkb);
+			/* byte_order = *(*wkb); */
 			++(*wkb);
 			(*wkb) += 4;
-			if (rtree_get_point_mbr(wkb, end, n_dims,
-						byte_order, mbr)) {
+			if (rtree_get_point_mbr(wkb, end, n_dims, mbr)) {
 				return(-1);
 			}
 		}
@@ -244,11 +236,10 @@ rtree_get_geometry_mbr(
 		n_items = uint4korr((*wkb));
 		(*wkb) += 4;
 		for (; n_items > 0; --n_items) {
-			byte_order = *(*wkb);
+			/* byte_order = *(*wkb); */
 			++(*wkb);
 			(*wkb) += 4;
-			if (rtree_get_linestring_mbr(wkb, end, n_dims,
-						     byte_order, mbr)) {
+			if (rtree_get_linestring_mbr(wkb, end, n_dims, mbr)) {
 				return(-1);
 			}
 		}
@@ -258,11 +249,10 @@ rtree_get_geometry_mbr(
 		n_items = uint4korr((*wkb));
 		(*wkb) += 4;
 		for (; n_items > 0; --n_items) {
-			byte_order = *(*wkb);
+			/* byte_order = *(*wkb); */
 			++(*wkb);
 			(*wkb) += 4;
-			if (rtree_get_polygon_mbr(wkb, end, n_dims,
-						  byte_order, mbr)) {
+			if (rtree_get_polygon_mbr(wkb, end, n_dims, mbr)) {
 				return(-1);
 			}
 		}
@@ -366,7 +356,7 @@ mbr_join_square(
 
 	/* Check if finite (not infinity or NaN),
 	so we don't get NaN in calculations */
-	if (!isfinite(square)) {
+	if (!std::isfinite(square)) {
 		return DBL_MAX;
 	}
 
@@ -402,7 +392,7 @@ copy_coords(
 /*========*/
 	double*		dst,	/*!< in/out: destination. */
 	const double*	src,	/*!< in: source. */
-	int		n_dim)	/*!< in: dimensions. */
+	int)
 {
 	memcpy(dst, src, DATA_MBR_LEN);
 }
@@ -624,7 +614,7 @@ rtree_key_cmp(
 /*==========*/
 	page_cur_mode_t	mode,	/*!< in: compare method. */
 	const uchar*	b,	/*!< in: first key. */
-	int		b_len,	/*!< in: first key len. */
+	int,
 	const uchar*	a,	/*!< in: second key. */
 	int		a_len)	/*!< in: second key len. */
 {
diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc
index a3790f5440e..be67239e177 100644
--- a/storage/innobase/gis/gis0rtree.cc
+++ b/storage/innobase/gis/gis0rtree.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -37,6 +38,7 @@ Created 2013/03/27 Allen Lai and Jimmy Yang
 #include "trx0undo.h"
 #include "srv0mon.h"
 #include "gis0geo.h"
+#include <cmath>
 
 /*************************************************************//**
 Initial split nodes info for R-tree split.
@@ -70,7 +72,7 @@ rtr_page_split_initialize_nodes(
 	page = buf_block_get_frame(block);
 	n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
 
-	n_recs = page_get_n_recs(page) + 1;
+	n_recs = ulint(page_get_n_recs(page)) + 1;
 
 	/*We reserve 2 MBRs memory space for temp result of split
 	algrithm. And plus the new mbr that need to insert, we
@@ -85,7 +87,7 @@ rtr_page_split_initialize_nodes(
 	stop = task + n_recs;
 
 	rec = page_rec_get_next(page_get_infimum_rec(page));
-	ut_d(const bool is_leaf = page_is_leaf(page));
+	const bool is_leaf = page_is_leaf(page);
 	*offsets = rec_get_offsets(rec, cursor->index, *offsets, is_leaf,
 				   n_uniq, &heap);
 
@@ -132,10 +134,8 @@ rtr_index_build_node_ptr(
 					pointer */
 	ulint			page_no,/*!< in: page number to put in node
 					pointer */
-	mem_heap_t*		heap,	/*!< in: memory heap where pointer
+	mem_heap_t*		heap)	/*!< in: memory heap where pointer
 					created */
-	ulint			level)	/*!< in: level of rec in tree:
-					0 means leaf level */
 {
 	dtuple_t*	tuple;
 	dfield_t*	field;
@@ -290,7 +290,6 @@ rtr_update_mbr_field(
 	ulint		up_match = 0;
 	ulint		low_match = 0;
 	ulint		child;
-	ulint		level;
 	ulint		rec_info;
 	page_zip_des_t*	page_zip;
 	bool		ins_suc = true;
@@ -309,7 +308,7 @@ rtr_update_mbr_field(
 	page_zip = buf_block_get_page_zip(block);
 
 	child = btr_node_ptr_get_child_page_no(rec, offsets);
-	level = btr_page_get_level(buf_block_get_frame(block), mtr);
+	const bool is_leaf = page_is_leaf(block->frame);
 
 	if (new_rec) {
 		child_rec = new_rec;
@@ -318,7 +317,7 @@ rtr_update_mbr_field(
 	}
 
 	dtuple_t* node_ptr = rtr_index_build_node_ptr(
-		index, mbr, child_rec, child, heap, level);
+		index, mbr, child_rec, child, heap);
 
 	/* We need to remember the child page no of cursor2, since page could be
 	reorganized or insert a new rec before it. */
@@ -428,7 +427,7 @@ rtr_update_mbr_field(
 		ut_ad(old_rec != insert_rec);
 
 		page_cur_position(old_rec, block, &page_cur);
-		offsets2 = rec_get_offsets(old_rec, index, NULL, !level,
+		offsets2 = rec_get_offsets(old_rec, index, NULL, is_leaf,
 					   ULINT_UNDEFINED, &heap);
 		page_cur_delete_rec(&page_cur, index, offsets2, mtr);
 
@@ -458,7 +457,7 @@ update_mbr:
 
 			cur2_rec = cursor2->page_cur.rec;
 			offsets2 = rec_get_offsets(cur2_rec, index, NULL,
-						   !level,
+						   is_leaf,
 						   ULINT_UNDEFINED, &heap);
 
 			cur2_rec_info = rec_get_info_bits(cur2_rec,
@@ -518,7 +517,7 @@ update_mbr:
 		if (ins_suc) {
 			btr_cur_position(index, insert_rec, block, cursor);
 			offsets = rec_get_offsets(insert_rec,
-						  index, offsets, !level,
+						  index, offsets, is_leaf,
 						  ULINT_UNDEFINED, &heap);
 		}
 
@@ -533,7 +532,7 @@ update_mbr:
 			cur2_rec = btr_cur_get_rec(cursor2);
 
 			offsets2 = rec_get_offsets(cur2_rec, index, NULL,
-						   !level,
+						   is_leaf,
 						   ULINT_UNDEFINED, &heap);
 
 			/* If the cursor2 position is on a wrong rec, we
@@ -547,7 +546,7 @@ update_mbr:
 				while (!page_rec_is_supremum(cur2_rec)) {
 					offsets2 = rec_get_offsets(cur2_rec, index,
 								   NULL,
-								   !level,
+								   is_leaf,
 								   ULINT_UNDEFINED,
 								   &heap);
 					cur2_pno = btr_node_ptr_get_child_page_no(
@@ -633,7 +632,6 @@ rtr_adjust_upper_level(
 	buf_block_t*	new_block,	/*!< in/out: the new half page */
 	rtr_mbr_t*	mbr,		/*!< in: MBR on the old page */
 	rtr_mbr_t*	new_mbr,	/*!< in: MBR on the new page */
-	ulint		direction,	/*!< in: FSP_UP or FSP_DOWN */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	page_t*		page;
@@ -652,10 +650,8 @@ rtr_adjust_upper_level(
 	ulint		next_page_no;
 	ulint		space;
 	page_cur_t*	page_cursor;
-	rtr_mbr_t	parent_mbr;
 	lock_prdt_t	prdt;
 	lock_prdt_t	new_prdt;
-	lock_prdt_t	parent_prdt;
 	dberr_t		err;
 	big_rec_t*	dummy_big_rec;
 	rec_t*		rec;
@@ -667,9 +663,8 @@ rtr_adjust_upper_level(
 	cursor.thr = sea_cur->thr;
 
 	/* Get the level of the split pages */
-	level = btr_page_get_level(buf_block_get_frame(block), mtr);
-	ut_ad(level
-	      == btr_page_get_level(buf_block_get_frame(new_block), mtr));
+	level = btr_page_get_level(buf_block_get_frame(block));
+	ut_ad(level == btr_page_get_level(buf_block_get_frame(new_block)));
 
 	page = buf_block_get_frame(block);
 	page_no = block->page.id.page_no();
@@ -686,8 +681,6 @@ rtr_adjust_upper_level(
 
 	page_cursor = btr_cur_get_page_cur(&cursor);
 
-	rtr_get_mbr_from_rec(page_cursor->rec, offsets, &parent_mbr);
-
 	rtr_update_mbr_field(&cursor, offsets, NULL, page, mbr, NULL, mtr);
 
 	/* Already updated parent MBR, reset in our path */
@@ -703,7 +696,7 @@ rtr_adjust_upper_level(
 	node_ptr_upper = rtr_index_build_node_ptr(
 		index, new_mbr,
 		page_rec_get_next(page_get_infimum_rec(new_page)),
-		new_page_no, heap, level);
+		new_page_no, heap);
 
 	ulint	up_match = 0;
 	ulint	low_match = 0;
@@ -742,11 +735,9 @@ rtr_adjust_upper_level(
 	prdt.op = 0;
 	new_prdt.data = static_cast<void*>(new_mbr);
 	new_prdt.op = 0;
-	parent_prdt.data = static_cast<void*>(&parent_mbr);
-	parent_prdt.op = 0;
 
 	lock_prdt_update_parent(block, new_block, &prdt, &new_prdt,
-				&parent_prdt, dict_index_get_space(index),
+				index->table->space_id,
 				page_cursor->block->page.id.page_no());
 
 	mem_heap_free(heap);
@@ -913,7 +904,7 @@ rtr_split_page_move_rec_list(
 	same temp-table in parallel.
 	max_trx_id is ignored for temp tables because it not required
 	for MVCC. */
-	if (is_leaf && !dict_table_is_temporary(index->table)) {
+	if (is_leaf && !index->table->is_temporary()) {
 		page_update_max_trx_id(new_block, NULL,
 				       page_get_max_trx_id(page),
 				       mtr);
@@ -1001,7 +992,6 @@ rtr_page_split_and_insert(
 	page_t*			page;
 	page_t*			new_page;
 	ulint			page_no;
-	byte			direction;
 	ulint			hint_page_no;
 	buf_block_t*		new_block;
 	page_zip_des_t*		page_zip;
@@ -1047,7 +1037,7 @@ func_start:
 	block = btr_cur_get_block(cursor);
 	page = buf_block_get_frame(block);
 	page_zip = buf_block_get_page_zip(block);
-	page_level = btr_page_get_level(page, mtr);
+	page_level = btr_page_get_level(page);
 	current_ssn = page_get_ssn_id(page);
 
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
@@ -1055,7 +1045,7 @@ func_start:
 
 	page_no = block->page.id.page_no();
 
-	if (btr_page_get_prev(page, mtr) == FIL_NULL && !page_is_leaf(page)) {
+	if (!page_has_prev(page) && !page_is_leaf(page)) {
 		first_rec = page_rec_get_next(
 			page_get_infimum_rec(buf_block_get_frame(block)));
 	}
@@ -1065,7 +1055,7 @@ func_start:
 		*heap, cursor, offsets, tuple, &buf_pos);
 
 	/* Divide all mbrs to two groups. */
-	n_recs = page_get_n_recs(page) + 1;
+	n_recs = ulint(page_get_n_recs(page)) + 1;
 
 	end_split_node = rtr_split_node_array + n_recs;
 
@@ -1091,9 +1081,8 @@ func_start:
 					   static_cast<uchar*>(first_rec));
 
 	/* Allocate a new page to the index */
-	direction = FSP_UP;
 	hint_page_no = page_no + 1;
-	new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
+	new_block = btr_page_alloc(cursor->index, hint_page_no, FSP_UP,
 				   page_level, mtr, mtr);
 	new_page_zip = buf_block_get_page_zip(new_block);
 	btr_page_create(new_block, new_page_zip, cursor->index,
@@ -1268,12 +1257,12 @@ after_insert:
 
 	/* Check any predicate locks need to be moved/copied to the
 	new page */
-	lock_prdt_update_split(block, new_block, &prdt, &new_prdt,
-			       dict_index_get_space(cursor->index), page_no);
+	lock_prdt_update_split(new_block, &prdt, &new_prdt,
+			       cursor->index->table->space_id, page_no);
 
 	/* Adjust the upper level. */
 	rtr_adjust_upper_level(cursor, flags, block, new_block,
-			       &mbr, &new_mbr, direction, mtr);
+			       &mbr, &new_mbr, mtr);
 
 	/* Save the new ssn to the root page, since we need to reinit
 	the first ssn value from it after restart server. */
@@ -1297,7 +1286,7 @@ after_insert:
 	if (!rec) {
 		/* We play safe and reset the free bits for new_page */
 		if (!dict_index_is_clust(cursor->index)
-		    && !dict_table_is_temporary(cursor->index->table)) {
+		    && !cursor->index->table->is_temporary()) {
 			ibuf_reset_free_bits(new_block);
 			ibuf_reset_free_bits(block);
 		}
@@ -1334,7 +1323,6 @@ dberr_t
 rtr_ins_enlarge_mbr(
 /*================*/
 	btr_cur_t*		btr_cur,	/*!< in: btr cursor */
-	que_thr_t*		thr,		/*!< in: query thread */
 	mtr_t*			mtr)		/*!< in: mtr */
 {
 	dberr_t			err = DB_SUCCESS;
@@ -1445,7 +1433,7 @@ rtr_page_copy_rec_list_end_no_locks(
 
 	btr_assert_not_corrupted(new_block, index);
 	ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
-	ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
+	ut_a(mach_read_from_2(new_page + srv_page_size - 10) == (ulint)
 	     (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
 
 	cur_rec = page_rec_get_next(
@@ -1668,10 +1656,7 @@ rtr_merge_mbr_changed(
 	btr_cur_t*		cursor2,	/*!< in: the other cursor */
 	ulint*			offsets,	/*!< in: rec offsets */
 	ulint*			offsets2,	/*!< in: rec offsets */
-	rtr_mbr_t*		new_mbr,	/*!< out: MBR to update */
-	buf_block_t*		merge_block,	/*!< in: page to merge */
-	buf_block_t*		block,		/*!< in: page be merged */
-	dict_index_t*		index)		/*!< in: index */
+	rtr_mbr_t*		new_mbr)	/*!< out: MBR to update */
 {
 	double*		mbr;
 	double		mbr1[SPDIMS * 2];
@@ -1716,9 +1701,6 @@ rtr_merge_and_update_mbr(
 	ulint*			offsets,	/*!< in: rec offsets */
 	ulint*			offsets2,	/*!< in: rec offsets */
 	page_t*			child_page,	/*!< in: the page. */
-	buf_block_t*		merge_block,	/*!< in: page to merge */
-	buf_block_t*		block,		/*!< in: page be merged */
-	dict_index_t*		index,		/*!< in: index */
 	mtr_t*			mtr)		/*!< in: mtr */
 {
 	dberr_t			err = DB_SUCCESS;
@@ -1728,8 +1710,7 @@ rtr_merge_and_update_mbr(
 	ut_ad(dict_index_is_spatial(cursor->index));
 
 	changed = rtr_merge_mbr_changed(cursor, cursor2, offsets, offsets2,
-					&new_mbr, merge_block,
-					block, index);
+					&new_mbr);
 
 	/* Update the mbr field of the rec. And will delete the record
 	pointed by cursor2 */
@@ -1739,7 +1720,7 @@ rtr_merge_and_update_mbr(
 			err = DB_ERROR;
 		}
 	} else {
-		rtr_node_ptr_delete(cursor2->index, cursor2, block, mtr);
+		rtr_node_ptr_delete(cursor2, mtr);
 	}
 
 	return(err);
@@ -1750,10 +1731,8 @@ Deletes on the upper level the node pointer to a page. */
 void
 rtr_node_ptr_delete(
 /*================*/
-	dict_index_t*	index,	/*!< in: index tree */
 	btr_cur_t*	cursor, /*!< in: search cursor, contains information
 				about parent nodes in search */
-	buf_block_t*	block,	/*!< in: page whose node pointer is deleted */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ibool		compressed;
@@ -1843,12 +1822,14 @@ rtr_rec_cal_increase(
 @param[in]	tuple	range tuple containing mbr, may also be empty tuple
 @param[in]	mode	search mode
 @return estimated number of rows */
-int64_t
+ha_rows
 rtr_estimate_n_rows_in_range(
 	dict_index_t*	index,
 	const dtuple_t*	tuple,
 	page_cur_mode_t	mode)
 {
+	ut_ad(dict_index_is_spatial(index));
+
 	/* Check tuple & mode */
 	if (tuple->n_fields == 0) {
 		return(HA_POS_ERROR);
@@ -1870,64 +1851,48 @@ rtr_estimate_n_rows_in_range(
 	);
 
 	/* Read mbr from tuple. */
-	const dfield_t*	dtuple_field;
-	ulint		dtuple_f_len MY_ATTRIBUTE((unused));
 	rtr_mbr_t	range_mbr;
 	double		range_area;
-	byte*		range_mbr_ptr;
+	const byte*	range_mbr_ptr;
 
-	dtuple_field = dtuple_get_nth_field(tuple, 0);
-	dtuple_f_len = dfield_get_len(dtuple_field);
-	range_mbr_ptr = reinterpret_cast<byte*>(dfield_get_data(dtuple_field));
+	const dfield_t* dtuple_field = dtuple_get_nth_field(tuple, 0);
+	ut_ad(dfield_get_len(dtuple_field) >= DATA_MBR_LEN);
+	range_mbr_ptr = reinterpret_cast<const byte*>(
+		dfield_get_data(dtuple_field));
 
-	ut_ad(dtuple_f_len >= DATA_MBR_LEN);
 	rtr_read_mbr(range_mbr_ptr, &range_mbr);
 	range_area = (range_mbr.xmax - range_mbr.xmin)
 		 * (range_mbr.ymax - range_mbr.ymin);
 
 	/* Get index root page. */
-	page_size_t	page_size(dict_table_page_size(index->table));
-	page_id_t	page_id(dict_index_get_space(index),
-				dict_index_get_page(index));
 	mtr_t		mtr;
-	buf_block_t*	block;
-	page_t*		page;
-	ulint		n_recs;
 
-	mtr_start(&mtr);
-	mtr.set_named_space(dict_index_get_space(index));
-	mtr_s_lock(dict_index_get_lock(index), &mtr);
+	mtr.start();
+	index->set_modified(mtr);
+	mtr_s_lock(&index->lock, &mtr);
 
-	block = btr_block_get(page_id, page_size, RW_S_LATCH, index, &mtr);
-	page = buf_block_get_frame(block);
-	n_recs = page_header_get_field(page, PAGE_N_RECS);
+	buf_block_t* block = btr_block_get(
+		page_id_t(index->table->space_id, index->page),
+		page_size_t(index->table->space->flags),
+		RW_S_LATCH, index, &mtr);
+	const page_t* page = buf_block_get_frame(block);
+	const unsigned n_recs = page_header_get_field(page, PAGE_N_RECS);
 
 	if (n_recs == 0) {
-		mtr_commit(&mtr);
+		mtr.commit();
 		return(HA_POS_ERROR);
 	}
 
-	rec_t*		rec;
-	byte*		field;
-	ulint		len;
-	ulint*		offsets = NULL;
-	mem_heap_t*	heap;
-
-	heap = mem_heap_create(512);
-	rec = page_rec_get_next(page_get_infimum_rec(page));
-	offsets = rec_get_offsets(rec, index, offsets, page_rec_is_leaf(rec),
-				  ULINT_UNDEFINED, &heap);
-
 	/* Scan records in root page and calculate area. */
 	double	area = 0;
-	while (!page_rec_is_supremum(rec)) {
+	for (const rec_t* rec = page_rec_get_next(
+		     page_get_infimum_rec(block->frame));
+	     !page_rec_is_supremum(rec);
+	     rec = page_rec_get_next_const(rec)) {
 		rtr_mbr_t	mbr;
 		double		rec_area;
 
-		field = rec_get_nth_field(rec, offsets, 0, &len);
-		ut_ad(len == DATA_MBR_LEN);
-
-		rtr_read_mbr(field, &mbr);
+		rtr_read_mbr(rec, &mbr);
 
 		rec_area = (mbr.xmax - mbr.xmin) * (mbr.ymax - mbr.ymin);
 
@@ -1944,8 +1909,8 @@ rtr_estimate_n_rows_in_range(
 			case PAGE_CUR_WITHIN:
 			case PAGE_CUR_MBR_EQUAL:
 				if (rtree_key_cmp(
-					PAGE_CUR_WITHIN, range_mbr_ptr,
-					DATA_MBR_LEN, field, DATA_MBR_LEN)
+					    PAGE_CUR_WITHIN, range_mbr_ptr,
+					    DATA_MBR_LEN, rec, DATA_MBR_LEN)
 				    == 0) {
 					area += 1;
 				}
@@ -1959,22 +1924,23 @@ rtr_estimate_n_rows_in_range(
 			switch (mode) {
 			case PAGE_CUR_CONTAIN:
 			case PAGE_CUR_INTERSECT:
-				area += rtree_area_overlapping(range_mbr_ptr,
-						field, DATA_MBR_LEN) / rec_area;
+				area += rtree_area_overlapping(
+					range_mbr_ptr, rec, DATA_MBR_LEN)
+					/ rec_area;
 				break;
 
 			case PAGE_CUR_DISJOINT:
 				area += 1;
-				area -= rtree_area_overlapping(range_mbr_ptr,
-						field, DATA_MBR_LEN) / rec_area;
+				area -= rtree_area_overlapping(
+					range_mbr_ptr, rec, DATA_MBR_LEN)
+					/ rec_area;
 				break;
 
 			case PAGE_CUR_WITHIN:
 			case PAGE_CUR_MBR_EQUAL:
-				if (rtree_key_cmp(
-					PAGE_CUR_WITHIN, range_mbr_ptr,
-					DATA_MBR_LEN, field, DATA_MBR_LEN)
-				    == 0) {
+				if (!rtree_key_cmp(
+					    PAGE_CUR_WITHIN, range_mbr_ptr,
+					    DATA_MBR_LEN, rec, DATA_MBR_LEN)) {
 					area += range_area / rec_area;
 				}
 
@@ -1983,17 +1949,14 @@ rtr_estimate_n_rows_in_range(
 				ut_error;
 			}
 		}
-
-		rec = page_rec_get_next(rec);
 	}
 
-	mtr_commit(&mtr);
-	mem_heap_free(heap);
+	mtr.commit();
 
-	if (!isfinite(area)) {
+	if (!std::isfinite(area)) {
 		return(HA_POS_ERROR);
 	}
 
-	return(static_cast<int64_t>(dict_table_get_n_rows(index->table)
-				    * area / n_recs));
+	area /= n_recs;
+	return ha_rows(dict_table_get_n_rows(index->table) * area);
 }
diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc
index 5f8657238b0..66a6d57d986 100644
--- a/storage/innobase/gis/gis0sea.cc
+++ b/storage/innobase/gis/gis0sea.cc
@@ -96,7 +96,6 @@ rtr_pcur_getnext_from_path(
 {
 	dict_index_t*	index = btr_cur->index;
 	bool		found = false;
-	ulint		space = dict_index_get_space(index);
 	page_cur_t*	page_cursor;
 	ulint		level = 0;
 	node_visit_t	next_rec;
@@ -146,7 +145,7 @@ rtr_pcur_getnext_from_path(
 						| MTR_MEMO_X_LOCK));
 	}
 
-	const page_size_t&	page_size = dict_table_page_size(index->table);
+	const page_size_t	page_size(index->table->space->flags);
 
 	/* Pop each node/page to be searched from "path" structure
 	and do a search on it. Please note, any pages that are in
@@ -266,11 +265,11 @@ rtr_pcur_getnext_from_path(
 					btr_cur->page_cur.block)));
 #endif /* UNIV_RTR_DEBUG */
 
-		page_id_t	page_id(space, next_rec.page_no);
 		dberr_t err = DB_SUCCESS;
 
 		block = buf_page_get_gen(
-			page_id, page_size,
+			page_id_t(index->table->space_id,
+				  next_rec.page_no), page_size,
 			rw_latch, NULL, BUF_GET, __FILE__, __LINE__, mtr, &err);
 
 		if (block == NULL) {
@@ -299,11 +298,12 @@ rtr_pcur_getnext_from_path(
 			    && mode != PAGE_CUR_RTREE_LOCATE) {
 				ut_ad(rtr_info->thr);
 				lock_place_prdt_page_lock(
-					space, next_page_no, index,
+					index->table->space_id,
+					next_page_no, index,
 					rtr_info->thr);
 			}
 			new_split = true;
-#if UNIV_GIS_DEBUG
+#if defined(UNIV_GIS_DEBUG)
 			fprintf(stderr,
 				"GIS_DIAG: Splitted page found: %d, %ld\n",
 				static_cast<int>(need_parent), next_page_no);
@@ -405,8 +405,7 @@ rtr_pcur_getnext_from_path(
 			}
 
 			lock_prdt_lock(block, &prdt, index, LOCK_S,
-				       LOCK_PREDICATE, btr_cur->rtr_info->thr,
-				       mtr);
+				       LOCK_PREDICATE, btr_cur->rtr_info->thr);
 
 			if (rw_latch == RW_NO_LATCH) {
 				rw_lock_s_unlock(&(block->lock));
@@ -420,11 +419,11 @@ rtr_pcur_getnext_from_path(
 				if (my_latch_mode == BTR_MODIFY_TREE
 				    && level == 0) {
 					ut_ad(rw_latch == RW_NO_LATCH);
-					page_id_t	my_page_id(
-						space, block->page.id.page_no());
 
 					btr_cur_latch_leaves(
-						block, my_page_id,
+						block,
+						page_id_t(index->table->space_id,
+							  block->page.id.page_no()),
 						page_size, BTR_MODIFY_TREE,
 						btr_cur, mtr);
 				}
@@ -717,7 +716,7 @@ rtr_page_get_father_node_ptr(
 
 	ut_ad(dict_index_get_page(index) != page_no);
 
-	level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
+	level = btr_page_get_level(btr_cur_get_page(cursor));
 
 	user_rec = btr_cur_get_rec(cursor);
 	ut_a(page_rec_is_user_rec(user_rec));
@@ -727,7 +726,7 @@ rtr_page_get_father_node_ptr(
 	rtr_get_mbr_from_rec(user_rec, offsets, &mbr);
 
 	tuple = rtr_index_build_node_ptr(
-			index, &mbr, user_rec, page_no, heap, level);
+		index, &mbr, user_rec, page_no, heap);
 
 	if (sea_cur && !sea_cur->rtr_info) {
 		sea_cur = NULL;
@@ -1254,8 +1253,8 @@ rtr_check_discard_page(
 	mutex_exit(&index->rtr_track->rtr_active_mutex);
 
 	lock_mutex_enter();
-	lock_prdt_page_free_from_discard(block, lock_sys->prdt_hash);
-	lock_prdt_page_free_from_discard(block, lock_sys->prdt_page_hash);
+	lock_prdt_page_free_from_discard(block, lock_sys.prdt_hash);
+	lock_prdt_page_free_from_discard(block, lock_sys.prdt_page_hash);
 	lock_mutex_exit();
 }
 
@@ -1344,9 +1343,8 @@ rtr_cur_restore_position(
 	const page_t*	page;
 	page_cur_t*	page_cursor;
 	node_visit_t*	node = rtr_get_parent_node(btr_cur, level, false);
-	ulint		space = dict_index_get_space(index);
 	node_seq_t	path_ssn = node->seq_no;
-	page_size_t	page_size = dict_table_page_size(index->table);
+	const page_size_t	page_size(index->table->space->flags);
 
 	ulint		page_no = node->page_no;
 
@@ -1359,11 +1357,11 @@ rtr_cur_restore_position(
 	ut_ad(r_cursor == node->cursor);
 
 search_again:
-	page_id_t	page_id(space, page_no);
 	dberr_t err = DB_SUCCESS;
 
 	block = buf_page_get_gen(
-		page_id, page_size, RW_X_LATCH, NULL,
+		page_id_t(index->table->space_id, page_no),
+		page_size, RW_X_LATCH, NULL,
 		BUF_GET, __FILE__, __LINE__, mtr, &err);
 
 	ut_ad(block);
@@ -1453,7 +1451,7 @@ rtr_leaf_push_match_rec(
 	data_len = rec_offs_data_size(offsets) + rec_offs_extra_size(offsets);
 	match_rec->used += data_len;
 
-	ut_ad(match_rec->used < UNIV_PAGE_SIZE);
+	ut_ad(match_rec->used < srv_page_size);
 }
 
 /**************************************************************//**
@@ -1679,7 +1677,7 @@ rtr_cur_search_with_match(
 
 	page = buf_block_get_frame(block);
 
-	const ulint level = btr_page_get_level(page, mtr);
+	const ulint level = btr_page_get_level(page);
 	const bool is_leaf = !level;
 
 	if (mode == PAGE_CUR_RTREE_LOCATE) {
@@ -1715,7 +1713,7 @@ rtr_cur_search_with_match(
 		first page as much as possible, as there will be problem
 		when update MIN_REC rec in compress table */
 		if (buf_block_get_page_zip(block)
-		    && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
+		    && !page_has_prev(page)
 		    && page_get_n_recs(page) >= 2) {
 
 			rec = page_rec_get_next_const(rec);
diff --git a/storage/innobase/ha/ha0ha.cc b/storage/innobase/ha/ha0ha.cc
index da542d4f742..fa1a9bc5db9 100644
--- a/storage/innobase/ha/ha0ha.cc
+++ b/storage/innobase/ha/ha0ha.cc
@@ -60,7 +60,8 @@ ib_create(
 
 	if (n_sync_obj == 0) {
 		table->heap = mem_heap_create_typed(
-			ut_min(static_cast<ulint>(4096),
+			std::min<ulong>(
+				4096,
 				MEM_MAX_ALLOC_IN_BUF / 2
 				- MEM_BLOCK_HEADER_SIZE - MEM_SPACE_NEEDED(0)),
 			type);
@@ -84,7 +85,8 @@ ib_create(
 
 	for (ulint i = 0; i < n_sync_obj; i++) {
 		table->heaps[i] = mem_heap_create_typed(
-			ut_min(static_cast<ulint>(4096),
+			std::min<ulong>(
+				4096,
 				MEM_MAX_ALLOC_IN_BUF / 2
 				- MEM_BLOCK_HEADER_SIZE - MEM_SPACE_NEEDED(0)),
 			type);
@@ -126,7 +128,8 @@ ib_recreate(
 
 	for (ulint i = 0; i < new_table->n_sync_obj; i++) {
 		new_table->heaps[i] = mem_heap_create_typed(
-			ut_min(static_cast<ulint>(4096),
+			std::min<ulong>(
+				4096,
 				MEM_MAX_ALLOC_IN_BUF / 2
 				- MEM_BLOCK_HEADER_SIZE - MEM_SPACE_NEEDED(0)),
 			MEM_HEAP_FOR_PAGE_HASH);
@@ -192,7 +195,7 @@ ha_clear(
 #ifdef BTR_CUR_HASH_ADAPT
 # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 /** Maximum number of records in a page */
-static const lint MAX_N_POINTERS
+static const ulint MAX_N_POINTERS
 	= UNIV_PAGE_SIZE_MAX / REC_N_NEW_EXTRA_BYTES;
 # endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 
@@ -242,8 +245,8 @@ ha_insert_for_fold_func(
 				buf_block_t* prev_block = prev_node->block;
 				ut_a(prev_block->frame
 				     == page_align(prev_node->data));
-				ut_a(my_atomic_addlint(
-					     &prev_block->n_pointers, -1)
+				ut_a(my_atomic_addlint(&prev_block->n_pointers,
+						       ulint(-1))
 				     < MAX_N_POINTERS);
 				ut_a(my_atomic_addlint(&block->n_pointers, 1)
 				     < MAX_N_POINTERS);
@@ -339,7 +342,7 @@ ha_delete_hash_node(
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	if (table->adaptive) {
 		ut_a(del_node->block->frame = page_align(del_node->data));
-		ut_a(my_atomic_addlint(&del_node->block->n_pointers, -1)
+		ut_a(my_atomic_addlint(&del_node->block->n_pointers, ulint(-1))
 		     < MAX_N_POINTERS);
 	}
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
@@ -382,7 +385,8 @@ ha_search_and_update_if_found_func(
 	if (node) {
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 		if (table->adaptive) {
-			ut_a(my_atomic_addlint(&node->block->n_pointers, -1)
+			ut_a(my_atomic_addlint(&node->block->n_pointers,
+					       ulint(-1))
 			     < MAX_N_POINTERS);
 			ut_a(my_atomic_addlint(&new_block->n_pointers, 1)
 			     < MAX_N_POINTERS);
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 6daffbd7ec1..a747038aed4 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -108,10 +108,9 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #include "trx0purge.h"
 #endif /* UNIV_DEBUG */
 #include "trx0roll.h"
-#include "trx0sys.h"
+#include "trx0rseg.h"
 #include "trx0trx.h"
 #include "fil0pagecompress.h"
-#include "trx0xa.h"
 #include "ut0mem.h"
 #include "row0ext.h"
 
@@ -128,12 +127,6 @@ TABLE *open_purge_table(THD *thd, const char *db, size_t dblen,
 			const char *tb, size_t tblen);
 void close_thread_tables(THD* thd);
 
-/** Check if user has used xtradb extended system variable that
-is not currently supported by innodb or marked as deprecated. */
-static
-void
-innodb_check_deprecated(void);
-
 #ifdef MYSQL_DYNAMIC_PLUGIN
 #define tc_size  400
 #define tdc_size 400
@@ -151,7 +144,6 @@ innodb_check_deprecated(void);
 
 #ifdef WITH_WSREP
 #include "dict0priv.h"
-#include "ut0byte.h"
 #include <mysql/service_md5.h>
 #include "wsrep_sst.h"
 
@@ -190,43 +182,29 @@ static const long AUTOINC_OLD_STYLE_LOCKING = 0;
 static const long AUTOINC_NEW_STYLE_LOCKING = 1;
 static const long AUTOINC_NO_LOCKING = 2;
 
-static long innobase_log_buffer_size;
-static long innobase_open_files=0;
+static ulong innobase_open_files;
 static long innobase_autoinc_lock_mode;
-static ulong innobase_commit_concurrency = 0;
-static ulong innobase_read_io_threads;
-static ulong innobase_write_io_threads;
+static ulong innobase_commit_concurrency;
 
-static long long innobase_buffer_pool_size;
+static ulonglong innobase_buffer_pool_size;
 
 /** Percentage of the buffer pool to reserve for 'old' blocks.
 Connected to buf_LRU_old_ratio. */
 static uint innobase_old_blocks_pct;
 
+static char*	innobase_data_file_path;
+static char*	innobase_temp_data_file_path;
+
 /* The default values for the following char* start-up parameters
-are determined in innobase_init below: */
+are determined in innodb_init_params(). */
 
 static char*	innobase_data_home_dir;
-static char*	innobase_data_file_path;
-static char*	innobase_temp_data_file_path;
-static char*	innobase_file_format_name;
-static char*	innobase_change_buffering;
 static char*	innobase_enable_monitor_counter;
 static char*	innobase_disable_monitor_counter;
 static char*	innobase_reset_monitor_counter;
 static char*	innobase_reset_all_monitor_counter;
 
-/* The highest file format being used in the database. The value can be
-set by user, however, it will be adjusted to the newer file format if
-a table of such format is created/opened. */
-char*	innobase_file_format_max;
-
-/** Default value of innodb_file_format */
-static const char*	innodb_file_format_default	= "Barracuda";
-/** Default value of innodb_file_format_max */
-static const char*	innodb_file_format_max_default	= "Antelope";
-
-static char*	innobase_file_flush_method;
+static ulong	innodb_flush_method;
 
 /* This variable can be set in the server configure file, specifying
 stopword table to be used */
@@ -235,16 +213,12 @@ static char*	innobase_server_stopword_table;
 /* Below we have boolean-valued start-up parameters, and their default
 values */
 
-static my_bool	innobase_file_format_check;
 static my_bool	innobase_use_atomic_writes;
-static my_bool	innobase_use_fallocate;
-static my_bool	innobase_use_doublewrite;
 static my_bool	innobase_use_checksums;
 static my_bool	innobase_locks_unsafe_for_binlog;
 static my_bool	innobase_rollback_on_timeout;
 static my_bool	innobase_create_status_file;
 my_bool	innobase_stats_on_metadata;
-static my_bool	innobase_large_prefix;
 static my_bool	innodb_optimize_fulltext_only;
 
 static char*	innodb_version_str = (char*) INNODB_VERSION_STR;
@@ -336,7 +310,7 @@ thd_destructor_proxy(void *)
 				    MY_MEMORY_ORDER_RELAXED);
 
 	while (srv_fast_shutdown == 0 &&
-	       (trx_sys_any_active_transactions() ||
+	       (trx_sys.any_active_transactions() ||
 		(uint)thread_count > srv_n_purge_threads + 1)) {
 		thd_proc_info(thd, "InnoDB slow shutdown wait");
 		os_thread_sleep(1000);
@@ -450,6 +424,30 @@ static TYPELIB innodb_lock_schedule_algorithm_typelib = {
 	NULL
 };
 
+/** Names of allowed values of innodb_flush_method */
+const char* innodb_flush_method_names[] = {
+	"fsync",
+	"O_DSYNC",
+	"littlesync",
+	"nosync",
+	"O_DIRECT",
+	"O_DIRECT_NO_FSYNC",
+#ifdef _WIN32
+	"unbuffered",
+	"async_unbuffered" /* alias for "unbuffered" */,
+	"normal" /* alias for "fsync" */,
+#endif
+	NullS
+};
+
+/** Enumeration of innodb_flush_method */
+TYPELIB innodb_flush_method_typelib = {
+	array_elements(innodb_flush_method_names) - 1,
+	"innodb_flush_method_typelib",
+	innodb_flush_method_names,
+	NULL
+};
+
 /* The following counter is used to convey information to InnoDB
 about server activity: in case of normal DML ops it is not
 sensible to call srv_active_wake_master_thread after each
@@ -459,13 +457,22 @@ operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */
 static ulong	innobase_active_counter	= 0;
 
 /** Allowed values of innodb_change_buffering */
-static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
+static const char* innodb_change_buffering_names[] = {
 	"none",		/* IBUF_USE_NONE */
 	"inserts",	/* IBUF_USE_INSERT */
 	"deletes",	/* IBUF_USE_DELETE_MARK */
 	"changes",	/* IBUF_USE_INSERT_DELETE_MARK */
 	"purges",	/* IBUF_USE_DELETE */
-	"all"		/* IBUF_USE_ALL */
+	"all",		/* IBUF_USE_ALL */
+	NullS
+};
+
+/** Enumeration of innodb_change_buffering */
+static TYPELIB innodb_change_buffering_typelib = {
+	array_elements(innodb_change_buffering_names) - 1,
+	"innodb_change_buffering_typelib",
+	innodb_change_buffering_names,
+	NULL
 };
 
 /** Retrieve the FTS Relevance Ranking result for doc with doc_id
@@ -586,7 +593,6 @@ static PSI_mutex_info all_innodb_mutexes[] = {
 	PSI_KEY(dict_foreign_err_mutex),
 	PSI_KEY(dict_sys_mutex),
 	PSI_KEY(recalc_pool_mutex),
-	PSI_KEY(file_format_max_mutex),
 	PSI_KEY(fil_system_mutex),
 	PSI_KEY(flush_list_mutex),
 	PSI_KEY(fts_bg_threads_mutex),
@@ -616,7 +622,6 @@ static PSI_mutex_info all_innodb_mutexes[] = {
 	PSI_KEY(srv_misc_tmpfile_mutex),
 	PSI_KEY(srv_monitor_file_mutex),
 	PSI_KEY(buf_dblwr_mutex),
-	PSI_KEY(trx_undo_mutex),
 	PSI_KEY(trx_pool_mutex),
 	PSI_KEY(trx_pool_manager_mutex),
 	PSI_KEY(srv_sys_mutex),
@@ -764,7 +769,7 @@ static
 int
 innodb_tmpdir_validate(
 	THD*				thd,
-	struct st_mysql_sys_var*	var,
+	struct st_mysql_sys_var*,
 	void*				save,
 	struct st_mysql_value*		value)
 {
@@ -913,42 +918,6 @@ innodb_encrypt_tables_validate(
 
 static const char innobase_hton_name[]= "InnoDB";
 
-static const char*	deprecated_innodb_support_xa
-	= "Using innodb_support_xa is deprecated and the"
-	" parameter may be removed in future releases.";
-
-static const char*	deprecated_innodb_support_xa_off
-	= "Using innodb_support_xa is deprecated and the"
-	" parameter may be removed in future releases."
-	" Only innodb_support_xa=ON is allowed.";
-
-/** Update the session variable innodb_support_xa.
-@param[in]	thd	current session
-@param[in]	var	the system variable innodb_support_xa
-@param[in,out]	var_ptr	the contents of the variable
-@param[in]	save	the to-be-updated value */
-static
-void
-innodb_support_xa_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save)
-{
-	my_bool	innodb_support_xa = *static_cast<const my_bool*>(save);
-
-	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
-		     HA_ERR_WRONG_COMMAND,
-		     innodb_support_xa
-		     ? deprecated_innodb_support_xa
-		     : deprecated_innodb_support_xa_off);
-}
-
-static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG,
-  "Enable InnoDB support for the XA two-phase commit",
-  /* check_func */ NULL, innodb_support_xa_update,
-  /* default */ TRUE);
-
 static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
   "Enable InnoDB locking in LOCK TABLES",
   /* check_func */ NULL, /* update_func */ NULL,
@@ -965,7 +934,7 @@ static MYSQL_THDVAR_BOOL(ft_enable_stopword, PLUGIN_VAR_OPCMDARG,
 
 static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
   "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
-  NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
+  NULL, NULL, 50, 0, 1024 * 1024 * 1024, 0);
 
 static MYSQL_THDVAR_STR(ft_user_stopword_table,
   PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
@@ -984,6 +953,8 @@ static SHOW_VAR innodb_status_variables[]= {
   (char*) &export_vars.innodb_buffer_pool_load_status,	  SHOW_CHAR},
   {"buffer_pool_resize_status",
   (char*) &export_vars.innodb_buffer_pool_resize_status,  SHOW_CHAR},
+  {"buffer_pool_load_incomplete",
+  &export_vars.innodb_buffer_pool_load_incomplete,        SHOW_BOOL},
   {"buffer_pool_pages_data",
   (char*) &export_vars.innodb_buffer_pool_pages_data,	  SHOW_LONG},
   {"buffer_pool_bytes_data",
@@ -1137,6 +1108,9 @@ static SHOW_VAR innodb_status_variables[]= {
   {"defragment_count",
   (char*) &export_vars.innodb_defragment_count, SHOW_LONG},
 
+  {"instant_alter_column",
+  (char*) &export_vars.innodb_instant_alter_column, SHOW_LONG},
+
   /* Online alter table status variables */
   {"onlineddl_rowlog_rows",
   (char*) &export_vars.innodb_onlineddl_rowlog_rows, SHOW_LONG},
@@ -1336,25 +1310,6 @@ static
 void
 innodb_params_adjust();
 
-/************************************************************//**
-Validate the file format name and return its corresponding id.
-@return valid file format id */
-static
-uint
-innobase_file_format_name_lookup(
-/*=============================*/
-	const char*	format_name);	/*!< in: pointer to file format
-					name */
-/************************************************************//**
-Validate the file format check config parameters, as a side effect it
-sets the srv_max_file_format_at_startup variable.
-@return the format_id if valid config value, otherwise, return -1 */
-static
-int
-innobase_file_format_validate_and_set(
-/*==================================*/
-	const char*	format_max);	/*!< in: parameter value */
-
 /*******************************************************************//**
 This function is used to prepare an X/Open XA distributed transaction.
 @return 0 or error number */
@@ -1547,9 +1502,7 @@ static
 int
 innobase_commit_concurrency_validate(
 /*=================================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
-						variable */
+	THD*, st_mysql_sys_var*,
 	void*				save,	/*!< out: immediate result
 						for update function */
 	struct st_mysql_value*		value)	/*!< in: incoming string */
@@ -1711,10 +1664,7 @@ extern "C" time_t thd_start_time(const THD* thd);
 /******************************************************************//**
 Get the thread start time.
 @return the thread start time in seconds since the epoch. */
-ulint
-thd_start_time_in_secs(
-/*===================*/
-	THD*	thd)	/*!< in: thread handle, or NULL */
+ulint thd_start_time_in_secs(THD*)
 {
 	// FIXME: This function should be added to the server code.
 	//return(thd_start_time(thd));
@@ -1861,19 +1811,6 @@ thd_lock_wait_timeout(
 	return(THDVAR(thd, lock_wait_timeout));
 }
 
-/******************************************************************//**
-Set the time waited for the lock for the current query. */
-void
-thd_set_lock_wait_time(
-/*===================*/
-	THD*	thd,	/*!< in/out: thread handle */
-	ulint	value)	/*!< in: time waited for the lock */
-{
-	if (thd) {
-		thd_storage_lock_wait(thd, value);
-	}
-}
-
 /** Get the value of innodb_tmpdir.
 @param[in]	thd	thread handle, or NULL to query
 			the global innodb_tmpdir.
@@ -1936,7 +1873,7 @@ Converts an InnoDB error code to a MySQL error code and also tells to MySQL
 about a possible transaction rollback inside InnoDB caused by a lock wait
 timeout or a deadlock.
 @return MySQL error code */
-int
+static int
 convert_error_code_to_mysql(
 /*========================*/
 	dberr_t	error,	/*!< in: InnoDB error code */
@@ -2064,7 +2001,7 @@ convert_error_code_to_mysql(
 		/* If prefix is true then a 768-byte prefix is stored
 		locally for BLOB fields. Refer to dict_table_get_format().
 		We limit max record size to 16k for 64k page size. */
-		bool prefix = (dict_tf_get_format(flags) == UNIV_FORMAT_A);
+		bool prefix = !DICT_TF_HAS_ATOMIC_BLOBS(flags);
 		bool comp = !!(flags & DICT_TF_COMPACT);
 		ulint free_space = page_get_free_space_of_empty(comp) / 2;
 
@@ -2156,17 +2093,6 @@ innobase_mysql_print_thd(
 }
 
 /******************************************************************//**
-Get the error message format string.
-@return the format string or 0 if not found. */
-const char*
-innobase_get_err_msg(
-/*=================*/
-	int	error_code)	/*!< in: MySQL error code */
-{
-	return(my_get_err_msg(error_code));
-}
-
-/******************************************************************//**
 Get the variable length bounds of the given character set. */
 void
 innobase_get_cset_width(
@@ -2430,24 +2356,8 @@ static int mysql_tmpfile_path(const char *path, const char *prefix)
   DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN);
 
   char filename[FN_REFLEN];
-  File fd = create_temp_file(filename, path, prefix,
-#ifdef __WIN__
-                             O_BINARY | O_TRUNC | O_SEQUENTIAL |
-                             O_SHORT_LIVED |
-#endif /* __WIN__ */
-                             O_CREAT | O_EXCL | O_RDWR | O_TEMPORARY,
-                             MYF(MY_WME));
-  if (fd >= 0) {
-#ifndef __WIN__
-    /*
-      This can be removed once the following bug is fixed:
-      Bug #28903  create_temp_file() doesn't honor O_TEMPORARY option
-                  (file not removed) (Unix)
-    */
-    unlink(filename);
-#endif /* !__WIN__ */
-  }
-
+  File fd = create_temp_file(filename, path, prefix, O_BINARY | O_SEQUENTIAL,
+                             MYF(MY_WME | MY_TEMPORARY));
   return fd;
 }
 
@@ -2455,19 +2365,18 @@ static int mysql_tmpfile_path(const char *path, const char *prefix)
 path. If the path is NULL, then it will be created in tmpdir.
 @param[in]	path	location for creating temporary file
 @return temporary file descriptor, or < 0 on error */
-int
+os_file_t
 innobase_mysql_tmpfile(
 	const char*	path)
 {
 #ifdef WITH_INNODB_DISALLOW_WRITES
 	os_event_wait(srv_allow_writes_event);
 #endif /* WITH_INNODB_DISALLOW_WRITES */
-	int	fd2 = -1;
 	File	fd;
 
 	DBUG_EXECUTE_IF(
 		"innobase_tmpfile_creation_failure",
-		return(-1);
+		return(OS_FILE_CLOSED);
 	);
 
 	if (path == NULL) {
@@ -2476,54 +2385,59 @@ innobase_mysql_tmpfile(
 		fd = mysql_tmpfile_path(path, "ib");
 	}
 
-	if (fd >= 0) {
-		/* Copy the file descriptor, so that the additional resources
-		allocated by create_temp_file() can be freed by invoking
-		my_close().
+	if (fd < 0)
+		return OS_FILE_CLOSED;
+
+	/* Copy the file descriptor, so that the additional resources
+	allocated by create_temp_file() can be freed by invoking
+	my_close().
 
-		Because the file descriptor returned by this function
-		will be passed to fdopen(), it will be closed by invoking
-		fclose(), which in turn will invoke close() instead of
-		my_close(). */
+	Because the file descriptor returned by this function
+	will be passed to fdopen(), it will be closed by invoking
+	fclose(), which in turn will invoke close() instead of
+	my_close(). */
 
 #ifdef _WIN32
-		/* Note that on Windows, the integer returned by mysql_tmpfile
-		has no relation to C runtime file descriptor. Here, we need
-		to call my_get_osfhandle to get the HANDLE and then convert it
-		to C runtime filedescriptor. */
-		{
-			HANDLE hFile = my_get_osfhandle(fd);
-			HANDLE hDup;
-			BOOL bOK = DuplicateHandle(
-					GetCurrentProcess(),
-					hFile, GetCurrentProcess(),
-					&hDup, 0, FALSE, DUPLICATE_SAME_ACCESS);
-			if (bOK) {
-				fd2 = _open_osfhandle((intptr_t) hDup, 0);
-			} else {
-				my_osmaperr(GetLastError());
-				fd2 = -1;
-			}
-		}
+	/* Note that on Windows, the integer returned by mysql_tmpfile
+	has no relation to C runtime file descriptor. Here, we need
+	to call my_get_osfhandle to get the HANDLE and then convert it
+	to C runtime filedescriptor. */
+
+	HANDLE hFile = my_get_osfhandle(fd);
+	HANDLE hDup;
+	BOOL bOK = DuplicateHandle(
+			GetCurrentProcess(),
+			hFile, GetCurrentProcess(),
+			&hDup, 0, FALSE, DUPLICATE_SAME_ACCESS);
+	my_close(fd, MYF(MY_WME));
+
+	if (!bOK) {
+		my_osmaperr(GetLastError());
+		goto error;
+	}
+	return hDup;
 #else
 #ifdef F_DUPFD_CLOEXEC
-		fd2 = fcntl(fd, F_DUPFD_CLOEXEC, 0);
+	int fd2 = fcntl(fd, F_DUPFD_CLOEXEC, 0);
 #else
-		fd2 = dup(fd);
+	int fd2 = dup(fd);
 #endif
-#endif
-		if (fd2 < 0) {
-			char errbuf[MYSYS_STRERROR_SIZE];
-			DBUG_PRINT("error",("Got error %d on dup",fd2));
-			set_my_errno(errno);
-			my_error(EE_OUT_OF_FILERESOURCES,
-				 MYF(0),
-				 "ib*", errno,
-				  my_strerror(errbuf, sizeof(errbuf), errno));
-		}
-		my_close(fd, MYF(MY_WME));
+	my_close(fd, MYF(MY_WME));
+	if (fd2 < 0) {
+		set_my_errno(errno);
+		goto error;
 	}
-	return(fd2);
+	return fd2;
+#endif
+
+error:
+	char errbuf[MYSYS_STRERROR_SIZE];
+
+	my_error(EE_OUT_OF_FILERESOURCES,
+		MYF(0),
+		"ib*", errno,
+		my_strerror(errbuf, sizeof(errbuf), errno));
+	return (OS_FILE_CLOSED);
 }
 
 /*********************************************************************//**
@@ -2790,7 +2704,7 @@ innobase_trx_allocate(
 	DBUG_ASSERT(thd != NULL);
 	DBUG_ASSERT(EQ_CURRENT_THD(thd));
 
-	trx = trx_allocate_for_mysql();
+	trx = trx_create();
 
 	trx->mysql_thd = thd;
 
@@ -2821,13 +2735,17 @@ check_trx_exists(
 	}
 }
 
-/*************************************************************************
-Gets current trx. */
-trx_t*
-innobase_get_trx()
+/**
+  Gets current trx.
+
+  This function may be called during InnoDB initialisation, when
+  innodb_hton_ptr->slot is not yet set to meaningful value.
+*/
+
+trx_t *current_trx()
 {
 	THD *thd=current_thd;
-	if (likely(thd != 0)) {
+	if (likely(thd != 0) && innodb_hton_ptr->slot != HA_SLOT_UNDEF) {
 		return thd_to_trx(thd);
 	} else {
 		return(NULL);
@@ -2908,7 +2826,8 @@ innobase_copy_frm_flags_from_create_info(
 	ibool	ps_on;
 	ibool	ps_off;
 
-	if (dict_table_is_temporary(innodb_table)) {
+	if (innodb_table->is_temporary()
+	    || innodb_table->no_rollback()) {
 		/* Temp tables do not use persistent stats. */
 		ps_on = FALSE;
 		ps_off = TRUE;
@@ -2943,7 +2862,7 @@ innobase_copy_frm_flags_from_table_share(
 	ibool	ps_on;
 	ibool	ps_off;
 
-	if (dict_table_is_temporary(innodb_table)) {
+	if (innodb_table->is_temporary()) {
 		/* Temp tables do not use persistent stats */
 		ps_on = FALSE;
 		ps_off = TRUE;
@@ -2994,8 +2913,9 @@ ha_innobase::ha_innobase(
 		*/
 			  | HA_CAN_EXPORT
 			  | HA_CAN_RTREEKEYS
+                          | HA_CAN_TABLES_WITHOUT_ROLLBACK
 			  | HA_CONCURRENT_OPTIMIZE
-			  |  (srv_force_primary_key ? HA_REQUIRE_PRIMARY_KEY : 0)
+			  |  (srv_force_primary_key ? HA_WANTS_PRIMARY_KEY : 0)
 		  ),
 	m_start_of_scan(),
         m_mysql_has_locked()
@@ -3166,8 +3086,8 @@ static bool innobase_query_caching_table_check_low(
 		return false;
 	}
 
-	return !MVCC::is_view_active(trx->read_view)
-		|| trx->read_view->low_limit_id()
+	return !trx->read_view.is_open()
+		|| trx->read_view.low_limit_id()
 		>= table->query_cache_inv_trx_id;
 }
 
@@ -3201,12 +3121,12 @@ static bool innobase_query_caching_table_check(
 
 		if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
 		    && !srv_read_only_mode
-		    && !MVCC::is_view_active(trx->read_view)) {
+		    && !trx->read_view.is_open()) {
 
 			/* Start the transaction if it is not started yet */
 			trx_start_if_not_started(trx, false);
 
-			trx_sys->mvcc->view_open(trx->read_view, trx);
+			trx->read_view.open(trx);
 		}
 	}
 
@@ -3228,9 +3148,9 @@ read view to it if there is no read view yet.
 Why a deadlock of threads is not possible: the query cache calls this function
 at the start of a SELECT processing. Then the calling thread cannot be
 holding any InnoDB semaphores. The calling thread is holding the
-query cache mutex, and this function will reserve the InnoDB trx_sys->mutex.
+query cache mutex, and this function will reserve the InnoDB trx_sys.mutex.
 Thus, the 'rank' in sync0mutex.h of the MySQL query cache mutex is above
-the InnoDB trx_sys->mutex.
+the InnoDB trx_sys.mutex.
 @return TRUE if permitted, FALSE if not; note that the value FALSE
 does not mean we should invalidate the query cache: invalidation is
 called explicitly */
@@ -3241,10 +3161,10 @@ innobase_query_caching_of_table_permitted(
 	THD*	thd,		/*!< in: thd of the user who is trying to
 				store a result to the query cache or
 				retrieve it */
-	char*	full_name,	/*!< in: normalized path to the table */
+	const char* full_name,	/*!< in: normalized path to the table */
 	uint	full_name_len,	/*!< in: length of the normalized path
 				to the table */
-	ulonglong *unused)	/*!< unused for this engine */
+	ulonglong *)
 {
 	char	norm_name[1000];
 	trx_t*	trx = check_trx_exists(thd);
@@ -3298,13 +3218,11 @@ innobase_invalidate_query_cache(
 /*============================*/
 	trx_t*		trx,		/*!< in: transaction which
 					modifies the table */
-	const char*	full_name,	/*!< in: concatenation of
+	const char*	full_name)	/*!< in: concatenation of
 					database name, path separator,
 					table name, null char NUL;
 					NOTE that in Windows this is
 					always in LOWER CASE! */
-	ulint		full_name_len)	/*!< in: full name length where
-					also the null chars count */
 {
 	/* Note that the sync0mutex.h rank of the query cache mutex is just
 	above the InnoDB trx_sys_t->lock. The caller of this function must
@@ -3315,12 +3233,12 @@ innobase_invalidate_query_cache(
         char db_name[NAME_CHAR_LEN * MY_CS_MBMAXLEN + 1];
         const char *key_ptr;
         size_t  tabname_len;
-        size_t  dbname_len;
 
         // Extract the database name.
         key_ptr= strchr(full_name, '/');
         DBUG_ASSERT(key_ptr != NULL); // Database name should be present
-        memcpy(db_name, full_name, (dbname_len= (key_ptr - full_name)));
+        size_t  dbname_len= size_t(key_ptr - full_name);
+        memcpy(db_name, full_name, dbname_len);
         db_name[dbname_len]= '\0';
 
         /* Construct the key("db-name\0table$name\0") for the query cache using
@@ -3336,7 +3254,7 @@ innobase_invalidate_query_cache(
         /* Argument TRUE below means we are using transactions */
         mysql_query_cache_invalidate4(trx->mysql_thd,
                                       qcache_key_name,
-                                      (dbname_len + tabname_len + 2),
+                                      uint(dbname_len + tabname_len + 2),
                                       TRUE);
 #endif
 }
@@ -3389,9 +3307,9 @@ innobase_quote_identifier(
 	if (q == EOF) {
 		quoted_identifier.append(id);
 	} else {
-		quoted_identifier += (unsigned char)q;
+		quoted_identifier += char(q);
 		quoted_identifier.append(id);
-		quoted_identifier += (unsigned char)q;
+		quoted_identifier += char(q);
 	}
 
 	return (quoted_identifier);
@@ -3459,12 +3377,13 @@ innobase_convert_name(
 	}
 
 	/* Print the database name and table name separately. */
-	s = innobase_convert_identifier(s, bufend - s, id, slash - id, thd);
+	s = innobase_convert_identifier(s, ulint(bufend - s),
+					id, ulint(slash - id), thd);
 	if (s < bufend) {
 		*s++ = '.';
-		s = innobase_convert_identifier(s, bufend - s,
+		s = innobase_convert_identifier(s, ulint(bufend - s),
 						slash + 1, idlen
-						- (slash - id) - 1,
+						- ulint(slash - id) - 1,
 						thd);
 	}
 
@@ -3493,8 +3412,8 @@ innobase_format_name(
 
 /**********************************************************************//**
 Determines if the currently running transaction has been interrupted.
-@return TRUE if interrupted */
-ibool
+@return true if interrupted */
+bool
 trx_is_interrupted(
 /*===============*/
 	const trx_t*	trx)	/*!< in: transaction */
@@ -3530,7 +3449,7 @@ ha_innobase::reset_template(void)
 		}
 	);
 
-	m_prebuilt->keep_other_fields_on_keyread = 0;
+	m_prebuilt->keep_other_fields_on_keyread = false;
 	m_prebuilt->read_just_key = 0;
 	m_prebuilt->in_fts_query = 0;
 
@@ -3573,7 +3492,7 @@ ha_innobase::init_table_handle_for_HANDLER(void)
 
 	/* Assign a read view if the transaction does not have it yet */
 
-	trx_assign_read_view(m_prebuilt->trx);
+	m_prebuilt->trx->read_view.open(m_prebuilt->trx);
 
 	innobase_register_trx(ht, m_user_thd, m_prebuilt->trx);
 
@@ -3609,9 +3528,12 @@ innobase_space_shutdown()
 {
 	DBUG_ENTER("innobase_space_shutdown");
 
+	if (fil_system.temp_space) {
+		fil_system.temp_space->close();
+	}
+
 	srv_sys_space.shutdown();
 	if (srv_tmp_space.get_sanity_check_status()) {
-		fil_space_close(srv_tmp_space.name());
 		srv_tmp_space.delete_files();
 	}
 	srv_tmp_space.shutdown();
@@ -3623,72 +3545,15 @@ innobase_space_shutdown()
 	DBUG_VOID_RETURN;
 }
 
-/*********************************************************************//**
-Free any resources that were allocated and return failure.
+/** Free any resources that were allocated and return failure.
 @return always return 1 */
-static
-int
-innobase_init_abort()
-/*=================*/
+static int innodb_init_abort()
 {
-	DBUG_ENTER("innobase_init_abort");
+	DBUG_ENTER("innodb_init_abort");
 	innobase_space_shutdown();
 	DBUG_RETURN(1);
 }
 
-/** Return partitioning flags. */
-static uint innobase_partition_flags()
-{
-	/* JAN: TODO: MYSQL 5.7
-	return(HA_CAN_EXCHANGE_PARTITION | HA_CANNOT_PARTITION_FK);
-	*/
-	return (0);
-}
-
-/** Deprecation message about InnoDB file format related parameters */
-#define DEPRECATED_FORMAT_PARAMETER(x)					\
-	"Using " x " is deprecated and the parameter"			\
-	" may be removed in future releases."				\
-	" See https://mariadb.com/kb/en/library/xtradbinnodb-file-format/"
-
-/** Deprecation message about innodb_file_format */
-static const char*	deprecated_file_format
-	= DEPRECATED_FORMAT_PARAMETER("innodb_file_format");
-
-/** Deprecation message about innodb_large_prefix */
-static const char*	deprecated_large_prefix
-	= DEPRECATED_FORMAT_PARAMETER("innodb_large_prefix");
-
-/** Deprecation message about innodb_file_format_check */
-static const char*	deprecated_file_format_check
-	= DEPRECATED_FORMAT_PARAMETER("innodb_file_format_check");
-
-/** Deprecation message about innodb_file_format_max */
-static const char*	deprecated_file_format_max
-	= DEPRECATED_FORMAT_PARAMETER("innodb_file_format_max");
-
-/** Deprecation message about innodb_use_trim */
-static const char*	deprecated_use_trim
-	= "Using innodb_use_trim is deprecated"
-	" and the parameter will be removed in MariaDB 10.3.";
-
-/** Deprecation message about innodb_instrument_semaphores */
-static const char*	deprecated_instrument_semaphores
-	= "Using innodb_instrument_semaphores is deprecated"
-	" and the parameter will be removed in MariaDB 10.3.";
-
-static const char*	deprecated_use_mtflush
-	= "Using innodb_use_mtflush is deprecated"
-	" and the parameter will be removed in MariaDB 10.3."
-	" Use innodb-page-cleaners instead. ";
-
-static const char*	deprecated_mtflush_threads
-	= "Using innodb_mtflush_threads is deprecated"
-	" and the parameter will be removed in MariaDB 10.3."
-	" Use innodb-page-cleaners instead. ";
-
-static my_bool innodb_instrument_semaphores;
-
 /** Update log_checksum_algorithm_ptr with a pointer to the function
 corresponding to whether checksums are enabled.
 @param[in,out]	thd	client session, or NULL if at startup
@@ -3737,93 +3602,107 @@ static const char* ha_innobase_exts[] = {
 	NullS
 };
 
-/*********************************************************************//**
-Opens an InnoDB database.
-@return 0 on success, 1 on failure */
-static
-int
-innobase_init(
-/*==========*/
-	void	*p)	/*!< in: InnoDB handlerton */
+/** Determine if system-versioned data was modified by the transaction.
+@param[in,out]	thd	current session
+@param[out]	trx_id	transaction start ID
+@return	transaction commit ID
+@retval	0	if no system-versioned data was affected by the transaction */
+static ulonglong innodb_prepare_commit_versioned(THD* thd, ulonglong *trx_id)
 {
-	static char	current_dir[3];		/*!< Set if using current lib */
-	int		err;
-	char		*default_path;
-	uint		format_id;
-	ulong		num_pll_degree;
-
-	DBUG_ENTER("innobase_init");
-	handlerton* innobase_hton= (handlerton*) p;
-	innodb_hton_ptr = innobase_hton;
-
-	innobase_hton->state = SHOW_OPTION_YES;
-	innobase_hton->db_type = DB_TYPE_INNODB;
-	innobase_hton->savepoint_offset = sizeof(trx_named_savept_t);
-	innobase_hton->close_connection = innobase_close_connection;
-	innobase_hton->kill_query = innobase_kill_query;
-	innobase_hton->savepoint_set = innobase_savepoint;
-	innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint;
+	if (const trx_t* trx = thd_to_trx(thd)) {
+		*trx_id = trx->id;
 
-	innobase_hton->savepoint_rollback_can_release_mdl =
-				innobase_rollback_to_savepoint_can_release_mdl;
+		for (trx_mod_tables_t::const_iterator t
+			     = trx->mod_tables.begin();
+		     t != trx->mod_tables.end(); t++) {
+			if (t->second.is_versioned()) {
+				DBUG_ASSERT(t->first->versioned_by_id());
+				DBUG_ASSERT(trx->rsegs.m_redo.rseg);
 
-	innobase_hton->savepoint_release = innobase_release_savepoint;
-	innobase_hton->prepare_ordered= NULL;
-	innobase_hton->commit_ordered= innobase_commit_ordered;
-	innobase_hton->commit = innobase_commit;
-	innobase_hton->rollback = innobase_rollback;
-	innobase_hton->prepare = innobase_xa_prepare;
-	innobase_hton->recover = innobase_xa_recover;
-	innobase_hton->commit_by_xid = innobase_commit_by_xid;
-	innobase_hton->rollback_by_xid = innobase_rollback_by_xid;
-	innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
-	innobase_hton->create = innobase_create_handler;
-
-	innobase_hton->drop_database = innobase_drop_database;
-	innobase_hton->panic = innobase_end;
-	innobase_hton->partition_flags= innobase_partition_flags;
-
-	innobase_hton->start_consistent_snapshot =
-		innobase_start_trx_and_assign_read_view;
-
-	innobase_hton->flush_logs = innobase_flush_logs;
-	innobase_hton->show_status = innobase_show_status;
-	innobase_hton->flags =
-		HTON_SUPPORTS_EXTENDED_KEYS | HTON_SUPPORTS_FOREIGN_KEYS;
-
-#ifdef WITH_WSREP
-        innobase_hton->abort_transaction=wsrep_abort_transaction;
-        innobase_hton->set_checkpoint=innobase_wsrep_set_checkpoint;
-        innobase_hton->get_checkpoint=innobase_wsrep_get_checkpoint;
-        innobase_hton->fake_trx_id=wsrep_fake_trx_id;
-#endif /* WITH_WSREP */
-
-	innobase_hton->tablefile_extensions = ha_innobase_exts;
-	innobase_hton->table_options = innodb_table_option_list;
+				return trx_sys.get_new_trx_id();
+			}
+		}
 
-	innodb_remember_check_sysvar_funcs();
+		return 0;
+	}
 
-	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
+	*trx_id = 0;
+	return 0;
+}
 
-#ifndef DBUG_OFF
-	static const char	test_filename[] = "-@";
-	char			test_tablename[sizeof test_filename
-				+ sizeof(srv_mysql50_table_name_prefix) - 1];
-	if ((sizeof(test_tablename)) - 1
-			!= filename_to_tablename(test_filename,
-						 test_tablename,
-						 sizeof(test_tablename), true)
-			|| strncmp(test_tablename,
-				   srv_mysql50_table_name_prefix,
-				   sizeof(srv_mysql50_table_name_prefix) - 1)
-			|| strcmp(test_tablename
-				  + sizeof(srv_mysql50_table_name_prefix) - 1,
-				  test_filename)) {
-
-		sql_print_error("tablename encoding has been changed");
-		DBUG_RETURN(innobase_init_abort());
-	}
-#endif /* DBUG_OFF */
+/** Initialize and normalize innodb_buffer_pool_size. */
+static void innodb_buffer_pool_size_init()
+{
+	if (srv_buf_pool_size >= BUF_POOL_SIZE_THRESHOLD) {
+
+		if (srv_buf_pool_instances == srv_buf_pool_instances_default) {
+#if defined(_WIN32) && !defined(_WIN64)
+			/* Do not allocate too large of a buffer pool on
+			Windows 32-bit systems, which can have trouble
+			allocating larger single contiguous memory blocks. */
+			srv_buf_pool_size = ulint(
+				ut_uint64_align_up(srv_buf_pool_size,
+						   srv_buf_pool_chunk_unit));
+			srv_buf_pool_instances = std::min<ulong>(
+				MAX_BUFFER_POOLS,
+				ulong(srv_buf_pool_size
+				      / srv_buf_pool_chunk_unit));
+#else /* defined(_WIN32) && !defined(_WIN64) */
+			/* Default to 8 instances when size > 1GB. */
+			srv_buf_pool_instances = 8;
+#endif /* defined(_WIN32) && !defined(_WIN64) */
+		}
+	} else {
+		/* If buffer pool is less than 1 GiB, assume fewer
+		threads. Also use only one buffer pool instance. */
+		if (srv_buf_pool_instances != srv_buf_pool_instances_default
+		    && srv_buf_pool_instances != 1) {
+			/* We can't distinguish whether the user has explicitly
+			started mysqld with --innodb-buffer-pool-instances=0,
+			(srv_buf_pool_instances_default is 0) or has not
+			specified that option at all. Thus we have the
+			limitation that if the user started with =0, we
+			will not emit a warning here, but we should actually
+			do so. */
+			ib::info()
+				<< "Adjusting innodb_buffer_pool_instances"
+				" from " << srv_buf_pool_instances << " to 1"
+				" since innodb_buffer_pool_size is less than "
+				<< BUF_POOL_SIZE_THRESHOLD / (1024 * 1024)
+				<< " MiB";
+		}
+
+		srv_buf_pool_instances = 1;
+	}
+
+	if (srv_buf_pool_chunk_unit * srv_buf_pool_instances
+	    > srv_buf_pool_size) {
+		/* Size unit of buffer pool is larger than srv_buf_pool_size.
+		adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */
+		srv_buf_pool_chunk_unit
+			= static_cast<ulong>(srv_buf_pool_size)
+			  / srv_buf_pool_instances;
+		if (srv_buf_pool_size % srv_buf_pool_instances != 0) {
+			++srv_buf_pool_chunk_unit;
+		}
+	}
+
+	srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size);
+	innobase_buffer_pool_size = srv_buf_pool_size;
+}
+
+/** Initialize, validate and normalize the InnoDB startup parameters.
+@return failure code
+@retval 0 on success
+@retval HA_ERR_OUT_OF_MEM	when out of memory
+@retval HA_ERR_INITIALIZATION	when some parameters are out of range */
+static int innodb_init_params()
+{
+	DBUG_ENTER("innodb_init_params");
+
+	static char	current_dir[3];
+	char		*default_path;
+	ulong		num_pll_degree;
 
 	/* Check that values don't overflow on 32-bit systems. */
 	if (sizeof(ulint) == 4) {
@@ -3831,26 +3710,19 @@ innobase_init(
 			sql_print_error(
 				"innodb_buffer_pool_size can't be over 4GB"
 				" on 32-bit systems");
-
-			DBUG_RETURN(innobase_init_abort());
+			DBUG_RETURN(HA_ERR_OUT_OF_MEM);
 		}
 	}
 
-	os_file_set_umask(my_umask);
-
-	/* Setup the memory alloc/free tracing mechanisms before calling
-	any functions that could possibly allocate memory. */
-	ut_new_boot();
-
 	/* The buffer pool needs to be able to accommodate enough many
 	pages, even for larger pages */
-	if (UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF
+	if (srv_page_size > UNIV_PAGE_SIZE_DEF
 	    && innobase_buffer_pool_size < (24 * 1024 * 1024)) {
 		ib::info() << "innodb_page_size="
-			<< UNIV_PAGE_SIZE << " requires "
+			<< srv_page_size << " requires "
 			<< "innodb_buffer_pool_size > 24M current "
 			<< innobase_buffer_pool_size;
-		goto error;
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 
 #ifdef WITH_WSREP
@@ -3868,14 +3740,6 @@ innobase_init(
 	        || !strcmp(wsrep_sst_method, "xtrabackup-v2"))) {
 		ib::info() << "Galera SST method xtrabackup is deprecated and the "
 			" support for it may be removed in future releases.";
-
-		/* We can't blindly turn on this as it will cause a
-		modification of the redo log format identifier. See
-		MDEV-13564 for more information. */
-		if (!srv_safe_truncate) {
-			ib::info() << "Requested xtrabackup based SST for Galera but"
-				   << "innodb_safe_truncate is disabled.";
-		}
 	}
 #endif /* WITH_WSREP */
 
@@ -3884,7 +3748,7 @@ innobase_init(
 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
 				"InnoDB: liblz4 is not installed. \n",
 				innodb_compression_algorithm);
-	        goto error;
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 #endif
 
@@ -3893,7 +3757,7 @@ innobase_init(
 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
 				"InnoDB: liblzo is not installed. \n",
 				innodb_compression_algorithm);
-		goto error;
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 #endif
 
@@ -3902,7 +3766,7 @@ innobase_init(
 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
 				"InnoDB: liblzma is not installed. \n",
 				innodb_compression_algorithm);
-		goto error;
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 #endif
 
@@ -3911,7 +3775,7 @@ innobase_init(
 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
 				"InnoDB: libbz2 is not installed. \n",
 				innodb_compression_algorithm);
-		goto error;
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 #endif
 
@@ -3920,7 +3784,7 @@ innobase_init(
 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
 				"InnoDB: libsnappy is not installed. \n",
 				innodb_compression_algorithm);
-		goto error;
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 #endif
 
@@ -3928,11 +3792,17 @@ innobase_init(
 	     && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) {
 		sql_print_error("InnoDB: cannot enable encryption, "
 				"encryption plugin is not available");
-		goto error;
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 
-	innodb_check_deprecated();
-
+#ifdef _WIN32
+	if (!is_filename_allowed(srv_buf_dump_filename,
+				 strlen(srv_buf_dump_filename), FALSE)) {
+		sql_print_error("InnoDB: innodb_buffer_pool_filename"
+			" cannot have colon (:) in the file name.");
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
+	}
+#endif
 
 	/* First calculate the default path for innodb_data_home_dir etc.,
 	in case the user has not given any value.
@@ -3977,13 +3847,7 @@ innobase_init(
 	if (!srv_page_size_shift) {
 		sql_print_error("InnoDB: Invalid page size=%lu.\n",
 				srv_page_size);
-		DBUG_RETURN(innobase_init_abort());
-	}
-
-	/* Set default InnoDB temp data file size to 12 MB and let it be
-	auto-extending. */
-	if (!innobase_data_file_path) {
-		innobase_data_file_path = (char*) "ibdata1:12M:autoextend";
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 
 	/* This is the first time univ_page_size is used.
@@ -4000,19 +3864,9 @@ innobase_init(
 	if (!srv_sys_space.parse_params(innobase_data_file_path, true)) {
 		ib::error() << "Unable to parse innodb_data_file_path="
 			    << innobase_data_file_path;
-		DBUG_RETURN(innobase_init_abort());
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 
-	/* Set default InnoDB temp data file size to 12 MB and let it be
-	auto-extending. */
-
-	if (!innobase_temp_data_file_path) {
-		innobase_temp_data_file_path = (char*) "ibtmp1:12M:autoextend";
-	}
-
-	/* We set the temporary tablspace id later, after recovery.
-	The temp tablespace doesn't support raw devices.
-	Set the name and path. */
 	srv_tmp_space.set_name("innodb_temporary");
 	srv_tmp_space.set_path(srv_data_home);
 	srv_tmp_space.set_flags(FSP_FLAGS_PAGE_SSIZE());
@@ -4020,16 +3874,19 @@ innobase_init(
 	if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) {
 		ib::error() << "Unable to parse innodb_temp_data_file_path="
 			    << innobase_temp_data_file_path;
-		DBUG_RETURN(innobase_init_abort());
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 
 	/* Perform all sanity check before we take action of deleting files*/
 	if (srv_sys_space.intersection(&srv_tmp_space)) {
 		sql_print_error("%s and %s file names seem to be the same.",
 			srv_tmp_space.name(), srv_sys_space.name());
-		DBUG_RETURN(innobase_init_abort());
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 
+	srv_sys_space.normalize_size();
+	srv_tmp_space.normalize_size();
+
 	/* ------------ UNDO tablespaces files ---------------------*/
 	if (!srv_undo_dir) {
 		srv_undo_dir = default_path;
@@ -4039,7 +3896,7 @@ innobase_init(
 
 	if (strchr(srv_undo_dir, ';')) {
 		sql_print_error("syntax error in innodb_undo_directory");
-		DBUG_RETURN(innobase_init_abort());
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 
 	/* -------------- All log files ---------------------------*/
@@ -4054,119 +3911,21 @@ innobase_init(
 
 	if (strchr(srv_log_group_home_dir, ';')) {
 		sql_print_error("syntax error in innodb_log_group_home_dir");
-		DBUG_RETURN(innobase_init_abort());
-	}
-
-	if (!innobase_large_prefix) {
-		ib::warn() << deprecated_large_prefix;
-	}
-
-	if (!THDVAR(NULL, support_xa)) {
-		ib::warn() << deprecated_innodb_support_xa_off;
-		THDVAR(NULL, support_xa) = TRUE;
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 
-	if (innobase_file_format_name != innodb_file_format_default) {
-		ib::warn() << deprecated_file_format;
+	if (srv_n_log_files * srv_log_file_size
+	    >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
+		/* log_block_convert_lsn_to_no() limits the returned block
+		number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
+		bytes, then we have a limit of 512 GB. If that limit is to
+		be raised, then log_block_convert_lsn_to_no() must be
+		modified. */
+		ib::error() << "Combined size of log files must be < 512 GB";
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
 	}
 
-	if (innodb_instrument_semaphores) {
-		ib::warn() << deprecated_instrument_semaphores;
-	}
-
-	if (srv_use_mtflush) {
-		ib::warn() << deprecated_use_mtflush;
-	}
-
-	if (srv_use_mtflush && srv_mtflush_threads != MTFLUSH_DEFAULT_WORKER) {
-		ib::warn() << deprecated_mtflush_threads;
-	}
-
-	/* Validate the file format by animal name */
-	if (innobase_file_format_name != NULL) {
-
-		format_id = innobase_file_format_name_lookup(
-			innobase_file_format_name);
-
-		if (format_id > UNIV_FORMAT_MAX) {
-
-			sql_print_error("InnoDB: wrong innodb_file_format.");
-
-			DBUG_RETURN(innobase_init_abort());
-		}
-	} else {
-		/* Set it to the default file format id. Though this
-		should never happen. */
-		format_id = 0;
-	}
-
-	srv_file_format = format_id;
-
-	/* Given the type of innobase_file_format_name we have little
-	choice but to cast away the constness from the returned name.
-	innobase_file_format_name is used in the MySQL set variable
-	interface and so can't be const. */
-
-	innobase_file_format_name =
-		(char*) trx_sys_file_format_id_to_name(format_id);
-
-	/* Check innobase_file_format_check variable */
-	if (!innobase_file_format_check) {
-		ib::warn() << deprecated_file_format_check;
-
-		/* Set the value to disable checking. */
-		srv_max_file_format_at_startup = UNIV_FORMAT_MAX + 1;
-
-	} else {
-
-		/* Set the value to the lowest supported format. */
-		srv_max_file_format_at_startup = UNIV_FORMAT_MIN;
-	}
-
-	if (innobase_file_format_max != innodb_file_format_max_default) {
-		ib::warn() << deprecated_file_format_max;
-	}
-
-	/* Did the user specify a format name that we support?
-	As a side effect it will update the variable
-	srv_max_file_format_at_startup */
-	if (innobase_file_format_validate_and_set(
-			innobase_file_format_max) < 0) {
-
-		sql_print_error("InnoDB: invalid"
-				" innodb_file_format_max value:"
-				" should be any value up to %s or its"
-				" equivalent numeric id",
-				trx_sys_file_format_id_to_name(
-					UNIV_FORMAT_MAX));
-
-		DBUG_RETURN(innobase_init_abort());
-	}
-
-	if (innobase_change_buffering) {
-		ulint	use;
-
-		for (use = 0;
-		     use < UT_ARR_SIZE(innobase_change_buffering_values);
-		     use++) {
-			if (!innobase_strcasecmp(
-				    innobase_change_buffering,
-				    innobase_change_buffering_values[use])) {
-				ibuf_use = (ibuf_use_t) use;
-				goto innobase_change_buffering_inited_ok;
-			}
-		}
-
-		sql_print_error("InnoDB: invalid value"
-				" innodb_change_buffering=%s",
-				innobase_change_buffering);
-		DBUG_RETURN(innobase_init_abort());
-	}
-
-innobase_change_buffering_inited_ok:
-	ut_a((ulint) ibuf_use < UT_ARR_SIZE(innobase_change_buffering_values));
-	innobase_change_buffering = (char*)
-		innobase_change_buffering_values[ibuf_use];
+	DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL);
 
 	/* Check that interdependent parameters have sane values. */
 	if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) {
@@ -4202,24 +3961,13 @@ innobase_change_buffering_inited_ok:
 		srv_io_capacity = srv_max_io_capacity;
 	}
 
-	if (!is_filename_allowed(srv_buf_dump_filename,
-				 strlen(srv_buf_dump_filename), FALSE)) {
-		sql_print_error("InnoDB: innodb_buffer_pool_filename"
-			" cannot have colon (:) in the file name.");
-		DBUG_RETURN(innobase_init_abort());
-	}
-
-	/* --------------------------------------------------*/
-
-	srv_file_flush_method_str = innobase_file_flush_method;
-
 	if (UNIV_PAGE_SIZE_DEF != srv_page_size) {
 		ib::info() << "innodb_page_size=" << srv_page_size;
 
 		srv_max_undo_log_size = std::max(
 			srv_max_undo_log_size,
 			ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
-			* srv_page_size);
+			<< srv_page_size_shift);
 	}
 
 	if (srv_log_write_ahead_size > srv_page_size) {
@@ -4239,14 +3987,7 @@ innobase_change_buffering_inited_ok:
 		}
 	}
 
-	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
-
-	srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
-
-	srv_n_read_io_threads = (ulint) innobase_read_io_threads;
-	srv_n_write_io_threads = (ulint) innobase_write_io_threads;
-
-	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+	srv_buf_pool_size = ulint(innobase_buffer_pool_size);
 
 	if (!innobase_use_checksums) {
 		ib::warn() << "Setting innodb_checksums to OFF is DEPRECATED."
@@ -4281,16 +4022,16 @@ innobase_change_buffering_inited_ok:
 		}
 	}
 
-	if (innobase_open_files > (long) open_files_limit) {
+	if (innobase_open_files > open_files_limit) {
 		ib::warn() << "innodb_open_files " << innobase_open_files
 			   << " should not be greater"
 			   << "than the open_files_limit " << open_files_limit;
-		if (innobase_open_files > (long) tc_size) {
+		if (innobase_open_files > tc_size) {
 			innobase_open_files = tc_size;
 		}
 	}
 
-	srv_max_n_open_files = (ulint) innobase_open_files;
+	srv_max_n_open_files = innobase_open_files;
 	srv_innodb_status = (ibool) innobase_create_status_file;
 
 	srv_print_verbose_log = mysqld_embedded ? 0 : 1;
@@ -4312,12 +4053,6 @@ innobase_change_buffering_inited_ok:
 
 	innobase_commit_concurrency_init_default();
 
-	if (innobase_use_fallocate) {
-		ib::warn() << "innodb_use_fallocate is DEPRECATED"
-			" and has no effect in MariaDB 10.2."
-			" It will be removed in MariaDB 10.3.";
-	}
-
 	srv_use_atomic_writes
 		= innobase_use_atomic_writes && my_may_have_atomic_write;
         if (srv_use_atomic_writes && !srv_file_per_table)
@@ -4333,15 +4068,153 @@ innobase_change_buffering_inited_ok:
                   unbuffered)
                 */
 #ifndef _WIN32
-		if (!innobase_file_flush_method ||
-			!strstr(innobase_file_flush_method, "O_DIRECT")) {
-			innobase_file_flush_method =
-				srv_file_flush_method_str = (char*)"O_DIRECT";
+		switch (innodb_flush_method) {
+		case SRV_O_DIRECT:
+		case SRV_O_DIRECT_NO_FSYNC:
+			break;
+		default:
+			innodb_flush_method = SRV_O_DIRECT;
 			fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n");
 		}
 #endif
 	}
 
+	if (srv_read_only_mode) {
+		ib::info() << "Started in read only mode";
+		srv_use_doublewrite_buf = FALSE;
+	}
+
+#ifdef LINUX_NATIVE_AIO
+	if (srv_use_native_aio) {
+		ib::info() << "Using Linux native AIO";
+	}
+#elif !defined _WIN32
+	/* Currently native AIO is supported only on windows and linux
+	and that also when the support is compiled in. In all other
+	cases, we ignore the setting of innodb_use_native_aio. */
+	srv_use_native_aio = FALSE;
+#endif
+
+#ifndef _WIN32
+	ut_ad(innodb_flush_method <= SRV_O_DIRECT_NO_FSYNC);
+#else
+	switch (innodb_flush_method) {
+	case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */:
+		innodb_flush_method = SRV_ALL_O_DIRECT_FSYNC;
+		break;
+	case SRV_ALL_O_DIRECT_FSYNC + 2 /* "normal"="fsync" */:
+		innodb_flush_method = SRV_FSYNC;
+		break;
+	default:
+		ut_ad(innodb_flush_method <= SRV_ALL_O_DIRECT_FSYNC);
+	}
+#endif
+	srv_file_flush_method = srv_flush_t(innodb_flush_method);
+
+	innodb_buffer_pool_size_init();
+
+	if (srv_n_page_cleaners > srv_buf_pool_instances) {
+		/* limit of page_cleaner parallelizability
+		is number of buffer pool instances. */
+		srv_n_page_cleaners = srv_buf_pool_instances;
+	}
+
+	srv_lock_table_size = 5 * (srv_buf_pool_size >> srv_page_size_shift);
+	DBUG_RETURN(0);
+}
+
+/** Initialize the InnoDB storage engine plugin.
+@param[in,out]	p	InnoDB handlerton
+@return error code
+@retval 0 on success */
+static int innodb_init(void* p)
+{
+	DBUG_ENTER("innodb_init");
+	handlerton* innobase_hton= static_cast<handlerton*>(p);
+	innodb_hton_ptr = innobase_hton;
+
+	innobase_hton->state = SHOW_OPTION_YES;
+	innobase_hton->db_type = DB_TYPE_INNODB;
+	innobase_hton->savepoint_offset = sizeof(trx_named_savept_t);
+	innobase_hton->close_connection = innobase_close_connection;
+	innobase_hton->kill_query = innobase_kill_query;
+	innobase_hton->savepoint_set = innobase_savepoint;
+	innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint;
+
+	innobase_hton->savepoint_rollback_can_release_mdl =
+				innobase_rollback_to_savepoint_can_release_mdl;
+
+	innobase_hton->savepoint_release = innobase_release_savepoint;
+	innobase_hton->prepare_ordered= NULL;
+	innobase_hton->commit_ordered= innobase_commit_ordered;
+	innobase_hton->commit = innobase_commit;
+	innobase_hton->rollback = innobase_rollback;
+	innobase_hton->prepare = innobase_xa_prepare;
+	innobase_hton->recover = innobase_xa_recover;
+	innobase_hton->commit_by_xid = innobase_commit_by_xid;
+	innobase_hton->rollback_by_xid = innobase_rollback_by_xid;
+	innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
+	innobase_hton->create = innobase_create_handler;
+
+	innobase_hton->drop_database = innobase_drop_database;
+	innobase_hton->panic = innobase_end;
+
+	innobase_hton->start_consistent_snapshot =
+		innobase_start_trx_and_assign_read_view;
+
+	innobase_hton->flush_logs = innobase_flush_logs;
+	innobase_hton->show_status = innobase_show_status;
+	innobase_hton->flags =
+		HTON_SUPPORTS_EXTENDED_KEYS | HTON_SUPPORTS_FOREIGN_KEYS
+		| HTON_NATIVE_SYS_VERSIONING;
+
+#ifdef WITH_WSREP
+	innobase_hton->abort_transaction=wsrep_abort_transaction;
+	innobase_hton->set_checkpoint=innobase_wsrep_set_checkpoint;
+	innobase_hton->get_checkpoint=innobase_wsrep_get_checkpoint;
+	innobase_hton->fake_trx_id=wsrep_fake_trx_id;
+#endif /* WITH_WSREP */
+
+	innobase_hton->tablefile_extensions = ha_innobase_exts;
+	innobase_hton->table_options = innodb_table_option_list;
+
+	/* System Versioning */
+	innobase_hton->prepare_commit_versioned
+		= innodb_prepare_commit_versioned;
+
+	innodb_remember_check_sysvar_funcs();
+
+	compile_time_assert(DATA_MYSQL_TRUE_VARCHAR == MYSQL_TYPE_VARCHAR);
+
+#ifndef DBUG_OFF
+	static const char	test_filename[] = "-@";
+	char			test_tablename[sizeof test_filename
+				+ sizeof(srv_mysql50_table_name_prefix) - 1];
+	DBUG_ASSERT(sizeof test_tablename - 1
+		    == filename_to_tablename(test_filename,
+					     test_tablename,
+					     sizeof test_tablename, true));
+	DBUG_ASSERT(!strncmp(test_tablename,
+			     srv_mysql50_table_name_prefix,
+			     sizeof srv_mysql50_table_name_prefix - 1));
+	DBUG_ASSERT(!strcmp(test_tablename
+			    + sizeof srv_mysql50_table_name_prefix - 1,
+			    test_filename));
+#endif /* DBUG_OFF */
+
+	os_file_set_umask(my_umask);
+
+	/* Setup the memory alloc/free tracing mechanisms before calling
+	any functions that could possibly allocate memory. */
+	ut_new_boot();
+
+	if (int error = innodb_init_params()) {
+		DBUG_RETURN(error);
+	}
+
+	/* After this point, error handling has to use
+	innodb_init_abort(). */
+
 #ifdef HAVE_PSI_INTERFACE
 	/* Register keys with MySQL performance schema */
 	int	count;
@@ -4373,13 +4246,20 @@ innobase_change_buffering_inited_ok:
 	mysql_cond_register("innodb", all_innodb_conds, count);
 #endif /* HAVE_PSI_INTERFACE */
 
-	err = innobase_start_or_create_for_mysql();
+	bool	create_new_db = false;
+
+	/* Check whether the data files exist. */
+	dberr_t	err = srv_sys_space.check_file_spec(&create_new_db, 5U << 20);
+
+	if (err != DB_SUCCESS) {
+		DBUG_RETURN(innodb_init_abort());
+	}
 
-	innobase_buffer_pool_size = static_cast<long long>(srv_buf_pool_size);
+	err = srv_start(create_new_db);
 
 	if (err != DB_SUCCESS) {
 		innodb_shutdown();
-		DBUG_RETURN(innobase_init_abort());
+		DBUG_RETURN(innodb_init_abort());
 	} else if (!srv_read_only_mode) {
 		mysql_thread_create(thd_destructor_thread_key,
 				    &thd_destructor_thread,
@@ -4411,9 +4291,6 @@ innobase_change_buffering_inited_ok:
 	}
 #endif /* MYSQL_DYNAMIC_PLUGIN */
 
-	/* Get the current high water mark format. */
-	innobase_file_format_max = (char*) trx_sys_file_format_max_get();
-
 	/* Currently, monitor counter information are not persistent. */
 	memset(monitor_set_tbl, 0, sizeof monitor_set_tbl);
 
@@ -4430,7 +4307,6 @@ innobase_change_buffering_inited_ok:
 	/* Turn on monitor counters that are default on */
 	srv_mon_default_on();
 
-
 	/* Unit Tests */
 #ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
 	unit_test_os_file_get_parent_dir();
@@ -4451,9 +4327,6 @@ innobase_change_buffering_inited_ok:
 #endif /* UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT */
 
 	DBUG_RETURN(0);
-
-error:
-	DBUG_RETURN(1);
 }
 
 /** Shut down the InnoDB storage engine.
@@ -4469,7 +4342,7 @@ innobase_end(handlerton*, ha_panic_function)
 		if (thd) { // may be UNINSTALL PLUGIN statement
 		 	trx_t* trx = thd_to_trx(thd);
 		 	if (trx) {
-		 		trx_free_for_mysql(trx);
+				trx_free(trx);
 		 	}
 		}
 
@@ -4568,7 +4441,7 @@ innobase_start_trx_and_assign_read_view(
 		thd_get_trx_isolation(thd));
 
 	if (trx->isolation_level == TRX_ISO_REPEATABLE_READ) {
-		trx_assign_read_view(trx);
+		trx->read_view.open(trx);
 	} else {
 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
 				    HA_ERR_UNSUPPORTED,
@@ -4630,11 +4503,8 @@ innobase_commit_ordered_2(
 		If the binary log is not enabled, or the transaction
 		is not written to the binary log, the file name will
 		be a NULL pointer. */
-		ulonglong	pos;
-
-		thd_binlog_pos(thd, &trx->mysql_log_file_name, &pos);
-
-		trx->mysql_log_offset = static_cast<int64_t>(pos);
+		thd_binlog_pos(thd, &trx->mysql_log_file_name,
+			       &trx->mysql_log_offset);
 
 		/* Don't do write + flush right now. For group commit
 		to work we want to do the flush later. */
@@ -4962,7 +4832,6 @@ UNIV_INTERN
 void
 innobase_mysql_log_notify(
 /*======================*/
-	ib_uint64_t	write_lsn,	/*!< in: LSN written to log file */
 	ib_uint64_t	flush_lsn)	/*!< in: LSN flushed to disk */
 {
 	struct pending_checkpoint *	pending;
@@ -5052,7 +4921,7 @@ innobase_rollback_to_savepoint(
 
 	char	name[64];
 
-	longlong2str((ulint) savepoint, name, 36);
+	longlong2str(longlong(savepoint), name, 36);
 
 	int64_t	mysql_binlog_cache_pos;
 
@@ -5121,7 +4990,7 @@ innobase_release_savepoint(
 
 	/* TODO: use provided savepoint data area to store savepoint data */
 
-	longlong2str((ulint) savepoint, name, 36);
+	longlong2str(longlong(savepoint), name, 36);
 
 	error = trx_release_savepoint_for_mysql(trx, name);
 
@@ -5160,12 +5029,12 @@ innobase_savepoint(
 	/* TODO: use provided savepoint data area to store savepoint data */
 	char	name[64];
 
-	longlong2str((ulint) savepoint,name,36);
+	longlong2str(longlong(savepoint), name, 36);
 
 	dberr_t	error = trx_savepoint_for_mysql(trx, name, 0);
 
 	if (error == DB_SUCCESS && trx->fts_trx != NULL) {
-		fts_savepoint_take(trx, trx->fts_trx, name);
+		fts_savepoint_take(trx->fts_trx, name);
 	}
 
 	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
@@ -5234,7 +5103,7 @@ innobase_close_connection(
 		} else {
 rollback_and_free:
 			innobase_rollback_trx(trx);
-			trx_free_for_mysql(trx);
+			trx_free(trx);
 		}
 	}
 
@@ -5313,9 +5182,7 @@ ha_innobase::table_flags() const
 	/* Need to use tx_isolation here since table flags is (also)
 	called before prebuilt is inited. */
 
-	ulong const	tx_isolation = thd_tx_isolation(thd);
-
-	if (tx_isolation <= ISO_READ_COMMITTED) {
+	if (thd_tx_isolation(thd) <= ISO_READ_COMMITTED) {
 		return(flags);
 	}
 
@@ -5434,7 +5301,7 @@ ha_innobase::max_supported_key_length() const
 	Note: Handle 16k and 32k pages the same here since the limits
 	are higher than imposed by MySQL. */
 
-	switch (UNIV_PAGE_SIZE) {
+	switch (srv_page_size) {
 	case 4096:
 		/* Hack: allow mysql.innodb_index_stats to be created. */
 		/* FIXME: rewrite this API, and in sql_table.cc consider
@@ -5831,7 +5698,7 @@ innobase_vcol_build_templ(
 	mysql_row_templ_t*	templ,
 	ulint			col_no)
 {
-	if (dict_col_is_virtual(col)) {
+	if (col->is_virtual()) {
 		templ->is_virtual = true;
 		templ->col_no = col_no;
 		templ->clust_rec_field_no = ULINT_UNDEFINED;
@@ -5888,7 +5755,7 @@ innobase_build_v_templ(
 	const dict_add_v_col_t*	add_v,
 	bool			locked)
 {
-	ulint	ncol = ib_table->n_cols - DATA_N_SYS_COLS;
+	ulint	ncol = unsigned(ib_table->n_cols) - DATA_N_SYS_COLS;
 	ulint	n_v_col = ib_table->n_v_cols;
 	bool	marker[REC_MAX_N_FIELDS];
 
@@ -5963,7 +5830,7 @@ innobase_build_v_templ(
 			}
 
 			ut_ad(!my_strcasecmp(system_charset_info, name,
-					     field->field_name));
+					     field->field_name.str));
 #endif
 			const dict_v_col_t*	vcol;
 
@@ -5997,7 +5864,7 @@ innobase_build_v_templ(
 			ut_ad(!my_strcasecmp(system_charset_info,
 					     dict_table_get_col_name(
 						     ib_table, j),
-					     field->field_name));
+					     field->field_name.str));
 
 			s_templ->vtempl[j] = static_cast<
 				mysql_row_templ_t*>(
@@ -6043,14 +5910,13 @@ check_index_consistency(const TABLE* table, const dict_table_t* ib_table)
 	corresponding InnoDB index pointer into index_mapping
 	array. */
 	for (ulint count = 0; count < mysql_num_index; count++) {
-
 		const dict_index_t* index = dict_table_get_index_on_name(
-		    ib_table, table->key_info[count].name);
+			ib_table, table->key_info[count].name.str);
 
 		if (index == NULL) {
 			sql_print_error("Cannot find index %s in InnoDB"
 					" index dictionary.",
-					table->key_info[count].name);
+					table->key_info[count].name.str);
 			ret = false;
 			goto func_exit;
 		}
@@ -6061,7 +5927,7 @@ check_index_consistency(const TABLE* table, const dict_table_t* ib_table)
 						  index)) {
 			sql_print_error("Found index %s whose column info"
 					" does not match that of MariaDB.",
-					table->key_info[count].name);
+					table->key_info[count].name.str);
 			ret = false;
 			goto func_exit;
 		}
@@ -6146,7 +6012,7 @@ static
 void
 initialize_auto_increment(dict_table_t* table, const Field* field)
 {
-	ut_ad(!dict_table_is_temporary(table));
+	ut_ad(!table->is_temporary());
 
 	const unsigned	col_no = innodb_col_no(field);
 
@@ -6228,8 +6094,8 @@ no_such_table:
 		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
 	}
 
-	uint n_fields = mysql_fields(table);
-	uint n_cols = dict_table_get_n_user_cols(ib_table)
+	size_t n_fields = mysql_fields(table);
+	size_t n_cols = dict_table_get_n_user_cols(ib_table)
 		+ dict_table_get_n_v_cols(ib_table)
 		- !!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID);
 
@@ -6261,11 +6127,7 @@ no_such_table:
 
 	MONITOR_INC(MONITOR_TABLE_OPEN);
 
-	bool	no_tablespace = false;
-	bool	encrypted = false;
-	FilSpace space;
-
-	if (dict_table_is_discarded(ib_table)) {
+	if ((ib_table->flags2 & DICT_TF2_DISCARDED)) {
 
 		ib_senderrf(thd,
 			IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED,
@@ -6275,77 +6137,37 @@ no_such_table:
 		all the flags and index root page numbers to FIL_NULL that
 		should prevent any DML from running but it should allow DDL
 		operations. */
-
-		no_tablespace = false;
-
 	} else if (!ib_table->is_readable()) {
-		space = fil_space_acquire_silent(ib_table->space);
-
-		if (space()) {
-			if (space()->crypt_data && space()->crypt_data->is_encrypted()) {
-				/* This means that tablespace was found but we could not
-				decrypt encrypted page. */
-				no_tablespace = true;
-				encrypted = true;
-			} else {
-				no_tablespace = true;
-			}
-		} else {
+		const fil_space_t* space = ib_table->space;
+		if (!space) {
 			ib_senderrf(
 				thd, IB_LOG_LEVEL_WARN,
 				ER_TABLESPACE_MISSING, norm_name);
-
-			/* This means we have no idea what happened to the tablespace
-			file, best to play it safe. */
-
-			no_tablespace = true;
 		}
-	} else {
-		no_tablespace = false;
-	}
-
-	if (!thd_tablespace_op(thd) && no_tablespace) {
-		set_my_errno(ENOENT);
-		int ret_err = HA_ERR_NO_SUCH_TABLE;
 
-		/* If table has no talespace but it has crypt data, check
-		is tablespace made unaccessible because encryption service
-		or used key_id is not available. */
-		if (encrypted) {
-			bool warning_pushed = false;
+		if (!thd_tablespace_op(thd)) {
+			set_my_errno(ENOENT);
+			int ret_err = HA_ERR_NO_SUCH_TABLE;
 
-			if (!encryption_key_id_exists(space()->crypt_data->key_id)) {
+			if (space && space->crypt_data
+			    && space->crypt_data->is_encrypted()) {
 				push_warning_printf(
-					thd, Sql_condition::WARN_LEVEL_WARN,
+					thd,
+					Sql_condition::WARN_LEVEL_WARN,
 					HA_ERR_DECRYPTION_FAILED,
-					"Table %s in file %s is encrypted but encryption service or"
+					"Table %s in file %s is encrypted"
+					" but encryption service or"
 					" used key_id %u is not available. "
 					" Can't continue reading table.",
 					table_share->table_name.str,
-					space()->chain.start->name,
-					space()->crypt_data->key_id);
+					space->chain.start->name,
+					space->crypt_data->key_id);
 				ret_err = HA_ERR_DECRYPTION_FAILED;
-				warning_pushed = true;
 			}
 
-			/* If table is marked as encrypted then we push
-			warning if it has not been already done as used
-			key_id might be found but it is incorrect. */
-			if (!warning_pushed) {
-				push_warning_printf(
-					thd, Sql_condition::WARN_LEVEL_WARN,
-					HA_ERR_DECRYPTION_FAILED,
-					"Table %s in file %s is encrypted but encryption service or"
-					" used key_id is not available. "
-					" Can't continue reading table.",
-					table_share->table_name.str,
-					space()->chain.start->name);
-				ret_err = HA_ERR_DECRYPTION_FAILED;
-			}
+			dict_table_close(ib_table, FALSE, FALSE);
+			DBUG_RETURN(ret_err);
 		}
-
-		dict_table_close(ib_table, FALSE, FALSE);
-		DBUG_RETURN(ret_err);
 	}
 
 	m_prebuilt = row_create_prebuilt(ib_table, table->s->reclength);
@@ -6477,19 +6299,10 @@ no_such_table:
 	}
 
 	/* Index block size in InnoDB: used by MySQL in query optimization */
-	stats.block_size = UNIV_PAGE_SIZE;
-
-	if (m_prebuilt->table != NULL) {
-		/* We update the highest file format in the system table
-		space, if this table has higher file format setting. */
-
-		trx_sys_file_format_max_upgrade(
-			(const char**) &innobase_file_format_max,
-			dict_table_get_format(m_prebuilt->table));
-	}
+	stats.block_size = srv_page_size;
 
 	if (m_prebuilt->table == NULL
-	    || dict_table_is_temporary(m_prebuilt->table)
+	    || m_prebuilt->table->is_temporary()
 	    || m_prebuilt->table->persistent_autoinc
 	    || !m_prebuilt->table->is_readable()) {
 	} else if (const Field* ai = table->found_next_number_field) {
@@ -6512,7 +6325,11 @@ no_such_table:
 		}
 	}
 
-	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
+	if (table && m_prebuilt->table) {
+		ut_ad(table->versioned() == m_prebuilt->table->versioned());
+	}
+
+	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST | HA_STATUS_OPEN);
 	DBUG_RETURN(0);
 }
 
@@ -6544,7 +6361,11 @@ platforms.
 @return dictionary table object or NULL if not found */
 dict_table_t*
 ha_innobase::open_dict_table(
-	const char*		table_name,
+	const char*
+#ifdef _WIN32
+	table_name
+#endif
+	,
 	const char*		norm_name,
 	bool			is_partition,
 	dict_err_ignore_t	ignore_err)
@@ -6650,9 +6471,7 @@ ha_innobase::max_supported_key_part_length() const
 {
 	/* A table format specific index column length check will be performed
 	at ha_innobase::add_index() and row_create_index_for_mysql() */
-	return(innobase_large_prefix
-		? REC_VERSION_56_MAX_INDEX_COL_LEN
-		: REC_ANTELOPE_MAX_INDEX_COL_LEN - 1);
+	return(REC_VERSION_56_MAX_INDEX_COL_LEN);
 }
 
 /******************************************************************//**
@@ -6688,7 +6507,7 @@ ha_innobase::close()
 
 #ifdef WITH_WSREP
 UNIV_INTERN
-int
+ulint
 wsrep_innobase_mysql_sort(
 /*======================*/
 					/* out: str contains sort string */
@@ -6702,7 +6521,7 @@ wsrep_innobase_mysql_sort(
 {
 	CHARSET_INFO*		charset;
 	enum_field_types	mysql_tp;
-	int ret_length =	str_length;
+	ulint			ret_length =	str_length;
 
 	DBUG_ASSERT(str_length != UNIV_SQL_NULL);
 
@@ -6939,7 +6758,7 @@ innobase_mysql_fts_get_token(
 	for (;;) {
 
 		if (doc >= end) {
-			return(doc - start);
+			return ulint(doc - start);
 		}
 
 		int	ctype;
@@ -6981,7 +6800,7 @@ innobase_mysql_fts_get_token(
 	token->f_len = (uint) (doc - token->f_str) - mwc;
 	token->f_n_char = length;
 
-	return(doc - start);
+	return ulint(doc - start);
 }
 
 /** Converts a MySQL type to an InnoDB type. Note that this function returns
@@ -7563,7 +7382,7 @@ build_template_field(
 				ib::info() << "MySQL table "
 					<< table->s->table_name.str
 					<< " field " << j << " name "
-					<< table->field[j]->field_name;
+					<< table->field[j]->field_name.str;
 			}
 
 			ib::error() << "Clustered record field for column " << i
@@ -7690,10 +7509,11 @@ ha_innobase::build_template(
 	ibool		fetch_primary_key_cols	= FALSE;
 	ulint		i;
 
-	if (m_prebuilt->select_lock_type == LOCK_X) {
+	if (m_prebuilt->select_lock_type == LOCK_X || m_prebuilt->table->no_rollback()) {
 		/* We always retrieve the whole clustered index record if we
 		use exclusive row level locks, for example, if the read is
-		done in an UPDATE statement. */
+		done in an UPDATE statement or if we are using a no rollback
+                table */
 
 		whole_row = true;
 	} else if (!whole_row) {
@@ -7732,6 +7552,7 @@ ha_innobase::build_template(
 
 	index = whole_row ? clust_index : m_prebuilt->index;
 
+	m_prebuilt->versioned_write = table->versioned_write(VERS_TRX_ID);
 	m_prebuilt->need_to_access_clustered = (index == clust_index);
 
 	/* Either m_prebuilt->index should be a secondary index, or it
@@ -8157,23 +7978,15 @@ ha_innobase::write_row(
 	if (high_level_read_only) {
 		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
-	} else if (m_prebuilt->trx != trx) {
-
-		ib::error() << "The transaction object for the table handle is"
-			" at " << static_cast<const void*>(m_prebuilt->trx)
-			<< ", but for the current thread it is at "
-			<< static_cast<const void*>(trx);
-
-		fputs("InnoDB: Dump of 200 bytes around m_prebuilt: ", stderr);
-		ut_print_buf(stderr, ((const byte*) m_prebuilt) - 100, 200);
-		fputs("\nInnoDB: Dump of 200 bytes around ha_data: ", stderr);
-		ut_print_buf(stderr, ((const byte*) trx) - 100, 200);
-		putc('\n', stderr);
-		ut_error;
-	} else if (!trx_is_started(trx)) {
+	}
+
+	ut_a(m_prebuilt->trx == trx);
+
+	if (!trx_is_started(trx)) {
 		++trx->will_lock;
 	}
 
+	ins_mode_t	vers_set_fields;
 	/* Handling of Auto-Increment Columns. */
 	if (table->next_number_field && record == table->record[0]) {
 
@@ -8220,8 +8033,11 @@ ha_innobase::write_row(
 
 	innobase_srv_conc_enter_innodb(m_prebuilt);
 
+	vers_set_fields = table->versioned_write(VERS_TRX_ID) ?
+		ROW_INS_VERSIONED : ROW_INS_NORMAL;
+
 	/* Execute insert graph that will result in actual insert. */
-	error = row_insert_for_mysql((byte*) record, m_prebuilt);
+	error = row_insert_for_mysql((byte*) record, m_prebuilt, vers_set_fields);
 
 	DEBUG_SYNC(m_user_thd, "ib_after_row_insert");
 
@@ -8440,7 +8256,7 @@ dberr_t
 calc_row_difference(
 	upd_t*		uvect,
 	const uchar*	old_row,
-	uchar*		new_row,
+	const uchar*	new_row,
 	TABLE*		table,
 	uchar*		upd_buff,
 	ulint		buff_len,
@@ -8545,7 +8361,7 @@ calc_row_difference(
 		if (field_mysql_type == MYSQL_TYPE_LONGLONG
 		    && prebuilt->table->fts
 		    && innobase_strcasecmp(
-			field->field_name, FTS_DOC_ID_COL_NAME) == 0) {
+			field->field_name.str, FTS_DOC_ID_COL_NAME) == 0) {
 			doc_id = (doc_id_t) mach_read_from_n_little_endian(
 				n_ptr, 8);
 			if (doc_id == 0) {
@@ -8842,8 +8658,7 @@ wsrep_calc_row_hash(
 	const uchar*	row,		/*!< in: row in MySQL format */
 	TABLE*		table,		/*!< in: table in MySQL data
 					dictionary */
-	row_prebuilt_t*	prebuilt,	/*!< in: InnoDB prebuilt struct */
-	THD*		thd)		/*!< in: user thread */
+	row_prebuilt_t*	prebuilt)	/*!< in: InnoDB prebuilt struct */
 {
 	Field*		field;
 	enum_field_types field_mysql_type;
@@ -8930,7 +8745,7 @@ if its index columns are updated!
 int
 ha_innobase::update_row(
 	const uchar*	old_row,
-	uchar*		new_row)
+	const uchar*	new_row)
 {
 	int		err;
 
@@ -8990,14 +8805,33 @@ ha_innobase::update_row(
 		should not increase the count of updated rows.
 		This is fix for http://bugs.mysql.com/29157 */
 		DBUG_RETURN(HA_ERR_RECORD_IS_THE_SAME);
-	}
+	} else {
+		const bool vers_set_fields = m_prebuilt->versioned_write
+			&& m_prebuilt->upd_node->update->affects_versioned();
+		const bool vers_ins_row = vers_set_fields
+			&& thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE;
+
+		/* This is not a delete */
+		m_prebuilt->upd_node->is_delete =
+			(vers_set_fields && !vers_ins_row) ||
+			(thd_sql_command(m_user_thd) == SQLCOM_DELETE &&
+				table->versioned(VERS_TIMESTAMP))
+			? VERSIONED_DELETE
+			: NO_DELETE;
 
-	/* This is not a delete */
-	m_prebuilt->upd_node->is_delete = FALSE;
+		innobase_srv_conc_enter_innodb(m_prebuilt);
 
-	innobase_srv_conc_enter_innodb(m_prebuilt);
+		error = row_update_for_mysql(m_prebuilt);
 
-	error = row_update_for_mysql(m_prebuilt);
+		if (error == DB_SUCCESS && vers_ins_row
+		    /* Multiple UPDATE of same rows in single transaction create
+		       historical rows only once. */
+		    && trx->id != table->vers_start_id()) {
+			error = row_insert_for_mysql((byte*) old_row,
+						     m_prebuilt,
+						     ROW_INS_HISTORICAL);
+		}
+	}
 
 	if (error == DB_SUCCESS && autoinc) {
 		/* A value for an AUTO_INCREMENT column
@@ -9090,8 +8924,11 @@ ha_innobase::delete_row(
 	}
 
 	/* This is a delete */
-
-	m_prebuilt->upd_node->is_delete = TRUE;
+	m_prebuilt->upd_node->is_delete = table->versioned_write(VERS_TRX_ID)
+		&& table->vers_end_field()->is_max()
+		&& trx->id != table->vers_start_id()
+		? VERSIONED_DELETE
+		: PLAIN_DELETE;
 
 	innobase_srv_conc_enter_innodb(m_prebuilt);
 
@@ -9209,8 +9046,7 @@ int
 ha_innobase::index_init(
 /*====================*/
 	uint		keynr,	/*!< in: key (index) number */
-	bool		sorted)	/*!< in: 1 if result MUST be sorted
-				according to index */
+	bool)
 {
 	DBUG_ENTER("index_init");
 
@@ -9396,8 +9232,7 @@ ha_innobase::index_read(
 			m_prebuilt->srch_key_val_len,
 			index,
 			(byte*) key_ptr,
-			(ulint) key_len,
-			m_prebuilt->trx);
+			(ulint) key_len);
 
 		DBUG_ASSERT(m_prebuilt->search_tuple->n_fields > 0);
 	} else {
@@ -9533,7 +9368,7 @@ ha_innobase::innobase_get_index(
 
 	if (keynr != MAX_KEY && table->s->keys > 0) {
 		key = &table->key_info[keynr];
-		index = dict_table_get_index_on_name(ib_table, key->name);
+		index = dict_table_get_index_on_name(ib_table, key->name.str);
 		ut_ad(index);
 	} else {
 		index = dict_table_get_first_index(ib_table);
@@ -9543,7 +9378,7 @@ ha_innobase::innobase_get_index(
 		sql_print_error(
 			"InnoDB could not find key no %u with name %s"
 			" from dict cache for table %s",
-			keynr, key ? key->name : "NULL",
+			keynr, key ? key->name.str : "NULL",
 			ib_table->name.m_name);
 	}
 
@@ -9688,9 +9523,7 @@ ha_innobase::general_fetch(
 	} else if (m_prebuilt->table->corrupted) {
 		DBUG_RETURN(HA_ERR_CRASHED);
 	} else {
-		FilSpace space(m_prebuilt->table->space, true);
-
-		DBUG_RETURN(space()
+		DBUG_RETURN(m_prebuilt->table->space
 			    ? HA_ERR_DECRYPTION_FAILED
 			    : HA_ERR_NO_SUCH_TABLE);
 	}
@@ -9776,8 +9609,7 @@ int
 ha_innobase::index_next_same(
 /*=========================*/
 	uchar*		buf,	/*!< in/out: buffer for the row */
-	const uchar*	key,	/*!< in: key value */
-	uint		keylen)	/*!< in: key value length */
+	const uchar*, uint)
 {
 	return(general_fetch(buf, ROW_SEL_NEXT, m_last_match_mode));
 }
@@ -9935,7 +9767,7 @@ ha_innobase::rnd_pos(
 	/* Note that we assume the length of the row reference is fixed
 	for the table, and it is == ref_length */
 
-	int	error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
+	int	error = index_read(buf, pos, (uint)ref_length, HA_READ_KEY_EXACT);
 
 	if (error != 0) {
 		DBUG_PRINT("error", ("Got error: %d", error));
@@ -10040,7 +9872,7 @@ ha_innobase::ft_init_ext(
 	}
 
 	/* If tablespace is discarded, we should return here */
-	if (dict_table_is_discarded(ft_table)) {
+	if (!ft_table->space) {
 		my_error(ER_NO_SUCH_TABLE, MYF(0), table->s->db.str,
 			 table->s->table_name.str);
 		return(NULL);
@@ -10396,7 +10228,7 @@ wsrep_append_foreign_key(
 	}
 
 	ut_a(idx);
-	key[0] = (char)i;
+	key[0] = byte(i);
 
 	rcode = wsrep_rec_get_foreign_key(
 		&key[1], &len, rec, index, idx,
@@ -10484,7 +10316,6 @@ wsrep_append_key(
 	THD		*thd,
 	trx_t 		*trx,
 	TABLE_SHARE 	*table_share,
-	TABLE 		*table,
 	const char*	key,
 	uint16_t        key_len,
 	bool            shared
@@ -10597,7 +10428,7 @@ ha_innobase::wsrep_append_keys(
 
 		if (!is_null) {
 			rcode = wsrep_append_key(
-				thd, trx, table_share, table, keyval,
+				thd, trx, table_share, keyval,
 				len, shared);
 
 			if (rcode) {
@@ -10637,7 +10468,7 @@ ha_innobase::wsrep_append_keys(
 			if (!tab) {
 				WSREP_WARN("MariaDB-InnoDB key mismatch %s %s",
 					   table->s->table_name.str,
-					   key_info->name);
+					   key_info->name.str);
 			}
 			/* !hasPK == table with no PK, must append all non-unique keys */
 			if (!hasPK || key_info->flags & HA_NOSAME ||
@@ -10651,7 +10482,7 @@ ha_innobase::wsrep_append_keys(
 					record0, &is_null);
 				if (!is_null) {
 					rcode = wsrep_append_key(
-						thd, trx, table_share, table,
+						thd, trx, table_share,
 						keyval0, len+1, shared);
 
 					if (rcode) {
@@ -10674,7 +10505,6 @@ ha_innobase::wsrep_append_keys(
 					if (!is_null && memcmp(key0, key1, len)) {
 						rcode = wsrep_append_key(
 							thd, trx, table_share,
-							table,
 							keyval1, len+1, shared);
 						if (rcode) DBUG_RETURN(rcode);
 					}
@@ -10688,9 +10518,9 @@ ha_innobase::wsrep_append_keys(
 		uchar digest[16];
 		int rcode;
 
-		wsrep_calc_row_hash(digest, record0, table, m_prebuilt, thd);
+		wsrep_calc_row_hash(digest, record0, table, m_prebuilt);
 
-		if ((rcode = wsrep_append_key(thd, trx, table_share, table,
+		if ((rcode = wsrep_append_key(thd, trx, table_share,
 					      (const char*) digest, 16,
 					      shared))) {
 			DBUG_RETURN(rcode);
@@ -10698,9 +10528,8 @@ ha_innobase::wsrep_append_keys(
 
 		if (record1) {
 			wsrep_calc_row_hash(
-				digest, record1, table, m_prebuilt, thd);
+				digest, record1, table, m_prebuilt);
 			if ((rcode = wsrep_append_key(thd, trx, table_share,
-						      table,
 						      (const char*) digest,
 						      16, shared))) {
 				DBUG_RETURN(rcode);
@@ -10785,7 +10614,7 @@ create_table_check_doc_id_col(
 
 		col_len = field->pack_length();
 
-		if (innobase_strcasecmp(field->field_name,
+		if (innobase_strcasecmp(field->field_name.str,
 					FTS_DOC_ID_COL_NAME) == 0) {
 
 			/* Note the name is case sensitive due to
@@ -10793,7 +10622,7 @@ create_table_check_doc_id_col(
 			if (col_type == DATA_INT
 			    && !field->real_maybe_null()
 			    && col_len == sizeof(doc_id_t)
-			    && (strcmp(field->field_name,
+			    && (strcmp(field->field_name.str,
 				      FTS_DOC_ID_COL_NAME) == 0)) {
 				*doc_id_col = i;
 			} else {
@@ -10805,7 +10634,7 @@ create_table_check_doc_id_col(
 					" of BIGINT NOT NULL type, and named"
 					" in all capitalized characters");
 				my_error(ER_WRONG_COLUMN_NAME, MYF(0),
-					 field->field_name);
+					 field->field_name.str);
 				*doc_id_col = ULINT_UNDEFINED;
 			}
 
@@ -10863,7 +10692,7 @@ innodb_base_col_setup(
 	const Field*	field,
 	dict_v_col_t*	v_col)
 {
-	int     n = 0;
+	ulint n = 0;
 
 	prepare_vcol_for_base_setup(table, field, v_col);
 
@@ -10876,7 +10705,7 @@ innodb_base_col_setup(
 			for (z = 0; z < table->n_cols; z++) {
 				const char* name = dict_table_get_col_name(table, z);
 				if (!innobase_strcasecmp(name,
-						base_field->field_name)) {
+						base_field->field_name.str)) {
 					break;
 				}
 			}
@@ -10917,7 +10746,7 @@ innodb_base_col_setup_for_stored(
 				const char* name = dict_table_get_col_name(
 						table, z);
 				if (!innobase_strcasecmp(
-					name, base_field->field_name)) {
+					name, base_field->field_name.str)) {
 					break;
 				}
 			}
@@ -10956,7 +10785,6 @@ create_table_info_t::create_table_def()
 	ibool		has_doc_id_col = FALSE;
 	mem_heap_t*	heap;
 	ulint		num_v = 0;
-	ulint		space_id = 0;
 	ulint		actual_n_cols;
 	ha_table_option_struct *options= m_form->s->option_struct;
 	dberr_t		err = DB_SUCCESS;
@@ -11004,24 +10832,19 @@ create_table_info_t::create_table_def()
 
 		/* Raise error if the Doc ID column is of wrong type or name */
 		if (doc_id_col == ULINT_UNDEFINED) {
-
-			err = DB_ERROR;
-			goto error_ret;
+			DBUG_RETURN(HA_ERR_GENERIC);
 		} else {
 			has_doc_id_col = TRUE;
 		}
 	}
 
-	/* For single-table tablespaces, we pass 0 as the space id, and then
-	determine the actual space id when the tablespace is created. */
-
 	/* Adjust the number of columns for the FTS hidden field */
 	actual_n_cols = n_cols;
 	if (m_flags2 & DICT_TF2_FTS && !has_doc_id_col) {
 		actual_n_cols += 1;
 	}
 
-	table = dict_mem_table_create(m_table_name, space_id,
+	table = dict_mem_table_create(m_table_name, NULL,
 				      actual_n_cols, num_v, m_flags, m_flags2);
 
 	/* Set the hidden doc_id column. */
@@ -11047,6 +10870,18 @@ create_table_info_t::create_table_def()
 		bool	is_stored = false;
 
 		Field*	field = m_form->field[i];
+		ulint vers_row = 0;
+
+		if (m_form->versioned()) {
+			if (i == m_form->s->row_start_field) {
+				vers_row = DATA_VERS_START;
+			} else if (i == m_form->s->row_end_field) {
+				vers_row = DATA_VERS_END;
+			} else if (!(field->flags
+				     & VERS_UPDATE_UNVERSIONED_FLAG)) {
+				vers_row = DATA_VERSIONED;
+			}
+		}
 
 		col_type = get_innobase_type_from_mysql_type(
 			&unsigned_type, field);
@@ -11060,7 +10895,7 @@ create_table_info_t::create_table_def()
 				" column type and try to re-create"
 				" the table with an appropriate"
 				" column type.",
-				table->name.m_name, field->field_name);
+				table->name.m_name, field->field_name.str);
 			goto err_col;
 		}
 
@@ -11116,33 +10951,34 @@ create_table_info_t::create_table_def()
 
 		/* First check whether the column to be added has a
 		system reserved name. */
-		if (dict_col_name_is_reserved(field->field_name)){
+		if (dict_col_name_is_reserved(field->field_name.str)){
 			my_error(ER_WRONG_COLUMN_NAME, MYF(0),
-				 field->field_name);
+				 field->field_name.str);
 err_col:
 			dict_mem_table_free(table);
 			mem_heap_free(heap);
-
-			err = DB_ERROR;
-			goto error_ret;
+			ut_ad(trx_state_eq(m_trx, TRX_STATE_NOT_STARTED));
+			DBUG_RETURN(HA_ERR_GENERIC);
 		}
 
 		if (!is_virtual) {
 			dict_mem_table_add_col(table, heap,
-				field->field_name, col_type,
+				field->field_name.str, col_type,
 				dtype_form_prtype(
 					(ulint) field->type()
 					| nulls_allowed | unsigned_type
-					| binary_type | long_true_varchar,
+					| binary_type | long_true_varchar
+					| vers_row,
 					charset_no),
 				col_len);
 		} else {
 			dict_mem_table_add_v_col(table, heap,
-				field->field_name, col_type,
+				field->field_name.str, col_type,
 				dtype_form_prtype(
 					(ulint) field->type()
 					| nulls_allowed | unsigned_type
 					| binary_type | long_true_varchar
+					| vers_row
 					| is_virtual,
 					charset_no),
 				col_len, i, 0);
@@ -11203,25 +11039,17 @@ err_col:
 		fts_add_doc_id_column(table, heap);
 	}
 
-	/* If temp table, then we avoid creation of entries in SYSTEM TABLES.
-	Given that temp table lifetime is limited to connection/server lifetime
-	on re-start we don't need to restore temp-table and so no entry is
-	needed in SYSTEM tables. */
-	if (dict_table_is_temporary(table)) {
-		/* Get a new table ID */
-		dict_table_assign_new_id(table, m_trx);
-		table->space = SRV_TMP_SPACE_ID;
-
-		/* Temp-table are maintained in memory and so
-		can_be_evicted is FALSE. */
-		mem_heap_t* temp_table_heap = mem_heap_create(256);
+	dict_table_add_system_columns(table, heap);
 
-		dict_table_add_to_cache(table, FALSE, temp_table_heap);
-
-		DBUG_EXECUTE_IF("ib_ddl_crash_during_create2",
-				DBUG_SUICIDE(););
-
-		mem_heap_free(temp_table_heap);
+	if (table->is_temporary()) {
+		/* Get a new table ID. FIXME: Make this a private
+		sequence, not shared with persistent tables! */
+		dict_table_assign_new_id(table, m_trx);
+		ut_ad(dict_tf_get_rec_format(table->flags)
+		      != REC_FORMAT_COMPRESSED);
+		table->space_id = SRV_TMP_SPACE_ID;
+		table->space = fil_system.temp_space;
+		table->add_to_cache();
 	} else {
 		if (err == DB_SUCCESS) {
 			err = row_create_table_for_mysql(
@@ -11240,7 +11068,18 @@ err_col:
 	DBUG_EXECUTE_IF("ib_create_err_tablespace_exist",
 			err = DB_TABLESPACE_EXISTS;);
 
-	if (err == DB_DUPLICATE_KEY || err == DB_TABLESPACE_EXISTS) {
+	switch (err) {
+	case DB_SUCCESS:
+		ut_ad(table);
+		m_table = table;
+		if (m_flags2 & DICT_TF2_FTS) {
+			fts_optimize_add_table(table);
+		}
+		DBUG_RETURN(0);
+	default:
+		break;
+	case DB_DUPLICATE_KEY:
+	case DB_TABLESPACE_EXISTS:
 		char display_name[FN_REFLEN];
 		char* buf_end = innobase_convert_identifier(
 			display_name, sizeof(display_name) - 1,
@@ -11254,13 +11093,7 @@ err_col:
 			 : ER_TABLESPACE_EXISTS, MYF(0), display_name);
 	}
 
-	if (err == DB_SUCCESS && (m_flags2 & DICT_TF2_FTS)) {
-		fts_optimize_add_table(table);
-	}
-
-error_ret:
-	DBUG_RETURN(convert_error_code_to_mysql(err, m_flags, m_thd));
-}
+	DBUG_RETURN(convert_error_code_to_mysql(err, m_flags, m_thd));}
 
 /*****************************************************************//**
 Creates an index in an InnoDB database. */
@@ -11271,14 +11104,12 @@ create_index(
 	trx_t*		trx,		/*!< in: InnoDB transaction handle */
 	const TABLE*	form,		/*!< in: information on table
 					columns and indexes */
-	ulint		flags,		/*!< in: InnoDB table flags */
-	const char*	table_name,	/*!< in: table name */
+	dict_table_t*	table,		/*!< in,out: table */
 	uint		key_num)	/*!< in: index number */
 {
 	dict_index_t*	index;
 	int		error;
 	const KEY*	key;
-	ulint		ind_type;
 	ulint*		field_lengths;
 
 	DBUG_ENTER("create_index");
@@ -11286,19 +11117,15 @@ create_index(
 	key = form->key_info + key_num;
 
 	/* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */
-	ut_a(innobase_strcasecmp(key->name, innobase_index_reserve_name) != 0);
-
-	ind_type = 0;
-	if (key->flags & HA_SPATIAL) {
-		ind_type = DICT_SPATIAL;
-	} else if (key->flags & HA_FULLTEXT) {
-		ind_type = DICT_FTS;
-	}
-
-	if (ind_type != 0)
-	{
-		index = dict_mem_index_create(table_name, key->name, 0,
-					      ind_type,
+	ut_a(innobase_strcasecmp(key->name.str, innobase_index_reserve_name) != 0);
+
+	if (key->flags & (HA_SPATIAL | HA_FULLTEXT)) {
+		/* Only one of these can be specified at a time. */
+		ut_ad(~key->flags & (HA_SPATIAL | HA_FULLTEXT));
+		ut_ad(!(key->flags & HA_NOSAME));
+		index = dict_mem_index_create(table, key->name.str,
+					      (key->flags & HA_SPATIAL)
+					      ? DICT_SPATIAL : DICT_FTS,
 					      key->user_defined_key_parts);
 
 		for (ulint i = 0; i < key->user_defined_key_parts; i++) {
@@ -11312,17 +11139,16 @@ create_index(
 			}
 
 			dict_mem_index_add_field(
-				index, key_part->field->field_name, 0);
+				index, key_part->field->field_name.str, 0);
 		}
 
 		DBUG_RETURN(convert_error_code_to_mysql(
 				    row_create_index_for_mysql(
 					    index, trx, NULL),
-				    flags, NULL));
-
+				    table->flags, NULL));
 	}
 
-	ind_type = 0;
+	ulint ind_type = 0;
 
 	if (key_num == form->s->primary_key) {
 		ind_type |= DICT_CLUSTERED;
@@ -11339,7 +11165,7 @@ create_index(
 	/* We pass 0 as the space id, and determine at a lower level the space
 	id where to store the table */
 
-	index = dict_mem_index_create(table_name, key->name, 0,
+	index = dict_mem_index_create(table, key->name.str,
 				      ind_type, key->user_defined_key_parts);
 
 	for (ulint i = 0; i < key->user_defined_key_parts; i++) {
@@ -11364,7 +11190,7 @@ create_index(
 		if (field == NULL)
 		  ut_error;
 
-		const char*	field_name = key_part->field->field_name;
+		const char*	field_name = key_part->field->field_name.str;
 
 		col_type = get_innobase_type_from_mysql_type(
 			&is_unsigned, key_part->field);
@@ -11389,8 +11215,8 @@ create_index(
 					" prefix index field, on an"
 					" inappropriate data type. Table"
 					" name %s, column name %s.",
-					table_name,
-					key_part->field->field_name);
+					form->s->table_name.str,
+					key_part->field->field_name.str);
 
 				prefix_len = 0;
 			}
@@ -11412,6 +11238,7 @@ create_index(
 	/* Even though we've defined max_supported_key_part_length, we
 	still do our own checking using field_lengths to be absolutely
 	sure we don't create too long indexes. */
+	ulint flags = table->flags;
 
 	error = convert_error_code_to_mysql(
 		row_create_index_for_mysql(index, trx, field_lengths),
@@ -11422,31 +11249,6 @@ create_index(
 	DBUG_RETURN(error);
 }
 
-/*****************************************************************//**
-Creates an index to an InnoDB table when the user has defined no
-primary index. */
-inline
-int
-create_clustered_index_when_no_primary(
-/*===================================*/
-	trx_t*		trx,		/*!< in: InnoDB transaction handle */
-	ulint		flags,		/*!< in: InnoDB table flags */
-	const char*	table_name)	/*!< in: table name */
-{
-	dict_index_t*	index;
-	dberr_t		error;
-
-	/* We pass 0 as the space id, and determine at a lower level the space
-	id where to store the table */
-	index = dict_mem_index_create(table_name,
-				      innobase_index_reserve_name,
-				      0, DICT_CLUSTERED, 0);
-
-	error = row_create_index_for_mysql(index, trx, NULL);
-
-	return(convert_error_code_to_mysql(error, flags, NULL));
-}
-
 /** Return a display name for the row format
 @param[in]	row_format	Row Format
 @return row format name */
@@ -11551,12 +11353,12 @@ create_table_info_t::create_options_are_invalid()
 		case 8:
 		case 16:
 			/* The maximum KEY_BLOCK_SIZE (KBS) is
-			UNIV_PAGE_SIZE_MAX. But if UNIV_PAGE_SIZE is
+			UNIV_PAGE_SIZE_MAX. But if srv_page_size is
 			smaller than UNIV_PAGE_SIZE_MAX, the maximum
 			KBS is also smaller. */
 			kbs_max = ut_min(
-				1 << (UNIV_PAGE_SSIZE_MAX - 1),
-				1 << (PAGE_ZIP_SSIZE_MAX - 1));
+				1U << (UNIV_PAGE_SSIZE_MAX - 1),
+				1U << (PAGE_ZIP_SSIZE_MAX - 1));
 			if (m_create_info->key_block_size > kbs_max) {
 				push_warning_printf(
 					m_thd, Sql_condition::WARN_LEVEL_WARN,
@@ -11577,14 +11379,6 @@ create_table_info_t::create_options_are_invalid()
 					" innodb_file_per_table.");
 				ret = "KEY_BLOCK_SIZE";
 			}
-			if (srv_file_format < UNIV_FORMAT_B) {
-				push_warning(
-					m_thd, Sql_condition::WARN_LEVEL_WARN,
-					ER_ILLEGAL_HA_CREATE_OPTION,
-					"InnoDB: KEY_BLOCK_SIZE requires"
-					" innodb_file_format > Antelope.");
-				ret = "KEY_BLOCK_SIZE";
-			}
 			break;
 		default:
 			push_warning_printf(
@@ -11616,28 +11410,8 @@ create_table_info_t::create_options_are_invalid()
 				get_row_format_name(row_format));
 			ret = "ROW_FORMAT";
 		}
-		if (srv_file_format < UNIV_FORMAT_B) {
-			push_warning_printf(
-				m_thd, Sql_condition::WARN_LEVEL_WARN,
-				ER_ILLEGAL_HA_CREATE_OPTION,
-				"InnoDB: ROW_FORMAT=%s requires"
-				" innodb_file_format > Antelope.",
-				get_row_format_name(row_format));
-			ret = "ROW_FORMAT";
-		}
 		break;
 	case ROW_TYPE_DYNAMIC:
-		if (!is_temp && srv_file_format < UNIV_FORMAT_B) {
-			push_warning_printf(
-				m_thd, Sql_condition::WARN_LEVEL_WARN,
-				ER_ILLEGAL_HA_CREATE_OPTION,
-				"InnoDB: ROW_FORMAT=%s requires"
-				" innodb_file_format > Antelope.",
-				get_row_format_name(row_format));
-			ret = "ROW_FORMAT";
-		}
-		/* ROW_FORMAT=DYNAMIC also shuns KEY_BLOCK_SIZE */
-		/* fall through */
 	case ROW_TYPE_COMPACT:
 	case ROW_TYPE_REDUNDANT:
 		if (has_key_block_size) {
@@ -11680,7 +11454,7 @@ create_table_info_t::create_options_are_invalid()
 
 	/* Don't support compressed table when page size > 16k. */
 	if ((has_key_block_size || row_format == ROW_TYPE_COMPRESSED)
-	    && UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF) {
+	    && srv_page_size > UNIV_PAGE_SIZE_DEF) {
 		push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
 			     ER_ILLEGAL_HA_CREATE_OPTION,
 			     "InnoDB: Cannot create a COMPRESSED table"
@@ -11767,15 +11541,6 @@ create_table_info_t::check_table_options()
 			return "PAGE_COMPRESSED";
 		}
 
-		if (srv_file_format < UNIV_FORMAT_B) {
-			push_warning(
-				m_thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: PAGE_COMPRESSED requires"
-				" innodb_file_format > Antelope.");
-			return "PAGE_COMPRESSED";
-		}
-
 		if (m_create_info->key_block_size) {
 			push_warning(
 				m_thd, Sql_condition::WARN_LEVEL_WARN,
@@ -11865,7 +11630,7 @@ ha_innobase::update_create_info(
 		create_info->auto_increment_value = stats.auto_increment_value;
 	}
 
-	if (dict_table_is_temporary(m_prebuilt->table)) {
+	if (m_prebuilt->table->is_temporary()) {
 		return;
 	}
 
@@ -11898,7 +11663,11 @@ innobase_fts_load_stopword(
 @return 0 if successful, otherwise, error number */
 int
 create_table_info_t::parse_table_name(
-	const char*		name)
+	const char*
+#ifdef _WIN32
+	name
+#endif
+				      )
 {
 	DBUG_ENTER("parse_table_name");
 
@@ -11980,10 +11749,6 @@ create_table_info_t::innobase_table_flags()
 		ut_min(static_cast<ulint>(UNIV_PAGE_SSIZE_MAX),
 		       static_cast<ulint>(PAGE_ZIP_SSIZE_MAX));
 
-	/* Cache the value of innodb_file_format, in case it is
-	modified by another thread while the table is being created. */
-	const ulint	file_format_allowed = srv_file_format;
-
 	/* Cache the value of innobase_compression_level, in case it is
 	modified by another thread while the table is being created. */
 	const ulint     default_compression_level = page_zip_level;
@@ -12012,16 +11777,16 @@ create_table_info_t::innobase_table_flags()
 			}
 		}
 
-		if (innobase_strcasecmp(key->name, FTS_DOC_ID_INDEX_NAME)) {
+		if (innobase_strcasecmp(key->name.str, FTS_DOC_ID_INDEX_NAME)) {
 			continue;
 		}
 
 		/* Do a pre-check on FTS DOC ID index */
 		if (!(key->flags & HA_NOSAME)
-		    || strcmp(key->name, FTS_DOC_ID_INDEX_NAME)
-		    || strcmp(key->key_part[0].field->field_name,
+		    || strcmp(key->name.str, FTS_DOC_ID_INDEX_NAME)
+		    || strcmp(key->key_part[0].field->field_name.str,
 			      FTS_DOC_ID_COL_NAME)) {
-			fts_doc_id_index_bad = key->name;
+			fts_doc_id_index_bad = key->name.str;
 		}
 
 		if (fts_doc_id_index_bad && (m_flags2 & DICT_TF2_FTS)) {
@@ -12065,15 +11830,6 @@ index_bad:
 			zip_allowed = false;
 		}
 
-		if (file_format_allowed < UNIV_FORMAT_B) {
-			push_warning(
-				m_thd, Sql_condition::WARN_LEVEL_WARN,
-				ER_ILLEGAL_HA_CREATE_OPTION,
-				"InnoDB: KEY_BLOCK_SIZE requires"
-				" innodb_file_format > Antelope.");
-			zip_allowed = false;
-		}
-
 		if (!zip_allowed
 		    || zssize > zip_ssize_max) {
 			push_warning_printf(
@@ -12111,7 +11867,7 @@ index_bad:
 		if (row_type == ROW_TYPE_COMPRESSED && zip_allowed) {
 			/* ROW_FORMAT=COMPRESSED without KEY_BLOCK_SIZE
 			implies half the maximum KEY_BLOCK_SIZE(*1k) or
-			UNIV_PAGE_SIZE, whichever is less. */
+			srv_page_size, whichever is less. */
 			zip_ssize = zip_ssize_max - 1;
 		}
 	}
@@ -12139,13 +11895,6 @@ index_bad:
 				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: ROW_FORMAT=COMPRESSED requires"
 				" innodb_file_per_table.");
-
-		} else if (file_format_allowed == UNIV_FORMAT_A) {
-			push_warning_printf(
-				m_thd, Sql_condition::WARN_LEVEL_WARN,
-				ER_ILLEGAL_HA_CREATE_OPTION,
-				"InnoDB: ROW_FORMAT=COMPRESSED requires"
-				" innodb_file_format > Antelope.");
 		} else {
 			innodb_row_format = REC_FORMAT_COMPRESSED;
 			break;
@@ -12169,7 +11918,7 @@ index_bad:
 	}
 
 	/* Don't support compressed table when page size > 16k. */
-	if (zip_allowed && zip_ssize && UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF) {
+	if (zip_allowed && zip_ssize && srv_page_size > UNIV_PAGE_SIZE_DEF) {
 		push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
 			     ER_ILLEGAL_HA_CREATE_OPTION,
 			     "InnoDB: Cannot create a COMPRESSED table"
@@ -12197,8 +11946,11 @@ index_bad:
 			m_use_data_dir,
 			options->page_compressed,
 		    	options->page_compression_level == 0 ?
-		        default_compression_level : static_cast<ulint>(options->page_compression_level),
-		    	0);
+		        default_compression_level : ulint(options->page_compression_level));
+
+	if (m_form->s->table_type == TABLE_TYPE_SEQUENCE) {
+		m_flags |= DICT_TF_MASK_NO_ROLLBACK;
+	}
 
 	/* Set the flags2 when create table or alter tables */
 	m_flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
@@ -12293,7 +12045,7 @@ innobase_parse_hint_from_comment(
 	}
 
 	/* update SYS_INDEX table */
-	if (!dict_table_is_temporary(table)) {
+	if (!table->is_temporary()) {
 		for (uint i = 0; i < table_share->keys; i++) {
 			is_found[i] = false;
 		}
@@ -12319,7 +12071,7 @@ innobase_parse_hint_from_comment(
 				KEY*	key_info = &table_share->key_info[i];
 
 				if (innobase_strcasecmp(
-					index->name, key_info->name) == 0) {
+					index->name, key_info->name.str) == 0) {
 
 					dict_index_set_merge_threshold(
 						index,
@@ -12361,7 +12113,7 @@ innobase_parse_hint_from_comment(
 			KEY*	key_info = &table_share->key_info[i];
 
 			if (innobase_strcasecmp(
-				index->name, key_info->name) == 0) {
+				index->name, key_info->name.str) == 0) {
 
 				/* x-lock index is needed to exclude concurrent
 				pessimistic tree operations */
@@ -12512,7 +12264,6 @@ int create_table_info_t::create_table(bool create_fk)
 	int		error;
 	int		primary_key_no;
 	uint		i;
-	dict_table_t*	innobase_table = NULL;
 
 	DBUG_ENTER("create_table");
 
@@ -12539,9 +12290,13 @@ int create_table_info_t::create_table(bool create_fk)
 		/* Create an index which is used as the clustered index;
 		order the rows by their row id which is internally generated
 		by InnoDB */
-
-		error = create_clustered_index_when_no_primary(
-			m_trx, m_flags, m_table_name);
+		ulint flags = m_table->flags;
+		dict_index_t* index = dict_mem_index_create(
+			m_table, innobase_index_reserve_name,
+			DICT_CLUSTERED, 0);
+		error = convert_error_code_to_mysql(
+			row_create_index_for_mysql(index, m_trx, NULL),
+			flags, m_thd);
 		if (error) {
 			DBUG_RETURN(error);
 		}
@@ -12550,7 +12305,7 @@ int create_table_info_t::create_table(bool create_fk)
 	if (primary_key_no != -1) {
 		/* In InnoDB the clustered index must always be created
 		first */
-		if ((error = create_index(m_trx, m_form, m_flags, m_table_name,
+		if ((error = create_index(m_trx, m_form, m_table,
 					  (uint) primary_key_no))) {
 			DBUG_RETURN(error);
 		}
@@ -12561,11 +12316,6 @@ int create_table_info_t::create_table(bool create_fk)
 	if (m_flags2 & DICT_TF2_FTS) {
 		fts_doc_id_index_enum	ret;
 
-		innobase_table = dict_table_open_on_name(
-			m_table_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
-
-		ut_a(innobase_table);
-
 		/* Check whether there already exists FTS_DOC_ID_INDEX */
 		ret = innobase_fts_check_doc_id_index_in_def(
 			m_form->s->keys, m_form->key_info);
@@ -12584,13 +12334,12 @@ int create_table_info_t::create_table(bool create_fk)
 					    " make sure it is of correct"
 					    " type\n",
 					    FTS_DOC_ID_INDEX_NAME,
-					    innobase_table->name.m_name);
+					    m_table->name.m_name);
 
-			if (innobase_table->fts) {
-				fts_free(innobase_table);
+			if (m_table->fts) {
+				fts_free(m_table);
 			}
 
-			dict_table_close(innobase_table, TRUE, FALSE);
 			my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
 				 FTS_DOC_ID_INDEX_NAME);
 			DBUG_RETURN(-1);
@@ -12600,37 +12349,31 @@ int create_table_info_t::create_table(bool create_fk)
 		}
 
 		dberr_t	err = fts_create_common_tables(
-			m_trx, innobase_table, m_table_name,
+			m_trx, m_table,
 			(ret == FTS_EXIST_DOC_ID_INDEX));
 
 		error = convert_error_code_to_mysql(err, 0, NULL);
 
-		dict_table_close(innobase_table, TRUE, FALSE);
-
 		if (error) {
 			DBUG_RETURN(error);
 		}
 	}
 
 	for (i = 0; i < m_form->s->keys; i++) {
-
-		if (i != static_cast<uint>(primary_key_no)) {
-
-			if ((error = create_index(m_trx, m_form, m_flags,
-						  m_table_name, i))) {
-				DBUG_RETURN(error);
-			}
+		if (i != uint(primary_key_no)
+		    && (error = create_index(m_trx, m_form, m_table, i))) {
+			DBUG_RETURN(error);
 		}
 	}
 
 	/* Cache all the FTS indexes on this table in the FTS specific
 	structure. They are used for FTS indexed column update handling. */
 	if (m_flags2 & DICT_TF2_FTS) {
-		fts_t*          fts = innobase_table->fts;
+		fts_t*          fts = m_table->fts;
 
 		ut_a(fts != NULL);
 
-		dict_table_get_all_fts_indexes(innobase_table, fts->indexes);
+		dict_table_get_all_fts_indexes(m_table, fts->indexes);
 	}
 
 	size_t stmt_len;
@@ -12693,13 +12436,6 @@ int create_table_info_t::create_table(bool create_fk)
 		}
 	}
 
-	innobase_table = dict_table_open_on_name(
-		m_table_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
-
-	if (innobase_table != NULL) {
-		dict_table_close(innobase_table, TRUE, FALSE);
-	}
-
 	DBUG_RETURN(0);
 }
 
@@ -12737,21 +12473,12 @@ create_table_info_t::create_table_update_dict()
 
 	dict_stats_update(innobase_table, DICT_STATS_EMPTY_TABLE);
 
-	if (innobase_table) {
-		/* We update the highest file format in the system table
-		space, if this table has higher file format setting. */
-
-		trx_sys_file_format_max_upgrade(
-			(const char**) &innobase_file_format_max,
-			dict_table_get_format(innobase_table));
-	}
-
 	/* Load server stopword into FTS cache */
 	if (m_flags2 & DICT_TF2_FTS) {
 		if (!innobase_fts_load_stopword(innobase_table, NULL, m_thd)) {
 			dict_table_close(innobase_table, FALSE, FALSE);
 			srv_active_wake_master_thread();
-			trx_free_for_mysql(m_trx);
+			trx_free(m_trx);
 			DBUG_RETURN(-1);
 		}
 	}
@@ -12768,7 +12495,7 @@ create_table_info_t::create_table_update_dict()
 		dict_table_autoinc_lock(innobase_table);
 		dict_table_autoinc_initialize(innobase_table, autoinc);
 
-		if (dict_table_is_temporary(innobase_table)) {
+		if (innobase_table->is_temporary()) {
 			/* AUTO_INCREMENT is not persistent for
 			TEMPORARY TABLE. Temporary tables are never
 			evicted. Keep the counter in memory only. */
@@ -12830,6 +12557,10 @@ ha_innobase::create(
 
 	DBUG_ENTER("ha_innobase::create");
 
+	DBUG_ASSERT(form->s == table_share);
+	DBUG_ASSERT(table_share->table_type == TABLE_TYPE_SEQUENCE
+		    || table_share->table_type == TABLE_TYPE_NORMAL);
+
 	create_table_info_t	info(ha_thd(),
 				     form,
 				     create_info,
@@ -12870,7 +12601,7 @@ ha_innobase::create(
 		trx_rollback_for_mysql(trx);
 		row_mysql_unlock_data_dictionary(trx);
 		if (own_trx) {
-			trx_free_for_mysql(trx);
+			trx_free(trx);
 		}
 		DBUG_RETURN(error);
 	}
@@ -12879,7 +12610,7 @@ ha_innobase::create(
 	row_mysql_unlock_data_dictionary(trx);
 
 	if (own_trx) {
-		trx_free_for_mysql(trx);
+		trx_free(trx);
 	}
 
 	/* Flush the log to reduce probability that the .frm files and
@@ -12935,7 +12666,7 @@ ha_innobase::discard_or_import_tablespace(
 
 	dict_table_t*	dict_table = m_prebuilt->table;
 
-	if (dict_table_is_temporary(dict_table)) {
+	if (dict_table->is_temporary()) {
 
 		ib_senderrf(
 			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
@@ -12944,7 +12675,7 @@ ha_innobase::discard_or_import_tablespace(
 		DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
 	}
 
-	if (dict_table->space == srv_sys_space.space_id()) {
+	if (dict_table->space == fil_system.sys_space) {
 		ib_senderrf(
 			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 			ER_TABLE_IN_SYSTEM_TABLESPACE,
@@ -13082,7 +12813,7 @@ inline int ha_innobase::delete_table(const char* name, enum_sql_command sqlcom)
 	     iter != parent_trx->mod_tables.end();
 	     ++iter) {
 
-		dict_table_t*	table_to_drop = *iter;
+		dict_table_t*	table_to_drop = iter->first;
 
 		if (strcmp(norm_name, table_to_drop->name.m_name) == 0) {
 			parent_trx->mod_tables.erase(table_to_drop);
@@ -13206,7 +12937,7 @@ inline int ha_innobase::delete_table(const char* name, enum_sql_command sqlcom)
 
 	innobase_commit_low(trx);
 
-	trx_free_for_mysql(trx);
+	trx_free(trx);
 
 	DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
 }
@@ -13306,7 +13037,7 @@ innobase_drop_database(
 
 	innobase_commit_low(trx);
 
-	trx_free_for_mysql(trx);
+	trx_free(trx);
 }
 
 /** Rename an InnoDB table.
@@ -13376,11 +13107,6 @@ innobase_rename_table(
 		goto func_exit;
 	}
 
-	/* Transaction must be flagged as a locking transaction or it hasn't
-	been started yet. */
-
-	ut_a(trx->will_lock > 0);
-
 	error = row_rename_table_for_mysql(norm_from, norm_to, trx, commit);
 
 	if (error != DB_SUCCESS) {
@@ -13457,37 +13183,6 @@ int ha_innobase::truncate()
 
 	update_thd();
 
-	if (!srv_safe_truncate) {
-		if (!trx_is_started(m_prebuilt->trx)) {
-			++m_prebuilt->trx->will_lock;
-		}
-
-		dberr_t	err = row_truncate_table_for_mysql(
-			m_prebuilt->table, m_prebuilt->trx);
-
-		int	error;
-
-		switch (err) {
-		case DB_TABLESPACE_DELETED:
-		case DB_TABLESPACE_NOT_FOUND:
-			ib_senderrf(
-				m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
-				err == DB_TABLESPACE_DELETED
-				? ER_TABLESPACE_DISCARDED
-				: ER_TABLESPACE_MISSING,
-				table->s->table_name.str);
-			error = HA_ERR_TABLESPACE_MISSING;
-			break;
-		default:
-			error = convert_error_code_to_mysql(
-				err, m_prebuilt->table->flags,
-				m_prebuilt->trx->mysql_thd);
-			break;
-		}
-		table->status = STATUS_NOT_FOUND;
-		DBUG_RETURN(error);
-	}
-
 	HA_CREATE_INFO	info;
 	mem_heap_t*	heap = mem_heap_create(1000);
 	dict_table_t*	ib_table = m_prebuilt->table;
@@ -13496,7 +13191,7 @@ int ha_innobase::truncate()
 	info.init();
 	update_create_info_from_table(&info, table);
 
-	if (dict_table_is_temporary(ib_table)) {
+	if (ib_table->is_temporary()) {
 		info.options|= HA_LEX_CREATE_TMP_TABLE;
 	} else {
 		dict_get_and_save_data_dir_path(ib_table, false);
@@ -13528,7 +13223,7 @@ int ha_innobase::truncate()
 			     dict_table_is_file_per_table(ib_table), trx);
 	}
 
-	trx_free_for_mysql(trx);
+	trx_free(trx);
 
 	if (!err) {
 		/* Reopen the newly created table, and drop the
@@ -13577,7 +13272,7 @@ ha_innobase::rename_table(
 
 	innobase_commit_low(trx);
 
-	trx_free_for_mysql(trx);
+	trx_free(trx);
 
 	if (error == DB_SUCCESS) {
 		char	norm_from[MAX_FULL_NAME_LEN];
@@ -13641,7 +13336,7 @@ ha_innobase::records_in_range(
 	dict_index_t*	index;
 	dtuple_t*	range_start;
 	dtuple_t*	range_end;
-	int64_t		n_rows;
+	ha_rows		n_rows;
 	page_cur_mode_t	mode1;
 	page_cur_mode_t	mode2;
 	mem_heap_t*	heap;
@@ -13661,7 +13356,7 @@ ha_innobase::records_in_range(
 	/* There exists possibility of not being able to find requested
 	index due to inconsistency between MySQL and InoDB dictionary info.
 	Necessary message should have been printed in innobase_get_index() */
-	if (dict_table_is_discarded(m_prebuilt->table)) {
+	if (!m_prebuilt->table->space) {
 		n_rows = HA_POS_ERROR;
 		goto func_exit;
 	}
@@ -13693,8 +13388,7 @@ ha_innobase::records_in_range(
 		m_prebuilt->srch_key_val_len,
 		index,
 		(byte*) (min_key ? min_key->key : (const uchar*) 0),
-		(ulint) (min_key ? min_key->length : 0),
-		m_prebuilt->trx);
+		(ulint) (min_key ? min_key->length : 0));
 
 	DBUG_ASSERT(min_key
 		    ? range_start->n_fields > 0
@@ -13706,8 +13400,7 @@ ha_innobase::records_in_range(
 		m_prebuilt->srch_key_val_len,
 		index,
 		(byte*) (max_key ? max_key->key : (const uchar*) 0),
-		(ulint) (max_key ? max_key->length : 0),
-		m_prebuilt->trx);
+		(ulint) (max_key ? max_key->length : 0));
 
 	DBUG_ASSERT(max_key
 		    ? range_end->n_fields > 0
@@ -13791,8 +13484,8 @@ ha_innobase::estimate_rows_upper_bound()
 
 	ut_a(stat_n_leaf_pages > 0);
 
-	local_data_file_length =
-		((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE;
+	local_data_file_length = ulonglong(stat_n_leaf_pages)
+		<< srv_page_size_shift;
 
 	/* Calculate a minimum length for a clustered index record and from
 	that an upper bound for the number of rows. Since we only calculate
@@ -13885,16 +13578,6 @@ ha_innobase::read_time(
 	return(ranges + (double) rows / (double) total_rows * time_for_scan);
 }
 
-/******************************************************************//**
-Return the size of the InnoDB memory buffer. */
-
-longlong
-ha_innobase::get_memory_buffer_size() const
-/*=======================================*/
-{
-	return(innobase_buffer_pool_size);
-}
-
 /** Update the system variable with the given value of the InnoDB
 buffer pool size.
 @param[in]	buf_pool_size	given value of buffer pool size.*/
@@ -13908,7 +13591,7 @@ innodb_set_buf_pool_size(ulonglong buf_pool_size)
 Calculates the key number used inside MySQL for an Innobase index.
 @return the key number used inside MySQL */
 static
-int
+unsigned
 innobase_get_mysql_key_number_for_index(
 /*====================================*/
 	const TABLE*		table,	/*!< in: table in MySQL data
@@ -13932,8 +13615,7 @@ innobase_get_mysql_key_number_for_index(
 			i++;
 		}
 
-		if (dict_index_is_clust(index)
-		    && dict_index_is_auto_gen_clust(index)) {
+		if (dict_index_is_auto_gen_clust(index)) {
 			ut_a(i > 0);
 			i--;
 		}
@@ -13945,7 +13627,7 @@ innobase_get_mysql_key_number_for_index(
 	structure and InnoDB dict_index_t list */
 	for (i = 0; i < table->s->keys; i++) {
 		ind = dict_table_get_index_on_name(
-			ib_table, table->key_info[i].name);
+			ib_table, table->key_info[i].name.str);
 
 		if (index == ind) {
 			return(i);
@@ -13968,13 +13650,13 @@ innobase_get_mysql_key_number_for_index(
 					" index.",
 					index->name());
 			}
-			return(-1);
+			return(~0U);
 		}
 	}
 
 	ut_error;
 
-	return(-1);
+	return(~0U);
 }
 
 /*********************************************************************//**
@@ -14052,6 +13734,48 @@ innodb_rec_per_key(
 	return(rec_per_key);
 }
 
+/** Calculate how many KiB of new data we will be able to insert to the
+tablespace without running out of space. Start with a space object that has
+been acquired by the caller who holds it for the calculation,
+@param[in]	space		tablespace object from fil_space_acquire()
+@return available space in KiB */
+static uintmax_t
+fsp_get_available_space_in_free_extents(const fil_space_t& space)
+{
+	ulint	size_in_header = space.size_in_header;
+	if (size_in_header < FSP_EXTENT_SIZE) {
+		return 0;		/* TODO: count free frag pages and
+					return a value based on that */
+	}
+
+	/* Below we play safe when counting free extents above the free limit:
+	some of them will contain extent descriptor pages, and therefore
+	will not be free extents */
+	ut_ad(size_in_header >= space.free_limit);
+	ulint	n_free_up =
+		(size_in_header - space.free_limit) / FSP_EXTENT_SIZE;
+
+	const ulint size = page_size_t(space.flags).physical();
+	if (n_free_up > 0) {
+		n_free_up--;
+		n_free_up -= n_free_up / (size / FSP_EXTENT_SIZE);
+	}
+
+	/* We reserve 1 extent + 0.5 % of the space size to undo logs
+	and 1 extent + 0.5 % to cleaning operations; NOTE: this source
+	code is duplicated in the function above! */
+
+	ulint	reserve = 2 + ((size_in_header / FSP_EXTENT_SIZE) * 2) / 200;
+	ulint	n_free = space.free_len + n_free_up;
+
+	if (reserve > n_free) {
+		return(0);
+	}
+
+	return(static_cast<uintmax_t>(n_free - reserve)
+	       * FSP_EXTENT_SIZE * (size / 1024));
+}
+
 /*********************************************************************//**
 Returns statistics information of the table to the MySQL interpreter,
 in various fields of the handle object.
@@ -14157,7 +13881,7 @@ ha_innobase::info_low(
 		set. That way SHOW TABLE STATUS will show the best estimate,
 		while the optimizer never sees the table empty. */
 
-		if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
+		if (n_rows == 0 && !(flag & (HA_STATUS_TIME | HA_STATUS_OPEN))) {
 			n_rows++;
 		}
 
@@ -14178,69 +13902,23 @@ ha_innobase::info_low(
 			m_prebuilt->autoinc_last_value = 0;
 		}
 
-		const page_size_t&	page_size
-			= dict_table_page_size(ib_table);
-
 		stats.records = (ha_rows) n_rows;
 		stats.deleted = 0;
-		stats.data_file_length
-			= ((ulonglong) stat_clustered_index_size)
-			* page_size.physical();
-		stats.index_file_length
-			= ((ulonglong) stat_sum_of_other_index_sizes)
-			* page_size.physical();
-
-		/* Since fsp_get_available_space_in_free_extents() is
-		acquiring latches inside InnoDB, we do not call it if we
-		are asked by MySQL to avoid locking. Another reason to
-		avoid the call is that it uses quite a lot of CPU.
-		See Bug#38185. */
-		if (flag & HA_STATUS_NO_LOCK
-		    || !(flag & HA_STATUS_VARIABLE_EXTRA)) {
-			/* We do not update delete_length if no
-			locking is requested so the "old" value can
-			remain. delete_length is initialized to 0 in
-			the ha_statistics' constructor. Also we only
-			need delete_length to be set when
-			HA_STATUS_VARIABLE_EXTRA is set */
-		} else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-			/* Avoid accessing the tablespace if
-			innodb_crash_recovery is set to a high value. */
-			stats.delete_length = 0;
-		} else {
-			uintmax_t   avail_space;
-
-			avail_space = fsp_get_available_space_in_free_extents(
-				ib_table->space);
-
-			if (avail_space == UINTMAX_MAX) {
-				THD*	thd;
-				char	errbuf[MYSYS_STRERROR_SIZE];
-
-				thd = ha_thd();
-
-				push_warning_printf(
-					thd,
-					Sql_condition::WARN_LEVEL_WARN,
-					ER_CANT_GET_STAT,
-					"InnoDB: Trying to get the free"
-					" space for table %s but its"
-					" tablespace has been discarded or"
-					" the .ibd file is missing. Setting"
-					" the free space to zero."
-					" (errno: %d - %s)",
-					ib_table->name.m_name, errno,
-					my_strerror(errbuf, sizeof(errbuf),
-						    errno));
-
-				stats.delete_length = 0;
-			} else {
-				stats.delete_length = avail_space * 1024;
-			}
+		if (fil_space_t* space = ib_table->space) {
+			const ulint size = page_size_t(space->flags)
+				.physical();
+			stats.data_file_length
+				= ulonglong(stat_clustered_index_size)
+				* size;
+			stats.index_file_length
+				= ulonglong(stat_sum_of_other_index_sizes)
+				* size;
+			stats.delete_length = 1024
+				* fsp_get_available_space_in_free_extents(
+					*space);
 		}
-
 		stats.check_time = 0;
-		stats.mrr_length_per_rec= ref_length +  8; // 8 = max(sizeof(void *));
+		stats.mrr_length_per_rec= (uint)ref_length +  8; // 8 = max(sizeof(void *));
 
 		if (stats.records == 0) {
 			stats.mean_rec_length = 0;
@@ -14415,7 +14093,7 @@ ha_innobase::info_low(
 			errkey = (unsigned int) (
 				(m_prebuilt->trx->error_key_num
 				 == ULINT_UNDEFINED)
-					? ~0
+					? ~0U
 					: m_prebuilt->trx->error_key_num);
 		}
 	}
@@ -14449,10 +14127,7 @@ each index tree. This does NOT calculate exact statistics on the table.
 @return HA_ADMIN_* error code or HA_ADMIN_OK */
 
 int
-ha_innobase::analyze(
-/*=================*/
-	THD*		thd,		/*!< in: connection thread handle */
-	HA_CHECK_OPT*	check_opt)	/*!< in: currently ignored */
+ha_innobase::analyze(THD*, HA_CHECK_OPT*)
 {
 	/* Simply call info_low() with all the flags
 	and request recalculation of the statistics */
@@ -14589,7 +14264,7 @@ int
 ha_innobase::optimize(
 /*==================*/
 	THD*		thd,		/*!< in: connection thread handle */
-	HA_CHECK_OPT*	check_opt)	/*!< in: currently ignored */
+	HA_CHECK_OPT*)
 {
 
 	/* FTS-FIXME: Since MySQL doesn't support engine-specific commands,
@@ -14602,17 +14277,15 @@ ha_innobase::optimize(
 	calls to OPTIMIZE, which is undesirable. */
 	bool try_alter = true;
 
-	/* TODO: Defragment is disabled for now */
 	if (srv_defragment) {
-		int err;
-
-		err = defragment_table(m_prebuilt->table->name.m_name, NULL, false);
+		int err= defragment_table(
+			m_prebuilt->table->name.m_name, NULL, false);
 
 		if (err == 0) {
 			try_alter = false;
 		} else {
 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-				err,
+					    uint(err),
 				"InnoDB: Cannot defragment table %s: returned error code %d\n",
 				m_prebuilt->table->name, err);
 
@@ -14624,7 +14297,7 @@ ha_innobase::optimize(
 
 	if (innodb_optimize_fulltext_only) {
 		if (m_prebuilt->table->fts && m_prebuilt->table->fts->cache
-		    && !dict_table_is_discarded(m_prebuilt->table)) {
+		    && m_prebuilt->table->space) {
 			fts_sync_table(m_prebuilt->table, false, true, false);
 			fts_optimize_table(m_prebuilt->table);
 		}
@@ -14665,7 +14338,7 @@ ha_innobase::check(
 		build_template(true);
 	}
 
-	if (dict_table_is_discarded(m_prebuilt->table)) {
+	if (!m_prebuilt->table->space) {
 
 		ib_senderrf(
 			thd,
@@ -14676,7 +14349,7 @@ ha_innobase::check(
 		DBUG_RETURN(HA_ADMIN_CORRUPT);
 
 	} else if (!m_prebuilt->table->is_readable() &&
-		   !fil_space_get(m_prebuilt->table->space)) {
+		   !m_prebuilt->table->space) {
 
 		ib_senderrf(
 			thd, IB_LOG_LEVEL_ERROR,
@@ -14923,9 +14596,11 @@ ha_innobase::update_table_comment(
 #define SSTR( x ) reinterpret_cast< std::ostringstream & >(		\
         ( std::ostringstream() << std::dec << x ) ).str()
 
-	fk_str.append("InnoDB free: ");
-	fk_str.append(SSTR(fsp_get_available_space_in_free_extents(
-				m_prebuilt->table->space)));
+	if (m_prebuilt->table->space) {
+		fk_str.append("InnoDB free: ");
+		fk_str.append(SSTR(fsp_get_available_space_in_free_extents(
+					   *m_prebuilt->table->space)));
+	}
 
 	fk_str.append(dict_print_info_on_foreign_keys(
 			FALSE, m_prebuilt->trx,
@@ -15017,8 +14692,8 @@ get_foreign_key_info(
 	char			tmp_buff[NAME_LEN+1];
 	char			name_buff[NAME_LEN+1];
 	const char*		ptr;
-	LEX_STRING*		referenced_key_name;
-	LEX_STRING*		name = NULL;
+	LEX_CSTRING*		referenced_key_name;
+	LEX_CSTRING*		name = NULL;
 
 	ptr = dict_remove_db_name(foreign->id);
 	f_key_info.foreign_id = thd_make_lex_string(
@@ -15494,6 +15169,16 @@ ha_innobase::extra(
 		break;
 	case HA_EXTRA_BEGIN_ALTER_COPY:
 		m_prebuilt->table->skip_alter_undo = 1;
+		if (m_prebuilt->table->is_temporary()
+		    || !m_prebuilt->table->versioned_by_id()) {
+			break;
+		}
+		trx_start_if_not_started(m_prebuilt->trx, true);
+		m_prebuilt->trx->mod_tables.insert(
+			trx_mod_tables_t::value_type(
+				const_cast<dict_table_t*>(m_prebuilt->table),
+				0))
+			.first->second.set_versioned(0);
 		break;
 	case HA_EXTRA_END_ALTER_COPY:
 		m_prebuilt->table->skip_alter_undo = 0;
@@ -15581,7 +15266,7 @@ ha_innobase::start_stmt(
 	m_prebuilt->hint_need_to_fetch_extra_cols = 0;
 	reset_template();
 
-	if (dict_table_is_temporary(m_prebuilt->table)
+	if (m_prebuilt->table->is_temporary()
 	    && m_mysql_has_locked
 	    && m_prebuilt->select_lock_type == LOCK_NONE) {
 		dberr_t error;
@@ -15655,6 +15340,10 @@ innobase_map_isolation_level(
 /*=========================*/
 	enum_tx_isolation	iso)	/*!< in: MySQL isolation level code */
 {
+	if (UNIV_UNLIKELY(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN)
+	    || UNIV_UNLIKELY(srv_read_only_mode)) {
+		return TRX_ISO_READ_UNCOMMITTED;
+	}
 	switch (iso) {
 	case ISO_REPEATABLE_READ:	return(TRX_ISO_REPEATABLE_READ);
 	case ISO_READ_COMMITTED:	return(TRX_ISO_READ_COMMITTED);
@@ -15728,24 +15417,24 @@ ha_innobase::external_lock(
 	}
 
 	/* Check for UPDATEs in read-only mode. */
-	if (srv_read_only_mode
-	    && (thd_sql_command(thd) == SQLCOM_UPDATE
-		|| thd_sql_command(thd) == SQLCOM_INSERT
-		|| thd_sql_command(thd) == SQLCOM_REPLACE
-		|| thd_sql_command(thd) == SQLCOM_DROP_TABLE
-		|| thd_sql_command(thd) == SQLCOM_ALTER_TABLE
-		|| thd_sql_command(thd) == SQLCOM_OPTIMIZE
-		|| (thd_sql_command(thd) == SQLCOM_CREATE_TABLE
-		    && lock_type == F_WRLCK)
-		|| thd_sql_command(thd) == SQLCOM_CREATE_INDEX
-		|| thd_sql_command(thd) == SQLCOM_DROP_INDEX
-		|| thd_sql_command(thd) == SQLCOM_DELETE)) {
-
-		if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE) {
-			ib_senderrf(thd, IB_LOG_LEVEL_WARN,
-				    ER_READ_ONLY_MODE);
-			DBUG_RETURN(HA_ERR_TABLE_READONLY);
-		} else {
+	if (srv_read_only_mode) {
+		switch (thd_sql_command(thd)) {
+		case SQLCOM_CREATE_TABLE:
+			if (lock_type != F_WRLCK) {
+				break;
+			}
+			/* fall through */
+		case SQLCOM_UPDATE:
+		case SQLCOM_INSERT:
+		case SQLCOM_REPLACE:
+		case SQLCOM_DROP_TABLE:
+		case SQLCOM_ALTER_TABLE:
+		case SQLCOM_OPTIMIZE:
+		case SQLCOM_CREATE_INDEX:
+		case SQLCOM_DROP_INDEX:
+		case SQLCOM_CREATE_SEQUENCE:
+		case SQLCOM_DROP_SEQUENCE:
+		case SQLCOM_DELETE:
 			ib_senderrf(thd, IB_LOG_LEVEL_WARN,
 				    ER_READ_ONLY_MODE);
 			DBUG_RETURN(HA_ERR_TABLE_READONLY);
@@ -15764,7 +15453,7 @@ ha_innobase::external_lock(
 		    && thd_sql_command(thd) == SQLCOM_FLUSH
 		    && lock_type == F_RDLCK) {
 
-			if (dict_table_is_discarded(m_prebuilt->table)) {
+			if (!m_prebuilt->table->space) {
 				ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 					    ER_TABLESPACE_DISCARDED,
 					    table->s->table_name.str);
@@ -15901,14 +15590,8 @@ ha_innobase::external_lock(
 				innobase_commit(ht, thd, TRUE);
 			}
 
-		} else if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
-			   && MVCC::is_view_active(trx->read_view)) {
-
-			mutex_enter(&trx_sys->mutex);
-
-			trx_sys->mvcc->view_close(trx->read_view, true);
-
-			mutex_exit(&trx_sys->mutex);
+		} else if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
+			trx->read_view.close();
 		}
 	}
 
@@ -15973,7 +15656,7 @@ innodb_show_status(
 	bytes of text. */
 
 	char*	str;
-	ssize_t	flen;
+	size_t	flen;
 
 	mutex_enter(&srv_monitor_file_mutex);
 	rewind(srv_monitor_file);
@@ -15983,11 +15666,12 @@ innodb_show_status(
 
 	os_file_set_eof(srv_monitor_file);
 
-	if ((flen = ftell(srv_monitor_file)) < 0) {
+	flen = size_t(ftell(srv_monitor_file));
+	if (ssize_t(flen) < 0) {
 		flen = 0;
 	}
 
-	ssize_t	usable_len;
+	size_t	usable_len;
 
 	if (flen > MAX_STATUS_SIZE) {
 		usable_len = MAX_STATUS_SIZE;
@@ -16000,7 +15684,7 @@ innodb_show_status(
 	read the contents of the temporary file */
 
 	if (!(str = (char*) my_malloc(//PSI_INSTRUMENT_ME,
-				usable_len + 1, MYF(0)))) {
+		      usable_len + 1, MYF(0)))) {
 		mutex_exit(&srv_monitor_file_mutex);
 		DBUG_RETURN(1);
 	}
@@ -16010,19 +15694,18 @@ innodb_show_status(
 	if (flen < MAX_STATUS_SIZE) {
 		/* Display the entire output. */
 		flen = fread(str, 1, flen, srv_monitor_file);
-	} else if (trx_list_end < (ulint) flen
+	} else if (trx_list_end < flen
 		   && trx_list_start < trx_list_end
-		   && trx_list_start + (flen - trx_list_end)
+		   && trx_list_start + flen - trx_list_end
 		   < MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
 
 		/* Omit the beginning of the list of active transactions. */
-		ssize_t	len = fread(str, 1, trx_list_start, srv_monitor_file);
+		size_t	len = fread(str, 1, trx_list_start, srv_monitor_file);
 
 		memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
 		len += sizeof truncated_msg - 1;
 		usable_len = (MAX_STATUS_SIZE - 1) - len;
-		fseek(srv_monitor_file,
-		      static_cast<long>(flen - usable_len), SEEK_SET);
+		fseek(srv_monitor_file, long(flen - usable_len), SEEK_SET);
 		len += fread(str + len, 1, usable_len, srv_monitor_file);
 		flen = len;
 	} else {
@@ -16162,12 +15845,10 @@ struct ShowStatus {
 		spins=N,waits=N,calls=N"
 
 	The user has to parse the dataunfortunately
-	@param[in,out]	hton		the innodb handlerton
 	@param[in,out]	thd		the MySQL query thread of the caller
 	@param[in,out]	stat_print	function for printing statistics
 	@return true on success. */
 	bool to_string(
-		handlerton*	hton,
 		THD*		thd,
 		stat_print_fn*	stat_print)
 		UNIV_NOTHROW;
@@ -16183,13 +15864,11 @@ We store the metrics  in the "Status" column as:
 	spins=N,waits=N,calls=N"
 
 The user has to parse the dataunfortunately
-@param[in,out]	hton		the innodb handlerton
 @param[in,out]	thd		the MySQL query thread of the caller
 @param[in,out]	stat_print	function for printing statistics
 @return true on success. */
 bool
 ShowStatus::to_string(
-	handlerton*	hton,
 	THD*		thd,
 	stat_print_fn*	stat_print)
 	UNIV_NOTHROW
@@ -16238,7 +15917,11 @@ ShowStatus::to_string(
 static
 int
 innodb_show_mutex_status(
-	handlerton*	hton,
+	handlerton*
+#ifdef DBUG_ASSERT_EXISTS
+	hton
+#endif
+	,
 	THD*		thd,
 	stat_print_fn*	stat_print)
 {
@@ -16250,7 +15933,7 @@ innodb_show_mutex_status(
 
 	mutex_monitor.iterate(collector);
 
-	if (!collector.to_string(hton, thd, stat_print)) {
+	if (!collector.to_string(thd, stat_print)) {
 		DBUG_RETURN(1);
 	}
 
@@ -16265,7 +15948,11 @@ innodb_show_mutex_status(
 static
 int
 innodb_show_rwlock_status(
-	handlerton*	hton,
+	handlerton*
+#ifdef DBUG_ASSERT_EXISTS
+	hton
+#endif
+	,
 	THD*		thd,
 	stat_print_fn*	stat_print)
 {
@@ -16467,23 +16154,17 @@ ha_innobase::store_lock(
 		trx->isolation_level = innobase_map_isolation_level(
 			(enum_tx_isolation) thd_tx_isolation(thd));
 
-		if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
-		    && MVCC::is_view_active(trx->read_view)) {
+		if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
 
 			/* At low transaction isolation levels we let
 			each consistent read set its own snapshot */
-
-			mutex_enter(&trx_sys->mutex);
-
-			trx_sys->mvcc->view_close(trx->read_view, true);
-
-			mutex_exit(&trx_sys->mutex);
+			trx->read_view.close();
 		}
 	}
 
 	DBUG_ASSERT(EQ_CURRENT_THD(thd));
 	const bool in_lock_tables = thd_in_lock_tables(thd);
-	const uint sql_command = thd_sql_command(thd);
+	const int sql_command = thd_sql_command(thd);
 
 	if (srv_read_only_mode
 	    && (sql_command == SQLCOM_UPDATE
@@ -16497,6 +16178,8 @@ ha_innobase::store_lock(
 			 && lock_type <= TL_WRITE))
 		|| sql_command == SQLCOM_CREATE_INDEX
 		|| sql_command == SQLCOM_DROP_INDEX
+		|| sql_command == SQLCOM_CREATE_SEQUENCE
+		|| sql_command == SQLCOM_DROP_SEQUENCE
 		|| sql_command == SQLCOM_DELETE)) {
 
 		ib_senderrf(trx->mysql_thd,
@@ -16526,7 +16209,8 @@ ha_innobase::store_lock(
 		}
 
 	/* Check for DROP TABLE */
-	} else if (sql_command == SQLCOM_DROP_TABLE) {
+	} else if (sql_command == SQLCOM_DROP_TABLE ||
+                   sql_command == SQLCOM_DROP_SEQUENCE) {
 
 		/* MySQL calls this function in DROP TABLE though this table
 		handle may belong to another thd that is running a query. Let
@@ -16561,7 +16245,8 @@ ha_innobase::store_lock(
 		/* Use consistent read for checksum table */
 
 		if (sql_command == SQLCOM_CHECKSUM
-                    || (sql_command == SQLCOM_ANALYZE && lock_type == TL_READ)
+		    || sql_command == SQLCOM_CREATE_SEQUENCE
+		    || (sql_command == SQLCOM_ANALYZE && lock_type == TL_READ)
 		    || ((srv_locks_unsafe_for_binlog
 			|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
 			&& trx->isolation_level != TRX_ISO_SERIALIZABLE
@@ -16570,6 +16255,7 @@ ha_innobase::store_lock(
 			&& (sql_command == SQLCOM_INSERT_SELECT
 			    || sql_command == SQLCOM_REPLACE_SELECT
 			    || sql_command == SQLCOM_UPDATE
+			    || sql_command == SQLCOM_CREATE_SEQUENCE
 			    || sql_command == SQLCOM_CREATE_TABLE))) {
 
 			/* If we either have innobase_locks_unsafe_for_binlog
@@ -16996,7 +16682,7 @@ my_bool
 ha_innobase::register_query_cache_table(
 /*====================================*/
 	THD*		thd,		/*!< in: user thread handle */
-	char*		table_key,	/*!< in: normalized path to the
+	const char*	table_key,	/*!< in: normalized path to the
 					table */
 	uint		key_length,	/*!< in: length of the normalized
 					path to the table */
@@ -17068,8 +16754,7 @@ innobase_get_at_most_n_mbchars(
 		characters, and we can store in the column prefix index the
 		whole string. */
 
-		char_length = my_charpos(charset, str,
-						str + data_len, (int) n_chars);
+		char_length= my_charpos(charset, str, str + data_len, n_chars);
 		if (char_length > data_len) {
 			char_length = data_len;
 		}
@@ -17204,7 +16889,7 @@ innobase_commit_by_xid(
 		ut_ad(trx->mysql_thd == NULL);
 		trx_deregister_from_2pc(trx);
 		ut_ad(!trx->will_lock);    /* trx cache requirement */
-		trx_free_for_background(trx);
+		trx_free(trx);
 
 		return(XA_OK);
 	} else {
@@ -17234,7 +16919,7 @@ innobase_rollback_by_xid(
 		int ret = innobase_rollback_trx(trx);
 		trx_deregister_from_2pc(trx);
 		ut_ad(!trx->will_lock);
-		trx_free_for_background(trx);
+		trx_free(trx);
 
 		return(ret);
 	} else {
@@ -17298,10 +16983,7 @@ void
 innodb_io_capacity_max_update(
 /*===========================*/
 	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
+	st_mysql_sys_var*, void*,
 	const void*			save)	/*!< in: immediate result
 						from check function */
 {
@@ -17333,10 +17015,7 @@ void
 innodb_io_capacity_update(
 /*======================*/
 	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
+	st_mysql_sys_var*, void*,
 	const void*			save)	/*!< in: immediate result
 						from check function */
 {
@@ -17368,10 +17047,7 @@ void
 innodb_max_dirty_pages_pct_update(
 /*==============================*/
 	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
+	st_mysql_sys_var*, void*,
 	const void*			save)	/*!< in: immediate result
 						from check function */
 {
@@ -17402,10 +17078,7 @@ void
 innodb_max_dirty_pages_pct_lwm_update(
 /*==================================*/
 	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
+	st_mysql_sys_var*, void*,
 	const void*			save)	/*!< in: immediate result
 						from check function */
 {
@@ -17427,117 +17100,6 @@ innodb_max_dirty_pages_pct_lwm_update(
 	srv_max_dirty_pages_pct_lwm = in_val;
 }
 
-/************************************************************//**
-Validate the file format name and return its corresponding id.
-@return valid file format id */
-static
-uint
-innobase_file_format_name_lookup(
-/*=============================*/
-	const char*	format_name)	/*!< in: pointer to file format name */
-{
-	char*	endp;
-	uint	format_id;
-
-	ut_a(format_name != NULL);
-
-	/* The format name can contain the format id itself instead of
-	the name and we check for that. */
-	format_id = (uint) strtoul(format_name, &endp, 10);
-
-	/* Check for valid parse. */
-	if (*endp == '\0' && *format_name != '\0') {
-
-		if (format_id <= UNIV_FORMAT_MAX) {
-
-			return(format_id);
-		}
-	} else {
-
-		for (format_id = 0; format_id <= UNIV_FORMAT_MAX;
-		     format_id++) {
-			const char*	name;
-
-			name = trx_sys_file_format_id_to_name(format_id);
-
-			if (!innobase_strcasecmp(format_name, name)) {
-
-				return(format_id);
-			}
-		}
-	}
-
-	return(UNIV_FORMAT_MAX + 1);
-}
-
-/************************************************************//**
-Validate the file format check config parameters, as a side effect it
-sets the srv_max_file_format_at_startup variable.
-@return the format_id if valid config value, otherwise, return -1 */
-static
-int
-innobase_file_format_validate_and_set(
-/*==================================*/
-	const char*	format_max)	/*!< in: parameter value */
-{
-	uint		format_id;
-
-	format_id = innobase_file_format_name_lookup(format_max);
-
-	if (format_id < UNIV_FORMAT_MAX + 1) {
-		srv_max_file_format_at_startup = format_id;
-
-		return((int) format_id);
-	} else {
-		return(-1);
-	}
-}
-
-/*************************************************************//**
-Check if it is a valid file format. This function is registered as
-a callback with MySQL.
-@return 0 for valid file format */
-static
-int
-innodb_file_format_name_validate(
-/*=============================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
-						variable */
-	void*				save,	/*!< out: immediate result
-						for update function */
-	struct st_mysql_value*		value)	/*!< in: incoming string */
-{
-	const char*	file_format_input;
-	char		buff[STRING_BUFFER_USUAL_SIZE];
-	int		len = sizeof(buff);
-
-	ut_a(save != NULL);
-	ut_a(value != NULL);
-
-	file_format_input = value->val_str(value, buff, &len);
-
-	if (file_format_input != NULL) {
-		uint	format_id;
-
-		format_id = innobase_file_format_name_lookup(
-			file_format_input);
-
-		if (format_id <= UNIV_FORMAT_MAX) {
-
-			/* Save a pointer to the name in the
-			'file_format_name_map' constant array. */
-			*static_cast<const char**>(save) =
-			    trx_sys_file_format_id_to_name(format_id);
-
-			return(0);
-		}
-	}
-
-	*static_cast<const char**>(save) = NULL;
-	return(1);
-}
-
 /*************************************************************//**
 Don't allow to set innodb_fast_shutdown=0 if purge threads are
 already down.
@@ -17569,172 +17131,6 @@ fast_shutdown_validate(
 	return(0);
 }
 
-/****************************************************************//**
-Update the system variable innodb_file_format using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_file_format_name_update(
-/*===========================*/
-	THD*				thd,		/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,		/*!< in: pointer to
-							system variable */
-	void*				var_ptr,	/*!< out: where the
-							formal string goes */
-	const void*			save)		/*!< in: immediate result
-							from check function */
-{
-	const char* format_name;
-
-	ut_a(var_ptr != NULL);
-	ut_a(save != NULL);
-
-
-	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
-		     HA_ERR_WRONG_COMMAND, deprecated_file_format);
-
-	format_name = *static_cast<const char*const*>(save);
-
-	if (format_name) {
-		uint	format_id;
-
-		format_id = innobase_file_format_name_lookup(format_name);
-
-		if (format_id <= UNIV_FORMAT_MAX) {
-			srv_file_format = format_id;
-		}
-	}
-
-	*static_cast<const char**>(var_ptr)
-		= trx_sys_file_format_id_to_name(srv_file_format);
-}
-
-/*************************************************************//**
-Check if valid argument to innodb_file_format_max. This function
-is registered as a callback with MySQL.
-@return 0 for valid file format */
-static
-int
-innodb_file_format_max_validate(
-/*============================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
-						variable */
-	void*				save,	/*!< out: immediate result
-						for update function */
-	struct st_mysql_value*		value)	/*!< in: incoming string */
-{
-	const char*	file_format_input;
-	char		buff[STRING_BUFFER_USUAL_SIZE];
-	int		len = sizeof(buff);
-	int		format_id;
-
-	ut_a(save != NULL);
-	ut_a(value != NULL);
-
-	file_format_input = value->val_str(value, buff, &len);
-
-	if (file_format_input != NULL) {
-
-		format_id = innobase_file_format_validate_and_set(
-			file_format_input);
-
-		if (format_id >= 0) {
-			/* Save a pointer to the name in the
-			'file_format_name_map' constant array. */
-			*static_cast<const char**>(save) =
-			    trx_sys_file_format_id_to_name(
-						(uint) format_id);
-
-			return(0);
-
-		} else {
-			push_warning_printf(thd,
-			  Sql_condition::WARN_LEVEL_WARN,
-			  ER_WRONG_ARGUMENTS,
-			  "InnoDB: invalid innodb_file_format_max"
-			  " value; can be any format up to %s"
-			  " or equivalent id of %d",
-			  trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX),
-			  UNIV_FORMAT_MAX);
-		}
-	}
-
-	*static_cast<const char**>(save) = NULL;
-	return(1);
-}
-
-/****************************************************************//**
-Update the system variable innodb_file_format_max using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_file_format_max_update(
-/*==========================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
-{
-	const char*	format_name_in;
-	const char**	format_name_out;
-	uint		format_id;
-
-	ut_a(save != NULL);
-	ut_a(var_ptr != NULL);
-
-
-	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
-		     HA_ERR_WRONG_COMMAND, deprecated_file_format_max);
-
-	format_name_in = *static_cast<const char*const*>(save);
-
-	if (!format_name_in) {
-
-		return;
-	}
-
-	format_id = innobase_file_format_name_lookup(format_name_in);
-
-	if (format_id > UNIV_FORMAT_MAX) {
-		/* DEFAULT is "on", which is invalid at runtime. */
-		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-				    ER_WRONG_ARGUMENTS,
-				    "Ignoring SET innodb_file_format=%s",
-				    format_name_in);
-		return;
-	}
-
-	format_name_out = static_cast<const char**>(var_ptr);
-
-	/* Update the max format id in the system tablespace. */
-	if (trx_sys_file_format_max_set(format_id, format_name_out)) {
-		ib::info() << "The file format in the system tablespace is now"
-			" set to " << *format_name_out << ".";
-	}
-}
-
-/** Update innodb_large_prefix.
-@param[in,out]	thd	MySQL client connection
-@param[out]	var_ptr	current value
-@param[in]	save	to-be-assigned value */
-static
-void
-innodb_large_prefix_update(
-	THD*		thd,
-	st_mysql_sys_var*,
-	void*		var_ptr,
-	const void*	save)
-{
-	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
-		     HA_ERR_WRONG_COMMAND, deprecated_large_prefix);
-
-	*static_cast<my_bool*>(var_ptr) = *static_cast<const my_bool*>(save);
-}
-
 /*************************************************************//**
 Check whether valid argument given to innobase_*_stopword_table.
 This function is registered as a callback with MySQL.
@@ -17744,8 +17140,7 @@ int
 innodb_stopword_table_validate(
 /*===========================*/
 	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
-						variable */
+	st_mysql_sys_var*,
 	void*				save,	/*!< out: immediate result
 						for update function */
 	struct st_mysql_value*		value)	/*!< in: incoming string */
@@ -17780,17 +17175,10 @@ innodb_stopword_table_validate(
 
 /** Update the system variable innodb_buffer_pool_size using the "saved"
 value. This function is registered as a callback with MySQL.
-@param[in]	thd	thread handle
-@param[in]	var	pointer to system variable
-@param[out]	var_ptr	where the formal string goes
 @param[in]	save	immediate result from check function */
 static
 void
-innodb_buffer_pool_size_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save)
+innodb_buffer_pool_size_update(THD*,st_mysql_sys_var*,void*, const void* save)
 {
         longlong	in_val = *static_cast<const longlong*>(save);
 
@@ -17812,9 +17200,7 @@ static
 int
 innodb_internal_table_validate(
 /*===========================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
-						variable */
+	THD*, st_mysql_sys_var*,
 	void*				save,	/*!< out: immediate result
 						for update function */
 	struct st_mysql_value*		value)	/*!< in: incoming string */
@@ -17864,9 +17250,7 @@ static
 void
 innodb_internal_table_update(
 /*=========================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
+	THD*, st_mysql_sys_var*,
 	void*				var_ptr,/*!< out: where the
 						formal string goes */
 	const void*			save)	/*!< in: immediate result
@@ -17897,15 +17281,8 @@ Update the system variable innodb_adaptive_hash_index using the "saved"
 value. This function is registered as a callback with MySQL. */
 static
 void
-innodb_adaptive_hash_index_update(
-/*==============================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
+innodb_adaptive_hash_index_update(THD*, st_mysql_sys_var*, void*,
+				  const void* save)
 {
 	if (*(my_bool*) save) {
 		btr_search_enable();
@@ -17920,15 +17297,7 @@ Update the system variable innodb_cmp_per_index using the "saved"
 value. This function is registered as a callback with MySQL. */
 static
 void
-innodb_cmp_per_index_update(
-/*========================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
+innodb_cmp_per_index_update(THD*, st_mysql_sys_var*, void*, const void* save)
 {
 	/* Reset the stats whenever we enable the table
 	INFORMATION_SCHEMA.innodb_cmp_per_index. */
@@ -17944,15 +17313,7 @@ Update the system variable innodb_old_blocks_pct using the "saved"
 value. This function is registered as a callback with MySQL. */
 static
 void
-innodb_old_blocks_pct_update(
-/*=========================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
+innodb_old_blocks_pct_update(THD*, st_mysql_sys_var*, void*, const void* save)
 {
 	innobase_old_blocks_pct = static_cast<uint>(
 		buf_LRU_old_ratio_update(
@@ -17964,15 +17325,8 @@ Update the system variable innodb_old_blocks_pct using the "saved"
 value. This function is registered as a callback with MySQL. */
 static
 void
-innodb_change_buffer_max_size_update(
-/*=================================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
+innodb_change_buffer_max_size_update(THD*, st_mysql_sys_var*, void*,
+				     const void* save)
 {
 	srv_change_buffer_max_size =
 			(*static_cast<const uint*>(save));
@@ -17987,15 +17341,7 @@ static ulong srv_saved_page_number_debug = 0;
 Save an InnoDB page number. */
 static
 void
-innodb_save_page_no(
-/*================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
+innodb_save_page_no(THD*, st_mysql_sys_var*, void*, const void* save)
 {
 	srv_saved_page_number_debug = *static_cast<const ulong*>(save);
 
@@ -18007,15 +17353,7 @@ innodb_save_page_no(
 Make the first page of given user tablespace dirty. */
 static
 void
-innodb_make_page_dirty(
-/*===================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
+innodb_make_page_dirty(THD*, st_mysql_sys_var*, void*, const void* save)
 {
 	mtr_t		mtr;
 	ulong		space_id = *static_cast<const ulong*>(save);
@@ -18026,7 +17364,7 @@ innodb_make_page_dirty(
 	}
 
 	if (srv_saved_page_number_debug >= space->size) {
-		fil_space_release(space);
+		space->release();
 		return;
 	}
 
@@ -18048,112 +17386,10 @@ innodb_make_page_dirty(
 				 MLOG_2BYTES, &mtr);
 	}
 	mtr.commit();
-	fil_space_release(space);
+	space->release();
 }
 #endif // UNIV_DEBUG
 /*************************************************************//**
-Find the corresponding ibuf_use_t value that indexes into
-innobase_change_buffering_values[] array for the input
-change buffering option name.
-@return corresponding IBUF_USE_* value for the input variable
-name, or IBUF_USE_COUNT if not able to find a match */
-static
-ibuf_use_t
-innodb_find_change_buffering_value(
-/*===============================*/
-	const char*	input_name)	/*!< in: input change buffering
-					option name */
-{
-	for (ulint i = 0;
-	     i < UT_ARR_SIZE(innobase_change_buffering_values);
-	     ++i) {
-
-		/* found a match */
-		if (!innobase_strcasecmp(
-			input_name, innobase_change_buffering_values[i])) {
-			return(static_cast<ibuf_use_t>(i));
-		}
-	}
-
-	/* Did not find any match */
-	return(IBUF_USE_COUNT);
-}
-
-/*************************************************************//**
-Check if it is a valid value of innodb_change_buffering. This function is
-registered as a callback with MySQL.
-@return 0 for valid innodb_change_buffering */
-static
-int
-innodb_change_buffering_validate(
-/*=============================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
-						variable */
-	void*				save,	/*!< out: immediate result
-						for update function */
-	struct st_mysql_value*		value)	/*!< in: incoming string */
-{
-	const char*	change_buffering_input;
-	char		buff[STRING_BUFFER_USUAL_SIZE];
-	int		len = sizeof(buff);
-
-	ut_a(save != NULL);
-	ut_a(value != NULL);
-
-	change_buffering_input = value->val_str(value, buff, &len);
-
-	if (change_buffering_input != NULL) {
-		ibuf_use_t	use;
-
-		use = innodb_find_change_buffering_value(
-			change_buffering_input);
-
-		if (use != IBUF_USE_COUNT) {
-			/* Find a matching change_buffering option value. */
-			*static_cast<const char**>(save) =
-				innobase_change_buffering_values[use];
-
-			return(0);
-		}
-	}
-
-	/* No corresponding change buffering option for user supplied
-	"change_buffering_input" */
-	return(1);
-}
-
-/****************************************************************//**
-Update the system variable innodb_change_buffering using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_change_buffering_update(
-/*===========================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
-{
-	ibuf_use_t	use;
-
-	ut_a(var_ptr != NULL);
-	ut_a(save != NULL);
-
-	use = innodb_find_change_buffering_value(
-		*static_cast<const char*const*>(save));
-
-	ut_a(use < IBUF_USE_COUNT);
-
-	ibuf_use = use;
-	*static_cast<const char**>(var_ptr) =
-		 *static_cast<const char*const*>(save);
-}
-
-/*************************************************************//**
 Just emit a warning that the usage of the variable is deprecated.
 @return 0 */
 static
@@ -18161,10 +17397,7 @@ void
 innodb_stats_sample_pages_update(
 /*=============================*/
 	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
+	st_mysql_sys_var*, void*,
 	const void*			save)	/*!< in: immediate result
 						from check function */
 {
@@ -18440,9 +17673,7 @@ static
 int
 innodb_monitor_validate(
 /*====================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
-						variable */
+	THD*, st_mysql_sys_var*,
 	void*				save,	/*!< out: immediate result
 						for update function */
 	struct st_mysql_value*		value)	/*!< in: incoming string */
@@ -18617,9 +17848,7 @@ innodb_srv_buf_dump_filename_validate(
 	ut_a(save != NULL);
 	ut_a(value != NULL);
 
-	const char*	buf_name = value->val_str(value, buff, &len);
-
-	if (buf_name != NULL) {
+	if (const char* buf_name = value->val_str(value, buff, &len)) {
 		if (is_filename_allowed(buf_name, len, FALSE)){
 			*static_cast<const char**>(save) = buf_name;
 			return(0);
@@ -18688,13 +17917,8 @@ SET GLOBAL innodb_buffer_pool_evict='uncompressed'
 evicts all uncompressed page frames of compressed tablespaces. */
 static
 void
-innodb_buffer_pool_evict_update(
-/*============================*/
-	THD*			thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*var,	/*!< in: pointer to system variable */
-	void*			var_ptr,/*!< out: ignored */
-	const void*		save)	/*!< in: immediate result
-					from check function */
+innodb_buffer_pool_evict_update(THD*, st_mysql_sys_var*, void*,
+				const void* save)
 {
 	if (const char* op = *static_cast<const char*const*>(save)) {
 		if (!strcmp(op, "uncompressed")) {
@@ -18722,8 +17946,7 @@ void
 innodb_enable_monitor_update(
 /*=========================*/
 	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
+	st_mysql_sys_var*,
 	void*				var_ptr,/*!< out: where the
 						formal string goes */
 	const void*			save)	/*!< in: immediate result
@@ -18740,8 +17963,7 @@ void
 innodb_disable_monitor_update(
 /*==========================*/
 	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
+	st_mysql_sys_var*,
 	void*				var_ptr,/*!< out: where the
 						formal string goes */
 	const void*			save)	/*!< in: immediate result
@@ -18759,8 +17981,7 @@ void
 innodb_reset_monitor_update(
 /*========================*/
 	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
+	st_mysql_sys_var*,
 	void*				var_ptr,/*!< out: where the
 						formal string goes */
 	const void*			save)	/*!< in: immediate result
@@ -18778,8 +17999,7 @@ void
 innodb_reset_all_monitor_update(
 /*============================*/
 	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
+	st_mysql_sys_var*,
 	void*				var_ptr,/*!< out: where the
 						formal string goes */
 	const void*			save)	/*!< in: immediate result
@@ -18791,15 +18011,8 @@ innodb_reset_all_monitor_update(
 
 static
 void
-innodb_defragment_frequency_update(
-/*===============================*/
-	THD* thd,  /*!< in: thread handle */
-	struct st_mysql_sys_var* var,  /*!< in: pointer to
-	          system variable */
-	void* var_ptr,/*!< out: where the
-	          formal string goes */
-	const void* save) /*!< in: immediate result
-	          from check function */
+innodb_defragment_frequency_update(THD*, st_mysql_sys_var*, void*,
+				   const void* save)
 {
 	srv_defragment_frequency = (*static_cast<const uint*>(save));
 	srv_defragment_interval = ut_microseconds_to_timer(
@@ -18839,13 +18052,8 @@ innodb_enable_monitor_at_startup(
 	for (char* option = my_strtok_r(str, sep, &last);
 	     option;
 	     option = my_strtok_r(NULL, sep, &last)) {
-		ulint	ret;
 		char*	option_name;
-
-		ret = innodb_monitor_valid_byname(&option_name, option);
-
-		/* The name is validated if ret == 0 */
-		if (!ret) {
+		if (!innodb_monitor_valid_byname(&option_name, option)) {
 			innodb_monitor_update(NULL, NULL, &option,
 					      MONITOR_TURN_ON, FALSE);
 		} else {
@@ -18858,13 +18066,7 @@ innodb_enable_monitor_at_startup(
 /****************************************************************//**
 Callback function for accessing the InnoDB variables from MySQL:
 SHOW VARIABLES. */
-static
-int
-show_innodb_vars(
-/*=============*/
-	THD*		thd,
-	SHOW_VAR*	var,
-	char*		buff)
+static int show_innodb_vars(THD*, SHOW_VAR* var, char*)
 {
 	innodb_export_status();
 	var->type = SHOW_ARRAY;
@@ -18894,7 +18096,7 @@ innobase_index_name_is_reserved(
 	for (key_num = 0; key_num < num_of_keys; key_num++) {
 		key = &key_info[key_num];
 
-		if (innobase_strcasecmp(key->name,
+		if (innobase_strcasecmp(key->name.str,
 					innobase_index_reserve_name) == 0) {
 			/* Push warning to mysql */
 			push_warning_printf(thd,
@@ -18983,17 +18185,7 @@ static uint	innodb_merge_threshold_set_all_debug
 /** Wait for the background drop list to become empty. */
 static
 void
-wait_background_drop_list_empty(
-	THD*				thd	/*!< in: thread handle */
-					MY_ATTRIBUTE((unused)),
-	struct st_mysql_sys_var*	var	/*!< in: pointer to system
-						variable */
-					MY_ATTRIBUTE((unused)),
-	void*				var_ptr	/*!< out: where the formal
-						string goes */
-					MY_ATTRIBUTE((unused)),
-	const void*			save)	/*!< in: immediate result from
-						check function */
+wait_background_drop_list_empty(THD*, st_mysql_sys_var*, void*, const void*)
 {
 	row_wait_for_background_drop_list_empty();
 }
@@ -19002,30 +18194,19 @@ wait_background_drop_list_empty(
 Force innodb to checkpoint. */
 static
 void
-checkpoint_now_set(
-/*===============*/
-	THD*				thd	/*!< in: thread handle */
-					MY_ATTRIBUTE((unused)),
-	struct st_mysql_sys_var*	var	/*!< in: pointer to system
-						variable */
-					MY_ATTRIBUTE((unused)),
-	void*				var_ptr	/*!< out: where the formal
-						string goes */
-					MY_ATTRIBUTE((unused)),
-	const void*			save)	/*!< in: immediate result from
-						check function */
+checkpoint_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
 {
 	if (*(my_bool*) save) {
-		while (log_sys->last_checkpoint_lsn
+		while (log_sys.last_checkpoint_lsn
 		       + SIZE_OF_MLOG_CHECKPOINT
-		       + (log_sys->append_on_checkpoint != NULL
-			  ? log_sys->append_on_checkpoint->size() : 0)
-		       < log_sys->lsn) {
+		       + (log_sys.append_on_checkpoint != NULL
+			  ? log_sys.append_on_checkpoint->size() : 0)
+		       < log_sys.lsn) {
 			log_make_checkpoint_at(LSN_MAX, TRUE);
 			fil_flush_file_spaces(FIL_TYPE_LOG);
 		}
 
-		dberr_t err = fil_write_flushed_lsn(log_sys->lsn);
+		dberr_t err = fil_write_flushed_lsn(log_sys.lsn);
 
 		if (err != DB_SUCCESS) {
 			ib::warn() << "Checkpoint set failed " << err;
@@ -19037,18 +18218,7 @@ checkpoint_now_set(
 Force a dirty pages flush now. */
 static
 void
-buf_flush_list_now_set(
-/*===================*/
-	THD*				thd	/*!< in: thread handle */
-					MY_ATTRIBUTE((unused)),
-	struct st_mysql_sys_var*	var	/*!< in: pointer to system
-						variable */
-					MY_ATTRIBUTE((unused)),
-	void*				var_ptr	/*!< out: where the formal
-						string goes */
-					MY_ATTRIBUTE((unused)),
-	const void*			save)	/*!< in: immediate result from
-						check function */
+buf_flush_list_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
 {
 	if (*(my_bool*) save) {
 		buf_flush_sync_all_buf_pools();
@@ -19057,17 +18227,11 @@ buf_flush_list_now_set(
 
 /** Override current MERGE_THRESHOLD setting for all indexes at dictionary
 now.
-@param[in]	thd	thread handle
-@param[in]	var	pointer to system variable
-@param[out]	var_ptr	where the formal string goes
 @param[in]	save	immediate result from check function */
 static
 void
-innodb_merge_threshold_set_all_debug_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save)
+innodb_merge_threshold_set_all_debug_update(THD*, st_mysql_sys_var*, void*,
+					    const void* save)
 {
 	innodb_merge_threshold_set_all_debug
 		= (*static_cast<const uint*>(save));
@@ -19194,10 +18358,7 @@ void
 innodb_log_write_ahead_size_update(
 /*===============================*/
 	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
+	st_mysql_sys_var*, void*,
 	const void*			save)	/*!< in: immediate result
 						from check function */
 {
@@ -19208,8 +18369,8 @@ innodb_log_write_ahead_size_update(
 		val = val * 2;
 	}
 
-	if (val > UNIV_PAGE_SIZE) {
-		val = UNIV_PAGE_SIZE;
+	if (val > srv_page_size) {
+		val = srv_page_size;
 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
 				    ER_WRONG_ARGUMENTS,
 				    "innodb_log_write_ahead_size cannot"
@@ -19218,7 +18379,7 @@ innodb_log_write_ahead_size_update(
 				    ER_WRONG_ARGUMENTS,
 				    "Setting innodb_log_write_ahead_size"
 				    " to %lu",
-				    UNIV_PAGE_SIZE);
+				    srv_page_size);
 	} else if (val != in_val) {
 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
 				    ER_WRONG_ARGUMENTS,
@@ -19240,12 +18401,8 @@ which control InnoDB "status monitor" output to the error log.
 @param[in]	save	to-be-assigned value */
 static
 void
-innodb_status_output_update(
-/*========================*/
-	THD*				thd __attribute__((unused)),
-	struct st_mysql_sys_var*	var __attribute__((unused)),
-	void*				var_ptr __attribute__((unused)),
-	const void*			save __attribute__((unused)))
+innodb_status_output_update(THD*, st_mysql_sys_var*, void* var_ptr,
+			    const void* save)
 {
 	*static_cast<my_bool*>(var_ptr) = *static_cast<const my_bool*>(save);
 	/* Wakeup server monitor thread. */
@@ -19256,15 +18413,8 @@ innodb_status_output_update(
 Update the system variable innodb_encryption_threads */
 static
 void
-innodb_encryption_threads_update(
-/*=============================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
+innodb_encryption_threads_update(THD*, st_mysql_sys_var*, void*,
+				 const void* save)
 {
 	fil_crypt_set_thread_cnt(*static_cast<const uint*>(save));
 }
@@ -19273,15 +18423,8 @@ innodb_encryption_threads_update(
 Update the system variable innodb_encryption_rotate_key_age */
 static
 void
-innodb_encryption_rotate_key_age_update(
-/*====================================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
+innodb_encryption_rotate_key_age_update(THD*, st_mysql_sys_var*, void*,
+					const void* save)
 {
 	fil_crypt_set_rotate_key_age(*static_cast<const uint*>(save));
 }
@@ -19290,15 +18433,8 @@ innodb_encryption_rotate_key_age_update(
 Update the system variable innodb_encryption_rotation_iops */
 static
 void
-innodb_encryption_rotation_iops_update(
-/*===================================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
+innodb_encryption_rotation_iops_update(THD*, st_mysql_sys_var*, void*,
+				       const void* save)
 {
 	fil_crypt_set_rotation_iops(*static_cast<const uint*>(save));
 }
@@ -19307,31 +18443,19 @@ innodb_encryption_rotation_iops_update(
 Update the system variable innodb_encrypt_tables*/
 static
 void
-innodb_encrypt_tables_update(
-/*=========================*/
-	THD*                            thd,    /*!< in: thread handle */
-	struct st_mysql_sys_var*        var,    /*!< in: pointer to
-						system variable */
-	void*                           var_ptr,/*!< out: where the
-						formal string goes */
-	const void*                     save)   /*!< in: immediate result
-						from check function */
+innodb_encrypt_tables_update(THD*, st_mysql_sys_var*, void*, const void* save)
 {
 	fil_crypt_set_encrypt_tables(*static_cast<const ulong*>(save));
 }
 
 /** Update the innodb_log_checksums parameter.
-@param[in]	thd	thread handle
-@param[in]	var	system variable
+@param[in,out]	thd	client connection
 @param[out]	var_ptr	current value
 @param[in]	save	immediate result from check function */
 static
 void
-innodb_log_checksums_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save)
+innodb_log_checksums_update(THD* thd, st_mysql_sys_var*, void* var_ptr,
+			    const void* save)
 {
 	*static_cast<my_bool*>(var_ptr) = innodb_log_checksums_func_update(
 		thd, *static_cast<const my_bool*>(save));
@@ -19588,13 +18712,13 @@ static
 int
 wsrep_abort_transaction(
 /*====================*/
-	handlerton* hton,
+	handlerton*,
 	THD *bf_thd,
 	THD *victim_thd,
 	my_bool signal)
 {
 	DBUG_ENTER("wsrep_innobase_abort_thd");
-	
+
 	trx_t* victim_trx	= thd_to_trx(victim_thd);
 	trx_t* bf_trx		= (bf_thd) ? thd_to_trx(bf_thd) : NULL;
 
@@ -19632,17 +18756,14 @@ innobase_wsrep_set_checkpoint(
 {
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
-        if (wsrep_is_wsrep_xid(xid)) {
-                mtr_t mtr;
-                mtr_start(&mtr);
-                trx_sysf_t* sys_header = trx_sysf_get(&mtr);
-                trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
-                mtr_commit(&mtr);
-                innobase_flush_logs(hton, false);
-                return 0;
-        } else {
-                return 1;
-        }
+	if (wsrep_is_wsrep_xid(xid)) {
+
+		trx_rseg_update_wsrep_checkpoint(xid);
+		innobase_flush_logs(hton, false);
+		return 0;
+	} else {
+		return 1;
+	}
 }
 
 static
@@ -19653,20 +18774,13 @@ innobase_wsrep_get_checkpoint(
 	XID* xid)
 {
 	DBUG_ASSERT(hton == innodb_hton_ptr);
-	trx_sys_read_wsrep_checkpoint(xid);
-	return 0;
+        trx_rseg_read_wsrep_checkpoint(*xid);
+        return 0;
 }
 
-static
-void
-wsrep_fake_trx_id(
-/*==============*/
-	handlerton	*hton,
-	THD		*thd)	/*!< in: user thread handle */
+static void wsrep_fake_trx_id(handlerton *, THD *thd)
 {
-	mutex_enter(&trx_sys->mutex);
-	trx_id_t trx_id = trx_sys_get_new_trx_id();
-	mutex_exit(&trx_sys->mutex);
+	trx_id_t trx_id = trx_sys.get_new_trx_id();
 	WSREP_DEBUG("innodb fake trx id: " TRX_ID_FMT " thd: %s",
 		    trx_id, wsrep_thd_query(thd));
 	wsrep_ws_handle_for_trx(wsrep_thd_ws_handle(thd), trx_id);
@@ -19674,44 +18788,6 @@ wsrep_fake_trx_id(
 
 #endif /* WITH_WSREP */
 
-/** Update the innodb_use_trim parameter.
-@param[in]	thd	thread handle
-@param[in]	var	system variable
-@param[out]	var_ptr	current value
-@param[in]	save	immediate result from check function */
-static
-void
-innodb_use_trim_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save)
-{
-	srv_use_trim = *static_cast<const my_bool*>(save);
-
-	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
-		     HA_ERR_WRONG_COMMAND, deprecated_use_trim);
-}
-
-/** Update the innodb_instrument_sempahores parameter.
-@param[in]	thd	thread handle
-@param[in]	var	system variable
-@param[out]	var_ptr	current value
-@param[in]	save	immediate result from check function */
-static
-void
-innodb_instrument_semaphores_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save)
-{
-	innodb_instrument_semaphores = *static_cast<const my_bool*>(save);
-
-	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
-		     HA_ERR_WRONG_COMMAND, deprecated_instrument_semaphores);
-}
-
 /* plugin options */
 
 static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm,
@@ -19756,7 +18832,7 @@ static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
   "The common part for InnoDB table spaces.",
   NULL, NULL, NULL);
 
-static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
+static MYSQL_SYSVAR_BOOL(doublewrite, srv_use_doublewrite_buf,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
   "Enable InnoDB doublewrite buffer (enabled by default)."
   " Disable with --skip-innodb-doublewrite.",
@@ -19767,16 +18843,11 @@ static MYSQL_SYSVAR_BOOL(use_atomic_writes, innobase_use_atomic_writes,
   "Enable atomic writes, instead of using the doublewrite buffer, for files "
   "on devices that supports atomic writes. "
   "To use this option one must use "
-  "file_per_table=1, flush_method=O_DIRECT and use_fallocate=1. "
+  "innodb_file_per_table=1, innodb_flush_method=O_DIRECT. "
   "This option only works on Linux with either FusionIO cards using "
   "the directFS filesystem or with Shannon cards using any file system.",
   NULL, NULL, TRUE);
 
-static MYSQL_SYSVAR_BOOL(use_fallocate, innobase_use_fallocate,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "Use posix_fallocate() to allocate files. DEPRECATED, has no effect.",
-  NULL, NULL, FALSE);
-
 static MYSQL_SYSVAR_BOOL(stats_include_delete_marked,
   srv_stats_include_delete_marked,
   PLUGIN_VAR_OPCMDARG,
@@ -19855,41 +18926,14 @@ static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size,
 static MYSQL_SYSVAR_UINT(fast_shutdown, srv_fast_shutdown,
   PLUGIN_VAR_OPCMDARG,
   "Speeds up the shutdown process of the InnoDB storage engine. Possible"
-  " values are 0, 1 (faster) or 2 (fastest - crash-like).",
-  fast_shutdown_validate, NULL, 1, 0, 2, 0);
+  " values are 0, 1 (faster), 2 (crash-like), 3 (fastest clean).",
+  fast_shutdown_validate, NULL, 1, 0, 3, 0);
 
 static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
   PLUGIN_VAR_NOCMDARG,
   "Stores each InnoDB table to an .ibd file in the database dir.",
   NULL, NULL, TRUE);
 
-static MYSQL_SYSVAR_STR(file_format, innobase_file_format_name,
-  PLUGIN_VAR_RQCMDARG,
-  "File format to use for new tables in .ibd files.",
-  innodb_file_format_name_validate,
-  innodb_file_format_name_update, innodb_file_format_default);
-
-/* "innobase_file_format_check" decides whether we would continue
-booting the server if the file format stamped on the system
-table space exceeds the maximum file format supported
-by the server. Can be set during server startup at command
-line or configure file, and a read only variable after
-server startup */
-static MYSQL_SYSVAR_BOOL(file_format_check, innobase_file_format_check,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "Whether to perform system file format check.",
-  NULL, NULL, TRUE);
-
-/* If a new file format is introduced, the file format
-name needs to be updated accordingly. Please refer to
-file_format_name_map[] defined in trx0sys.cc for the next
-file format name. */
-static MYSQL_SYSVAR_STR(file_format_max, innobase_file_format_max,
-  PLUGIN_VAR_OPCMDARG,
-  "The highest file format in the tablespace.",
-  innodb_file_format_max_validate,
-  innodb_file_format_max_update, innodb_file_format_max_default);
-
 static MYSQL_SYSVAR_STR(ft_server_stopword_table, innobase_server_stopword_table,
   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
   "The user supplied stopword table name.",
@@ -19916,14 +18960,11 @@ static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
   " guarantees in case of crash. 0 and 2 can be faster than 1 or 3.",
   NULL, NULL, 1, 0, 3, 0);
 
-static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
+static MYSQL_SYSVAR_ENUM(flush_method, innodb_flush_method,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "With which method to flush data.", NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix,
-  PLUGIN_VAR_NOCMDARG,
-  "Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.",
-  NULL, innodb_large_prefix_update, TRUE);
+  "With which method to flush data.",
+  NULL, NULL, IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_FSYNC),
+  &innodb_flush_method_typelib);
 
 static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
@@ -19941,10 +18982,20 @@ static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Path to InnoDB log files.", NULL, NULL, NULL);
 
+/** Update innodb_page_cleaners.
+@param[in]	save	the new value of innodb_page_cleaners */
+static
+void
+innodb_page_cleaners_threads_update(THD*, struct st_mysql_sys_var*, void*, const void *save)
+{
+	buf_flush_set_page_cleaner_thread_cnt(*static_cast<const ulong*>(save));
+}
+
 static MYSQL_SYSVAR_ULONG(page_cleaners, srv_n_page_cleaners,
-  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+  PLUGIN_VAR_RQCMDARG,
   "Page cleaner threads can be from 1 to 64. Default is 4.",
-  NULL, NULL, 4, 1, 64, 0);
+  NULL,
+  innodb_page_cleaners_threads_update, 4, 1, 64, 0);
 
 static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
   PLUGIN_VAR_RQCMDARG,
@@ -20122,13 +19173,13 @@ BUF_POOL_SIZE_THRESHOLD (srv/srv0start.cc), then srv_buf_pool_instances_default
 can be removed and 8 used instead. The problem with the current setup is that
 with 128MiB default buffer pool size and 8 instances by default we would emit
 a warning when no options are specified. */
-static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
+static MYSQL_SYSVAR_ULONGLONG(buffer_pool_size, innobase_buffer_pool_size,
   PLUGIN_VAR_RQCMDARG,
   "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
   innodb_buffer_pool_size_validate,
   innodb_buffer_pool_size_update,
-  static_cast<longlong>(srv_buf_pool_def_size),
-  static_cast<longlong>(srv_buf_pool_min_size),
+  srv_buf_pool_def_size,
+  srv_buf_pool_min_size,
   LLONG_MAX, 1024*1024L);
 
 static MYSQL_SYSVAR_ULONG(buffer_pool_chunk_size, srv_buf_pool_chunk_unit,
@@ -20189,6 +19240,12 @@ static MYSQL_SYSVAR_ULONG(buffer_pool_dump_pct, srv_buf_pool_dump_pct,
   NULL, NULL, 25, 1, 100, 0);
 
 #ifdef UNIV_DEBUG
+/* Added to test the innodb_buffer_pool_load_incomplete status variable. */
+static MYSQL_SYSVAR_ULONG(buffer_pool_load_pages_abort, srv_buf_pool_load_pages_abort,
+  PLUGIN_VAR_RQCMDARG,
+  "Number of pages during a buffer pool load to process before signaling innodb_buffer_pool_load_abort=1",
+  NULL, NULL, LONG_MAX, 1, LONG_MAX, 0);
+
 static MYSQL_SYSVAR_STR(buffer_pool_evict, srv_buffer_pool_evict,
   PLUGIN_VAR_RQCMDARG,
   "Evict pages from the buffer pool",
@@ -20295,7 +19352,7 @@ static MYSQL_SYSVAR_BOOL(deadlock_detect, innobase_deadlock_detect,
   " and we rely on innodb_lock_wait_timeout in case of deadlock.",
   NULL, NULL, TRUE);
 
-static MYSQL_SYSVAR_LONG(fill_factor, innobase_fill_factor,
+static MYSQL_SYSVAR_UINT(fill_factor, innobase_fill_factor,
   PLUGIN_VAR_RQCMDARG,
   "Percentage of B-tree page filled during bulk insert",
   NULL, NULL, 100, 10, 100, 0);
@@ -20366,12 +19423,12 @@ static MYSQL_SYSVAR_BOOL(optimize_fulltext_only, innodb_optimize_fulltext_only,
   "Only optimize the Fulltext index of the table",
   NULL, NULL, FALSE);
 
-static MYSQL_SYSVAR_ULONG(read_io_threads, innobase_read_io_threads,
+static MYSQL_SYSVAR_ULONG(read_io_threads, srv_n_read_io_threads,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of background read I/O threads in InnoDB.",
   NULL, NULL, 4, 1, 64, 0);
 
-static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads,
+static MYSQL_SYSVAR_ULONG(write_io_threads, srv_n_write_io_threads,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of background write I/O threads in InnoDB.",
   NULL, NULL, 4, 1, 64, 0);
@@ -20387,10 +19444,10 @@ static MYSQL_SYSVAR_ULONG(page_size, srv_page_size,
   NULL, NULL, UNIV_PAGE_SIZE_DEF,
   UNIV_PAGE_SIZE_MIN, UNIV_PAGE_SIZE_MAX, 0);
 
-static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
+static MYSQL_SYSVAR_ULONG(log_buffer_size, srv_log_buffer_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "The size of the buffer which InnoDB uses to write log to the log files on disk.",
-  NULL, NULL, 16*1024*1024L, 256*1024L, LONG_MAX, 1024);
+  NULL, NULL, 16L << 20, 256L << 10, LONG_MAX, 1024);
 
 static MYSQL_SYSVAR_ULONGLONG(log_file_size, srv_log_file_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -20423,10 +19480,10 @@ static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms,
   " The timeout is disabled if 0.",
   NULL, NULL, 1000, 0, UINT_MAX32, 0);
 
-static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
+static MYSQL_SYSVAR_ULONG(open_files, innobase_open_files,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "How many files at the maximum InnoDB keeps open at the same time.",
-  NULL, NULL, 0L, 0L, LONG_MAX, 0);
+  NULL, NULL, 0, 0, LONG_MAX, 0);
 
 static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
   PLUGIN_VAR_RQCMDARG,
@@ -20435,8 +19492,8 @@ static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
 
 static MYSQL_SYSVAR_UINT(spin_wait_delay, srv_spin_wait_delay,
   PLUGIN_VAR_OPCMDARG,
-  "Maximum delay between polling for a spin lock (6 by default)",
-  NULL, NULL, 6, 0, 6000, 0);
+  "Maximum delay between polling for a spin lock (4 by default)",
+  NULL, NULL, 4, 0, 6000, 0);
 
 static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
   PLUGIN_VAR_RQCMDARG,
@@ -20470,12 +19527,12 @@ static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
 static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Path to individual files and their sizes.",
-  NULL, NULL, NULL);
+  NULL, NULL, "ibdata1:12M:autoextend");
 
 static MYSQL_SYSVAR_STR(temp_data_file_path, innobase_temp_data_file_path,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Path to files and their sizes making temp-tablespace.",
-  NULL, NULL, NULL);
+  NULL, NULL, "ibtmp1:12M:autoextend");
 
 static MYSQL_SYSVAR_STR(undo_directory, srv_undo_dir,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -20553,12 +19610,10 @@ static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
   NULL, NULL, FALSE);
 #endif /* HAVE_LIBNUMA */
 
-static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
+static MYSQL_SYSVAR_ENUM(change_buffering, innodb_change_buffering,
   PLUGIN_VAR_RQCMDARG,
-  "Buffer changes to reduce random access:"
-  " OFF, ON, inserting, deleting, changing, or purging.",
-  innodb_change_buffering_validate,
-  innodb_change_buffering_update, "all");
+  "Buffer changes to secondary indexes.",
+  NULL, NULL, IBUF_USE_ALL, &innodb_change_buffering_typelib);
 
 static MYSQL_SYSVAR_UINT(change_buffer_max_size,
   srv_change_buffer_max_size,
@@ -20608,14 +19663,8 @@ static my_bool	innobase_disallow_writes	= FALSE;
 An "update" method for innobase_disallow_writes variable. */
 static
 void
-innobase_disallow_writes_update(
-/*============================*/
-	THD*			thd,		/* in: thread handle */
-	st_mysql_sys_var*	var,		/* in: pointer to system
-						variable */
-	void*			var_ptr,	/* out: pointer to dynamic
-						variable */
-	const void*		save)		/* in: temporary storage */
+innobase_disallow_writes_update(THD*, st_mysql_sys_var*,
+				void* var_ptr, const void* save)
 {
 	*(my_bool*)var_ptr = *(my_bool*)save;
 	ut_a(srv_allow_writes_event);
@@ -20699,11 +19748,6 @@ static MYSQL_SYSVAR_BOOL(read_only, srv_read_only_mode,
   "Start InnoDB in read only mode (off by default)",
   NULL, NULL, FALSE);
 
-static MYSQL_SYSVAR_BOOL(safe_truncate, srv_safe_truncate,
-  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
-  "Use backup-safe TRUNCATE TABLE and crash-safe RENAME (incompatible with older MariaDB 10.2; ON by default)",
-  NULL, NULL, TRUE);
-
 static MYSQL_SYSVAR_BOOL(cmp_per_index_enabled, srv_cmp_per_index_enabled,
   PLUGIN_VAR_OPCMDARG,
   "Enable INFORMATION_SCHEMA.innodb_cmp_per_index,"
@@ -20792,11 +19836,6 @@ static MYSQL_SYSVAR_BOOL(force_primary_key,
   "Do not allow to create table without primary key (off by default)",
   NULL, NULL, FALSE);
 
-static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
-  PLUGIN_VAR_OPCMDARG,
-  "Deallocate (punch_hole|trim) unused portions of the page compressed page (on by default)",
-  NULL, innodb_use_trim_update, TRUE);
-
 static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 };
 static TYPELIB page_compression_algorithms_typelib=
 {
@@ -20813,20 +19852,6 @@ static MYSQL_SYSVAR_ENUM(compression_algorithm, innodb_compression_algorithm,
   PAGE_ZLIB_ALGORITHM,
   &page_compression_algorithms_typelib);
 
-static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
-  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "DEPRECATED. Number of multi-threaded flush threads",
-  NULL, NULL,
-  MTFLUSH_DEFAULT_WORKER, /* Default setting */
-  1,                      /* Minimum setting */
-  MTFLUSH_MAX_WORKER,     /* Max setting */
-  0);
-
-static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "DEPRECATED. Use multi-threaded flush. Default FALSE.",
-  NULL, NULL, FALSE);
-
 static MYSQL_SYSVAR_ULONG(fatal_semaphore_wait_threshold, srv_fatal_semaphore_wait_threshold,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Maximum number of seconds that semaphore times out in InnoDB.",
@@ -20943,13 +19968,6 @@ static MYSQL_SYSVAR_BOOL(debug_force_scrubbing,
 			 NULL, NULL, FALSE);
 #endif /* UNIV_DEBUG */
 
-static MYSQL_SYSVAR_BOOL(instrument_semaphores, innodb_instrument_semaphores,
-  PLUGIN_VAR_OPCMDARG,
-  "DEPRECATED. This setting has no effect.",
-  NULL, innodb_instrument_semaphores_update, FALSE);
-
-#include "ha_xtradb.h"
-
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(autoextend_increment),
   MYSQL_SYSVAR(buffer_pool_size),
@@ -20964,6 +19982,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
 #endif /* UNIV_DEBUG */
   MYSQL_SYSVAR(buffer_pool_load_now),
   MYSQL_SYSVAR(buffer_pool_load_abort),
+#ifdef UNIV_DEBUG
+  MYSQL_SYSVAR(buffer_pool_load_pages_abort),
+#endif /* UNIV_DEBUG */
   MYSQL_SYSVAR(buffer_pool_load_at_startup),
   MYSQL_SYSVAR(defragment),
   MYSQL_SYSVAR(defragment_n_pages),
@@ -20985,14 +20006,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(doublewrite),
   MYSQL_SYSVAR(stats_include_delete_marked),
   MYSQL_SYSVAR(use_atomic_writes),
-  MYSQL_SYSVAR(use_fallocate),
   MYSQL_SYSVAR(fast_shutdown),
   MYSQL_SYSVAR(read_io_threads),
   MYSQL_SYSVAR(write_io_threads),
   MYSQL_SYSVAR(file_per_table),
-  MYSQL_SYSVAR(file_format),
-  MYSQL_SYSVAR(file_format_check),
-  MYSQL_SYSVAR(file_format_max),
   MYSQL_SYSVAR(flush_log_at_timeout),
   MYSQL_SYSVAR(flush_log_at_trx_commit),
   MYSQL_SYSVAR(flush_method),
@@ -21006,7 +20023,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(ft_min_token_size),
   MYSQL_SYSVAR(ft_num_word_optimize),
   MYSQL_SYSVAR(ft_sort_pll_degree),
-  MYSQL_SYSVAR(large_prefix),
   MYSQL_SYSVAR(force_load_corrupted),
   MYSQL_SYSVAR(lock_schedule_algorithm),
   MYSQL_SYSVAR(locks_unsafe_for_binlog),
@@ -21054,7 +20070,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(replication_delay),
   MYSQL_SYSVAR(status_file),
   MYSQL_SYSVAR(strict_mode),
-  MYSQL_SYSVAR(support_xa),
   MYSQL_SYSVAR(sort_buffer_size),
   MYSQL_SYSVAR(online_alter_log_max_size),
   MYSQL_SYSVAR(sync_spin_loops),
@@ -21083,7 +20098,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(random_read_ahead),
   MYSQL_SYSVAR(read_ahead_threshold),
   MYSQL_SYSVAR(read_only),
-  MYSQL_SYSVAR(safe_truncate),
   MYSQL_SYSVAR(io_capacity),
   MYSQL_SYSVAR(io_capacity_max),
   MYSQL_SYSVAR(page_cleaners),
@@ -21136,11 +20150,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(force_primary_key),
   MYSQL_SYSVAR(fatal_semaphore_wait_threshold),
   /* Table page compression feature */
-  MYSQL_SYSVAR(use_trim),
   MYSQL_SYSVAR(compression_default),
   MYSQL_SYSVAR(compression_algorithm),
-  MYSQL_SYSVAR(mtflush_threads),
-  MYSQL_SYSVAR(use_mtflush),
   /* Encryption feature */
   MYSQL_SYSVAR(encrypt_tables),
   MYSQL_SYSVAR(encryption_threads),
@@ -21159,14 +20170,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
 #ifdef UNIV_DEBUG
   MYSQL_SYSVAR(debug_force_scrubbing),
 #endif
-  MYSQL_SYSVAR(instrument_semaphores),
   MYSQL_SYSVAR(buf_dump_status_frequency),
   MYSQL_SYSVAR(background_thread),
 
-  /* XtraDB compatibility system variables */
-#define HA_XTRADB_SYSVARS
-#include "ha_xtradb.h"
-
   NULL
 };
 
@@ -21178,7 +20184,7 @@ maria_declare_plugin(innobase)
   plugin_author,
   "Supports transactions, row-level locking, foreign keys and encryption for tables",
   PLUGIN_LICENSE_GPL,
-  innobase_init, /* Plugin Init */
+  innodb_init, /* Plugin Init */
   NULL, /* Plugin Deinit */
   INNODB_VERSION_SHORT,
   innodb_status_variables_export,/* status variables             */
@@ -21251,13 +20257,13 @@ innodb_params_adjust()
 		= MYSQL_SYSVAR_NAME(undo_logs).def_val
 		= srv_available_undo_logs;
 	MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
-		= 1ULL << (32 + UNIV_PAGE_SIZE_SHIFT);
+		= 1ULL << (32U + srv_page_size_shift);
 	MYSQL_SYSVAR_NAME(max_undo_log_size).min_val
 		= MYSQL_SYSVAR_NAME(max_undo_log_size).def_val
 		= ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
-		* srv_page_size;
+		<< srv_page_size_shift;
 	MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
-		= 1ULL << (32 + UNIV_PAGE_SIZE_SHIFT);
+		= 1ULL << (32U + srv_page_size_shift);
 }
 
 /****************************************************************************
@@ -21550,7 +20556,7 @@ innobase_rename_vc_templ(
 
 	if (is_part != NULL) {
 		*is_part = '\0';
-		tbnamelen = is_part - tbname;
+		tbnamelen = ulint(is_part - tbname);
 	}
 
 	dbnamelen = filename_to_tablename(dbname, t_dbname,
@@ -21730,7 +20736,7 @@ innobase_get_computed_value(
 	if (!heap || index->table->vc_templ->rec_len
 		     >= REC_VERSION_56_MAX_INDEX_COL_LEN) {
 		if (*local_heap == NULL) {
-			*local_heap = mem_heap_create(UNIV_PAGE_SIZE);
+			*local_heap = mem_heap_create(srv_page_size);
 		}
 
 		buf = static_cast<byte*>(mem_heap_alloc(
@@ -21763,7 +20769,7 @@ innobase_get_computed_value(
 
 		if (row_field->ext) {
 			if (*local_heap == NULL) {
-				*local_heap = mem_heap_create(UNIV_PAGE_SIZE);
+				*local_heap = mem_heap_create(srv_page_size);
 			}
 
 			data = btr_copy_externally_stored_field(
@@ -21901,7 +20907,7 @@ ib_senderrf(
 {
 	va_list		args;
 	char*		str = NULL;
-	const char*	format = innobase_get_err_msg(code);
+	const char*	format = my_get_err_msg(code);
 
 	/* If the caller wants to push a message to the client then
 	the caller must pass a valid session handle. */
@@ -22059,7 +21065,7 @@ innobase_convert_to_filename_charset(
 	CHARSET_INFO*	cs_from = system_charset_info;
 
 	return(static_cast<uint>(strconvert(
-				cs_from, from, strlen(from),
+				cs_from, from, uint(strlen(from)),
 				cs_to, to, static_cast<uint>(len), &errors)));
 }
 
@@ -22078,7 +21084,7 @@ innobase_convert_to_system_charset(
 	CHARSET_INFO*	cs2 = system_charset_info;
 
 	return(static_cast<uint>(strconvert(
-				cs1, from, strlen(from),
+				cs1, from, static_cast<uint>(strlen(from)),
 				cs2, to, static_cast<uint>(len), errors)));
 }
 
@@ -22088,9 +21094,8 @@ void
 ib_warn_row_too_big(const dict_table_t*	table)
 {
 	/* If prefix is true then a 768-byte prefix is stored
-	locally for BLOB fields. Refer to dict_table_get_format() */
-	const bool prefix = (dict_tf_get_format(table->flags)
-			     == UNIV_FORMAT_A);
+	locally for BLOB fields. */
+	const bool	prefix = !dict_table_has_atomic_blobs(table);
 
 	const ulint	free_space = page_get_free_space_of_empty(
 		table->flags & DICT_TF_COMPACT) / 2;
@@ -22111,7 +21116,6 @@ ib_warn_row_too_big(const dict_table_t*	table)
 /** Validate the requested buffer pool size.  Also, reserve the necessary
 memory needed for buffer pool resize.
 @param[in]	thd	thread handle
-@param[in]	var	pointer to system variable
 @param[out]	save	immediate result for update function
 @param[in]	value	incoming string
 @return 0 on success, 1 on failure.
@@ -22120,13 +21124,11 @@ static
 int
 innodb_buffer_pool_size_validate(
 	THD*				thd,
-	struct st_mysql_sys_var*	var,
+	st_mysql_sys_var*,
 	void*				save,
 	struct st_mysql_value*		value)
 {
 	longlong	intbuf;
-
-
 	value->val_int(value, &intbuf);
 
 	if (!srv_was_started) {
@@ -22172,12 +21174,11 @@ innodb_buffer_pool_size_validate(
 		return(1);
 	}
 
-	ulint	requested_buf_pool_size
-		= buf_pool_size_align(static_cast<ulint>(intbuf));
+	ulint	requested_buf_pool_size = buf_pool_size_align(ulint(intbuf));
 
-	*static_cast<longlong*>(save) = requested_buf_pool_size;
+	*static_cast<ulonglong*>(save) = requested_buf_pool_size;
 
-	if (srv_buf_pool_size == static_cast<ulint>(intbuf)) {
+	if (srv_buf_pool_size == ulint(intbuf)) {
 		buf_pool_mutex_exit_all();
 		/* nothing to do */
 		return(0);
@@ -22225,7 +21226,7 @@ innodb_compression_algorithm_validate(
 						for update function */
 	struct st_mysql_value*		value)	/*!< in: incoming string */
 {
-	long		compression_algorithm;
+	ulong		compression_algorithm;
 	DBUG_ENTER("innobase_compression_algorithm_validate");
 
 	if (check_sysvar_enum(thd, var, save, value)) {
@@ -22347,7 +21348,7 @@ UNIV_INTERN
 void
 ib_push_warning(
 	trx_t*		trx,	/*!< in: trx */
-	ulint		error,	/*!< in: error code to push as warning */
+	dberr_t		error,	/*!< in: error code to push as warning */
 	const char	*format,/*!< in: warning message */
 	...)
 {
@@ -22361,9 +21362,9 @@ ib_push_warning(
 		buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
 		vsprintf(buf,format, args);
 
-		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-			convert_error_code_to_mysql((dberr_t)error, 0, thd),
-			buf);
+		push_warning_printf(
+			thd, Sql_condition::WARN_LEVEL_WARN,
+			uint(convert_error_code_to_mysql(error, 0, thd)), buf);
 		my_free(buf);
 		va_end(args);
 	}
@@ -22375,7 +21376,7 @@ UNIV_INTERN
 void
 ib_push_warning(
 	void*		ithd,	/*!< in: thd */
-	ulint		error,	/*!< in: error code to push as warning */
+	dberr_t		error,	/*!< in: error code to push as warning */
 	const char	*format,/*!< in: warning message */
 	...)
 {
@@ -22393,9 +21394,9 @@ ib_push_warning(
 		buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
 		vsprintf(buf,format, args);
 
-		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-			convert_error_code_to_mysql((dberr_t)error, 0, thd),
-			buf);
+		push_warning_printf(
+			thd, Sql_condition::WARN_LEVEL_WARN,
+			uint(convert_error_code_to_mysql(error, 0, thd)), buf);
 		my_free(buf);
 		va_end(args);
 	}
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index b62045fd963..ed7d380db8b 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -106,13 +106,11 @@ public:
 
 	double read_time(uint index, uint ranges, ha_rows rows);
 
-	longlong get_memory_buffer_size() const;
-
 	int delete_all_rows();
 
 	int write_row(uchar * buf);
 
-	int update_row(const uchar * old_data, uchar * new_data);
+	int update_row(const uchar * old_data, const uchar * new_data);
 
 	int delete_row(const uchar * buf);
 
@@ -263,7 +261,7 @@ public:
 	*/
 	my_bool register_query_cache_table(
 		THD*			thd,
-		char*			table_key,
+		const char*		table_key,
 		uint			key_length,
 		qc_engine_callback*	call_back,
 		ulonglong*		engine_data);
@@ -280,12 +278,24 @@ public:
 	by ALTER TABLE and holding data used during in-place alter.
 
 	@retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
-	@retval HA_ALTER_INPLACE_NO_LOCK Supported
-	@retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE
-		Supported, but requires lock during main phase and
-		exclusive lock during prepare phase.
-	@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE
-		Supported, prepare phase requires exclusive lock.  */
+	@retval HA_ALTER_INPLACE_INSTANT
+	MDL_EXCLUSIVE is needed for executing prepare_inplace_alter_table()
+	and commit_inplace_alter_table(). inplace_alter_table()
+	will not be called.
+	@retval HA_ALTER_INPLACE_COPY_NO_LOCK
+	MDL_EXCLUSIVE in prepare_inplace_alter_table(), which can be downgraded
+	to LOCK=NONE for rebuilding the table in inplace_alter_table()
+	@retval HA_ALTER_INPLACE_COPY_LOCK
+	MDL_EXCLUSIVE in prepare_inplace_alter_table(), which can be downgraded
+	to LOCK=SHARED for rebuilding the table in inplace_alter_table()
+	@retval HA_ALTER_INPLACE_NOCOPY_NO_LOCK
+	MDL_EXCLUSIVE in prepare_inplace_alter_table(), which can be downgraded
+	to LOCK=NONE for inplace_alter_table() which will not rebuild the table
+	@retval HA_ALTER_INPLACE_NOCOPY_LOCK
+	MDL_EXCLUSIVE in prepare_inplace_alter_table(), which can be downgraded
+	to LOCK=SHARED for inplace_alter_table() which will not rebuild
+	the table. */
+
 	enum_alter_inplace_result check_if_supported_inplace_alter(
 		TABLE*			altered_table,
 		Alter_inplace_info*	ha_alter_info);
@@ -646,7 +656,7 @@ public:
 	m_trx(trx),
 	m_form(form),
 	m_create_info(create_info),
-	m_table_name(table_name), m_drop_before_rollback(false),
+	m_table_name(table_name), m_table(NULL), m_drop_before_rollback(false),
 	m_remote_path(remote_path),
 	m_innodb_file_per_table(file_per_table)
 	{}
@@ -763,6 +773,8 @@ private:
 
 	/** Table name */
 	char*		m_table_name;
+	/** Table */
+	dict_table_t*	m_table;
 	/** Whether the table needs to be dropped before rollback */
 	bool		m_drop_before_rollback;
 
@@ -878,19 +890,6 @@ innodb_base_col_setup_for_stored(
 	create_table_info_t::normalize_table_name_low(norm_name, name, FALSE)
 #endif /* _WIN32 */
 
-/** Converts an InnoDB error code to a MySQL error code.
-Also tells to MySQL about a possible transaction rollback inside InnoDB caused
-by a lock wait timeout or a deadlock.
-@param[in]	error	InnoDB error code.
-@param[in]	flags	InnoDB table flags or 0.
-@param[in]	thd	MySQL thread or NULL.
-@return MySQL error code */
-int
-convert_error_code_to_mysql(
-	dberr_t	error,
-	ulint	flags,
-	THD*	thd);
-
 /** Converts a search mode flag understood by MySQL to a flag understood
 by InnoDB.
 @param[in]	find_flag	MySQL search mode flag.
diff --git a/storage/innobase/handler/ha_xtradb.h b/storage/innobase/handler/ha_xtradb.h
deleted file mode 100644
index d2d1f361382..00000000000
--- a/storage/innobase/handler/ha_xtradb.h
+++ /dev/null
@@ -1,1009 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2018, MariaDB Corporation.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/** @file ha_xtradb.h */
-
-#ifndef HA_XTRADB_H
-#define HA_XTRADB_H
-
-static
-void
-innodb_print_deprecation(const char* param);
-
-/* XtraDB compatibility system variables. Note that default value and
-minimum value can be different compared to original to detect has user
-really used the parameter or not. */
-
-static my_bool innodb_buffer_pool_populate;
-#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
-static ulong srv_cleaner_max_lru_time;
-static ulong srv_cleaner_max_flush_time;
-static ulong srv_cleaner_flush_chunk_size;
-static ulong srv_cleaner_lru_chunk_size;
-static ulong srv_cleaner_free_list_lwm;
-static my_bool srv_cleaner_eviction_factor;
-#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
-static ulong srv_pass_corrupt_table;
-static ulong srv_empty_free_list_algorithm;
-static ulong innobase_file_io_threads;
-static ulong srv_foreground_preflush;
-static longlong srv_kill_idle_transaction;
-static my_bool srv_fake_changes_locks;
-static my_bool	innobase_log_archive;
-static char*	innobase_log_arch_dir			= NULL;
-static ulong srv_log_arch_expire_sec;
-static ulong innobase_log_block_size;
-static ulong srv_log_checksum_algorithm;
-static ulonglong srv_max_bitmap_file_size;
-static ulonglong srv_max_changed_pages;
-static ulong innobase_mirrored_log_groups;
-#ifdef UNIV_LINUX
-static ulong srv_sched_priority_cleaner;
-#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
-static my_bool srv_cleaner_thread_priority;
-static my_bool srv_io_thread_priority;
-static my_bool srv_master_thread_priority;
-static my_bool srv_purge_thread_priority;
-static ulong srv_sched_priority_io;
-static ulong srv_sched_priority_master;
-static ulong srv_sched_priority_purge;
-#endif /* UNIV_DEBUG || UNIV_PERF_DEBUG */
-#endif /* UNIV_LINUX */
-static ulong srv_cleaner_lsn_age_factor;
-static ulong srv_show_locks_held;
-static ulong srv_show_verbose_locks;
-static my_bool srv_track_changed_pages;
-static my_bool innodb_track_redo_log_now;
-static my_bool srv_use_global_flush_log_at_trx_commit;
-static my_bool srv_use_stacktrace;
-
-
-static const char innodb_deprecated_msg[]= "Using %s is deprecated and the"
-		" parameter may be removed in future releases."
-		" Ignoning the parameter.";
-
-
-#ifdef BTR_CUR_HASH_ADAPT
-/* it is just alias for innodb_adaptive_hash_index_parts */
-/** Number of distinct partitions of AHI.
-Each partition is protected by its own latch and so we have parts number
-of latches protecting complete search system. */
-static MYSQL_SYSVAR_ULONG(adaptive_hash_index_partitions, btr_ahi_parts,
-  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
-  "It is an alias for innodb_adaptive_hash_index_parts; "
-  "only exists to allow easier upgrade from earlier XtraDB versions.",
-  NULL, NULL, 8, 1, 512, 0);
-#endif /* BTR_CUR_HASH_ADAPT */
-
-static MYSQL_SYSVAR_BOOL(buffer_pool_populate, innodb_buffer_pool_populate,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, NULL, FALSE);
-
-#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
-static
-void
-set_cleaner_max_lru_time(THD*thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_cleaner_max_lru_time");
-}
-/* Original default 1000 */
-static MYSQL_SYSVAR_ULONG(cleaner_max_lru_time, srv_cleaner_max_lru_time,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, set_cleaner_max_lru_time, 0, 0, ~0UL, 0);
-
-static
-void
-set_cleaner_max_flush_time(THD*thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_cleaner_max_flush_time");
-}
-/* Original default 1000 */
-static MYSQL_SYSVAR_ULONG(cleaner_max_flush_time, srv_cleaner_max_flush_time,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_cleaner_max_flush_time, 0, 0, ~0UL, 0);
-
-static
-void
-set_cleaner_flush_chunk_size(THD*thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_cleaner_flush_chunk_size");
-}
-/* Original default 100 */
-static MYSQL_SYSVAR_ULONG(cleaner_flush_chunk_size,
-  srv_cleaner_flush_chunk_size,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_cleaner_flush_chunk_size, 0, 0, ~0UL, 0);
-
-static
-void
-set_cleaner_lru_chunk_size(THD*thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_cleaner_lru_chunk_size");
-}
-/* Original default 100 */
-static MYSQL_SYSVAR_ULONG(cleaner_lru_chunk_size,
-  srv_cleaner_lru_chunk_size,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_cleaner_lru_chunk_size, 0, 0, ~0UL, 0);
-
-static
-void
-set_cleaner_free_list_lwm(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_cleaner_free_list_lwm");
-}
-/* Original default 10 */
-static MYSQL_SYSVAR_ULONG(cleaner_free_list_lwm, srv_cleaner_free_list_lwm,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_cleaner_free_list_lwm, 0, 0, 100, 0);
-
-static
-void
-set_cleaner_eviction_factor(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_cleaner_eviction_factor");
-}
-static MYSQL_SYSVAR_BOOL(cleaner_eviction_factor, srv_cleaner_eviction_factor,
-  PLUGIN_VAR_OPCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_cleaner_eviction_factor, FALSE);
-
-#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
-
-/* Added new default DEPRECATED */
-/** Possible values for system variable "innodb_cleaner_lsn_age_factor".  */
-static const char* innodb_cleaner_lsn_age_factor_names[] = {
-	"LEGACY",
-	"HIGH_CHECKPOINT",
-	"DEPRECATED",
-	NullS
-};
-
-/** Enumeration for innodb_cleaner_lsn_age_factor.  */
-static TYPELIB innodb_cleaner_lsn_age_factor_typelib = {
-	array_elements(innodb_cleaner_lsn_age_factor_names) - 1,
-	"innodb_cleaner_lsn_age_factor_typelib",
-	innodb_cleaner_lsn_age_factor_names,
-	NULL
-};
-
-/** Alternatives for srv_cleaner_lsn_age_factor, set through
-innodb_cleaner_lsn_age_factor variable  */
-enum srv_cleaner_lsn_age_factor_t {
-	SRV_CLEANER_LSN_AGE_FACTOR_LEGACY,	/*!< Original Oracle MySQL 5.6
-						formula */
-	SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT,
-						/*!< Percona Server 5.6 formula
-						that returns lower values than
-					        legacy option for low
-					        checkpoint ages, and higher
-					        values for high ages.  This has
-					        the effect of stabilizing the
-						checkpoint age higher.  */
-	SRV_CLEANER_LSN_AGE_FACTOR_DEPRECATED	/*!< Deprecated, do not use */
-};
-
-/** Alternatives for srv_foreground_preflush, set through
-innodb_foreground_preflush variable  */
-enum srv_foreground_preflush_t {
-	SRV_FOREGROUND_PREFLUSH_SYNC_PREFLUSH,	/*!< Original Oracle MySQL 5.6
-						behavior of performing a sync
-						flush list flush  */
-	SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF,	/*!< Exponential backoff wait
-						for the page cleaner to flush
-						for us  */
-	SRV_FOREGROUND_PREFLUSH_DEPRECATED	/*!< Deprecated, do not use */
-};
-
-/** Alternatives for srv_empty_free_list_algorithm, set through
-innodb_empty_free_list_algorithm variable  */
-enum srv_empty_free_list_t {
-	SRV_EMPTY_FREE_LIST_LEGACY,	/*!< Original Oracle MySQL 5.6
-				        algorithm */
-	SRV_EMPTY_FREE_LIST_BACKOFF,	/*!< Percona Server 5.6 algorithm that
-					loops in a progressive backoff until a
-					free page is produced by the cleaner
-					thread */
-	SRV_EMPTY_FREE_LIST_DEPRECATED	/*!< Deprecated, do not use */
-};
-
-#define SRV_CHECKSUM_ALGORITHM_DEPRECATED 6
-
-static
-void
-set_cleaner_lsn_age_factor(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_cleaner_lsn_age_factor");
-}
-static MYSQL_SYSVAR_ENUM(cleaner_lsn_age_factor,
-  srv_cleaner_lsn_age_factor,
-  PLUGIN_VAR_OPCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_cleaner_lsn_age_factor, SRV_CLEANER_LSN_AGE_FACTOR_DEPRECATED,
-  &innodb_cleaner_lsn_age_factor_typelib);
-
-/* Added new default drepcated, 3 */
-const char *corrupt_table_action_names[]=
-{
-  "assert", /* 0 */
-  "warn", /* 1 */
-  "salvage", /* 2 */
-  "deprecated", /* 3 */
-  NullS
-};
-
-TYPELIB corrupt_table_action_typelib=
-{
-  array_elements(corrupt_table_action_names) - 1, "corrupt_table_action_typelib",
-  corrupt_table_action_names, NULL
-};
-
-static
-void
-set_corrupt_table_action(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_corrupt_table_action");
-}
-static	MYSQL_SYSVAR_ENUM(corrupt_table_action, srv_pass_corrupt_table,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_corrupt_table_action, 3, &corrupt_table_action_typelib);
-
-/* Added new default DEPRECATED */
-/** Possible values for system variable "innodb_empty_free_list_algorithm".  */
-static const char* innodb_empty_free_list_algorithm_names[] = {
-	"LEGACY",
-	"BACKOFF",
-	"DEPRECATED",
-	NullS
-};
-
-/** Enumeration for innodb_empty_free_list_algorithm.  */
-static TYPELIB innodb_empty_free_list_algorithm_typelib = {
-	array_elements(innodb_empty_free_list_algorithm_names) - 1,
-	"innodb_empty_free_list_algorithm_typelib",
-	innodb_empty_free_list_algorithm_names,
-	NULL
-};
-
-static
-void
-set_empty_free_list_algorithm(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_empty_free_list_algorithm");
-}
-static MYSQL_SYSVAR_ENUM(empty_free_list_algorithm,
-  srv_empty_free_list_algorithm,
-  PLUGIN_VAR_OPCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_empty_free_list_algorithm, SRV_EMPTY_FREE_LIST_DEPRECATED,
-  &innodb_empty_free_list_algorithm_typelib);
-
-static
-void
-set_fake_changes(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_fake_changes");
-}
-static MYSQL_THDVAR_BOOL(fake_changes, PLUGIN_VAR_OPCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_fake_changes, FALSE);
-
-/* Original default, min 4. */
-static MYSQL_SYSVAR_ULONG(file_io_threads, innobase_file_io_threads,
-  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, NULL, 0, 0, 64, 0);
-
-/** Possible values for system variable "innodb_foreground_preflush".  */
-static const char* innodb_foreground_preflush_names[] = {
-	"SYNC_PREFLUSH",
-	"EXPONENTIAL_BACKOFF",
-	"DEPRECATED",
-	NullS
-};
-
-/* Enumeration for innodb_foreground_preflush.  */
-static TYPELIB innodb_foreground_preflush_typelib = {
-	array_elements(innodb_foreground_preflush_names) - 1,
-	"innodb_foreground_preflush_typelib",
-	innodb_foreground_preflush_names,
-	NULL
-};
-
-static
-void
-set_foreground_preflush(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_foreground_preflush");
-}
-static MYSQL_SYSVAR_ENUM(foreground_preflush, srv_foreground_preflush,
-  PLUGIN_VAR_OPCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_foreground_preflush, SRV_FOREGROUND_PREFLUSH_DEPRECATED,
-  &innodb_foreground_preflush_typelib);
-
-#ifdef EXTENDED_FOR_KILLIDLE
-#define kill_idle_help_text "If non-zero value, the idle session with transaction which is idle over the value in seconds is killed by InnoDB."
-#else
-#define kill_idle_help_text "No effect for this build."
-#endif
-static
-void
-set_kill_idle_transaction(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_kill_idle_transaction");
-}
-static MYSQL_SYSVAR_LONGLONG(kill_idle_transaction, srv_kill_idle_transaction,
-  PLUGIN_VAR_RQCMDARG, kill_idle_help_text,
-  NULL, &set_kill_idle_transaction, 0, 0, LONG_MAX, 0);
-
-static
-void
-set_locking_fake_changes(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_locking_fake_changes");
-}
-/* Original default: TRUE */
-static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks,
-  PLUGIN_VAR_NOCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_locking_fake_changes, FALSE);
-
-static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
-  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, NULL, NULL);
-
-static
-void
-set_log_archive(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_log_archive");
-}
-static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive,
-  PLUGIN_VAR_OPCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_log_archive, FALSE);
-
-static
-void
-set_log_arch_expire_sec(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_log_arch_expire_sec");
-}
-static MYSQL_SYSVAR_ULONG(log_arch_expire_sec,
-  srv_log_arch_expire_sec, PLUGIN_VAR_OPCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_log_arch_expire_sec, 0, 0, ~0UL, 0);
-
-/* Original default, min 512 */
-static MYSQL_SYSVAR_ULONG(log_block_size, innobase_log_block_size,
-  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, NULL, 0, 0,
-  (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0);
-
-/* Added new default deprecated */
-/** Possible values for system variables "innodb_checksum_algorithm" and
-"innodb_log_checksum_algorithm". */
-static const char* innodb_checksum_algorithm_names2[] = {
-	"CRC32",
-	"STRICT_CRC32",
-	"INNODB",
-	"STRICT_INNODB",
-	"NONE",
-	"STRICT_NONE",
-	"DEPRECATED",
-	NullS
-};
-
-/** Used to define an enumerate type of the system variables
-innodb_checksum_algorithm and innodb_log_checksum_algorithm. */
-static TYPELIB innodb_checksum_algorithm_typelib2 = {
-	array_elements(innodb_checksum_algorithm_names2) - 1,
-	"innodb_checksum_algorithm_typelib2",
-	innodb_checksum_algorithm_names2,
-	NULL
-};
-static
-void
-set_log_checksum_algorithm(THD* thd, st_mysql_sys_var*, void*, const void* save)
-{
-	push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_log_checksum_algorithm");
-	log_mutex_enter();
-	srv_log_checksum_algorithm = *static_cast<const ulong*>(save);
-	if (srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) {
-		ib::info() << "Setting innodb_log_checksums = false";
-		innodb_log_checksums = false;
-		log_checksum_algorithm_ptr = log_block_calc_checksum_none;
-	} else {
-		ib::info() << "Setting innodb_log_checksums = true";
-		innodb_log_checksums = true;
-		log_checksum_algorithm_ptr = log_block_calc_checksum_crc32;
-	}
-	log_mutex_exit();
-}
-static MYSQL_SYSVAR_ENUM(log_checksum_algorithm, srv_log_checksum_algorithm,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and translated to innodb_log_checksums (NONE to OFF, "
-  "everything else to ON); only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_log_checksum_algorithm, SRV_CHECKSUM_ALGORITHM_DEPRECATED,
-  &innodb_checksum_algorithm_typelib2);
-
-static
-void
-set_max_bitmap_file_size(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_max_bitmap_file_size");
-}
-/* Original default 100M, min 4K */
-static MYSQL_SYSVAR_ULONGLONG(max_bitmap_file_size, srv_max_bitmap_file_size,
-    PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-    NULL, &set_max_bitmap_file_size, 0, 0, ULONGLONG_MAX, 0);
-
-static
-void
-set_max_changed_pages(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_max_changed_pages");
-}
-/* Original default 1000000 */
-static MYSQL_SYSVAR_ULONGLONG(max_changed_pages, srv_max_changed_pages,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_max_changed_pages, 0, 0, ~0ULL, 0);
-
-/* Note that the default and minimum values are set to 0 to
-detect if the option is passed and print deprecation message */
-static MYSQL_SYSVAR_ULONG(mirrored_log_groups, innobase_mirrored_log_groups,
-  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, NULL, 0, 0, 10, 0);
-
-#ifdef UNIV_LINUX
-
-static
-void
-set_sched_priority_cleaner(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_sched_priority_cleaner");
-}
-/* Original default 19 */
-static MYSQL_SYSVAR_ULONG(sched_priority_cleaner, srv_sched_priority_cleaner,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_sched_priority_cleaner, 0, 0, 39, 0);
-
-#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
-static
-void
-set_priority_cleaner(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_priority_cleaner");
-}
-static MYSQL_SYSVAR_BOOL(priority_cleaner, srv_cleaner_thread_priority,
-  PLUGIN_VAR_OPCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_priority_cleaner, FALSE);
-
-static
-void
-set_priority_io(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_priority_io");
-}
-static MYSQL_SYSVAR_BOOL(priority_io, srv_io_thread_priority,
-  PLUGIN_VAR_OPCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-   NULL, &set_priority_io, FALSE);
-
-static
-void
-set_priority_master(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_priority_master");
-}
-static MYSQL_SYSVAR_BOOL(priority_master, srv_master_thread_priority,
-  PLUGIN_VAR_OPCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-   NULL, &set_priority_master, FALSE);
-
-static
-void
-set_priority_purge(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_priority_purge");
-}
-static MYSQL_SYSVAR_BOOL(priority_purge, srv_purge_thread_priority,
-  PLUGIN_VAR_OPCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_priority_purge, FALSE);
-
-static
-void
-set_sched_priority_io(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_sched_priority_io");
-}
-/* Original default 19 */
-static MYSQL_SYSVAR_ULONG(sched_priority_io, srv_sched_priority_io,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_sched_priority_io, 0, 0, 39, 0);
-
-static
-void
-set_sched_priority_master(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_sched_priority_master");
-}
-/* Original default 19 */
-static MYSQL_SYSVAR_ULONG(sched_priority_master, srv_sched_priority_master,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_sched_priority_master, 0, 0, 39, 0);
-
-static
-void
-set_sched_priority_purge(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_sched_priority_purge");
-}
-/* Original default 19 */
-static MYSQL_SYSVAR_ULONG(sched_priority_purge, srv_sched_priority_purge,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_sched_priority_purge, 0, 0, 39, 0);
-#endif /* UNIV_DEBUG || UNIV_PERF_DEBUG */
-#endif /* UNIV_LINUX */
-
-static
-void
-set_show_locks_held(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_show_locks_held");
-}
-/* TODO: Implement */
-static MYSQL_SYSVAR_ULONG(show_locks_held, srv_show_locks_held,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_show_locks_held, 0, 0, 1000, 0);
-
-static
-void
-set_show_verbose_locks(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_show_verbose_locks");
-}
-/* TODO: Implement */
-static MYSQL_SYSVAR_ULONG(show_verbose_locks, srv_show_verbose_locks,
-  PLUGIN_VAR_RQCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_show_verbose_locks, 0, 0, 1, 0);
-
-static MYSQL_SYSVAR_BOOL(track_changed_pages, srv_track_changed_pages,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, NULL, FALSE);
-
-static
-void
-set_track_redo_log_now(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_track_redo_log_now");
-}
-static MYSQL_SYSVAR_BOOL(track_redo_log_now,
-  innodb_track_redo_log_now,
-  PLUGIN_VAR_OPCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_track_redo_log_now, FALSE);
-
-static
-void
-set_use_global_flush_log_at_trx_commit(THD* thd, st_mysql_sys_var*, void*, const void*)
-{
-        push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-                            ER_WARN_DEPRECATED_SYNTAX,
-                            innodb_deprecated_msg,
-                            "innodb_use_global_flush_log_at_trx_commit");
-}
-static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
-  PLUGIN_VAR_NOCMDARG,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, &set_use_global_flush_log_at_trx_commit, FALSE);
-
-static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "Deprecated and ignored; only exists to allow easier upgrade from "
-  "earlier XtraDB versions.",
-  NULL, NULL, FALSE);
-
-/** Print deprecation message for a given system variable.
-@param[in]	param		System parameter name */
-static
-void
-innodb_print_deprecation(const char* param)
-{
-	ib::warn() << "Using " << param << " is deprecated and the"
-		" parameter may be removed in future releases."
-		" Ignoning the parameter.";
-}
-
-/** Check if user has used xtradb extended system variable that
-is not currently supported by innodb or marked as deprecated. */
-static
-void
-innodb_check_deprecated(void)
-{
-	if (innodb_buffer_pool_populate) {
-		innodb_print_deprecation("innodb-buffer-pool-populate");
-	}
-
-#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
-        if (srv_cleaner_max_lru_time) {
-		innodb_print_deprecation("innodb-cleaner-max-lru-time");
-	}
-
-        if (srv_cleaner_max_flush_time) {
-		innodb_print_deprecation("innodb-cleaner-max-flush-time");
-	}
-
-        if (srv_cleaner_flush_chunk_size) {
-		innodb_print_deprecation("innodb-cleaner-flush-chunk-size");
-	}
-
-        if (srv_cleaner_lru_chunk_size) {
-		innodb_print_deprecation("innodb-cleaner-lru_chunk_size");
-	}
-        if (srv_cleaner_free_list_lwm) {
-		innodb_print_deprecation("innodb-cleaner-free-list-lwm");
-	}
-
-        if (srv_cleaner_eviction_factor) {
-		innodb_print_deprecation("innodb-cleaner-eviction-factor");
-	}
-
-#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
-
-	if (srv_cleaner_lsn_age_factor != SRV_CLEANER_LSN_AGE_FACTOR_DEPRECATED) {
-		innodb_print_deprecation("innodb-cleaner-lsn-age-factor");
-	}
-
-	if (srv_pass_corrupt_table != 3) {
-		innodb_print_deprecation("innodb-pass-corrupt-table");
-	}
-
-	if (srv_empty_free_list_algorithm != SRV_EMPTY_FREE_LIST_DEPRECATED) {
-		innodb_print_deprecation("innodb-empty-free-list-algorithm");
-	}
-
-	if (THDVAR((THD*) NULL, fake_changes)) {
-		innodb_print_deprecation("innodb-fake-changes");
-	}
-
-	if (innobase_file_io_threads) {
-		innodb_print_deprecation("innodb-file-io-threads");
-	}
-
-	if (srv_foreground_preflush != SRV_FOREGROUND_PREFLUSH_DEPRECATED) {
-		innodb_print_deprecation("innodb-foreground-preflush");
-	}
-
-	if (srv_kill_idle_transaction != 0) {
-		innodb_print_deprecation("innodb-kill-idle-transaction");
-	}
-
-	if (srv_fake_changes_locks) {
-		innodb_print_deprecation("innodb-fake-changes-locks");
-	}
-
-	if (innobase_log_arch_dir) {
-		innodb_print_deprecation("innodb-log-arch-dir");
-	}
-
-	if (innobase_log_archive) {
-		innodb_print_deprecation("innodb-log-archive");
-	}
-
-	if (srv_log_arch_expire_sec) {
-		innodb_print_deprecation("innodb-log-arch-expire-sec");
-	}
-
-	if (innobase_log_block_size) {
-		innodb_print_deprecation("innodb-log-block-size");
-	}
-
-	if (srv_log_checksum_algorithm != SRV_CHECKSUM_ALGORITHM_DEPRECATED) {
-		innodb_print_deprecation("innodb-log-checksum-algorithm");
-		if (srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) {
-			ib::info() << "Setting innodb_log_checksums = false";
-			innodb_log_checksums = false;
-			log_checksum_algorithm_ptr = log_block_calc_checksum_none;
-		} else {
-			ib::info() << "Setting innodb_log_checksums = true";
-			innodb_log_checksums = true;
-			log_checksum_algorithm_ptr = log_block_calc_checksum_crc32;
-		}
-	}
-
-	if (srv_max_changed_pages) {
-		innodb_print_deprecation("innodb-max-changed-pages");
-	}
-
-	if (innobase_mirrored_log_groups) {
-		innodb_print_deprecation("innodb-mirrored-log-groups");
-	}
-
-#ifdef UNIV_LINUX
-	if (srv_sched_priority_cleaner) {
-		innodb_print_deprecation("innodb-sched-priority-cleaner");
-	}
-
-#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
-	if (srv_cleaner_thread_priority) {
-		innodb_print_deprecation("innodb-cleaner-thread-priority");
-	}
-
-	if (srv_io_thread_priority) {
-		innodb_print_deprecation("innodb-io-thread-priority");
-	}
-
-	if (srv_master_thread_priority) {
-		innodb_print_deprecation("inodb-master-thread-priority");
-	}
-
-	if (srv_purge_thread_priority) {
-		innodb_print_deprecation("inodb-purge-thread-priority");
-	}
-
-	if (srv_sched_priority_io) {
-		innodb_print_deprecation("innodb-sched-priority-io");
-	}
-
-	if (srv_sched_priority_master) {
-		innodb_print_deprecation("innodb-sched-priority-master");
-	}
-
-	if (srv_sched_priority_purge) {
-		innodb_print_deprecation("innodb-sched-priority-purge");
-	}
-#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
-#endif /* UNIV_LINUX */
-
-	if (srv_track_changed_pages) {
-		innodb_print_deprecation("innodb-track-changed-pages");
-	}
-
-	if (innodb_track_redo_log_now) {
-		innodb_print_deprecation("innodb-track-redo-log-now");
-	}
-
-	if (srv_use_global_flush_log_at_trx_commit) {
-		innodb_print_deprecation("innodb-use-global-flush-log-at-trx-commit");
-	}
-
-	if (srv_use_stacktrace) {
-		innodb_print_deprecation("innodb-use-stacktrace");
-	}
-
-        if (srv_max_bitmap_file_size) {
-		innodb_print_deprecation("innodb-max-bitmap-file-size");
-	}
-
-        if (srv_show_locks_held) {
-		innodb_print_deprecation("innodb-show-locks-held");
-	}
-
-        if (srv_show_verbose_locks) {
-		innodb_print_deprecation("innodb-show-verbose-locks");
-	}
-}
-
-#endif /* HA_XTRADB_H */
-
-#ifdef HA_XTRADB_SYSVARS
-  /* XtraDB compatibility system variables */
-#ifdef BTR_CUR_HASH_ADAPT
-  MYSQL_SYSVAR(adaptive_hash_index_partitions),
-#endif /* BTR_CUR_HASH_ADAPT */
-  MYSQL_SYSVAR(buffer_pool_populate),
-#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
-  MYSQL_SYSVAR(cleaner_eviction_factor),
-  MYSQL_SYSVAR(cleaner_flush_chunk_size),
-  MYSQL_SYSVAR(cleaner_free_list_lwm),
-  MYSQL_SYSVAR(cleaner_lru_chunk_size),
-  MYSQL_SYSVAR(cleaner_max_lru_time),
-  MYSQL_SYSVAR(cleaner_max_flush_time),
-#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
-  MYSQL_SYSVAR(cleaner_lsn_age_factor),
-  MYSQL_SYSVAR(corrupt_table_action),
-  MYSQL_SYSVAR(empty_free_list_algorithm),
-  MYSQL_SYSVAR(fake_changes),
-  MYSQL_SYSVAR(file_io_threads),
-  MYSQL_SYSVAR(foreground_preflush),
-  MYSQL_SYSVAR(kill_idle_transaction),
-  MYSQL_SYSVAR(locking_fake_changes),
-  MYSQL_SYSVAR(log_arch_dir),
-  MYSQL_SYSVAR(log_archive),
-  MYSQL_SYSVAR(log_arch_expire_sec),
-  MYSQL_SYSVAR(log_block_size),
-  MYSQL_SYSVAR(log_checksum_algorithm),
-  MYSQL_SYSVAR(max_bitmap_file_size),
-  MYSQL_SYSVAR(max_changed_pages),
-  MYSQL_SYSVAR(mirrored_log_groups),
-#ifdef UNIV_LINUX
-  MYSQL_SYSVAR(sched_priority_cleaner),
-#endif
-#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
-#ifdef UNIV_LINUX
-  MYSQL_SYSVAR(priority_cleaner),
-  MYSQL_SYSVAR(priority_io),
-  MYSQL_SYSVAR(priority_master),
-  MYSQL_SYSVAR(priority_purge),
-  MYSQL_SYSVAR(sched_priority_io),
-  MYSQL_SYSVAR(sched_priority_master),
-  MYSQL_SYSVAR(sched_priority_purge),
-#endif /* UNIV_LINUX */
-#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
-  MYSQL_SYSVAR(show_locks_held),
-  MYSQL_SYSVAR(show_verbose_locks),
-  MYSQL_SYSVAR(track_changed_pages),
-  MYSQL_SYSVAR(track_redo_log_now),
-  MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
-  MYSQL_SYSVAR(use_stacktrace),
-
-#endif /* HA_XTRADB_SYSVARS */
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 632b5dd5a5a..bc483ffa130 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -42,10 +42,14 @@ Smart ALTER TABLE
 #include "rem0types.h"
 #include "row0log.h"
 #include "row0merge.h"
+#include "row0ins.h"
+#include "row0row.h"
+#include "row0upd.h"
 #include "trx0trx.h"
 #include "trx0roll.h"
 #include "handler0alter.h"
 #include "srv0mon.h"
+#include "srv0srv.h"
 #include "fts0priv.h"
 #include "fts0plugin.h"
 #include "pars0pars.h"
@@ -58,60 +62,79 @@ static const char *MSG_UNSUPPORTED_ALTER_ONLINE_ON_VIRTUAL_COLUMN=
 			"combined with other ALTER TABLE actions";
 
 /** Operations for creating secondary indexes (no rebuild needed) */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ONLINE_CREATE
-	= Alter_inplace_info::ADD_INDEX
-	| Alter_inplace_info::ADD_UNIQUE_INDEX;
+static const alter_table_operations INNOBASE_ONLINE_CREATE
+	= ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX
+	| ALTER_ADD_UNIQUE_INDEX;
+
+/** Operations that require filling in default values for columns */
+static const alter_table_operations INNOBASE_DEFAULTS
+	= ALTER_COLUMN_NOT_NULLABLE
+	| ALTER_ADD_STORED_BASE_COLUMN;
+
+
+/** Operations that require knowledge about row_start, row_end values */
+static const alter_table_operations INNOBASE_ALTER_VERSIONED_REBUILD
+	= ALTER_ADD_SYSTEM_VERSIONING
+	| ALTER_DROP_SYSTEM_VERSIONING;
 
 /** Operations for rebuilding a table in place */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_REBUILD
-	= Alter_inplace_info::ADD_PK_INDEX
-	| Alter_inplace_info::DROP_PK_INDEX
-	| Alter_inplace_info::CHANGE_CREATE_OPTION
-	/* CHANGE_CREATE_OPTION needs to check innobase_need_rebuild() */
-	| Alter_inplace_info::ALTER_COLUMN_NULLABLE
-	| Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE
-	| Alter_inplace_info::ALTER_STORED_COLUMN_ORDER
-	| Alter_inplace_info::DROP_STORED_COLUMN
-	| Alter_inplace_info::ADD_STORED_BASE_COLUMN
-	| Alter_inplace_info::RECREATE_TABLE
+static const alter_table_operations INNOBASE_ALTER_REBUILD
+	= ALTER_ADD_PK_INDEX
+	| ALTER_DROP_PK_INDEX
+	| ALTER_OPTIONS
+	/* ALTER_OPTIONS needs to check alter_options_need_rebuild() */
+	| ALTER_COLUMN_NULLABLE
+	| INNOBASE_DEFAULTS
+	| ALTER_STORED_COLUMN_ORDER
+	| ALTER_DROP_STORED_COLUMN
+	| ALTER_RECREATE_TABLE
 	/*
-	| Alter_inplace_info::ALTER_STORED_COLUMN_TYPE
+	| ALTER_STORED_COLUMN_TYPE
 	*/
+	| INNOBASE_ALTER_VERSIONED_REBUILD
 	;
 
 /** Operations that require changes to data */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_DATA
+static const alter_table_operations INNOBASE_ALTER_DATA
 	= INNOBASE_ONLINE_CREATE | INNOBASE_ALTER_REBUILD;
 
 /** Operations for altering a table that InnoDB does not care about */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_IGNORE
-	= Alter_inplace_info::ALTER_COLUMN_DEFAULT
-	| Alter_inplace_info::ALTER_PARTITIONED
-	| Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT
-	| Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE
-	| Alter_inplace_info::ALTER_VIRTUAL_GCOL_EXPR
-	| Alter_inplace_info::ALTER_RENAME;
+static const alter_table_operations INNOBASE_INPLACE_IGNORE
+	= ALTER_COLUMN_DEFAULT
+	| ALTER_PARTITIONED
+	| ALTER_COLUMN_COLUMN_FORMAT
+	| ALTER_COLUMN_STORAGE_TYPE
+	| ALTER_VIRTUAL_GCOL_EXPR
+	| ALTER_DROP_CHECK_CONSTRAINT
+	| ALTER_RENAME;
 
 /** Operations on foreign key definitions (changing the schema only) */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_FOREIGN_OPERATIONS
-	= Alter_inplace_info::DROP_FOREIGN_KEY
-	| Alter_inplace_info::ADD_FOREIGN_KEY;
+static const alter_table_operations INNOBASE_FOREIGN_OPERATIONS
+	= ALTER_DROP_FOREIGN_KEY
+	| ALTER_ADD_FOREIGN_KEY;
+
+/** Operations that InnoDB cares about and can perform without creating data */
+static const alter_table_operations INNOBASE_ALTER_NOCREATE
+	= ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX
+	| ALTER_DROP_UNIQUE_INDEX;
 
 /** Operations that InnoDB cares about and can perform without rebuild */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_NOREBUILD
+static const alter_table_operations INNOBASE_ALTER_NOREBUILD
 	= INNOBASE_ONLINE_CREATE
-	| INNOBASE_FOREIGN_OPERATIONS
-	| Alter_inplace_info::DROP_INDEX
-	| Alter_inplace_info::DROP_UNIQUE_INDEX
+	| INNOBASE_ALTER_NOCREATE;
+
+/** Operations that can be performed instantly, without inplace_alter_table() */
+static const alter_table_operations INNOBASE_ALTER_INSTANT
+	= ALTER_VIRTUAL_COLUMN_ORDER
+	| ALTER_COLUMN_NAME
 #ifdef MYSQL_RENAME_INDEX
-	| Alter_inplace_info::RENAME_INDEX
+	| ALTER_RENAME_INDEX
 #endif
-	| Alter_inplace_info::ALTER_COLUMN_NAME
-	| Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
-	//| Alter_inplace_info::ALTER_INDEX_COMMENT
-	| Alter_inplace_info::ADD_VIRTUAL_COLUMN
-	| Alter_inplace_info::DROP_VIRTUAL_COLUMN
-	| Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER;
+	| ALTER_ADD_VIRTUAL_COLUMN
+	| INNOBASE_FOREIGN_OPERATIONS
+	| ALTER_COLUMN_EQUAL_PACK_LENGTH
+	| ALTER_COLUMN_UNVERSIONED
+	| ALTER_DROP_VIRTUAL_COLUMN;
 
 struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
 {
@@ -151,14 +174,16 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
 	dict_table_t*	old_table;
 	/** table where the indexes are being created or dropped */
 	dict_table_t*	new_table;
+	/** table definition for instant ADD COLUMN */
+	dict_table_t*	instant_table;
 	/** mapping of old column numbers to new ones, or NULL */
 	const ulint*	col_map;
 	/** new column names, or NULL if nothing was renamed */
 	const char**	col_names;
 	/** added AUTO_INCREMENT column position, or ULINT_UNDEFINED */
 	const ulint	add_autoinc;
-	/** default values of ADD COLUMN, or NULL */
-	const dtuple_t*	add_cols;
+	/** default values of ADD and CHANGE COLUMN, or NULL */
+	const dtuple_t*	defaults;
 	/** autoinc sequence to use */
 	ib_sequence_t	sequence;
 	/** temporary table name to use for old table when renaming tables */
@@ -177,6 +202,22 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
 	const char**	drop_vcol_name;
 	/** ALTER TABLE stage progress recorder */
 	ut_stage_alter_t* m_stage;
+	/** original number of user columns in the table */
+	const unsigned	old_n_cols;
+	/** original columns of the table */
+	dict_col_t* const old_cols;
+	/** original column names of the table */
+	const char* const old_col_names;
+
+	/** Allow non-null conversion.
+	(1) Alter ignore should allow the conversion
+	irrespective of sql mode.
+	(2) Don't allow the conversion in strict mode
+	(3) Allow the conversion only in non-strict mode. */
+	const bool	allow_not_null;
+
+	/** The page_compression_level attribute, or 0 */
+	const uint	page_compression_level;
 
 	ha_innobase_inplace_ctx(row_prebuilt_t*& prebuilt_arg,
 				dict_index_t** drop_arg,
@@ -194,7 +235,9 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
 				ulint add_autoinc_arg,
 				ulonglong autoinc_col_min_value_arg,
 				ulonglong autoinc_col_max_value_arg,
-				ulint num_to_drop_vcol_arg) :
+				bool allow_not_null_flag,
+				bool page_compressed,
+				ulonglong page_compression_level_arg) :
 		inplace_alter_handler_ctx(),
 		prebuilt (prebuilt_arg),
 		add_index (0), add_key_numbers (0), num_to_add_index (0),
@@ -204,10 +247,10 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
 		add_fk (add_fk_arg), num_to_add_fk (num_to_add_fk_arg),
 		online (online_arg), heap (heap_arg), trx (0),
 		old_table (prebuilt_arg->table),
-		new_table (new_table_arg),
+		new_table (new_table_arg), instant_table (0),
 		col_map (0), col_names (col_names_arg),
 		add_autoinc (add_autoinc_arg),
-		add_cols (0),
+		defaults (0),
 		sequence(prebuilt->trx->mysql_thd,
 			 autoinc_col_min_value_arg, autoinc_col_max_value_arg),
 		tmp_name (0),
@@ -218,8 +261,19 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
 		num_to_drop_vcol(0),
 		drop_vcol(0),
 		drop_vcol_name(0),
-		m_stage(NULL)
+		m_stage(NULL),
+		old_n_cols(prebuilt_arg->table->n_cols),
+		old_cols(prebuilt_arg->table->cols),
+		old_col_names(prebuilt_arg->table->col_names),
+		allow_not_null(allow_not_null_flag),
+		page_compression_level(page_compressed
+				       ? (page_compression_level_arg
+					  ? uint(page_compression_level_arg)
+					  : page_zip_level)
+				       : 0)
 	{
+		ut_ad(old_n_cols >= DATA_N_SYS_COLS);
+		ut_ad(page_compression_level <= 9);
 #ifdef UNIV_DEBUG
 		for (ulint i = 0; i < num_to_add_index; i++) {
 			ut_ad(!add_index[i]->to_be_dropped);
@@ -236,6 +290,15 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
 	~ha_innobase_inplace_ctx()
 	{
 		UT_DELETE(m_stage);
+		if (instant_table) {
+			while (dict_index_t* index
+			       = UT_LIST_GET_LAST(instant_table->indexes)) {
+				UT_LIST_REMOVE(instant_table->indexes, index);
+				rw_lock_free(&index->lock);
+				dict_mem_index_free(index);
+			}
+			dict_mem_table_free(instant_table);
+		}
 		mem_heap_free(heap);
 	}
 
@@ -253,6 +316,36 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
 		}
 	}
 
+	/** Convert table-rebuilding ALTER to instant ALTER. */
+	void prepare_instant()
+	{
+		DBUG_ASSERT(need_rebuild());
+		DBUG_ASSERT(!is_instant());
+		DBUG_ASSERT(old_table->n_cols == old_table->n_def);
+		DBUG_ASSERT(new_table->n_cols == new_table->n_def);
+		DBUG_ASSERT(old_table->n_cols == old_n_cols);
+		DBUG_ASSERT(new_table->n_cols > old_table->n_cols);
+		instant_table = new_table;
+
+		new_table = old_table;
+		export_vars.innodb_instant_alter_column++;
+	}
+
+	/** Revert prepare_instant() if the transaction is rolled back. */
+	void rollback_instant()
+	{
+		if (!is_instant()) return;
+		old_table->rollback_instant(old_n_cols,
+					    old_cols, old_col_names);
+	}
+
+	/** @return whether this is instant ALTER TABLE */
+	bool is_instant() const
+	{
+		DBUG_ASSERT(!instant_table || !instant_table->can_be_evicted);
+		return instant_table;
+	}
+
 private:
 	// Disable copying
 	ha_innobase_inplace_ctx(const ha_innobase_inplace_ctx&);
@@ -414,44 +507,62 @@ innobase_spatial_exist(
 	return(false);
 }
 
-/** Determine if ALTER TABLE needs to rebuild the table.
-@param ha_alter_info	the DDL operation
-@param table		metadata before ALTER TABLE
-@return whether it is necessary to rebuild the table */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-innobase_need_rebuild(
+/** Determine if ALTER_OPTIONS requires rebuilding the table.
+@param[in] ha_alter_info	the ALTER TABLE operation
+@param[in] table		metadata before ALTER TABLE
+@return whether it is mandatory to rebuild the table */
+static bool alter_options_need_rebuild(
 	const Alter_inplace_info*	ha_alter_info,
 	const TABLE*			table)
 {
-	Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags =
-		ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE;
+	DBUG_ASSERT(ha_alter_info->handler_flags & ALTER_OPTIONS);
 
-	if (alter_inplace_flags & Alter_inplace_info::CHANGE_CREATE_OPTION) {
-		const ha_table_option_struct& alt_opt=
+	if (ha_alter_info->create_info->used_fields
+	    & (HA_CREATE_USED_ROW_FORMAT
+	       | HA_CREATE_USED_KEY_BLOCK_SIZE)) {
+		/* Specifying ROW_FORMAT or KEY_BLOCK_SIZE requires
+		rebuilding the table. (These attributes in the .frm
+		file may disagree with the InnoDB data dictionary, and
+		the interpretation of thse attributes depends on
+		InnoDB parameters. That is why we for now always
+		require a rebuild when these attributes are specified.) */
+		return true;
+	}
+
+	const ha_table_option_struct& alt_opt=
 			*ha_alter_info->create_info->option_struct;
-		const ha_table_option_struct& opt= *table->s->option_struct;
+	const ha_table_option_struct& opt= *table->s->option_struct;
 
-		if (alt_opt.page_compressed != opt.page_compressed
-		    || alt_opt.page_compression_level
-		    != opt.page_compression_level
-		    || alt_opt.encryption != opt.encryption
-		    || alt_opt.encryption_key_id != opt.encryption_key_id) {
-			return(true);
-		}
+	/* Allow an instant change to enable page_compressed,
+	and any change of page_compression_level. */
+	if ((!alt_opt.page_compressed && opt.page_compressed)
+	    || alt_opt.encryption != opt.encryption
+	    || alt_opt.encryption_key_id != opt.encryption_key_id) {
+		return(true);
 	}
 
-	if (alter_inplace_flags == Alter_inplace_info::CHANGE_CREATE_OPTION
-	    && !(ha_alter_info->create_info->used_fields
-		 & (HA_CREATE_USED_ROW_FORMAT
-		    | HA_CREATE_USED_KEY_BLOCK_SIZE))) {
-		/* Any other CHANGE_CREATE_OPTION than changing
-		ROW_FORMAT or KEY_BLOCK_SIZE can be done without
-		rebuilding the table. */
-		return(false);
+	return false;
+}
+
+/** Determine if ALTER TABLE needs to rebuild the table
+(or perform instant operation).
+@param[in] ha_alter_info	the ALTER TABLE operation
+@param[in] table		metadata before ALTER TABLE
+@return whether it is necessary to rebuild the table or to alter columns */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+bool
+innobase_need_rebuild(
+	const Alter_inplace_info*	ha_alter_info,
+	const TABLE*			table)
+{
+	if ((ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE
+					      | INNOBASE_ALTER_NOREBUILD
+					      | INNOBASE_ALTER_INSTANT))
+	    == ALTER_OPTIONS) {
+		return alter_options_need_rebuild(ha_alter_info, table);
 	}
 
-	return(!!(alter_inplace_flags & INNOBASE_ALTER_REBUILD));
+	return !!(ha_alter_info->handler_flags & INNOBASE_ALTER_REBUILD);
 }
 
 /** Check if virtual column in old and new table are in order, excluding
@@ -475,7 +586,7 @@ check_v_col_in_order(
 	/* We don't support any adding new virtual column before
 	existed virtual column. */
 	if (ha_alter_info->handler_flags
-              & Alter_inplace_info::ADD_VIRTUAL_COLUMN) {
+              & ALTER_ADD_VIRTUAL_COLUMN) {
 		bool			has_new = false;
 
 		List_iterator_fast<Create_field> cf_it(
@@ -505,7 +616,7 @@ check_v_col_in_order(
 
 	/* directly return true if ALTER_VIRTUAL_COLUMN_ORDER is not on */
 	if (!(ha_alter_info->handler_flags
-              & Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER)) {
+              & ALTER_VIRTUAL_COLUMN_ORDER)) {
 		return(true);
 	}
 
@@ -533,8 +644,8 @@ check_v_col_in_order(
 			}
 
 			if (my_strcasecmp(system_charset_info,
-					  field->field_name,
-					  new_field->field_name) != 0) {
+					  field->field_name.str,
+					  new_field->field_name.str) != 0) {
 				/* different column */
 				return(false);
 			} else {
@@ -554,29 +665,216 @@ check_v_col_in_order(
 	return(true);
 }
 
+/** Determine if an instant operation is possible for altering columns.
+@param[in]	ha_alter_info	the ALTER TABLE operation
+@param[in]	table		table definition before ALTER TABLE */
+static
+bool
+instant_alter_column_possible(
+	const Alter_inplace_info*	ha_alter_info,
+	const TABLE*			table)
+{
+	// Making table system-versioned instantly is not implemented yet.
+	if (ha_alter_info->handler_flags & ALTER_ADD_SYSTEM_VERSIONING) {
+		return false;
+	}
+
+	if (~ha_alter_info->handler_flags & ALTER_ADD_STORED_BASE_COLUMN) {
+		return false;
+	}
+
+	/* At the moment, we disallow ADD [UNIQUE] INDEX together with
+	instant ADD COLUMN.
+
+	The main reason is that the work of instant ADD must be done
+	in commit_inplace_alter_table().  For the rollback_instant()
+	to work, we must add the columns to dict_table_t beforehand,
+	and roll back those changes in case the transaction is rolled
+	back.
+
+	If we added the columns to the dictionary cache already in the
+	prepare_inplace_alter_table(), we would have to deal with
+	column number mismatch in ha_innobase::open(), write_row() and
+	other functions. */
+
+	/* FIXME: allow instant ADD COLUMN together with
+	INNOBASE_ONLINE_CREATE (ADD [UNIQUE] INDEX) on pre-existing
+	columns. */
+	if (ha_alter_info->handler_flags
+	    & ((INNOBASE_ALTER_REBUILD | INNOBASE_ONLINE_CREATE)
+	       & ~ALTER_ADD_STORED_BASE_COLUMN & ~ALTER_OPTIONS)) {
+		return false;
+	}
+
+	return !(ha_alter_info->handler_flags & ALTER_OPTIONS)
+		|| !alter_options_need_rebuild(ha_alter_info, table);
+}
+
+/** Check whether the non-const default value for the field
+@param[in]	field	field which could be added or changed
+@return true if the non-const default is present. */
+static bool is_non_const_value(Field* field)
+{
+	return field->default_value
+		&& field->default_value->flags
+		& uint(~(VCOL_SESSION_FUNC | VCOL_TIME_FUNC));
+}
+
+/** Set default value for the field.
+@param[in]	field	field which could be added or changed
+@return true if the default value is set. */
+static bool set_default_value(Field* field)
+{
+	/* The added/changed NOT NULL column lacks a DEFAULT value,
+	   or the DEFAULT is the same for all rows.
+	   (Time functions, such as CURRENT_TIMESTAMP(),
+	   are evaluated from a timestamp that is assigned
+	   at the start of the statement. Session
+	   functions, such as USER(), always evaluate the
+	   same within a statement.) */
+
+	ut_ad(!is_non_const_value(field));
+
+	/* Compute the DEFAULT values of non-constant columns
+	   (VCOL_SESSION_FUNC | VCOL_TIME_FUNC). */
+	switch (field->set_default()) {
+	case 0: /* OK */
+	case 3: /* DATETIME to TIME or DATE conversion */
+		return true;
+	case -1: /* OOM, or GEOMETRY type mismatch */
+	case 1:  /* A number adjusted to the min/max value */
+	case 2:  /* String truncation, or conversion problem */
+		break;
+	}
+
+	return false;
+}
+
+/** Check whether the table has the FTS_DOC_ID column
+@param[in]	table		InnoDB table with fulltext index
+@param[in]	altered_table	MySQL table with fulltext index
+@param[out]	fts_doc_col_no	The column number for Doc ID,
+				or ULINT_UNDEFINED if it is of wrong type
+@param[out]	num_v		Number of virtual column
+@param[in]	check_only	check only whether fts doc id exist.
+@return whether there exists an FTS_DOC_ID column */
+static
+bool
+innobase_fts_check_doc_id_col(
+	const dict_table_t*	table,
+	const TABLE*		altered_table,
+	ulint*			fts_doc_col_no,
+	ulint*			num_v,
+	bool			check_only=false)
+{
+	*fts_doc_col_no = ULINT_UNDEFINED;
+
+	const uint n_cols = altered_table->s->fields;
+	ulint	i;
+	int	err = 0;
+	*num_v = 0;
+
+	for (i = 0; i < n_cols; i++) {
+		const Field*	field = altered_table->field[i];
+
+		if (innobase_is_v_fld(field)) {
+			(*num_v)++;
+		}
+
+		if (my_strcasecmp(system_charset_info,
+				  field->field_name.str, FTS_DOC_ID_COL_NAME)) {
+			continue;
+		}
+
+		if (strcmp(field->field_name.str, FTS_DOC_ID_COL_NAME)) {
+			err = ER_WRONG_COLUMN_NAME;
+		} else if (field->type() != MYSQL_TYPE_LONGLONG
+			   || field->pack_length() != 8
+			   || field->real_maybe_null()
+			   || !(field->flags & UNSIGNED_FLAG)
+			   || innobase_is_v_fld(field)) {
+			err = ER_INNODB_FT_WRONG_DOCID_COLUMN;
+		} else {
+			*fts_doc_col_no = i - *num_v;
+		}
+
+		if (err && !check_only) {
+			my_error(err, MYF(0), field->field_name.str);
+		}
+
+		return(true);
+	}
+
+	if (!table) {
+		return(false);
+	}
+
+	/* Not to count the virtual columns */
+	i -= *num_v;
+
+	for (; i + DATA_N_SYS_COLS < (uint) table->n_cols; i++) {
+		const char*     name = dict_table_get_col_name(table, i);
+
+		if (strcmp(name, FTS_DOC_ID_COL_NAME) == 0) {
+#ifdef UNIV_DEBUG
+			const dict_col_t*       col;
+
+			col = dict_table_get_nth_col(table, i);
+
+			/* Because the FTS_DOC_ID does not exist in
+			the MySQL data dictionary, this must be the
+			internally created FTS_DOC_ID column. */
+			ut_ad(col->mtype == DATA_INT);
+			ut_ad(col->len == 8);
+			ut_ad(col->prtype & DATA_NOT_NULL);
+			ut_ad(col->prtype & DATA_UNSIGNED);
+#endif /* UNIV_DEBUG */
+			*fts_doc_col_no = i;
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
 /** Check if InnoDB supports a particular alter table in-place
 @param altered_table TABLE object for new version of table.
 @param ha_alter_info Structure describing changes to be done
 by ALTER TABLE and holding data used during in-place alter.
 
 @retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
-@retval HA_ALTER_INPLACE_NO_LOCK Supported
-@retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE Supported, but requires
-lock during main phase and exclusive lock during prepare phase.
-@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE Supported, prepare phase
-requires exclusive lock (any transactions that have accessed the table
-must commit or roll back first, and no transactions can access the table
-while prepare_inplace_alter_table() is executing)
+@retval HA_ALTER_INPLACE_INSTANT
+MDL_EXCLUSIVE is needed for executing prepare_inplace_alter_table()
+and commit_inplace_alter_table(). inplace_alter_table() will not be called.
+@retval HA_ALTER_INPLACE_COPY_NO_LOCK
+MDL_EXCLUSIVE in prepare_inplace_alter_table(), which can be downgraded to
+LOCK=NONE for rebuilding the table in inplace_alter_table()
+@retval HA_ALTER_INPLACE_COPY_LOCK
+MDL_EXCLUSIVE in prepare_inplace_alter_table(), which can be downgraded to
+LOCK=SHARED for rebuilding the table in inplace_alter_table()
+@retval HA_ALTER_INPLACE_NOCOPY_NO_LOCK
+MDL_EXCLUSIVE in prepare_inplace_alter_table(), which can be downgraded to
+LOCK=NONE for inplace_alter_table() which will not rebuild the table
+@retval HA_ALTER_INPLACE_NOCOPY_LOCK
+MDL_EXCLUSIVE in prepare_inplace_alter_table(), which can be downgraded to
+LOCK=SHARED for inplace_alter_table() which will not rebuild the table
 */
 
 enum_alter_inplace_result
 ha_innobase::check_if_supported_inplace_alter(
-/*==========================================*/
 	TABLE*			altered_table,
 	Alter_inplace_info*	ha_alter_info)
 {
 	DBUG_ENTER("check_if_supported_inplace_alter");
 
+	if ((ha_alter_info->handler_flags
+	     & INNOBASE_ALTER_VERSIONED_REBUILD)
+	    && altered_table->versioned(VERS_TIMESTAMP)) {
+		ha_alter_info->unsupported_reason =
+			"Not implemented for system-versioned timestamp tables";
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+
 	/* Before 10.2.2 information about virtual columns was not stored in
 	system tables. We need to do a full alter to rebuild proper 10.2.2+
 	metadata with the information about virtual columns */
@@ -586,7 +884,7 @@ ha_innobase::check_if_supported_inplace_alter(
 
 	if (high_level_read_only) {
 		ha_alter_info->unsupported_reason =
-			innobase_get_err_msg(ER_READ_ONLY_MODE);
+			my_get_err_msg(ER_READ_ONLY_MODE);
 
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 	}
@@ -597,7 +895,7 @@ ha_innobase::check_if_supported_inplace_alter(
 		return an error too. This is how we effectively
 		deny adding too many columns to a table. */
 		ha_alter_info->unsupported_reason =
-			innobase_get_err_msg(ER_TOO_MANY_FIELDS);
+			my_get_err_msg(ER_TOO_MANY_FIELDS);
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 	}
 
@@ -605,57 +903,46 @@ ha_innobase::check_if_supported_inplace_alter(
 
 	if (ha_alter_info->handler_flags
 	    & ~(INNOBASE_INPLACE_IGNORE
+		| INNOBASE_ALTER_INSTANT
 		| INNOBASE_ALTER_NOREBUILD
 		| INNOBASE_ALTER_REBUILD)) {
 
 		if (ha_alter_info->handler_flags
-		    & Alter_inplace_info::ALTER_STORED_COLUMN_TYPE) {
-			ha_alter_info->unsupported_reason = innobase_get_err_msg(
+		    & ALTER_STORED_COLUMN_TYPE) {
+			ha_alter_info->unsupported_reason = my_get_err_msg(
 				ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COLUMN_TYPE);
 		}
+
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 	}
 
 	/* Only support online add foreign key constraint when
 	check_foreigns is turned off */
-	if ((ha_alter_info->handler_flags & Alter_inplace_info::ADD_FOREIGN_KEY)
+	if ((ha_alter_info->handler_flags & ALTER_ADD_FOREIGN_KEY)
 	    && m_prebuilt->trx->check_foreigns) {
-		ha_alter_info->unsupported_reason = innobase_get_err_msg(
+		ha_alter_info->unsupported_reason = my_get_err_msg(
 			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_CHECK);
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 	}
 
-#if 0
-	if (altered_table->file->ht != ht) {
-		/* Non-native partitioning table engine. No longer supported,
-		due to implementation of native InnoDB partitioning. */
-		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
-	}
-#endif
-
-	if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
-		DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
-	}
-
-	/* Only support NULL -> NOT NULL change if strict table sql_mode
-	is set. Fall back to COPY for conversion if not strict tables.
-	In-Place will fail with an error when trying to convert
-	NULL to a NOT NULL value. */
-	if ((ha_alter_info->handler_flags
-	     & Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE)
-	    && !thd_is_strict_mode(m_user_thd)) {
-		ha_alter_info->unsupported_reason = innobase_get_err_msg(
-			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL);
-		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	switch (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
+	case ALTER_OPTIONS:
+		if (alter_options_need_rebuild(ha_alter_info, table)) {
+			ha_alter_info->unsupported_reason = my_get_err_msg(
+				ER_ALTER_OPERATION_TABLE_OPTIONS_NEED_REBUILD);
+			break;
+		}
+		/* fall through */
+	case 0:
+		DBUG_RETURN(HA_ALTER_INPLACE_INSTANT);
 	}
 
 	/* DROP PRIMARY KEY is only allowed in combination with ADD
 	PRIMARY KEY. */
 	if ((ha_alter_info->handler_flags
-	     & (Alter_inplace_info::ADD_PK_INDEX
-		| Alter_inplace_info::DROP_PK_INDEX))
-	    == Alter_inplace_info::DROP_PK_INDEX) {
-		ha_alter_info->unsupported_reason = innobase_get_err_msg(
+	     & (ALTER_ADD_PK_INDEX | ALTER_DROP_PK_INDEX))
+	    == ALTER_DROP_PK_INDEX) {
+		ha_alter_info->unsupported_reason = my_get_err_msg(
 			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOPK);
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 	}
@@ -665,14 +952,14 @@ ha_innobase::check_if_supported_inplace_alter(
 	table should be rebuild. The change should
 	only go through the "Copy" method. */
 	if ((ha_alter_info->handler_flags
-	     & Alter_inplace_info::ALTER_COLUMN_NULLABLE)) {
+	     & ALTER_COLUMN_NULLABLE)) {
 		const uint my_primary_key = altered_table->s->primary_key;
 
 		/* See if MYSQL table has no pk but we do. */
 		if (UNIV_UNLIKELY(my_primary_key >= MAX_KEY)
 		    && !dict_index_is_auto_gen_clust(
 			    dict_table_get_first_index(m_prebuilt->table))) {
-			ha_alter_info->unsupported_reason = innobase_get_err_msg(
+			ha_alter_info->unsupported_reason = my_get_err_msg(
 				ER_PRIMARY_CANT_HAVE_NULL);
 			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 		}
@@ -691,8 +978,9 @@ ha_innobase::check_if_supported_inplace_alter(
 	*/
 	for (ulint i = 0, icol= 0; i < table->s->fields; i++) {
 		const Field*		field = table->field[i];
-		const dict_col_t*	col = dict_table_get_nth_col(m_prebuilt->table, icol);
-		ulint		unsigned_flag;
+		const dict_col_t*	col = dict_table_get_nth_col(
+			m_prebuilt->table, icol);
+		ulint			unsigned_flag;
 
 		if (!field->stored_in_db()) {
 			continue;
@@ -700,7 +988,8 @@ ha_innobase::check_if_supported_inplace_alter(
 
 		icol++;
 
-		if (col->mtype != get_innobase_type_from_mysql_type(&unsigned_flag, field)) {
+		if (col->mtype != get_innobase_type_from_mysql_type(
+			    &unsigned_flag, field)) {
 
 			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 		}
@@ -717,7 +1006,7 @@ ha_innobase::check_if_supported_inplace_alter(
 	use "Copy" method. */
 	if (m_prebuilt->table->dict_frm_mismatch) {
 
-		ha_alter_info->unsupported_reason = innobase_get_err_msg(
+		ha_alter_info->unsupported_reason = my_get_err_msg(
 			ER_NO_SUCH_INDEX);
 		ib_push_frm_error(m_user_thd, m_prebuilt->table, altered_table,
 			n_indexes, true);
@@ -731,27 +1020,27 @@ ha_innobase::check_if_supported_inplace_alter(
 	with these 2 options alone with inplace interface for now */
 
 	if (ha_alter_info->handler_flags
-	    & (Alter_inplace_info::ADD_VIRTUAL_COLUMN
-	       | Alter_inplace_info::DROP_VIRTUAL_COLUMN
-	       | Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER)) {
+	    & (ALTER_ADD_VIRTUAL_COLUMN
+	       | ALTER_DROP_VIRTUAL_COLUMN
+	       | ALTER_VIRTUAL_COLUMN_ORDER)) {
 		ulonglong flags = ha_alter_info->handler_flags;
 
 		/* TODO: uncomment the flags below, once we start to
 		support them */
 
-		flags &= ~(Alter_inplace_info::ADD_VIRTUAL_COLUMN
-			   | Alter_inplace_info::DROP_VIRTUAL_COLUMN
-			   | Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER
-		           | Alter_inplace_info::ALTER_VIRTUAL_GCOL_EXPR
-		           | Alter_inplace_info::ALTER_COLUMN_VCOL
+		flags &= ~(ALTER_ADD_VIRTUAL_COLUMN
+			   | ALTER_DROP_VIRTUAL_COLUMN
+			   | ALTER_VIRTUAL_COLUMN_ORDER
+		           | ALTER_VIRTUAL_GCOL_EXPR
+		           | ALTER_COLUMN_VCOL
 		/*
-			   | Alter_inplace_info::ADD_STORED_BASE_COLUMN
-			   | Alter_inplace_info::DROP_STORED_COLUMN
-			   | Alter_inplace_info::ALTER_STORED_COLUMN_ORDER
-			   | Alter_inplace_info::ADD_UNIQUE_INDEX
+			   | ALTER_ADD_STORED_BASE_COLUMN
+			   | ALTER_DROP_STORED_COLUMN
+			   | ALTER_STORED_COLUMN_ORDER
+			   | ALTER_ADD_UNIQUE_INDEX
 		*/
-			   | Alter_inplace_info::ADD_INDEX
-			   | Alter_inplace_info::DROP_INDEX);
+			   | ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX
+			   | ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX);
 
 		if (flags != 0
 		    || IF_PARTITIONING((altered_table->s->partition_info_str
@@ -768,7 +1057,7 @@ ha_innobase::check_if_supported_inplace_alter(
 
 	/* We should be able to do the operation in-place.
 	See if we can do it online (LOCK=NONE). */
-	bool	online = true;
+	bool		online = true;
 
 	List_iterator_fast<Create_field> cf_it(
 		ha_alter_info->alter_info->create_list);
@@ -790,7 +1079,8 @@ ha_innobase::check_if_supported_inplace_alter(
 		}
 
 		for (KEY_PART_INFO* key_part = new_key->key_part;
-		     key_part < new_key->key_part + new_key->user_defined_key_parts;
+		     key_part < (new_key->key_part
+				 + new_key->user_defined_key_parts);
 		     key_part++) {
 			const Create_field*	new_field;
 
@@ -824,7 +1114,7 @@ ha_innobase::check_if_supported_inplace_alter(
 
 			/* This is an added column. */
 			DBUG_ASSERT(ha_alter_info->handler_flags
-				    & Alter_inplace_info::ADD_COLUMN);
+				    & ALTER_ADD_COLUMN);
 
 			/* We cannot replace a hidden FTS_DOC_ID
 			with a user-visible FTS_DOC_ID. */
@@ -832,9 +1122,9 @@ ha_innobase::check_if_supported_inplace_alter(
 			    && innobase_fulltext_exist(altered_table)
 			    && !my_strcasecmp(
 				    system_charset_info,
-				    key_part->field->field_name,
+				    key_part->field->field_name.str,
 				    FTS_DOC_ID_COL_NAME)) {
-				ha_alter_info->unsupported_reason = innobase_get_err_msg(
+				ha_alter_info->unsupported_reason = my_get_err_msg(
 					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_HIDDEN_FTS);
 				DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 			}
@@ -849,8 +1139,12 @@ ha_innobase::check_if_supported_inplace_alter(
 				column values during online ALTER. */
 				DBUG_ASSERT(key_part->field == altered_table
 					    -> found_next_number_field);
-				ha_alter_info->unsupported_reason = innobase_get_err_msg(
-					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_AUTOINC);
+
+				if (ha_alter_info->online) {
+					ha_alter_info->unsupported_reason = my_get_err_msg(
+						ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_AUTOINC);
+				}
+
 				online = false;
 			}
 
@@ -859,36 +1153,41 @@ ha_innobase::check_if_supported_inplace_alter(
 				virtual column, while there is also a drop
 				virtual column in the same clause */
 				if (ha_alter_info->handler_flags
-				    & Alter_inplace_info::DROP_VIRTUAL_COLUMN) {
+				    & ALTER_DROP_VIRTUAL_COLUMN) {
 					ha_alter_info->unsupported_reason =
 						MSG_UNSUPPORTED_ALTER_ONLINE_ON_VIRTUAL_COLUMN;
 
 					DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 				}
 
-				ha_alter_info->unsupported_reason =
-					MSG_UNSUPPORTED_ALTER_ONLINE_ON_VIRTUAL_COLUMN;
+				if (ha_alter_info->online
+				    && !ha_alter_info->unsupported_reason) {
+					ha_alter_info->unsupported_reason =
+						MSG_UNSUPPORTED_ALTER_ONLINE_ON_VIRTUAL_COLUMN;
+				}
+
 				online = false;
 			}
 		}
 	}
 
-	DBUG_ASSERT(!m_prebuilt->table->fts || m_prebuilt->table->fts->doc_col
-		    <= table->s->fields);
-	DBUG_ASSERT(!m_prebuilt->table->fts || m_prebuilt->table->fts->doc_col
-		    < dict_table_get_n_user_cols(m_prebuilt->table));
+	DBUG_ASSERT(!m_prebuilt->table->fts
+		    || (m_prebuilt->table->fts->doc_col <= table->s->fields));
 
-	if (m_prebuilt->table->fts
-	    && innobase_fulltext_exist(altered_table)) {
+	DBUG_ASSERT(!m_prebuilt->table->fts
+		    || (m_prebuilt->table->fts->doc_col
+		        < dict_table_get_n_user_cols(m_prebuilt->table)));
+
+	if (m_prebuilt->table->fts && innobase_fulltext_exist(altered_table)) {
 		/* FULLTEXT indexes are supposed to remain. */
 		/* Disallow DROP INDEX FTS_DOC_ID_INDEX */
 
 		for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
 			if (!my_strcasecmp(
 				    system_charset_info,
-				    ha_alter_info->index_drop_buffer[i]->name,
+				    ha_alter_info->index_drop_buffer[i]->name.str,
 				    FTS_DOC_ID_INDEX_NAME)) {
-				ha_alter_info->unsupported_reason = innobase_get_err_msg(
+				ha_alter_info->unsupported_reason = my_get_err_msg(
 					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS);
 				DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 			}
@@ -906,9 +1205,9 @@ ha_innobase::check_if_supported_inplace_alter(
 
 			if (!my_strcasecmp(
 				    system_charset_info,
-				    (*fp)->field_name,
+				    (*fp)->field_name.str,
 				    FTS_DOC_ID_COL_NAME)) {
-				ha_alter_info->unsupported_reason = innobase_get_err_msg(
+				ha_alter_info->unsupported_reason = my_get_err_msg(
 					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS);
 				DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 			}
@@ -917,87 +1216,19 @@ ha_innobase::check_if_supported_inplace_alter(
 
 	m_prebuilt->trx->will_lock++;
 
-	if (!online) {
-		/* We already determined that only a non-locking
-		operation is possible. */
-	} else if (((ha_alter_info->handler_flags
-		     & Alter_inplace_info::ADD_PK_INDEX)
-		    || innobase_need_rebuild(ha_alter_info, table))
-		   && (innobase_fulltext_exist(altered_table)
-		       || innobase_spatial_exist(altered_table)
-		       || innobase_indexed_virtual_exist(altered_table))) {
-		/* Refuse to rebuild the table online, if
-		FULLTEXT OR SPATIAL indexes or indexed virtual columns
-		are to survive the rebuild. */
-		online = false;
-		/* If the table already contains fulltext indexes,
-		refuse to rebuild the table natively altogether. */
-		if (m_prebuilt->table->fts) {
-			ha_alter_info->unsupported_reason = innobase_get_err_msg(
-				ER_INNODB_FT_LIMIT);
-			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
-		}
-
-		if (innobase_spatial_exist(altered_table)) {
-			ha_alter_info->unsupported_reason =
-				innobase_get_err_msg(
-				ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_GIS);
-		} else if (!innobase_fulltext_exist(altered_table)) {
-			/* MDEV-14341 FIXME: Remove this limitation. */
-			ha_alter_info->unsupported_reason =
-				"online rebuild with indexed virtual columns";
-		} else {
-			ha_alter_info->unsupported_reason =
-				innobase_get_err_msg(
-				ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
-		}
-	} else if ((ha_alter_info->handler_flags
-		    & Alter_inplace_info::ADD_INDEX)) {
-		/* ADD FULLTEXT|SPATIAL INDEX requires a lock.
-
-		We could do ADD FULLTEXT INDEX without a lock if the
-		table already contains an FTS_DOC_ID column, but in
-		that case we would have to apply the modification log
-		to the full-text indexes.
-
-		We could also do ADD SPATIAL INDEX by implementing
-		row_log_apply() for it. */
-
-		for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
-			const KEY* key =
-				&ha_alter_info->key_info_buffer[
-					ha_alter_info->index_add_buffer[i]];
-			if (key->flags & HA_FULLTEXT) {
-				DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
-					      & ~(HA_FULLTEXT
-						  | HA_PACK_KEY
-						  | HA_GENERATED_KEY
-						  | HA_BINARY_PACK_KEY)));
-				ha_alter_info->unsupported_reason = innobase_get_err_msg(
-					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
-				online = false;
-				break;
-			}
-			if (key->flags & HA_SPATIAL) {
-				ha_alter_info->unsupported_reason = innobase_get_err_msg(
-					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_GIS);
-				online = false;
-				break;
-			}
-		}
-	}
-
 	/* When changing a NULL column to NOT NULL and specifying a
 	DEFAULT value, ensure that the DEFAULT expression is a constant.
 	Also, in ADD COLUMN, for now we only support a
 	constant DEFAULT expression. */
 	cf_it.rewind();
 	Field **af = altered_table->field;
+	bool add_column_not_last = false;
+	uint n_stored_cols = 0, n_add_cols = 0;
 
 	while (Create_field* cf = cf_it++) {
 		DBUG_ASSERT(cf->field
 			    || (ha_alter_info->handler_flags
-				& Alter_inplace_info::ADD_COLUMN));
+				& ALTER_ADD_COLUMN));
 
 		if (const Field* f = cf->field) {
 			/* This could be changing an existing column
@@ -1022,16 +1253,8 @@ ha_innobase::check_if_supported_inplace_alter(
 					/* No DEFAULT value is
 					specified. We can report
 					errors for any NULL values for
-					the TIMESTAMP.
-
-					FIXME: Allow any DEFAULT
-					expression whose value does
-					not change during ALTER TABLE.
-					This would require a fix in
-					row_merge_read_clustered_index()
-					to try to replace the DEFAULT
-					value before reporting
-					DB_INVALID_NULL. */
+					the TIMESTAMP. */
+
 					goto next_column;
 				}
 				break;
@@ -1049,41 +1272,172 @@ ha_innobase::check_if_supported_inplace_alter(
 			}
 
 			ha_alter_info->unsupported_reason
-				= innobase_get_err_msg(
+				= my_get_err_msg(
 					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL);
-		} else if (!(*af)->default_value
-			   || !((*af)->default_value->flags
-				& ~(VCOL_SESSION_FUNC | VCOL_TIME_FUNC))) {
-			/* The added NOT NULL column lacks a DEFAULT value,
-			or the DEFAULT is the same for all rows.
-			(Time functions, such as CURRENT_TIMESTAMP(),
-			are evaluated from a timestamp that is assigned
-			at the start of the statement. Session
-			functions, such as USER(), always evaluate the
-			same within a statement.) */
-
-			/* Compute the DEFAULT values of non-constant columns
-			(VCOL_SESSION_FUNC | VCOL_TIME_FUNC). */
-			switch ((*af)->set_default()) {
-			case 0: /* OK */
-			case 3: /* DATETIME to TIME or DATE conversion */
+		} else if (!is_non_const_value(*af)) {
+
+			n_add_cols++;
+
+			if (af < &altered_table->field[table_share->fields]) {
+				add_column_not_last = true;
+			}
+
+			if (set_default_value(*af)) {
 				goto next_column;
-			case -1: /* OOM, or GEOMETRY type mismatch */
-			case 1:  /* A number adjusted to the min/max value */
-			case 2:  /* String truncation, or conversion problem */
-				break;
 			}
 		}
 
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 
 next_column:
-		af++;
+		n_stored_cols += (*af++)->stored_in_db();
+	}
+
+	if (!add_column_not_last
+	    && uint(m_prebuilt->table->n_cols) - DATA_N_SYS_COLS + n_add_cols
+	    == n_stored_cols
+	    && m_prebuilt->table->supports_instant()
+	    && instant_alter_column_possible(ha_alter_info, table)) {
+
+		DBUG_RETURN(HA_ALTER_INPLACE_INSTANT);
+	}
+
+	if (!(ha_alter_info->handler_flags & ~(INNOBASE_ALTER_INSTANT
+					       | INNOBASE_INPLACE_IGNORE))) {
+		DBUG_RETURN(HA_ALTER_INPLACE_INSTANT);
+	}
+
+	bool fts_need_rebuild = false;
+	const bool need_rebuild = innobase_need_rebuild(ha_alter_info, table);
+
+	if (!online) {
+		/* We already determined that only a non-locking
+		operation is possible. */
+	} else if ((need_rebuild || (ha_alter_info->handler_flags
+				     & ALTER_ADD_PK_INDEX))
+		   && (innobase_fulltext_exist(altered_table)
+		       || innobase_spatial_exist(altered_table)
+		       || innobase_indexed_virtual_exist(altered_table))) {
+		/* Refuse to rebuild the table online, if
+		FULLTEXT OR SPATIAL indexes are to survive the rebuild. */
+		online = false;
+		/* If the table already contains fulltext indexes,
+		refuse to rebuild the table natively altogether. */
+		if (m_prebuilt->table->fts) {
+cannot_create_many_fulltext_index:
+			ha_alter_info->unsupported_reason =
+				my_get_err_msg(ER_INNODB_FT_LIMIT);
+			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+		}
+
+		if (ha_alter_info->online
+		    && !ha_alter_info->unsupported_reason) {
+
+			if (innobase_spatial_exist(altered_table)) {
+				ha_alter_info->unsupported_reason = my_get_err_msg(
+					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_GIS);
+			} else if (!innobase_fulltext_exist(altered_table)) {
+				/* MDEV-14341 FIXME: Remove this limitation. */
+				ha_alter_info->unsupported_reason =
+					"online rebuild with indexed virtual columns";
+			} else {
+				ha_alter_info->unsupported_reason = my_get_err_msg(
+					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
+			}
+		}
+
+	}
+
+	if (ha_alter_info->handler_flags
+		& ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX) {
+		/* ADD FULLTEXT|SPATIAL INDEX requires a lock.
+
+		We could do ADD FULLTEXT INDEX without a lock if the
+		table already contains an FTS_DOC_ID column, but in
+		that case we would have to apply the modification log
+		to the full-text indexes.
+
+		We could also do ADD SPATIAL INDEX by implementing
+		row_log_apply() for it. */
+		bool add_fulltext = false;
+
+		for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
+			const KEY* key =
+				&ha_alter_info->key_info_buffer[
+					ha_alter_info->index_add_buffer[i]];
+			if (key->flags & HA_FULLTEXT) {
+				DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
+					      & ~(HA_FULLTEXT
+						  | HA_PACK_KEY
+						  | HA_GENERATED_KEY
+						  | HA_BINARY_PACK_KEY)));
+				if (add_fulltext) {
+					goto cannot_create_many_fulltext_index;
+				}
+
+				add_fulltext = true;
+				if (ha_alter_info->online
+				    && !ha_alter_info->unsupported_reason) {
+					ha_alter_info->unsupported_reason = my_get_err_msg(
+						ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
+				}
+
+				online = false;
+
+				/* Full text search index exists, check
+				whether the table already has DOC ID column.
+				If not, InnoDB have to rebuild the table to
+				add a Doc ID hidden column and change
+				primary index. */
+				ulint	fts_doc_col_no;
+				ulint	num_v = 0;
+
+				fts_need_rebuild =
+					!innobase_fts_check_doc_id_col(
+						m_prebuilt->table,
+						altered_table,
+						&fts_doc_col_no, &num_v, true);
+			}
+
+			if (online && (key->flags & HA_SPATIAL)) {
+
+				if (ha_alter_info->online) {
+					ha_alter_info->unsupported_reason = my_get_err_msg(
+						ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_GIS);
+				}
+
+				online = false;
+			}
+		}
+	}
+
+	// FIXME: implement Online DDL for system-versioned operations
+	if (ha_alter_info->handler_flags & INNOBASE_ALTER_VERSIONED_REBUILD) {
+
+		if (ha_alter_info->online) {
+			ha_alter_info->unsupported_reason =
+				"Not implemented for system-versioned operations";
+		}
+
+		online = false;
+	}
+
+	if (need_rebuild || fts_need_rebuild) {
+		DBUG_RETURN(online
+			    ? HA_ALTER_INPLACE_COPY_NO_LOCK
+			    : HA_ALTER_INPLACE_COPY_LOCK);
+	}
+
+	if (ha_alter_info->unsupported_reason) {
+	} else if (ha_alter_info->handler_flags & INNOBASE_ONLINE_CREATE) {
+		ha_alter_info->unsupported_reason = "ADD INDEX";
+	} else {
+		ha_alter_info->unsupported_reason = "DROP INDEX";
 	}
 
 	DBUG_RETURN(online
-		    ? HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE
-		    : HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE);
+		    ? HA_ALTER_INPLACE_NOCOPY_NO_LOCK
+		    : HA_ALTER_INPLACE_NOCOPY_LOCK);
 }
 
 /*************************************************************//**
@@ -1095,7 +1449,7 @@ innobase_init_foreign(
 /*==================*/
 	dict_foreign_t*	foreign,		/*!< in/out: structure to
 						initialize */
-	char*		constraint_name,	/*!< in/out: constraint name if
+	const char*	constraint_name,	/*!< in/out: constraint name if
 						exists */
 	dict_table_t*	table,			/*!< in: foreign table */
 	dict_index_t*	index,			/*!< in: foreign key index */
@@ -1221,7 +1575,6 @@ innobase_set_foreign_key_option(
 	ut_ad(!foreign->type);
 
 	switch (fk_key->delete_opt) {
-	// JAN: TODO: ? MySQL 5.7 used enum fk_option directly from sql_lex.h
 	case FK_OPTION_NO_ACTION:
 	case FK_OPTION_RESTRICT:
 	case FK_OPTION_SET_DEFAULT:
@@ -1233,6 +1586,8 @@ innobase_set_foreign_key_option(
 	case FK_OPTION_SET_NULL:
 		foreign->type = DICT_FOREIGN_ON_DELETE_SET_NULL;
 		break;
+	case FK_OPTION_UNDEF:
+		break;
 	}
 
 	switch (fk_key->update_opt) {
@@ -1247,6 +1602,8 @@ innobase_set_foreign_key_option(
 	case FK_OPTION_SET_NULL:
 		foreign->type |= DICT_FOREIGN_ON_UPDATE_SET_NULL;
 		break;
+	case FK_OPTION_UNDEF:
+		break;
 	}
 
 	return(innobase_check_fk_option(foreign));
@@ -1303,7 +1660,7 @@ no_match:
 			}
 
 			if (innobase_strcasecmp(col_names[j],
-						key_part.field->field_name)) {
+						key_part.field->field_name.str)) {
 				/* Name mismatch */
 				goto no_match;
 			}
@@ -1319,12 +1676,10 @@ no_match:
 Find an index whose first fields are the columns in the array
 in the same order and is not marked for deletion
 @return matching index, NULL if not found */
-static MY_ATTRIBUTE((nonnull(1,2,6), warn_unused_result))
+static MY_ATTRIBUTE((nonnull(1,5), warn_unused_result))
 dict_index_t*
 innobase_find_fk_index(
 /*===================*/
-	Alter_inplace_info*	ha_alter_info,
-					/*!< in: alter table info */
 	dict_table_t*		table,	/*!< in: table */
 	const char**		col_names,
 					/*!< in: column names, or NULL
@@ -1504,7 +1859,6 @@ innobase_get_foreign_key_info(
 			}
 
 			index = innobase_find_fk_index(
-				ha_alter_info,
 				table, col_names,
 				drop_index, n_drop_index,
 				column_names, i);
@@ -1636,7 +1990,7 @@ innobase_get_foreign_key_info(
 			/* Not possible to add a foreign key without a
 			referenced column */
 			mutex_exit(&dict_sys->mutex);
-			my_error(ER_CANNOT_ADD_FOREIGN, MYF(0));
+			my_error(ER_CANNOT_ADD_FOREIGN, MYF(0), tbl_namep);
 			goto err_exit;
 		}
 
@@ -1820,7 +2174,7 @@ null_field:
 			continue;
 		}
 
-		ifield = rec_get_nth_field(rec, offsets, ipos, &ilen);
+		ifield = rec_get_nth_cfield(rec, index, offsets, ipos, &ilen);
 
 		/* Assign the NULL flag */
 		if (ilen == UNIV_SQL_NULL) {
@@ -1946,21 +2300,6 @@ innobase_row_to_mysql(
 	}
 }
 
-/*************************************************************//**
-Resets table->record[0]. */
-void
-innobase_rec_reset(
-/*===============*/
-	TABLE*			table)		/*!< in/out: MySQL table */
-{
-	uint	n_fields	= table->s->fields;
-	uint	i;
-
-	for (i = 0; i < n_fields; i++) {
-		table->field[i]->set_default();
-	}
-}
-
 /*******************************************************************//**
 This function checks that index keys are sensible.
 @return 0 or error number */
@@ -1985,9 +2324,9 @@ innobase_check_index_keys(
 			const KEY&	key2 = info->key_info_buffer[
 				info->index_add_buffer[i]];
 
-			if (0 == strcmp(key.name, key2.name)) {
+			if (0 == strcmp(key.name.str, key2.name.str)) {
 				my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
-					 key.name);
+					 key.name.str);
 
 				return(ER_WRONG_NAME_FOR_INDEX);
 			}
@@ -2001,7 +2340,7 @@ innobase_check_index_keys(
 		     index; index = dict_table_get_next_index(index)) {
 
 			if (index->is_committed()
-			    && !strcmp(key.name, index->name)) {
+			    && !strcmp(key.name.str, index->name)) {
 				break;
 			}
 		}
@@ -2026,7 +2365,8 @@ innobase_check_index_keys(
 				const KEY*	drop_key
 					= info->index_drop_buffer[i];
 
-				if (0 == strcmp(key.name, drop_key->name)) {
+				if (0 == strcmp(key.name.str,
+                                                drop_key->name.str)) {
 					goto name_ok;
 				}
 			}
@@ -2050,8 +2390,7 @@ innobase_check_index_keys(
 #endif /* MYSQL_RENAME_INDEX */
 
 			my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
-                                 key.name);
-
+                                 key.name.str);
 			return(ER_WRONG_NAME_FOR_INDEX);
 		}
 
@@ -2090,7 +2429,7 @@ name_ok:
 				}
 
 				my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
-					 field->field_name);
+					 field->field_name.str);
 				return(ER_WRONG_KEY_COLUMN);
 			}
 
@@ -2106,7 +2445,7 @@ name_ok:
 				}
 
 				my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
-					 field->field_name);
+					 field->field_name.str);
 				return(ER_WRONG_KEY_COLUMN);
 			}
 		}
@@ -2211,7 +2550,7 @@ innobase_create_index_def(
 	index->parser = NULL;
 	index->key_number = key_number;
 	index->n_fields = n_fields;
-	index->name = mem_heap_strdup(heap, key->name);
+	index->name = mem_heap_strdup(heap, key->name.str);
 	index->rebuild = new_clustered;
 
 	if (key_clustered) {
@@ -2231,8 +2570,8 @@ innobase_create_index_def(
 
 		if (key->flags & HA_USES_PARSER) {
 			for (ulint j = 0; j < altered_table->s->keys; j++) {
-				if (ut_strcmp(altered_table->key_info[j].name,
-					      key->name) == 0) {
+				if (ut_strcmp(altered_table->key_info[j].name.str,
+					      key->name.str) == 0) {
 					ut_ad(altered_table->key_info[j].flags
 					      & HA_USES_PARSER);
 
@@ -2297,92 +2636,6 @@ innobase_create_index_def(
 }
 
 /*******************************************************************//**
-Check whether the table has the FTS_DOC_ID column
-@return whether there exists an FTS_DOC_ID column */
-static
-bool
-innobase_fts_check_doc_id_col(
-/*==========================*/
-	const dict_table_t*	table,  /*!< in: InnoDB table with
-					fulltext index */
-	const TABLE*		altered_table,
-					/*!< in: MySQL table with
-					fulltext index */
-	ulint*			fts_doc_col_no,
-					/*!< out: The column number for
-					Doc ID, or ULINT_UNDEFINED
-					if it is of wrong type */
-	ulint*			num_v)	/*!< out: number of virtual column */
-{
-	*fts_doc_col_no = ULINT_UNDEFINED;
-
-	const uint n_cols = altered_table->s->fields;
-	ulint	i;
-
-	*num_v = 0;
-
-	for (i = 0; i < n_cols; i++) {
-		const Field*	field = altered_table->field[i];
-
-		if (innobase_is_v_fld(field)) {
-			(*num_v)++;
-		}
-
-		if (my_strcasecmp(system_charset_info,
-				  field->field_name, FTS_DOC_ID_COL_NAME)) {
-			continue;
-		}
-
-		if (strcmp(field->field_name, FTS_DOC_ID_COL_NAME)) {
-			my_error(ER_WRONG_COLUMN_NAME, MYF(0),
-				 field->field_name);
-		} else if (field->type() != MYSQL_TYPE_LONGLONG
-			   || field->pack_length() != 8
-			   || field->real_maybe_null()
-			   || !(field->flags & UNSIGNED_FLAG)
-			   || innobase_is_v_fld(field)) {
-			my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN, MYF(0),
-				 field->field_name);
-		} else {
-			*fts_doc_col_no = i - *num_v;
-		}
-
-		return(true);
-	}
-
-	if (!table) {
-		return(false);
-	}
-
-	/* Not to count the virtual columns */
-	i -= *num_v;
-
-	for (; i + DATA_N_SYS_COLS < (uint) table->n_cols; i++) {
-		const char*     name = dict_table_get_col_name(table, i);
-
-		if (strcmp(name, FTS_DOC_ID_COL_NAME) == 0) {
-#ifdef UNIV_DEBUG
-			const dict_col_t*       col;
-
-			col = dict_table_get_nth_col(table, i);
-
-			/* Because the FTS_DOC_ID does not exist in
-			the MySQL data dictionary, this must be the
-			internally created FTS_DOC_ID column. */
-			ut_ad(col->mtype == DATA_INT);
-			ut_ad(col->len == 8);
-			ut_ad(col->prtype & DATA_NOT_NULL);
-			ut_ad(col->prtype & DATA_UNSIGNED);
-#endif /* UNIV_DEBUG */
-			*fts_doc_col_no = i;
-			return(true);
-		}
-	}
-
-	return(false);
-}
-
-/*******************************************************************//**
 Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
 on the Doc ID column.
 @return the status of the FTS_DOC_ID index */
@@ -2408,14 +2661,14 @@ innobase_fts_check_doc_id_index(
 			const KEY& key = altered_table->key_info[i];
 
 			if (innobase_strcasecmp(
-				    key.name, FTS_DOC_ID_INDEX_NAME)) {
+				    key.name.str, FTS_DOC_ID_INDEX_NAME)) {
 				continue;
 			}
 
 			if ((key.flags & HA_NOSAME)
 			    && key.user_defined_key_parts == 1
-			    && !strcmp(key.name, FTS_DOC_ID_INDEX_NAME)
-			    && !strcmp(key.key_part[0].field->field_name,
+			    && !strcmp(key.name.str, FTS_DOC_ID_INDEX_NAME)
+			    && !strcmp(key.key_part[0].field->field_name.str,
 				       FTS_DOC_ID_COL_NAME)) {
 				if (fts_doc_col_no) {
 					*fts_doc_col_no = ULINT_UNDEFINED;
@@ -2455,7 +2708,7 @@ innobase_fts_check_doc_id_index(
 		    && field->col->mtype == DATA_INT
 		    && field->col->len == 8
 		    && field->col->prtype & DATA_NOT_NULL
-		    && !dict_col_is_virtual(field->col)) {
+		    && !field->col->is_virtual()) {
 			if (fts_doc_col_no) {
 				*fts_doc_col_no = dict_col_get_no(field->col);
 			}
@@ -2485,7 +2738,7 @@ innobase_fts_check_doc_id_index_in_def(
 	for (ulint j = 0; j < n_key; j++) {
 		const KEY*	key = &key_info[j];
 
-		if (innobase_strcasecmp(key->name, FTS_DOC_ID_INDEX_NAME)) {
+		if (innobase_strcasecmp(key->name.str, FTS_DOC_ID_INDEX_NAME)) {
 			continue;
 		}
 
@@ -2493,8 +2746,8 @@ innobase_fts_check_doc_id_index_in_def(
 		named as "FTS_DOC_ID_INDEX" and on column "FTS_DOC_ID" */
 		if (!(key->flags & HA_NOSAME)
 		    || key->user_defined_key_parts != 1
-		    || strcmp(key->name, FTS_DOC_ID_INDEX_NAME)
-		    || strcmp(key->key_part[0].field->field_name,
+		    || strcmp(key->name.str, FTS_DOC_ID_INDEX_NAME)
+		    || strcmp(key->key_part[0].field->field_name.str,
 			      FTS_DOC_ID_COL_NAME)) {
 			return(FTS_INCORRECT_DOC_ID_INDEX);
 		}
@@ -2565,7 +2818,7 @@ innobase_create_key_defs(
 
 	new_primary = n_add > 0
 		&& !my_strcasecmp(system_charset_info,
-				  key_info[*add].name, "PRIMARY");
+				  key_info[*add].name.str, "PRIMARY");
 	n_fts_add = 0;
 
 	/* If there is a UNIQUE INDEX consisting entirely of NOT NULL
@@ -2608,7 +2861,7 @@ innobase_create_key_defs(
 			index->ind_type = DICT_CLUSTERED;
 			index->name = innobase_index_reserve_name;
 			index->rebuild = true;
-			index->key_number = ~0;
+			index->key_number = ~0U;
 			primary_key_number = ULINT_UNDEFINED;
 			goto created_clustered;
 		} else {
@@ -2780,7 +3033,7 @@ online_retry_drop_indexes(
 		online_retry_drop_indexes_low(table, trx);
 		trx_commit_for_mysql(trx);
 		row_mysql_unlock_data_dictionary(trx);
-		trx_free_for_mysql(trx);
+		trx_free(trx);
 	}
 
 	ut_d(mutex_enter(&dict_sys->mutex));
@@ -2949,12 +3202,11 @@ column that is being dropped or modified to NOT NULL.
 @retval true Not allowed (will call my_error())
 @retval false Allowed
 */
-MY_ATTRIBUTE((pure, nonnull(1,2,3,4), warn_unused_result))
+MY_ATTRIBUTE((pure, nonnull(1,2,3), warn_unused_result))
 static
 bool
 innobase_check_foreigns(
 	Alter_inplace_info*	ha_alter_info,
-	const TABLE*		altered_table,
 	const TABLE*		old_table,
 	const dict_table_t*	user_table,
 	dict_foreign_t**	drop_fk,
@@ -2979,7 +3231,7 @@ innobase_check_foreigns(
 		if (!new_field || (new_field->flags & NOT_NULL_FLAG)) {
 			if (innobase_check_foreigns_low(
 				    user_table, drop_fk, n_drop_fk,
-				    (*fp)->field_name, !new_field)) {
+				    (*fp)->field_name.str, !new_field)) {
 				return(true);
 			}
 		}
@@ -2989,20 +3241,23 @@ innobase_check_foreigns(
 }
 
 /** Convert a default value for ADD COLUMN.
-
-@param heap Memory heap where allocated
-@param dfield InnoDB data field to copy to
-@param field MySQL value for the column
-@param comp nonzero if in compact format */
-static MY_ATTRIBUTE((nonnull))
-void
-innobase_build_col_map_add(
-/*=======================*/
+@param[in,out]	heap		Memory heap where allocated
+@param[out]	dfield		InnoDB data field to copy to
+@param[in]	field		MySQL value for the column
+@param[in]	old_field	Old field or NULL if new col is added	
+@param[in]	comp		nonzero if in compact format. */
+static void innobase_build_col_map_add(
 	mem_heap_t*	heap,
 	dfield_t*	dfield,
 	const Field*	field,
+	const Field*	old_field,
 	ulint		comp)
 {
+	if (old_field && old_field->real_maybe_null()
+	    && field->real_maybe_null()) {
+		return;
+	}
+
 	if (field->is_real_null()) {
 		dfield_set_null(dfield);
 		return;
@@ -3012,7 +3267,7 @@ innobase_build_col_map_add(
 
 	byte*	buf	= static_cast<byte*>(mem_heap_alloc(heap, size));
 
-	const byte*	mysql_data = field->ptr;
+	const byte*	mysql_data = old_field ? old_field->ptr : field->ptr;
 
 	row_mysql_store_col_in_innobase_format(
 		dfield, buf, true, mysql_data, size, comp);
@@ -3026,7 +3281,7 @@ adding columns.
 @param table MySQL table as it is before the ALTER operation
 @param new_table InnoDB table corresponding to MySQL altered_table
 @param old_table InnoDB table corresponding to MYSQL table
-@param add_cols Default values for ADD COLUMN, or NULL if no ADD COLUMN
+@param defaults Default values for ADD COLUMN, or NULL if no ADD COLUMN
 @param heap Memory heap where allocated
 @return array of integers, mapping column numbers in the table
 to column numbers in altered_table */
@@ -3039,7 +3294,7 @@ innobase_build_col_map(
 	const TABLE*		table,
 	const dict_table_t*	new_table,
 	const dict_table_t*	old_table,
-	dtuple_t*		add_cols,
+	dtuple_t*		defaults,
 	mem_heap_t*		heap)
 {
 	DBUG_ENTER("innobase_build_col_map");
@@ -3051,14 +3306,14 @@ innobase_build_col_map(
 	DBUG_ASSERT(dict_table_get_n_cols(old_table)
 		    + dict_table_get_n_v_cols(old_table)
 		    >= table->s->fields + DATA_N_SYS_COLS);
-	DBUG_ASSERT(!!add_cols == !!(ha_alter_info->handler_flags
-				     & Alter_inplace_info::ADD_COLUMN));
-	DBUG_ASSERT(!add_cols || dtuple_get_n_fields(add_cols)
+	DBUG_ASSERT(!!defaults == !!(ha_alter_info->handler_flags
+				     & INNOBASE_DEFAULTS));
+	DBUG_ASSERT(!defaults || dtuple_get_n_fields(defaults)
 		    == dict_table_get_n_cols(new_table));
 
 	ulint*	col_map = static_cast<ulint*>(
 		mem_heap_alloc(
-			heap, (old_table->n_cols + old_table->n_v_cols)
+			heap, unsigned(old_table->n_cols + old_table->n_v_cols)
 			* sizeof *col_map));
 
 	List_iterator_fast<Create_field> cf_it(
@@ -3077,11 +3332,7 @@ innobase_build_col_map(
 	}
 
 	while (const Create_field* new_field = cf_it++) {
-		bool	is_v = false;
-
-		if (innobase_is_v_fld(new_field)) {
-			is_v = true;
-		}
+		bool	is_v = innobase_is_v_fld(new_field);
 
 		ulint	num_old_v = 0;
 
@@ -3099,6 +3350,21 @@ innobase_build_col_map(
 			}
 
 			if (new_field->field == field) {
+
+				const Field* altered_field =
+					altered_table->field[i + num_v];
+
+				if (defaults) {
+					innobase_build_col_map_add(
+						heap,
+						dtuple_get_nth_field(
+							defaults, i),
+						altered_field,
+						field,
+						dict_table_is_comp(
+							new_table));
+				}
+
 				col_map[old_i - num_old_v] = i;
 				goto found_col;
 			}
@@ -3106,8 +3372,9 @@ innobase_build_col_map(
 
 		ut_ad(!is_v);
 		innobase_build_col_map_add(
-			heap, dtuple_get_nth_field(add_cols, i),
+			heap, dtuple_get_nth_field(defaults, i),
 			altered_table->field[i + num_v],
+			NULL,
 			dict_table_is_comp(new_table));
 found_col:
 		if (is_v) {
@@ -3219,7 +3486,7 @@ innobase_get_col_names(
 	DBUG_ENTER("innobase_get_col_names");
 	DBUG_ASSERT(user_table->n_t_def > table->s->fields);
 	DBUG_ASSERT(ha_alter_info->handler_flags
-		    & Alter_inplace_info::ALTER_COLUMN_NAME);
+		    & ALTER_COLUMN_NAME);
 
 	cols = static_cast<const char**>(
 		mem_heap_zalloc(heap, user_table->n_def * sizeof *cols));
@@ -3241,7 +3508,7 @@ innobase_get_col_names(
 			}
 
 			if (new_field->field == table->field[old_i]) {
-				cols[old_i - num_v] = new_field->field_name;
+				cols[old_i - num_v] = new_field->field_name.str;
 				break;
 			}
 		}
@@ -3392,10 +3659,11 @@ innobase_pk_order_preserved(
 		const bool	old_pk_column = old_field < old_n_uniq;
 
 		if (old_pk_column) {
-			new_field_order = old_field;
+			new_field_order = lint(old_field);
 		} else if (innobase_pk_col_is_existing(new_col_no, col_map,
 						       old_n_cols)) {
-			new_field_order = old_n_uniq + existing_field_count++;
+			new_field_order = lint(old_n_uniq
+					       + existing_field_count++);
 		} else {
 			/* Skip newly added column. */
 			continue;
@@ -3529,7 +3797,7 @@ innobase_check_gis_columns(
 
 		ulint col_nr = dict_table_has_column(
 			table,
-			key_part.field->field_name,
+			key_part.field->field_name.str,
 			key_part.fieldnr);
 		ut_ad(col_nr != table->n_def);
 		dict_col_t*	col = &table->cols[col_nr];
@@ -3652,7 +3920,7 @@ prepare_inplace_add_virtual(
 
 			if (charset_no > MAX_CHAR_COLL_NUM) {
 				my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
-					 field->field_name);
+					 field->field_name.str);
 				return(true);
 			}
 		} else {
@@ -3683,7 +3951,7 @@ prepare_inplace_add_virtual(
 
 		ctx->add_vcol[j].m_col.ind = i - 1;
 		ctx->add_vcol[j].num_base = 0;
-		ctx->add_vcol_name[j] = field->field_name;
+		ctx->add_vcol_name[j] = field->field_name.str;
 		ctx->add_vcol[j].base_col = NULL;
 		ctx->add_vcol[j].v_pos = ctx->old_table->n_v_cols
 					 - ctx->num_to_drop_vcol + j;
@@ -3699,7 +3967,6 @@ prepare_inplace_add_virtual(
 
 /** Collect virtual column info for its addition
 @param[in] ha_alter_info	Data used during in-place alter
-@param[in] altered_table	MySQL table that is being altered to
 @param[in] table		MySQL table as it is before the ALTER operation
 @retval true Failure
 @retval false Success */
@@ -3707,7 +3974,6 @@ static
 bool
 prepare_inplace_drop_virtual(
 	Alter_inplace_info*	ha_alter_info,
-	const TABLE*		altered_table,
 	const TABLE*		table)
 {
 	ha_innobase_inplace_ctx*	ctx;
@@ -3771,7 +4037,7 @@ prepare_inplace_drop_virtual(
 
 			if (charset_no > MAX_CHAR_COLL_NUM) {
 				my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
-					 field->field_name);
+					 field->field_name.str);
 				return(true);
 			}
 		} else {
@@ -3802,7 +4068,7 @@ prepare_inplace_drop_virtual(
 
 		ctx->drop_vcol[j].m_col.ind = i;
 
-		ctx->drop_vcol_name[j] = field->field_name;
+		ctx->drop_vcol_name[j] = field->field_name.str;
 
 		dict_v_col_t*	v_col = dict_table_get_nth_v_col_mysql(
 					ctx->old_table, i);
@@ -3902,40 +4168,38 @@ innobase_add_one_virtual(
 	return(error);
 }
 
-/** Update INNODB SYS_TABLES on number of virtual columns
+/** Update SYS_TABLES.N_COLS in the data dictionary.
 @param[in] user_table	InnoDB table
-@param[in] n_col	number of columns
+@param[in] n_cols	the new value of SYS_TABLES.N_COLS
 @param[in] trx		transaction
-@return DB_SUCCESS if successful, otherwise error code */
+@return whether the operation failed */
 static
-dberr_t
-innobase_update_n_virtual(
-	const dict_table_t*	table,
-	ulint			n_col,
-	trx_t*			trx)
+bool
+innodb_update_n_cols(const dict_table_t* table, ulint n_cols, trx_t* trx)
 {
-	dberr_t		err = DB_SUCCESS;
 	pars_info_t*    info = pars_info_create();
 
-	pars_info_add_int4_literal(info, "num_col", n_col);
+	pars_info_add_int4_literal(info, "n", n_cols);
 	pars_info_add_ull_literal(info, "id", table->id);
 
-        err = que_eval_sql(
-                info,
-                "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
-                "BEGIN\n"
-                "UPDATE SYS_TABLES"
-                " SET N_COLS = :num_col\n"
-                " WHERE ID = :id;\n"
-		"END;\n", FALSE, trx);
+	dberr_t err = que_eval_sql(info,
+				   "PROCEDURE UPDATE_N_COLS () IS\n"
+				   "BEGIN\n"
+				   "UPDATE SYS_TABLES SET N_COLS = :n"
+				   " WHERE ID = :id;\n"
+				   "END;\n", FALSE, trx);
+
+	if (err != DB_SUCCESS) {
+		my_error(ER_INTERNAL_ERROR, MYF(0),
+			 "InnoDB: Updating SYS_TABLES.N_COLS failed");
+		return true;
+	}
 
-	return(err);
+	return false;
 }
 
 /** Update system table for adding virtual column(s)
 @param[in]	ha_alter_info	Data used during in-place alter
-@param[in]	altered_table	MySQL table that is being altered
-@param[in]	table		MySQL table as it is before the ALTER operation
 @param[in]	user_table	InnoDB table
 @param[in]	trx		transaction
 @retval true Failure
@@ -3944,8 +4208,6 @@ static
 bool
 innobase_add_virtual_try(
 	Alter_inplace_info*	ha_alter_info,
-	const TABLE*		altered_table,
-	const TABLE*		table,
 	const dict_table_t*     user_table,
 	trx_t*			trx)
 {
@@ -3969,27 +4231,261 @@ innobase_add_virtual_try(
 	}
 
 
-	ulint	n_col = user_table->n_cols;
-	ulint	n_v_col = user_table->n_v_cols;
+	ulint	n_col = unsigned(user_table->n_cols) - DATA_N_SYS_COLS;
+	ulint	n_v_col = unsigned(user_table->n_v_cols)
+		+ ctx->num_to_add_vcol - ctx->num_to_drop_vcol;
+	ulint	new_n = dict_table_encode_n_col(n_col, n_v_col)
+		+ (unsigned(user_table->flags & DICT_TF_COMPACT) << 31);
+
+	return innodb_update_n_cols(user_table, new_n, trx);
+}
+
+/** Insert into SYS_COLUMNS and insert/update the hidden metadata record
+for instant ADD COLUMN.
+@param[in,out]	ctx		ALTER TABLE context for the current partition
+@param[in]	altered_table	MySQL table that is being altered
+@param[in]	table		MySQL table as it is before the ALTER operation
+@param[in,out]	trx		dictionary transaction
+@retval	true	failure
+@retval	false	success */
+static
+bool
+innobase_add_instant_try(
+	ha_innobase_inplace_ctx*ctx,
+	const TABLE*		altered_table,
+	const TABLE*		table,
+	trx_t*			trx)
+{
+	DBUG_ASSERT(!ctx->need_rebuild());
+
+	if (!ctx->is_instant()) return false;
+
+	DBUG_ASSERT(altered_table->s->fields > table->s->fields);
+	DBUG_ASSERT(ctx->old_table->n_cols == ctx->old_n_cols);
 
-	n_v_col +=  ctx->num_to_add_vcol;
+	dict_table_t* user_table = ctx->old_table;
+	user_table->instant_add_column(*ctx->instant_table);
+	dict_index_t* index = dict_table_get_first_index(user_table);
+	/* The table may have been emptied and may have lost its
+	'instant-add-ness' during this instant ADD COLUMN. */
 
-	n_col -= dict_table_get_n_sys_cols(user_table);
+	/* Construct a table row of default values for the stored columns. */
+	dtuple_t* row = dtuple_create(ctx->heap, user_table->n_cols);
+	dict_table_copy_types(row, user_table);
+	Field** af = altered_table->field;
+	Field** const end = altered_table->field + altered_table->s->fields;
 
-	n_v_col -= ctx->num_to_drop_vcol;
+	for (uint i = 0; af < end; af++) {
+		if (!(*af)->stored_in_db()) {
+			continue;
+		}
 
-	ulint	new_n = dict_table_encode_n_col(n_col, n_v_col)
-			+ ((user_table->flags & DICT_TF_COMPACT) << 31);
+		dict_col_t* col = dict_table_get_nth_col(user_table, i);
+		DBUG_ASSERT(!strcmp((*af)->field_name.str,
+				    dict_table_get_col_name(user_table, i)));
+
+		dfield_t* d = dtuple_get_nth_field(row, i);
+
+		if (col->is_instant()) {
+			dfield_set_data(d, col->def_val.data,
+					col->def_val.len);
+		} else if ((*af)->real_maybe_null()) {
+			/* Store NULL for nullable 'core' columns. */
+			dfield_set_null(d);
+		} else {
+			switch ((*af)->type()) {
+			case MYSQL_TYPE_VARCHAR:
+			case MYSQL_TYPE_GEOMETRY:
+			case MYSQL_TYPE_TINY_BLOB:
+			case MYSQL_TYPE_MEDIUM_BLOB:
+			case MYSQL_TYPE_BLOB:
+			case MYSQL_TYPE_LONG_BLOB:
+				/* Store the empty string for 'core'
+				variable-length NOT NULL columns. */
+				dfield_set_data(d, field_ref_zero, 0);
+				break;
+			default:
+				/* For fixed-length NOT NULL 'core' columns,
+				get a dummy default value from SQL. Note that
+				we will preserve the old values of these
+				columns when updating the metadata
+				record, to avoid unnecessary updates. */
+				ulint len = (*af)->pack_length();
+				DBUG_ASSERT(d->type.mtype != DATA_INT
+					    || len <= 8);
+				row_mysql_store_col_in_innobase_format(
+					d, d->type.mtype == DATA_INT
+					? static_cast<byte*>(
+						mem_heap_alloc(ctx->heap, len))
+					: NULL, true, (*af)->ptr, len,
+					dict_table_is_comp(user_table));
+			}
+		}
+
+		if (i + DATA_N_SYS_COLS < ctx->old_n_cols) {
+			i++;
+			continue;
+		}
+
+		pars_info_t*    info = pars_info_create();
+		pars_info_add_ull_literal(info, "id", user_table->id);
+		pars_info_add_int4_literal(info, "pos", i);
+		pars_info_add_str_literal(info, "name", (*af)->field_name.str);
+		pars_info_add_int4_literal(info, "mtype", d->type.mtype);
+		pars_info_add_int4_literal(info, "prtype", d->type.prtype);
+		pars_info_add_int4_literal(info, "len", d->type.len);
 
-	err = innobase_update_n_virtual(user_table, new_n, trx);
+		dberr_t err = que_eval_sql(
+			info,
+			"PROCEDURE ADD_COL () IS\n"
+			"BEGIN\n"
+			"INSERT INTO SYS_COLUMNS VALUES"
+			"(:id,:pos,:name,:mtype,:prtype,:len,0);\n"
+			"END;\n", FALSE, trx);
+		if (err != DB_SUCCESS) {
+			my_error(ER_INTERNAL_ERROR, MYF(0),
+				 "InnoDB: Insert into SYS_COLUMNS failed");
+			return(true);
+		}
+
+		i++;
+	}
+
+	if (innodb_update_n_cols(user_table, dict_table_encode_n_col(
+					 unsigned(user_table->n_cols)
+					 - DATA_N_SYS_COLS,
+					 user_table->n_v_cols)
+				 | (user_table->flags & DICT_TF_COMPACT) << 31,
+				 trx)) {
+		return true;
+	}
+
+	unsigned i = unsigned(user_table->n_cols) - DATA_N_SYS_COLS;
+	byte trx_id[DATA_TRX_ID_LEN], roll_ptr[DATA_ROLL_PTR_LEN];
+	dfield_set_data(dtuple_get_nth_field(row, i++), field_ref_zero,
+			DATA_ROW_ID_LEN);
+	dfield_set_data(dtuple_get_nth_field(row, i++), trx_id, sizeof trx_id);
+	dfield_set_data(dtuple_get_nth_field(row, i),roll_ptr,sizeof roll_ptr);
+	DBUG_ASSERT(i + 1 == user_table->n_cols);
+
+	trx_write_trx_id(trx_id, trx->id);
+	/* The DB_ROLL_PTR will be assigned later, when allocating undo log.
+	Silence a Valgrind warning in dtuple_validate() when
+	row_ins_clust_index_entry_low() searches for the insert position. */
+	memset(roll_ptr, 0, sizeof roll_ptr);
+
+	dtuple_t* entry = row_build_index_entry(row, NULL, index, ctx->heap);
+	entry->info_bits = REC_INFO_METADATA;
+
+	mtr_t mtr;
+	mtr.start();
+	index->set_modified(mtr);
+	btr_pcur_t pcur;
+	btr_pcur_open_at_index_side(true, index, BTR_MODIFY_TREE, &pcur, true,
+				    0, &mtr);
+	ut_ad(btr_pcur_is_before_first_on_page(&pcur));
+	btr_pcur_move_to_next_on_page(&pcur);
+
+	buf_block_t* block = btr_pcur_get_block(&pcur);
+	ut_ad(page_is_leaf(block->frame));
+	ut_ad(!page_has_prev(block->frame));
+	ut_ad(!buf_block_get_page_zip(block));
+	const rec_t* rec = btr_pcur_get_rec(&pcur);
+	que_thr_t* thr = pars_complete_graph_for_exec(
+		NULL, trx, ctx->heap, NULL);
+
+	dberr_t err;
+	if (rec_is_metadata(rec, index)) {
+		ut_ad(page_rec_is_user_rec(rec));
+		if (!page_has_next(block->frame)
+		    && page_rec_is_last(rec, block->frame)) {
+			goto empty_table;
+		}
+		/* Extend the record with the instantly added columns. */
+		const unsigned n = user_table->n_cols - ctx->old_n_cols;
+		/* Reserve room for DB_TRX_ID,DB_ROLL_PTR and any
+		non-updated off-page columns in case they are moved off
+		page as a result of the update. */
+		upd_t* update = upd_create(index->n_fields, ctx->heap);
+		update->n_fields = n;
+		update->info_bits = REC_INFO_METADATA;
+		/* Add the default values for instantly added columns */
+		for (unsigned i = 0; i < n; i++) {
+			upd_field_t* uf = upd_get_nth_field(update, i);
+			unsigned f = index->n_fields - n + i;
+			uf->field_no = f;
+			uf->new_val = entry->fields[f];
+		}
+		ulint* offsets = NULL;
+		mem_heap_t* offsets_heap = NULL;
+		big_rec_t* big_rec;
+		err = btr_cur_pessimistic_update(
+			BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG,
+			btr_pcur_get_btr_cur(&pcur),
+			&offsets, &offsets_heap, ctx->heap,
+			&big_rec, update, UPD_NODE_NO_ORD_CHANGE,
+			thr, trx->id, &mtr);
+		if (big_rec) {
+			if (err == DB_SUCCESS) {
+				err = btr_store_big_rec_extern_fields(
+					&pcur, offsets, big_rec, &mtr,
+					BTR_STORE_UPDATE);
+			}
+
+			dtuple_big_rec_free(big_rec);
+		}
+		if (offsets_heap) {
+			mem_heap_free(offsets_heap);
+		}
+		btr_pcur_close(&pcur);
+		goto func_exit;
+	} else if (page_rec_is_supremum(rec)) {
+empty_table:
+		/* The table is empty. */
+		ut_ad(page_is_root(block->frame));
+		btr_page_empty(block, NULL, index, 0, &mtr);
+		index->remove_instant();
+		err = DB_SUCCESS;
+		goto func_exit;
+	}
+
+	/* Convert the table to the instant ADD COLUMN format. */
+	ut_ad(user_table->is_instant());
+	mtr.commit();
+	mtr.start();
+	index->set_modified(mtr);
+	if (page_t* root = btr_root_get(index, &mtr)) {
+		if (fil_page_get_type(root) != FIL_PAGE_INDEX) {
+			DBUG_ASSERT(!"wrong page type");
+			goto err_exit;
+		}
+
+		DBUG_ASSERT(!page_is_comp(root) || !page_get_instant(root));
+		mlog_write_ulint(root + FIL_PAGE_TYPE,
+				 FIL_PAGE_TYPE_INSTANT, MLOG_2BYTES,
+				 &mtr);
+		page_set_instant(root, index->n_core_fields, &mtr);
+		mtr.commit();
+		mtr.start();
+		index->set_modified(mtr);
+		err = row_ins_clust_index_entry_low(
+			BTR_NO_LOCKING_FLAG, BTR_MODIFY_TREE, index,
+			index->n_uniq, entry, 0, thr, false);
+	} else {
+err_exit:
+		err = DB_CORRUPTION;
+	}
+
+func_exit:
+	mtr.commit();
 
 	if (err != DB_SUCCESS) {
-		my_error(ER_INTERNAL_ERROR, MYF(0),
-			 "InnoDB: ADD COLUMN...VIRTUAL");
-		return(true);
+		my_error_innodb(err, table->s->table_name.str,
+				user_table->flags);
+		return true;
 	}
 
-	return(false);
+	return false;
 }
 
 /** Update INNODB SYS_COLUMNS on new virtual column's position
@@ -4103,11 +4599,11 @@ innobase_drop_one_virtual_sys_columns(
 	for (ulint i = v_col->v_pos + 1; i < table->n_v_cols; i++) {
 		dict_v_col_t*   t_col = dict_table_get_nth_v_col(table, i);
 		ulint		old_p = dict_create_v_col_pos(
-					t_col->v_pos - n_prev_dropped,
-					t_col->m_col.ind - n_prev_dropped);
+			t_col->v_pos - n_prev_dropped,
+			t_col->m_col.ind - n_prev_dropped);
 		ulint		new_p = dict_create_v_col_pos(
-					t_col->v_pos - 1 - n_prev_dropped,
-					t_col->m_col.ind - 1 - n_prev_dropped);
+			t_col->v_pos - 1 - n_prev_dropped,
+			ulint(t_col->m_col.ind) - 1 - n_prev_dropped);
 
 		error = innobase_update_v_pos_sys_columns(
 			table, old_p, new_p, trx);
@@ -4156,8 +4652,6 @@ innobase_drop_one_virtual_sys_virtual(
 
 /** Update system table for dropping virtual column(s)
 @param[in]	ha_alter_info	Data used during in-place alter
-@param[in]	altered_table	MySQL table that is being altered
-@param[in]	table		MySQL table as it is before the ALTER operation
 @param[in]	user_table	InnoDB table
 @param[in]	trx		transaction
 @retval true Failure
@@ -4166,8 +4660,6 @@ static
 bool
 innobase_drop_virtual_try(
 	Alter_inplace_info*	ha_alter_info,
-	const TABLE*		altered_table,
-	const TABLE*		table,
 	const dict_table_t*     user_table,
 	trx_t*			trx)
 {
@@ -4203,24 +4695,13 @@ innobase_drop_virtual_try(
 	}
 
 
-	ulint	n_col = user_table->n_cols;
-	ulint	n_v_col = user_table->n_v_cols;
-
-	n_v_col -=  ctx->num_to_drop_vcol;
-
-	n_col -= dict_table_get_n_sys_cols(user_table);
-
+	ulint	n_col = unsigned(user_table->n_cols) - DATA_N_SYS_COLS;
+	ulint	n_v_col = unsigned(user_table->n_v_cols)
+		- ctx->num_to_drop_vcol;
 	ulint	new_n = dict_table_encode_n_col(n_col, n_v_col)
-			+ ((user_table->flags & DICT_TF_COMPACT) << 31);
+		| ((user_table->flags & DICT_TF_COMPACT) << 31);
 
-	err = innobase_update_n_virtual(user_table, new_n, trx);
-
-	if (err != DB_SUCCESS) {
-		my_error(ER_INTERNAL_ERROR, MYF(0),
-			 "InnoDB: DROP COLUMN...VIRTUAL");
-	}
-
-	return(false);
+	return innodb_update_n_cols(user_table, new_n, trx);
 }
 
 /** Adjust the create index column number from "New table" to
@@ -4306,6 +4787,38 @@ innodb_v_adjust_idx_col(
 	}
 }
 
+/** Create index metadata in the data dictionary.
+@param[in,out]	trx	dictionary transaction
+@param[in,out]	index	index being created
+@param[in]	add_v	virtual columns that are being added, or NULL
+@return the created index */
+MY_ATTRIBUTE((nonnull(1,2), warn_unused_result))
+static
+dict_index_t*
+create_index_dict(
+	trx_t*			trx,
+	dict_index_t*		index,
+	const dict_add_v_col_t* add_v)
+{
+	DBUG_ENTER("create_index_dict");
+
+	mem_heap_t* heap = mem_heap_create(512);
+	ind_node_t* node = ind_create_graph_create(
+		index, index->table->name.m_name, heap, add_v);
+	que_thr_t* thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
+
+	que_fork_start_command(
+		static_cast<que_fork_t*>(que_node_get_parent(thr)));
+
+	que_run_threads(thr);
+
+	index = node->index;
+
+	que_graph_free((que_t*) que_node_get_parent(thr));
+
+	DBUG_RETURN(trx->error_state == DB_SUCCESS ? index : NULL);
+}
+
 /** Update internal structures with concurrent writes blocked,
 while preparing ALTER TABLE.
 
@@ -4341,7 +4854,7 @@ prepare_inplace_alter_table_dict(
 	index_def_t*		index_defs;	/* index definitions */
 	dict_table_t*		user_table;
 	dict_index_t*		fts_index	= NULL;
-	ulint			new_clustered	= 0;
+	bool			new_clustered	= false;
 	dberr_t			error;
 	ulint			num_fts_index;
 	dict_add_v_col_t*	add_v = NULL;
@@ -4359,7 +4872,7 @@ prepare_inplace_alter_table_dict(
 	DBUG_ASSERT(!add_fts_doc_id || add_fts_doc_id_idx);
 	DBUG_ASSERT(!add_fts_doc_id_idx
 		    || innobase_fulltext_exist(altered_table));
-	DBUG_ASSERT(!ctx->add_cols);
+	DBUG_ASSERT(!ctx->defaults);
 	DBUG_ASSERT(!ctx->add_index);
 	DBUG_ASSERT(!ctx->add_key_numbers);
 	DBUG_ASSERT(!ctx->num_to_add_index);
@@ -4369,15 +4882,14 @@ prepare_inplace_alter_table_dict(
 	trx_start_if_not_started_xa(ctx->prebuilt->trx, true);
 
 	if (ha_alter_info->handler_flags
-	    & Alter_inplace_info::DROP_VIRTUAL_COLUMN) {
-		if (prepare_inplace_drop_virtual(
-			    ha_alter_info, altered_table, old_table)) {
+	    & ALTER_DROP_VIRTUAL_COLUMN) {
+		if (prepare_inplace_drop_virtual(ha_alter_info, old_table)) {
 			DBUG_RETURN(true);
 		}
 	}
 
 	if (ha_alter_info->handler_flags
-	    & Alter_inplace_info::ADD_VIRTUAL_COLUMN) {
+	    & ALTER_ADD_VIRTUAL_COLUMN) {
 		if (prepare_inplace_add_virtual(
 			    ha_alter_info, altered_table, old_table)) {
 			DBUG_RETURN(true);
@@ -4387,7 +4899,7 @@ prepare_inplace_alter_table_dict(
 		for create index */
 
 		if (ha_alter_info->handler_flags
-		    & Alter_inplace_info::ADD_INDEX) {
+		    & ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX) {
 			for (ulint i = 0; i < ctx->num_to_add_vcol; i++) {
 				/* Set mbminmax for newly added column */
 				dict_col_t& col = ctx->add_vcol[i].m_col;
@@ -4409,12 +4921,6 @@ prepare_inplace_alter_table_dict(
 	here */
 	ut_ad(check_v_col_in_order(old_table, altered_table, ha_alter_info));
 
-	/* Create a background transaction for the operations on
-	the data dictionary tables. */
-	ctx->trx = innobase_trx_allocate(ctx->prebuilt->trx->mysql_thd);
-
-	trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
-
 	/* Create table containing all indexes to be built in this
 	ALTER TABLE ADD INDEX so that they are in the correct order
 	in the table. */
@@ -4433,30 +4939,7 @@ prepare_inplace_alter_table_dict(
 		fts_doc_id_col, add_fts_doc_id, add_fts_doc_id_idx,
 		old_table);
 
-	new_clustered = DICT_CLUSTERED & index_defs[0].ind_type;
-
-	if (num_fts_index > 1) {
-		my_error(ER_INNODB_FT_LIMIT, MYF(0));
-		goto error_handled;
-	}
-
-	if (!ctx->online) {
-		/* This is not an online operation (LOCK=NONE). */
-	} else if (ctx->add_autoinc == ULINT_UNDEFINED
-		   && num_fts_index == 0
-		   && (!innobase_need_rebuild(ha_alter_info, old_table)
-		       || !innobase_fulltext_exist(altered_table))) {
-		/* InnoDB can perform an online operation (LOCK=NONE). */
-	} else {
-		size_t query_length;
-		/* This should have been blocked in
-		check_if_supported_inplace_alter(). */
-		ut_ad(0);
-		my_error(ER_NOT_SUPPORTED_YET, MYF(0),
-			 innobase_get_stmt_unsafe(ctx->prebuilt->trx->mysql_thd,
-						  &query_length));
-		goto error_handled;
-	}
+	new_clustered = (DICT_CLUSTERED & index_defs[0].ind_type) != 0;
 
 	/* The primary index would be rebuilt if a FTS Doc ID
 	column is to be added, and the primary index definition
@@ -4469,17 +4952,12 @@ prepare_inplace_alter_table_dict(
 	/* Allocate memory for dictionary index definitions */
 
 	ctx->add_index = static_cast<dict_index_t**>(
-		mem_heap_alloc(ctx->heap, ctx->num_to_add_index
+		mem_heap_zalloc(ctx->heap, ctx->num_to_add_index
 			       * sizeof *ctx->add_index));
 	ctx->add_key_numbers = add_key_nums = static_cast<ulint*>(
 		mem_heap_alloc(ctx->heap, ctx->num_to_add_index
 			       * sizeof *ctx->add_key_numbers));
 
-	/* This transaction should be dictionary operation, so that
-	the data dictionary will be locked during crash recovery. */
-
-	ut_ad(ctx->trx->dict_operation == TRX_DICT_OP_INDEX);
-
 	/* Acquire a lock on the table before creating any indexes. */
 
 	if (ctx->online) {
@@ -4494,6 +4972,12 @@ prepare_inplace_alter_table_dict(
 		}
 	}
 
+	/* Create a background transaction for the operations on
+	the data dictionary tables. */
+	ctx->trx = innobase_trx_allocate(ctx->prebuilt->trx->mysql_thd);
+
+	trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
+
 	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
 	or lock waits can happen in it during an index create operation. */
 
@@ -4513,10 +4997,43 @@ prepare_inplace_alter_table_dict(
 	ut_d(dict_table_check_for_dup_indexes(
 		     ctx->new_table, CHECK_ABORTED_OK));
 
+	DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter",
+			error = DB_OUT_OF_MEMORY;
+			goto error_handling;);
+
 	/* If a new clustered index is defined for the table we need
 	to rebuild the table with a temporary name. */
 
 	if (new_clustered) {
+		if (innobase_check_foreigns(
+			    ha_alter_info, old_table,
+			    user_table, ctx->drop_fk, ctx->num_to_drop_fk)) {
+new_clustered_failed:
+			DBUG_ASSERT(ctx->trx != ctx->prebuilt->trx);
+			trx_rollback_to_savepoint(ctx->trx, NULL);
+
+			ut_ad(user_table->get_ref_count() == 1);
+
+			online_retry_drop_indexes_with_trx(
+				user_table, ctx->trx);
+
+			if (ctx->need_rebuild()) {
+				ut_ad(!ctx->new_table->cached);
+				dict_mem_table_free(ctx->new_table);
+				ctx->new_table = ctx->old_table;
+			}
+
+			while (ctx->num_to_add_index--) {
+				if (dict_index_t*& i = ctx->add_index[
+					    ctx->num_to_add_index]) {
+					dict_mem_index_free(i);
+					i = NULL;
+				}
+			}
+
+			goto err_exit;
+		}
+
 		size_t	dblen = ctx->old_table->name.dblen() + 1;
 		size_t	tablen = altered_table->s->table_name.length;
 		const char* part = ctx->old_table->name.part();
@@ -4531,43 +5048,8 @@ prepare_inplace_alter_table_dict(
 		       part ? part : "", partlen + 1);
 		ulint		n_cols = 0;
 		ulint		n_v_cols = 0;
-		dtuple_t*	add_cols;
-		ulint		space_id = 0;
+		dtuple_t*	defaults;
 		ulint		z = 0;
-		uint32_t	key_id = FIL_DEFAULT_ENCRYPTION_KEY;
-		fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT;
-
-		if (dict_table_is_discarded(ctx->prebuilt->table)) {
-		} else if (fil_space_t* space
-			   = fil_space_acquire(ctx->prebuilt->table->space)) {
-			if (const fil_space_crypt_t* crypt_data
-			    = space->crypt_data) {
-				key_id = crypt_data->key_id;
-				mode = crypt_data->encryption;
-			}
-
-			fil_space_release(space);
-		}
-
-		if (ha_alter_info->handler_flags
-		    & Alter_inplace_info::CHANGE_CREATE_OPTION) {
-			const ha_table_option_struct& alt_opt=
-				*ha_alter_info->create_info->option_struct;
-			const ha_table_option_struct& opt=
-				*old_table->s->option_struct;
-			if (alt_opt.encryption != opt.encryption
-			    || alt_opt.encryption_key_id
-			    != opt.encryption_key_id) {
-				key_id = uint32_t(alt_opt.encryption_key_id);
-				mode = fil_encryption_t(alt_opt.encryption);
-			}
-		}
-
-		if (innobase_check_foreigns(
-			    ha_alter_info, altered_table, old_table,
-			    user_table, ctx->drop_fk, ctx->num_to_drop_fk)) {
-			goto new_clustered_failed;
-		}
 
 		for (uint i = 0; i < altered_table->s->fields; i++) {
 			const Field*	field = altered_table->field[i];
@@ -4592,19 +5074,8 @@ prepare_inplace_alter_table_dict(
 
 		DBUG_ASSERT(!add_fts_doc_id_idx || (flags2 & DICT_TF2_FTS));
 
-		/* Create the table. */
-		trx_set_dict_operation(ctx->trx, TRX_DICT_OP_TABLE);
-
-		if (dict_table_get_low(new_table_name)) {
-			my_error(ER_TABLE_EXISTS_ERROR, MYF(0),
-				 new_table_name);
-			goto new_clustered_failed;
-		}
-
-		/* The initial space id 0 may be overridden later if this
-		table is going to be a file_per_table tablespace. */
 		ctx->new_table = dict_mem_table_create(
-			new_table_name, space_id, n_cols + n_v_cols, n_v_cols,
+			new_table_name, NULL, n_cols + n_v_cols, n_v_cols,
 			flags, flags2);
 
 		/* The rebuilt indexed_table will use the renamed
@@ -4645,14 +5116,24 @@ prepare_inplace_alter_table_dict(
 				field_type |= DATA_UNSIGNED;
 			}
 
+			if (altered_table->versioned()) {
+				if (i == altered_table->s->row_start_field) {
+					field_type |= DATA_VERS_START;
+				} else if (i ==
+					   altered_table->s->row_end_field) {
+					field_type |= DATA_VERS_END;
+				} else if (!(field->flags
+					     & VERS_UPDATE_UNVERSIONED_FLAG)) {
+					field_type |= DATA_VERSIONED;
+				}
+			}
+
 			if (dtype_is_string_type(col_type)) {
 				charset_no = (ulint) field->charset()->number;
 
 				if (charset_no > MAX_CHAR_COLL_NUM) {
-					dict_mem_table_free(
-						ctx->new_table);
 					my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
-						 field->field_name);
+						 field->field_name.str);
 					goto new_clustered_failed;
 				}
 			} else {
@@ -4680,17 +5161,18 @@ prepare_inplace_alter_table_dict(
 
 			}
 
-			if (dict_col_name_is_reserved(field->field_name)) {
+			if (dict_col_name_is_reserved(field->field_name.str)) {
 				dict_mem_table_free(ctx->new_table);
+				ctx->new_table = ctx->old_table;
 				my_error(ER_WRONG_COLUMN_NAME, MYF(0),
-					 field->field_name);
+					 field->field_name.str);
 				goto new_clustered_failed;
 			}
 
 			if (is_virtual) {
 				dict_mem_table_add_v_col(
 					ctx->new_table, ctx->heap,
-					field->field_name,
+					field->field_name.str,
 					col_type,
 					dtype_form_prtype(
 						field_type, charset_no)
@@ -4699,7 +5181,7 @@ prepare_inplace_alter_table_dict(
 			} else {
 				dict_mem_table_add_col(
 					ctx->new_table, ctx->heap,
-					field->field_name,
+					field->field_name.str,
 					col_type,
 					dtype_form_prtype(
 						field_type, charset_no),
@@ -4732,69 +5214,23 @@ prepare_inplace_alter_table_dict(
 			ctx->new_table->fts->doc_col = fts_doc_id_col;
 		}
 
-		error = row_create_table_for_mysql(
-			ctx->new_table, ctx->trx, mode, key_id);
+		dict_table_add_system_columns(ctx->new_table, ctx->heap);
 
-		switch (error) {
-			dict_table_t*	temp_table;
-		case DB_SUCCESS:
-			/* We need to bump up the table ref count and
-			before we can use it we need to open the
-			table. The new_table must be in the data
-			dictionary cache, because we are still holding
-			the dict_sys->mutex. */
-			ut_ad(mutex_own(&dict_sys->mutex));
-			temp_table = dict_table_open_on_name(
-				ctx->new_table->name.m_name, TRUE, FALSE,
-				DICT_ERR_IGNORE_NONE);
-			ut_a(ctx->new_table == temp_table);
-			/* n_ref_count must be 1, because purge cannot
-			be executing on this very table as we are
-			holding dict_operation_lock X-latch. */
-			DBUG_ASSERT(ctx->new_table->get_ref_count() == 1);
-			break;
-		case DB_TABLESPACE_EXISTS:
-			my_error(ER_TABLESPACE_EXISTS, MYF(0),
-				 new_table_name);
-			goto new_clustered_failed;
-		case DB_DUPLICATE_KEY:
-			my_error(HA_ERR_TABLE_EXIST, MYF(0),
-				 altered_table->s->table_name.str);
-			goto new_clustered_failed;
-		case DB_UNSUPPORTED:
-			my_error(ER_UNSUPPORTED_EXTENSION, MYF(0),
-				 ctx->new_table->name.m_name);
-			goto new_clustered_failed;
-		default:
-			my_error_innodb(error, table_name, flags);
-new_clustered_failed:
-			DBUG_ASSERT(ctx->trx != ctx->prebuilt->trx);
-			trx_rollback_to_savepoint(ctx->trx, NULL);
-
-			ut_ad(user_table->get_ref_count() == 1);
-
-			online_retry_drop_indexes_with_trx(
-				user_table, ctx->trx);
-			goto err_exit;
-		}
-
-		if (ha_alter_info->handler_flags
-		    & Alter_inplace_info::ADD_COLUMN) {
-			add_cols = dtuple_create_with_vcol(
+		if (ha_alter_info->handler_flags & INNOBASE_DEFAULTS) {
+			defaults = dtuple_create_with_vcol(
 				ctx->heap,
 				dict_table_get_n_cols(ctx->new_table),
 				dict_table_get_n_v_cols(ctx->new_table));
 
-			dict_table_copy_types(add_cols, ctx->new_table);
+			dict_table_copy_types(defaults, ctx->new_table);
 		} else {
-			add_cols = NULL;
+			defaults = NULL;
 		}
 
 		ctx->col_map = innobase_build_col_map(
 			ha_alter_info, altered_table, old_table,
-			ctx->new_table, user_table,
-			add_cols, ctx->heap);
-		ctx->add_cols = add_cols;
+			ctx->new_table, user_table, defaults, ctx->heap);
+		ctx->defaults = defaults;
 	} else {
 		DBUG_ASSERT(!innobase_need_rebuild(ha_alter_info, old_table));
 		DBUG_ASSERT(old_table->s->primary_key
@@ -4840,15 +5276,10 @@ new_clustered_failed:
 		}
 	}
 
-	/* Assign table_id, so that no table id of
-	fts_create_index_tables() will be written to the undo logs. */
-	DBUG_ASSERT(ctx->new_table->id != 0);
-	ctx->trx->table_id = ctx->new_table->id;
-
-	/* Create the indexes in SYS_INDEXES and load into dictionary. */
+	ut_ad(new_clustered == ctx->need_rebuild());
 
+	/* Create the index metadata. */
 	for (ulint a = 0; a < ctx->num_to_add_index; a++) {
-
 		if (index_defs[a].ind_type & DICT_VIRTUAL
 		    && ctx->num_to_drop_vcol > 0 && !new_clustered) {
 			innodb_v_adjust_idx_col(ha_alter_info, old_table,
@@ -4857,66 +5288,280 @@ new_clustered_failed:
 		}
 
 		ctx->add_index[a] = row_merge_create_index(
-			ctx->trx, ctx->new_table, &index_defs[a], add_v);
+			ctx->new_table, &index_defs[a], add_v);
 
 		add_key_nums[a] = index_defs[a].key_number;
 
-		if (!ctx->add_index[a]) {
-			error = ctx->trx->error_state;
-			DBUG_ASSERT(error != DB_SUCCESS);
-			goto error_handling;
-		}
-
 		DBUG_ASSERT(ctx->add_index[a]->is_committed()
 			    == !!new_clustered);
+	}
 
-		if (ctx->add_index[a]->type & DICT_FTS) {
-			DBUG_ASSERT(num_fts_index);
-			DBUG_ASSERT(!fts_index);
-			DBUG_ASSERT(ctx->add_index[a]->type == DICT_FTS);
-			fts_index = ctx->add_index[a];
-		}
-
-		/* If only online ALTER TABLE operations have been
-		requested, allocate a modification log. If the table
-		will be locked anyway, the modification
-		log is unnecessary. When rebuilding the table
-		(new_clustered), we will allocate the log for the
-		clustered index of the old table, later. */
-		if (new_clustered
-		    || !ctx->online
-		    || !user_table->is_readable()
-		    || dict_table_is_discarded(user_table)) {
-			/* No need to allocate a modification log. */
-			ut_ad(!ctx->add_index[a]->online_log);
-		} else if (ctx->add_index[a]->type & DICT_FTS) {
-			/* Fulltext indexes are not covered
-			by a modification log. */
-		} else {
-			DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter",
-					error = DB_OUT_OF_MEMORY;
-					goto error_handling;);
-			rw_lock_x_lock(&ctx->add_index[a]->lock);
+	if (ctx->need_rebuild() && user_table->supports_instant()) {
+		if (!instant_alter_column_possible(ha_alter_info, old_table)) {
+			goto not_instant_add_column;
+		}
 
-			bool ok = row_log_allocate(ctx->add_index[a],
-						   NULL, true, NULL, NULL,
-						   path);
-			rw_lock_x_unlock(&ctx->add_index[a]->lock);
+		for (uint i = uint(ctx->old_table->n_cols) - DATA_N_SYS_COLS;
+		     i--; ) {
+			if (ctx->col_map[i] != i) {
+				goto not_instant_add_column;
+			}
+		}
 
-			if (!ok) {
-				error = DB_OUT_OF_MEMORY;
-				goto error_handling;
+		DBUG_ASSERT(ctx->new_table->n_cols > ctx->old_table->n_cols);
+
+		for (uint a = 0; a < ctx->num_to_add_index; a++) {
+			ctx->add_index[a]->table = ctx->new_table;
+			ctx->add_index[a] = dict_index_add_to_cache(
+				ctx->add_index[a], FIL_NULL, false,
+				&error, add_v);
+			ut_a(error == DB_SUCCESS);
+		}
+		DBUG_ASSERT(ha_alter_info->key_count
+			    /* hidden GEN_CLUST_INDEX in InnoDB */
+			    + dict_index_is_auto_gen_clust(
+				    dict_table_get_first_index(ctx->new_table))
+			    /* hidden FTS_DOC_ID_INDEX in InnoDB */
+			    + (ctx->old_table->fts_doc_id_index
+			       && innobase_fts_check_doc_id_index_in_def(
+				       altered_table->s->keys,
+				       altered_table->key_info)
+			       != FTS_EXIST_DOC_ID_INDEX)
+			    == ctx->num_to_add_index);
+		ctx->num_to_add_index = 0;
+		ctx->add_index = NULL;
+
+		uint i = 0; // index of stored columns ctx->new_table->cols[]
+		Field **af = altered_table->field;
+
+		List_iterator_fast<Create_field> cf_it(
+			ha_alter_info->alter_info->create_list);
+
+		while (const Create_field* new_field = cf_it++) {
+			DBUG_ASSERT(!new_field->field
+				    || std::find(old_table->field,
+						 old_table->field
+						 + old_table->s->fields,
+						 new_field->field) !=
+				    old_table->field + old_table->s->fields);
+			DBUG_ASSERT(new_field->field
+				    || !strcmp(new_field->field_name.str,
+					       (*af)->field_name.str));
+
+			if (!(*af)->stored_in_db()) {
+				af++;
+				continue;
+			}
+
+			dict_col_t* col = dict_table_get_nth_col(
+				ctx->new_table, i);
+			DBUG_ASSERT(!strcmp((*af)->field_name.str,
+				    dict_table_get_col_name(ctx->new_table,
+							    i)));
+			DBUG_ASSERT(!col->is_instant());
+
+			if (new_field->field) {
+				ut_d(const dict_col_t* old_col
+				     = dict_table_get_nth_col(user_table, i));
+				ut_d(const dict_index_t* index
+				     = user_table->indexes.start);
+				DBUG_SLOW_ASSERT(col->mtype == old_col->mtype);
+				ut_ad(col->prtype == old_col->prtype
+				      || col->prtype
+				      == (old_col->prtype & ~DATA_VERSIONED));
+				DBUG_SLOW_ASSERT(col->mbminlen
+					    == old_col->mbminlen);
+				DBUG_SLOW_ASSERT(col->mbmaxlen
+					    == old_col->mbmaxlen);
+				DBUG_SLOW_ASSERT(col->len >= old_col->len);
+				DBUG_SLOW_ASSERT(old_col->is_instant()
+					    == (dict_col_get_clust_pos(
+							old_col, index)
+						>= index->n_core_fields));
+			} else if ((*af)->is_real_null()) {
+				/* DEFAULT NULL */
+				col->def_val.len = UNIV_SQL_NULL;
+			} else {
+				switch ((*af)->type()) {
+				case MYSQL_TYPE_VARCHAR:
+					col->def_val.len = reinterpret_cast
+						<const Field_varstring*>
+						((*af))->get_length();
+					col->def_val.data = reinterpret_cast
+						<const Field_varstring*>
+						((*af))->get_data();
+					break;
+				case MYSQL_TYPE_GEOMETRY:
+				case MYSQL_TYPE_TINY_BLOB:
+				case MYSQL_TYPE_MEDIUM_BLOB:
+				case MYSQL_TYPE_BLOB:
+				case MYSQL_TYPE_LONG_BLOB:
+					col->def_val.len = reinterpret_cast
+						<const Field_blob*>
+						((*af))->get_length();
+					col->def_val.data = reinterpret_cast
+						<const Field_blob*>
+						((*af))->get_ptr();
+					break;
+				default:
+					dfield_t d;
+					dict_col_copy_type(col, &d.type);
+					ulint len = (*af)->pack_length();
+					DBUG_ASSERT(len <= 8
+						    || d.type.mtype
+						    != DATA_INT);
+					row_mysql_store_col_in_innobase_format(
+						&d,
+						d.type.mtype == DATA_INT
+						? static_cast<byte*>(
+							mem_heap_alloc(
+								ctx->heap,
+								len))
+						: NULL,
+						true, (*af)->ptr, len,
+						dict_table_is_comp(
+							user_table));
+					col->def_val.len = d.len;
+					col->def_val.data = d.data;
+				}
 			}
+
+			i++;
+			af++;
 		}
+
+		DBUG_ASSERT(af == altered_table->field
+			    + altered_table->s->fields);
+		/* There might exist a hidden FTS_DOC_ID column for
+		FULLTEXT INDEX. If it exists, the columns should have
+		been implicitly added by ADD FULLTEXT INDEX together
+		with instant ADD COLUMN. (If a hidden FTS_DOC_ID pre-existed,
+		then the ctx->col_map[] check should have prevented
+		adding visible user columns after that.) */
+		DBUG_ASSERT(DATA_N_SYS_COLS + i == ctx->new_table->n_cols
+			    || (1 + DATA_N_SYS_COLS + i
+				== ctx->new_table->n_cols
+				&& !strcmp(dict_table_get_col_name(
+						   ctx->new_table, i),
+				   FTS_DOC_ID_COL_NAME)));
+
+		ctx->prepare_instant();
 	}
 
-	ut_ad(new_clustered == ctx->need_rebuild());
+	if (ctx->need_rebuild()) {
+not_instant_add_column:
+		DBUG_ASSERT(ctx->need_rebuild());
+		DBUG_ASSERT(!ctx->is_instant());
+		DBUG_ASSERT(num_fts_index <= 1);
+		DBUG_ASSERT(!ctx->online || num_fts_index == 0);
+		DBUG_ASSERT(!ctx->online
+			    || ctx->add_autoinc == ULINT_UNDEFINED);
+		DBUG_ASSERT(!ctx->online
+			    || !innobase_need_rebuild(ha_alter_info, old_table)
+			    || !innobase_fulltext_exist(altered_table));
 
-	DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter",
-			error = DB_OUT_OF_MEMORY;
-			goto error_handling;);
+		uint32_t		key_id	= FIL_DEFAULT_ENCRYPTION_KEY;
+		fil_encryption_t	mode	= FIL_ENCRYPTION_DEFAULT;
+
+		if (fil_space_t* s = user_table->space) {
+			if (const fil_space_crypt_t* c = s->crypt_data) {
+				key_id = c->key_id;
+				mode = c->encryption;
+			}
+		}
+
+		if (ha_alter_info->handler_flags & ALTER_OPTIONS) {
+			const ha_table_option_struct& alt_opt=
+				*ha_alter_info->create_info->option_struct;
+			const ha_table_option_struct& opt=
+				*old_table->s->option_struct;
+			if (alt_opt.encryption != opt.encryption
+			    || alt_opt.encryption_key_id
+			    != opt.encryption_key_id) {
+				key_id = uint32_t(alt_opt.encryption_key_id);
+				mode = fil_encryption_t(alt_opt.encryption);
+			}
+		}
+
+		if (dict_table_get_low(ctx->new_table->name.m_name)) {
+			my_error(ER_TABLE_EXISTS_ERROR, MYF(0),
+				 ctx->new_table->name.m_name);
+			goto new_clustered_failed;
+		}
+
+		/* Create the table. */
+		trx_set_dict_operation(ctx->trx, TRX_DICT_OP_TABLE);
+
+		error = row_create_table_for_mysql(
+			ctx->new_table, ctx->trx, mode, key_id);
+
+		switch (error) {
+			dict_table_t*	temp_table;
+		case DB_SUCCESS:
+			/* We need to bump up the table ref count and
+			before we can use it we need to open the
+			table. The new_table must be in the data
+			dictionary cache, because we are still holding
+			the dict_sys->mutex. */
+			ut_ad(mutex_own(&dict_sys->mutex));
+			temp_table = dict_table_open_on_name(
+				ctx->new_table->name.m_name, TRUE, FALSE,
+				DICT_ERR_IGNORE_NONE);
+			ut_a(ctx->new_table == temp_table);
+			/* n_ref_count must be 1, because purge cannot
+			be executing on this very table as we are
+			holding dict_operation_lock X-latch. */
+			DBUG_ASSERT(ctx->new_table->get_ref_count() == 1);
+			DBUG_ASSERT(ctx->new_table->id != 0);
+			DBUG_ASSERT(ctx->new_table->id == ctx->trx->table_id);
+			break;
+		case DB_TABLESPACE_EXISTS:
+			my_error(ER_TABLESPACE_EXISTS, MYF(0),
+				 ctx->new_table->name.m_name);
+			goto new_table_failed;
+		case DB_DUPLICATE_KEY:
+			my_error(HA_ERR_TABLE_EXIST, MYF(0),
+				 altered_table->s->table_name.str);
+			goto new_table_failed;
+		case DB_UNSUPPORTED:
+			my_error(ER_UNSUPPORTED_EXTENSION, MYF(0),
+				 ctx->new_table->name.m_name);
+			goto new_table_failed;
+		default:
+			my_error_innodb(error, table_name, flags);
+new_table_failed:
+			DBUG_ASSERT(ctx->trx != ctx->prebuilt->trx);
+			goto new_clustered_failed;
+		}
+
+		for (ulint a = 0; a < ctx->num_to_add_index; a++) {
+			dict_index_t*& index = ctx->add_index[a];
+			const bool has_new_v_col = index->has_new_v_col;
+			index = create_index_dict(ctx->trx, index, add_v);
+			if (!index) {
+				error = ctx->trx->error_state;
+				ut_ad(error != DB_SUCCESS);
+				while (++a < ctx->num_to_add_index) {
+					dict_mem_index_free(ctx->add_index[a]);
+				}
+				goto error_handling;
+			}
+
+			index->parser = index_defs[a].parser;
+			index->has_new_v_col = has_new_v_col;
+			/* Note the id of the transaction that created this
+			index, we use it to restrict readers from accessing
+			this index, to ensure read consistency. */
+			ut_ad(index->trx_id == ctx->trx->id);
+
+			if (index->type & DICT_FTS) {
+				DBUG_ASSERT(num_fts_index == 1);
+				DBUG_ASSERT(!fts_index);
+				DBUG_ASSERT(index->type == DICT_FTS);
+				fts_index = ctx->add_index[a];
+			}
+		}
 
-	if (new_clustered) {
 		dict_index_t*	clust_index = dict_table_get_first_index(
 			user_table);
 		dict_index_t*	new_clust_index = dict_table_get_first_index(
@@ -4927,6 +5572,11 @@ new_clustered_failed:
 		DBUG_EXECUTE_IF("innodb_alter_table_pk_assert_no_sort",
 			DBUG_ASSERT(ctx->skip_pk_sort););
 
+		ut_ad(!new_clust_index->is_instant());
+		/* row_merge_build_index() depends on the correct value */
+		ut_ad(new_clust_index->n_core_null_bytes
+		      == UT_BITS_IN_BYTES(new_clust_index->n_nullable));
+
 		DBUG_ASSERT(!ctx->new_table->persistent_autoinc);
 		if (const Field* ai = altered_table->found_next_number_field) {
 			const unsigned	col_no = innodb_col_no(ai);
@@ -4938,7 +5588,7 @@ new_clustered_failed:
 			/* Initialize the AUTO_INCREMENT sequence
 			to the rebuilt table from the old one. */
 			if (!old_table->found_next_number_field
-			    || dict_table_is_discarded(user_table)) {
+			    || !user_table->space) {
 			} else if (ib_uint64_t autoinc
 				   = btr_read_autoinc(clust_index)) {
 				btr_write_autoinc(new_clust_index, autoinc);
@@ -4949,10 +5599,13 @@ new_clustered_failed:
 			/* Allocate a log for online table rebuild. */
 			rw_lock_x_lock(&clust_index->lock);
 			bool ok = row_log_allocate(
+				ctx->prebuilt->trx,
 				clust_index, ctx->new_table,
 				!(ha_alter_info->handler_flags
-				  & Alter_inplace_info::ADD_PK_INDEX),
-				ctx->add_cols, ctx->col_map, path);
+				  & ALTER_ADD_PK_INDEX),
+				ctx->defaults, ctx->col_map, path,
+				old_table,
+				ctx->allow_not_null);
 			rw_lock_x_unlock(&clust_index->lock);
 
 			if (!ok) {
@@ -4960,12 +5613,73 @@ new_clustered_failed:
 				goto error_handling;
 			}
 		}
+	} else if (ctx->num_to_add_index) {
+		ut_ad(!ctx->is_instant());
+		ctx->trx->table_id = user_table->id;
+
+		for (ulint a = 0; a < ctx->num_to_add_index; a++) {
+			dict_index_t*& index = ctx->add_index[a];
+			const bool has_new_v_col = index->has_new_v_col;
+			index = create_index_dict(ctx->trx, index, add_v);
+			if (!index) {
+				error = ctx->trx->error_state;
+				ut_ad(error != DB_SUCCESS);
+error_handling_drop_uncached:
+				while (++a < ctx->num_to_add_index) {
+					dict_mem_index_free(ctx->add_index[a]);
+				}
+				goto error_handling;
+			}
+
+			index->parser = index_defs[a].parser;
+			index->has_new_v_col = has_new_v_col;
+			/* Note the id of the transaction that created this
+			index, we use it to restrict readers from accessing
+			this index, to ensure read consistency. */
+			ut_ad(index->trx_id == ctx->trx->id);
+
+			/* If ADD INDEX with LOCK=NONE has been
+			requested, allocate a modification log. */
+			if (index->type & DICT_FTS) {
+				DBUG_ASSERT(num_fts_index == 1);
+				DBUG_ASSERT(!fts_index);
+				DBUG_ASSERT(index->type == DICT_FTS);
+				fts_index = ctx->add_index[a];
+				/* Fulltext indexes are not covered
+				by a modification log. */
+			} else if (!ctx->online
+				   || !user_table->is_readable()
+				   || !user_table->space) {
+				/* No need to allocate a modification log. */
+				DBUG_ASSERT(!index->online_log);
+			} else {
+				DBUG_EXECUTE_IF(
+					"innodb_OOM_prepare_inplace_alter",
+					error = DB_OUT_OF_MEMORY;
+					goto error_handling_drop_uncached;);
+				rw_lock_x_lock(&ctx->add_index[a]->lock);
+
+				bool ok = row_log_allocate(
+					ctx->prebuilt->trx,
+					index,
+					NULL, true, NULL, NULL,
+					path, old_table,
+					ctx->allow_not_null);
+
+				rw_lock_x_unlock(&index->lock);
+
+				if (!ok) {
+					error = DB_OUT_OF_MEMORY;
+					goto error_handling_drop_uncached;
+				}
+			}
+		}
 	}
 
-	if (ctx->online) {
+	if (ctx->online && ctx->num_to_add_index) {
 		/* Assign a consistent read view for
 		row_merge_read_clustered_index(). */
-		trx_assign_read_view(ctx->prebuilt->trx);
+		ctx->prebuilt->trx->read_view.open(ctx->prebuilt->trx);
 	}
 
 	if (fts_index) {
@@ -4989,8 +5703,8 @@ op_ok:
 		ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 
 		DICT_TF2_FLAG_SET(ctx->new_table, DICT_TF2_FTS);
-		if (new_clustered) {
-			/* For !new_clustered, this will be set at
+		if (ctx->need_rebuild()) {
+			/* For !ctx->need_rebuild(), this will be set at
 			commit_cache_norebuild(). */
 			ctx->new_table->fts_doc_id_index
 				= dict_table_get_index_on_name(
@@ -4998,10 +5712,8 @@ op_ok:
 			DBUG_ASSERT(ctx->new_table->fts_doc_id_index != NULL);
 		}
 
-		/* This function will commit the transaction and reset
-		the trx_t::dict_operation flag on success. */
-
-		error = fts_create_index_tables(ctx->trx, fts_index);
+		error = fts_create_index_tables(ctx->trx, fts_index,
+						ctx->new_table->id);
 
 		DBUG_EXECUTE_IF("innodb_test_fail_after_fts_index_table",
 				error = DB_LOCK_WAIT_TIMEOUT;
@@ -5011,13 +5723,13 @@ op_ok:
 			goto error_handling;
 		}
 
+		trx_commit(ctx->trx);
 		trx_start_for_ddl(ctx->trx, op);
 
 		if (!ctx->new_table->fts
 		    || ib_vector_size(ctx->new_table->fts->indexes) == 0) {
 			error = fts_create_common_tables(
-				ctx->trx, ctx->new_table,
-				user_table->name.m_name, TRUE);
+				ctx->trx, ctx->new_table, true);
 
 			DBUG_EXECUTE_IF(
 				"innodb_test_fail_after_fts_common_table",
@@ -5089,6 +5801,11 @@ error_handling:
 error_handled:
 
 	ctx->prebuilt->trx->error_info = NULL;
+
+	if (!ctx->trx) {
+		goto err_exit;
+	}
+
 	ctx->trx->error_state = DB_SUCCESS;
 
 	if (!dict_locked) {
@@ -5150,9 +5867,11 @@ err_exit:
 	}
 #endif /* UNIV_DEBUG */
 
-	row_mysql_unlock_data_dictionary(ctx->trx);
+	if (ctx->trx) {
+		row_mysql_unlock_data_dictionary(ctx->trx);
 
-	trx_free_for_mysql(ctx->trx);
+		trx_free(ctx->trx);
+	}
 	trx_commit_for_mysql(ctx->prebuilt->trx);
 
 	delete ctx;
@@ -5586,9 +6305,9 @@ ha_innobase::prepare_inplace_alter_table(
 				     NULL,
 				     srv_file_per_table);
 
-	info.set_tablespace_type(indexed_table->space != TRX_SYS_SPACE);
+	info.set_tablespace_type(indexed_table->space != fil_system.sys_space);
 
-	if (ha_alter_info->handler_flags & Alter_inplace_info::ADD_INDEX) {
+	if (ha_alter_info->handler_flags & ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX) {
 		if (info.gcols_in_fulltext_or_spatial()) {
 			goto err_exit_no_heap;
 		}
@@ -5599,9 +6318,7 @@ ha_innobase::prepare_inplace_alter_table(
 		if (indexed_table->corrupted) {
 			/* Handled below */
 		} else {
-			FilSpace space(indexed_table->space, true);
-
-			if (space()) {
+			if (const fil_space_t* space = indexed_table->space) {
 				String str;
 				const char* engine= table_type();
 
@@ -5613,7 +6330,7 @@ ha_innobase::prepare_inplace_alter_table(
 					" used key_id is not available. "
 					" Can't continue reading table.",
 					table_share->table_name.str,
-					space()->chain.start->name);
+					space->chain.start->name);
 
 				my_error(ER_GET_ERRMSG, MYF(0), HA_ERR_DECRYPTION_FAILED, str.c_ptr(), engine);
 				DBUG_RETURN(true);
@@ -5671,7 +6388,7 @@ err_exit_no_heap:
 	/* Prohibit renaming a column to something that the table
 	already contains. */
 	if (ha_alter_info->handler_flags
-	    & Alter_inplace_info::ALTER_COLUMN_NAME) {
+	    & ALTER_COLUMN_NAME) {
 		List_iterator_fast<Create_field> cf_it(
 			ha_alter_info->alter_info->create_list);
 
@@ -5685,7 +6402,7 @@ err_exit_no_heap:
 			cf_it.rewind();
 			while (Create_field* cf = cf_it++) {
 				if (cf->field == *fp) {
-					name = cf->field_name;
+					name = cf->field_name.str;
 					goto check_if_ok_to_rename;
 				}
 			}
@@ -5695,7 +6412,7 @@ check_if_ok_to_rename:
 			/* Prohibit renaming a column from FTS_DOC_ID
 			if full-text indexes exist. */
 			if (!my_strcasecmp(system_charset_info,
-					   (*fp)->field_name,
+					   (*fp)->field_name.str,
 					   FTS_DOC_ID_COL_NAME)
 			    && innobase_fulltext_exist(altered_table)) {
 				my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN,
@@ -5818,11 +6535,12 @@ check_if_ok_to_rename:
 	n_drop_fk = 0;
 
 	if (ha_alter_info->handler_flags
-	    & (INNOBASE_ALTER_NOREBUILD | INNOBASE_ALTER_REBUILD)) {
+	    & (INNOBASE_ALTER_NOREBUILD | INNOBASE_ALTER_REBUILD
+	       | INNOBASE_ALTER_INSTANT)) {
 		heap = mem_heap_create(1024);
 
 		if (ha_alter_info->handler_flags
-		    & Alter_inplace_info::ALTER_COLUMN_NAME) {
+		    & ALTER_COLUMN_NAME) {
 			col_names = innobase_get_col_names(
 				ha_alter_info, altered_table, table,
 				indexed_table, heap);
@@ -5835,7 +6553,7 @@ check_if_ok_to_rename:
 	}
 
 	if (ha_alter_info->handler_flags
-	    & Alter_inplace_info::DROP_FOREIGN_KEY) {
+	    & ALTER_DROP_FOREIGN_KEY) {
 		DBUG_ASSERT(ha_alter_info->alter_info->drop_list.elements > 0);
 
 		drop_fk = static_cast<dict_foreign_t**>(
@@ -5892,9 +6610,9 @@ found_fk:
 		dict_index_t*	drop_primary = NULL;
 
 		DBUG_ASSERT(ha_alter_info->handler_flags
-			    & (Alter_inplace_info::DROP_INDEX
-			       | Alter_inplace_info::DROP_UNIQUE_INDEX
-			       | Alter_inplace_info::DROP_PK_INDEX));
+			    & (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX
+			       | ALTER_DROP_UNIQUE_INDEX
+			       | ALTER_DROP_PK_INDEX));
 		/* Check which indexes to drop. */
 		drop_index = static_cast<dict_index_t**>(
 			mem_heap_alloc(
@@ -5906,7 +6624,7 @@ found_fk:
 				= ha_alter_info->index_drop_buffer[i];
 			dict_index_t*	index
 				= dict_table_get_index_on_name(
-					indexed_table, key->name);
+					indexed_table, key->name.str);
 
 			if (!index) {
 				push_warning_printf(
@@ -5948,7 +6666,7 @@ found_fk:
 				if (!my_strcasecmp(
 					    system_charset_info,
 					    FTS_DOC_ID_INDEX_NAME,
-					    table->key_info[i].name)) {
+					    table->key_info[i].name.str)) {
 					/* The index exists in the MySQL
 					data dictionary. Do not drop it,
 					even though it is no longer needed
@@ -6014,7 +6732,7 @@ check_if_can_drop_indexes:
 
 	/* Check if any of the existing indexes are marked as corruption
 	and if they are, refuse adding more indexes. */
-	if (ha_alter_info->handler_flags & Alter_inplace_info::ADD_INDEX) {
+	if (ha_alter_info->handler_flags & ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX) {
 		for (dict_index_t* index = dict_table_get_first_index(indexed_table);
 		     index != NULL; index = dict_table_get_next_index(index)) {
 
@@ -6069,7 +6787,7 @@ check_if_can_drop_indexes:
 	n_add_fk = 0;
 
 	if (ha_alter_info->handler_flags
-	    & Alter_inplace_info::ADD_FOREIGN_KEY) {
+	    & ALTER_ADD_FOREIGN_KEY) {
 		ut_ad(!m_prebuilt->trx->check_foreigns);
 
 		alter_fill_stored_column(altered_table, m_prebuilt->table,
@@ -6119,10 +6837,15 @@ err_exit:
 		}
 	}
 
+	const ha_table_option_struct& alt_opt=
+		*ha_alter_info->create_info->option_struct;
+
 	if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
-	    || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
-		== Alter_inplace_info::CHANGE_CREATE_OPTION
-		&& !innobase_need_rebuild(ha_alter_info, table))) {
+	    || ((ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE
+						  | INNOBASE_ALTER_NOCREATE
+						  | INNOBASE_ALTER_INSTANT))
+		== ALTER_OPTIONS
+		&& !alter_options_need_rebuild(ha_alter_info, table))) {
 
 		if (heap) {
 			ha_alter_info->handler_ctx
@@ -6134,11 +6857,15 @@ err_exit:
 					add_fk, n_add_fk,
 					ha_alter_info->online,
 					heap, indexed_table,
-					col_names, ULINT_UNDEFINED, 0, 0, 0);
+					col_names, ULINT_UNDEFINED, 0, 0,
+					(ha_alter_info->ignore
+					 || !thd_is_strict_mode(m_user_thd)),
+					alt_opt.page_compressed,
+					alt_opt.page_compression_level);
 		}
 
 		DBUG_ASSERT(m_prebuilt->trx->dict_operation_lock_mode == 0);
-		if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
+		if (ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE)) {
 
 			online_retry_drop_indexes(
 				m_prebuilt->table, m_user_thd);
@@ -6146,14 +6873,13 @@ err_exit:
 		}
 
 		if ((ha_alter_info->handler_flags
-		     & Alter_inplace_info::DROP_VIRTUAL_COLUMN)
-		    && prepare_inplace_drop_virtual(
-			    ha_alter_info, altered_table, table)) {
+		     & ALTER_DROP_VIRTUAL_COLUMN)
+		    && prepare_inplace_drop_virtual(ha_alter_info, table)) {
 			DBUG_RETURN(true);
 		}
 
 		if ((ha_alter_info->handler_flags
-		     & Alter_inplace_info::ADD_VIRTUAL_COLUMN)
+		     & ALTER_ADD_VIRTUAL_COLUMN)
 		    && prepare_inplace_add_virtual(
 			    ha_alter_info, altered_table, table)) {
 			DBUG_RETURN(true);
@@ -6177,12 +6903,6 @@ err_exit:
 			add_fts_doc_id = true;
 			add_fts_doc_id_idx = true;
 
-			push_warning_printf(
-				m_user_thd,
-				Sql_condition::WARN_LEVEL_WARN,
-				HA_ERR_WRONG_INDEX,
-				"InnoDB rebuilding table to add"
-				" column " FTS_DOC_ID_COL_NAME);
 		} else if (fts_doc_col_no == ULINT_UNDEFINED) {
 			goto err_exit;
 		}
@@ -6202,9 +6922,9 @@ err_exit:
 				doc_col_no == fts_doc_col_no
 				|| doc_col_no == ULINT_UNDEFINED
 				|| (ha_alter_info->handler_flags
-				    & (Alter_inplace_info::ALTER_STORED_COLUMN_ORDER
-				       | Alter_inplace_info::DROP_STORED_COLUMN
-				       | Alter_inplace_info::ADD_STORED_BASE_COLUMN)));
+				    & (ALTER_STORED_COLUMN_ORDER
+				       | ALTER_DROP_STORED_COLUMN
+				       | ALTER_ADD_STORED_BASE_COLUMN)));
 		}
 	}
 
@@ -6227,7 +6947,7 @@ err_exit:
 		/* This is an added column. */
 		DBUG_ASSERT(!new_field->field);
 		DBUG_ASSERT(ha_alter_info->handler_flags
-			    & Alter_inplace_info::ADD_COLUMN);
+			    & ALTER_ADD_COLUMN);
 
 		field = altered_table->field[i];
 
@@ -6269,7 +6989,9 @@ found_col:
 		heap, m_prebuilt->table, col_names,
 		add_autoinc_col_no,
 		ha_alter_info->create_info->auto_increment_value,
-		autoinc_col_max_value, 0);
+		autoinc_col_max_value,
+		ha_alter_info->ignore || !thd_is_strict_mode(m_user_thd),
+		alt_opt.page_compressed, alt_opt.page_compression_level);
 
 	DBUG_RETURN(prepare_inplace_alter_table_dict(
 			    ha_alter_info, altered_table, table,
@@ -6361,7 +7083,7 @@ get_error_key_name(
 	} else if (ha_alter_info->key_count == 0) {
 		return(dict_table_get_first_index(table)->name);
 	} else {
-		return(ha_alter_info->key_info_buffer[error_key_num].name);
+		return(ha_alter_info->key_info_buffer[error_key_num].name.str);
 	}
 }
 
@@ -6405,9 +7127,11 @@ ok_exit:
 		DBUG_RETURN(false);
 	}
 
-	if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
-	    == Alter_inplace_info::CHANGE_CREATE_OPTION
-	    && !innobase_need_rebuild(ha_alter_info, table)) {
+	if ((ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE
+					      | INNOBASE_ALTER_NOCREATE
+					      | INNOBASE_ALTER_INSTANT))
+	    == ALTER_OPTIONS
+	    && !alter_options_need_rebuild(ha_alter_info, table)) {
 		goto ok_exit;
 	}
 
@@ -6419,6 +7143,8 @@ ok_exit:
 	DBUG_ASSERT(ctx->trx);
 	DBUG_ASSERT(ctx->prebuilt == m_prebuilt);
 
+	if (ctx->is_instant()) goto ok_exit;
+
 	dict_index_t*	pk = dict_table_get_first_index(m_prebuilt->table);
 	ut_ad(pk != NULL);
 
@@ -6428,8 +7154,7 @@ ok_exit:
 
 	ctx->m_stage = UT_NEW_NOKEY(ut_stage_alter_t(pk));
 
-	if (!m_prebuilt->table->is_readable()
-	    || dict_table_is_discarded(m_prebuilt->table)) {
+	if (!m_prebuilt->table->is_readable()) {
 		goto all_done;
 	}
 
@@ -6444,7 +7169,7 @@ ok_exit:
 	rebuild_templ
 	     = ctx->need_rebuild()
 	       || ((ha_alter_info->handler_flags
-		& Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH)
+		& ALTER_COLUMN_EQUAL_PACK_LENGTH)
 		&& alter_templ_needs_rebuild(
 		   altered_table, ha_alter_info, ctx->new_table));
 
@@ -6498,9 +7223,9 @@ ok_exit:
 		m_prebuilt->table, ctx->new_table,
 		ctx->online,
 		ctx->add_index, ctx->add_key_numbers, ctx->num_to_add_index,
-		altered_table, ctx->add_cols, ctx->col_map,
+		altered_table, ctx->defaults, ctx->col_map,
 		ctx->add_autoinc, ctx->sequence, ctx->skip_pk_sort,
-		ctx->m_stage, add_v, eval_table);
+		ctx->m_stage, add_v, eval_table, ctx->allow_not_null);
 
 #ifndef DBUG_OFF
 oom:
@@ -6509,7 +7234,7 @@ oom:
 		DEBUG_SYNC_C("row_log_table_apply1_before");
 		error = row_log_table_apply(
 			ctx->thr, m_prebuilt->table, altered_table,
-			ctx->m_stage);
+			ctx->m_stage, ctx->new_table);
 	}
 
 	/* Init online ddl status variables */
@@ -6660,7 +7385,7 @@ check_col_exists_in_indexes(
 			const dict_col_t* idx_col
 				= dict_index_get_nth_col(index, i);
 
-			if (is_v && dict_col_is_virtual(idx_col)) {
+			if (is_v && idx_col->is_virtual()) {
 				const dict_v_col_t*   v_col = reinterpret_cast<
 					const dict_v_col_t*>(idx_col);
 				if (v_col->v_pos == col_no) {
@@ -6668,7 +7393,7 @@ check_col_exists_in_indexes(
 				}
 			}
 
-			if (!is_v && !dict_col_is_virtual(idx_col)
+			if (!is_v && !idx_col->is_virtual()
 			    && dict_col_get_no(idx_col) == col_no) {
 				return(true);
 			}
@@ -6741,7 +7466,8 @@ rollback_inplace_alter_table(
 		goto func_exit;
 	}
 
-	trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
+	trx_start_for_ddl(ctx->trx, ctx->need_rebuild()
+			  ? TRX_DICT_OP_TABLE : TRX_DICT_OP_INDEX);
 	row_mysql_lock_data_dictionary(ctx->trx);
 
 	if (ctx->need_rebuild()) {
@@ -6784,7 +7510,7 @@ rollback_inplace_alter_table(
 		}
 	} else {
 		DBUG_ASSERT(!(ha_alter_info->handler_flags
-			      & Alter_inplace_info::ADD_PK_INDEX));
+			      & ALTER_ADD_PK_INDEX));
 		DBUG_ASSERT(ctx->new_table == prebuilt->table);
 
 		innobase_rollback_sec_index(
@@ -6793,7 +7519,7 @@ rollback_inplace_alter_table(
 
 	trx_commit_for_mysql(ctx->trx);
 	row_mysql_unlock_data_dictionary(ctx->trx);
-	trx_free_for_mysql(ctx->trx);
+	trx_free(ctx->trx);
 
 func_exit:
 #ifndef DBUG_OFF
@@ -6932,8 +7658,7 @@ innobase_rename_column_try(
 	ulint			nth_col,
 	const char*		from,
 	const char*		to,
-	bool			new_clustered,
-	bool			is_virtual)
+	bool			new_clustered)
 {
 	pars_info_t*	info;
 	dberr_t		error;
@@ -7147,7 +7872,7 @@ innobase_rename_columns_try(
 
 	DBUG_ASSERT(ctx);
 	DBUG_ASSERT(ha_alter_info->handler_flags
-		    & Alter_inplace_info::ALTER_COLUMN_NAME);
+		    & ALTER_COLUMN_NAME);
 
 	for (Field** fp = table->field; *fp; fp++, i++) {
 		bool	is_virtual = innobase_is_v_fld(*fp);
@@ -7167,10 +7892,9 @@ innobase_rename_columns_try(
 				if (innobase_rename_column_try(
 					    ctx->old_table, trx, table_name,
 					    col_n,
-					    cf->field->field_name,
-					    cf->field_name,
-					    ctx->need_rebuild(),
-					    is_virtual)) {
+					    cf->field->field_name.str,
+					    cf->field_name.str,
+					    ctx->need_rebuild())) {
 					return(true);
 				}
 				goto processed_field;
@@ -7356,8 +8080,8 @@ innobase_rename_or_enlarge_columns_cache(
 	dict_table_t*		user_table)
 {
 	if (!(ha_alter_info->handler_flags
-	      & (Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
-		 | Alter_inplace_info::ALTER_COLUMN_NAME))) {
+	      & (ALTER_COLUMN_EQUAL_PACK_LENGTH
+		 | ALTER_COLUMN_NAME))) {
 		return;
 	}
 
@@ -7392,8 +8116,8 @@ innobase_rename_or_enlarge_columns_cache(
 			if ((*fp)->flags & FIELD_IS_RENAMED) {
 				dict_mem_table_col_rename(
 					user_table, col_n,
-					cf->field->field_name,
-					cf->field_name, is_virtual);
+					cf->field->field_name.str,
+					cf->field_name.str, is_virtual);
 			}
 
 			break;
@@ -7437,11 +8161,11 @@ commit_set_autoinc(
 		btr_write_autoinc(dict_table_get_first_index(ctx->new_table),
 				  autoinc - 1, true);
 	} else if ((ha_alter_info->handler_flags
-		    & Alter_inplace_info::CHANGE_CREATE_OPTION)
+		    & ALTER_CHANGE_CREATE_OPTION)
 		   && (ha_alter_info->create_info->used_fields
 		       & HA_CREATE_USED_AUTO)) {
 
-		if (dict_table_is_discarded(ctx->old_table)) {
+		if (!ctx->old_table->space) {
 			my_error(ER_TABLESPACE_DISCARDED, MYF(0),
 				 old_table->s->table_name.str);
 			DBUG_RETURN(true);
@@ -7452,7 +8176,7 @@ commit_set_autoinc(
 		const Field*	ai	= old_table->found_next_number_field;
 		ut_ad(!strcmp(dict_table_get_col_name(ctx->old_table,
 						      innodb_col_no(ai)),
-			      ai->field_name));
+			      ai->field_name.str));
 
 		ib_uint64_t	autoinc
 			= ha_alter_info->create_info->auto_increment_value;
@@ -7724,6 +8448,152 @@ innobase_update_foreign_cache(
 	DBUG_RETURN(err);
 }
 
+/** Changes SYS_COLUMNS.PRTYPE for one column.
+@param[in,out]	trx	transaction
+@param[in]	table_name	table name
+@param[in]	tableid	table ID as in SYS_TABLES
+@param[in]	pos	column position
+@param[in]	prtype	new precise type
+@return		boolean flag
+@retval	true	on failure
+@retval false	on success */
+static
+bool
+vers_change_field_try(
+	trx_t* trx,
+	const char* table_name,
+	const table_id_t tableid,
+	const ulint pos,
+	const ulint prtype)
+{
+	DBUG_ENTER("vers_change_field_try");
+
+	pars_info_t* info = pars_info_create();
+
+	pars_info_add_int4_literal(info, "prtype", prtype);
+	pars_info_add_ull_literal(info,"tableid", tableid);
+	pars_info_add_int4_literal(info, "pos", pos);
+
+	dberr_t error = que_eval_sql(info,
+				     "PROCEDURE CHANGE_COLUMN_MTYPE () IS\n"
+				     "BEGIN\n"
+				     "UPDATE SYS_COLUMNS SET PRTYPE=:prtype\n"
+				     "WHERE TABLE_ID=:tableid AND POS=:pos;\n"
+				     "END;\n",
+				     false, trx);
+
+	if (error != DB_SUCCESS) {
+		my_error_innodb(error, table_name, 0);
+		trx->error_state = DB_SUCCESS;
+		trx->op_info = "";
+		DBUG_RETURN(true);
+	}
+
+	DBUG_RETURN(false);
+}
+
+/** Changes fields WITH/WITHOUT SYSTEM VERSIONING property in SYS_COLUMNS.
+@param[in]	ha_alter_info	alter info
+@param[in]	ctx	alter inplace context
+@param[in]	trx	transaction
+@param[in]	table	old table
+@return		boolean flag
+@retval	true	on failure
+@retval false	on success */
+static
+bool
+vers_change_fields_try(
+	const Alter_inplace_info* ha_alter_info,
+	const ha_innobase_inplace_ctx* ctx,
+	trx_t* trx,
+	const TABLE* table)
+{
+	DBUG_ENTER("vers_change_fields_try");
+
+	DBUG_ASSERT(ha_alter_info);
+	DBUG_ASSERT(ctx);
+
+	List_iterator_fast<Create_field> it(
+	    ha_alter_info->alter_info->create_list);
+
+	while (const Create_field* create_field = it++) {
+		if (!create_field->field) {
+			continue;
+		}
+		if (create_field->versioning
+		    == Column_definition::VERSIONING_NOT_SET) {
+			continue;
+		}
+
+		const dict_table_t* new_table = ctx->new_table;
+		const uint pos = innodb_col_no(create_field->field);
+		const dict_col_t* col = dict_table_get_nth_col(new_table, pos);
+
+		DBUG_ASSERT(!col->vers_sys_start());
+		DBUG_ASSERT(!col->vers_sys_end());
+
+		ulint new_prtype
+		    = create_field->versioning
+			      == Column_definition::WITHOUT_VERSIONING
+			  ? col->prtype & ~DATA_VERSIONED
+			  : col->prtype | DATA_VERSIONED;
+
+		if (vers_change_field_try(trx, table->s->table_name.str,
+					  new_table->id, pos,
+					  new_prtype)) {
+			DBUG_RETURN(true);
+		}
+	}
+
+	DBUG_RETURN(false);
+}
+
+/** Changes WITH/WITHOUT SYSTEM VERSIONING for fields
+in the data dictionary cache.
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param table MySQL table as it is before the ALTER operation */
+static
+void
+vers_change_fields_cache(
+	Alter_inplace_info*		ha_alter_info,
+	const ha_innobase_inplace_ctx*	ctx,
+	const TABLE*			table)
+{
+	DBUG_ENTER("vers_change_fields_cache");
+
+	DBUG_ASSERT(ha_alter_info);
+	DBUG_ASSERT(ctx);
+	DBUG_ASSERT(ha_alter_info->handler_flags & ALTER_COLUMN_UNVERSIONED);
+
+	List_iterator_fast<Create_field> it(
+	    ha_alter_info->alter_info->create_list);
+
+	while (const Create_field* create_field = it++) {
+		if (!create_field->field) {
+			continue;
+		}
+		dict_col_t* col = dict_table_get_nth_col(
+		    ctx->new_table, innodb_col_no(create_field->field));
+
+		if (create_field->versioning
+		    == Column_definition::WITHOUT_VERSIONING) {
+
+			DBUG_ASSERT(!col->vers_sys_start());
+			DBUG_ASSERT(!col->vers_sys_end());
+			col->prtype &= ~DATA_VERSIONED;
+		} else if (create_field->versioning
+			   == Column_definition::WITH_VERSIONING) {
+
+			DBUG_ASSERT(!col->vers_sys_start());
+			DBUG_ASSERT(!col->vers_sys_end());
+			col->prtype |= DATA_VERSIONED;
+		}
+	}
+
+	DBUG_VOID_RETURN;
+}
+
 /** Commit the changes made during prepare_inplace_alter_table()
 and inplace_alter_table() inside the data dictionary tables,
 when rebuilding the table.
@@ -7754,7 +8624,7 @@ commit_try_rebuild(
 	DBUG_ASSERT(ctx->need_rebuild());
 	DBUG_ASSERT(trx->dict_operation_lock_mode == RW_X_LATCH);
 	DBUG_ASSERT(!(ha_alter_info->handler_flags
-		      & Alter_inplace_info::DROP_FOREIGN_KEY)
+		      & ALTER_DROP_FOREIGN_KEY)
 		    || ctx->num_to_drop_fk > 0);
 
 	for (dict_index_t* index = dict_table_get_first_index(rebuilt_table);
@@ -7808,7 +8678,8 @@ commit_try_rebuild(
 		error = row_log_table_apply(
 			ctx->thr, user_table, altered_table,
 			static_cast<ha_innobase_inplace_ctx*>(
-				ha_alter_info->handler_ctx)->m_stage);
+				ha_alter_info->handler_ctx)->m_stage,
+			ctx->new_table);
 
 		if (s_templ) {
 			ut_ad(ctx->need_rebuild());
@@ -7853,7 +8724,7 @@ commit_try_rebuild(
 	}
 
 	if ((ha_alter_info->handler_flags
-	     & Alter_inplace_info::ALTER_COLUMN_NAME)
+	     & ALTER_COLUMN_NAME)
 	    && innobase_rename_columns_try(ha_alter_info, ctx, old_table,
 					   trx, table_name)) {
 		DBUG_RETURN(true);
@@ -7863,7 +8734,7 @@ commit_try_rebuild(
 
 	/* The new table must inherit the flag from the
 	"parent" table. */
-	if (dict_table_is_discarded(user_table)) {
+	if (!user_table->space) {
 		rebuilt_table->file_unreadable = true;
 		rebuilt_table->flags2 |= DICT_TF2_DISCARDED;
 	}
@@ -7915,8 +8786,7 @@ commit_cache_rebuild(
 	DBUG_ENTER("commit_cache_rebuild");
 	DEBUG_SYNC_C("commit_cache_rebuild");
 	DBUG_ASSERT(ctx->need_rebuild());
-	DBUG_ASSERT(dict_table_is_discarded(ctx->old_table)
-		    == dict_table_is_discarded(ctx->new_table));
+	DBUG_ASSERT(!ctx->old_table->space == !ctx->new_table->space);
 
 	const char* old_name = mem_heap_strdup(
 		ctx->heap, ctx->old_table->name.m_name);
@@ -7960,7 +8830,7 @@ get_col_list_to_be_dropped(
 			const dict_col_t*	idx_col
 				= dict_index_get_nth_col(index, col);
 
-			if (dict_col_is_virtual(idx_col)) {
+			if (idx_col->is_virtual()) {
 				const dict_v_col_t*	v_col
 					= reinterpret_cast<
 						const dict_v_col_t*>(idx_col);
@@ -7974,6 +8844,58 @@ get_col_list_to_be_dropped(
 	}
 }
 
+/** Change PAGE_COMPRESSED to ON or change the PAGE_COMPRESSION_LEVEL.
+@param[in]	level		PAGE_COMPRESSION_LEVEL
+@param[in]	table		table before the change
+@param[in,out]	trx		data dictionary transaction
+@param[in]	table_name	table name in MariaDB
+@return	whether the operation succeeded */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
+static
+bool
+innobase_page_compression_try(
+	uint			level,
+	const dict_table_t*	table,
+	trx_t*			trx,
+	const char*		table_name)
+{
+	DBUG_ENTER("innobase_page_compression_try");
+	DBUG_ASSERT(level >= 1);
+	DBUG_ASSERT(level <= 9);
+
+	unsigned flags = table->flags
+		& ~(0xFU << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
+	flags |= 1U << DICT_TF_POS_PAGE_COMPRESSION
+		| level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL;
+
+	if (table->flags == flags) {
+		DBUG_RETURN(false);
+	}
+
+	pars_info_t* info = pars_info_create();
+
+	pars_info_add_ull_literal(info, "id", table->id);
+	pars_info_add_int4_literal(info, "type",
+				   dict_tf_to_sys_tables_type(flags));
+
+	dberr_t error = que_eval_sql(info,
+				     "PROCEDURE CHANGE_COMPRESSION () IS\n"
+				     "BEGIN\n"
+				     "UPDATE SYS_TABLES SET TYPE=:type\n"
+				     "WHERE ID=:id;\n"
+				     "END;\n",
+				     false, trx);
+
+	if (error != DB_SUCCESS) {
+		my_error_innodb(error, table_name, 0);
+		trx->error_state = DB_SUCCESS;
+		trx->op_info = "";
+		DBUG_RETURN(true);
+	}
+
+	DBUG_RETURN(false);
+}
+
 /** Commit the changes made during prepare_inplace_alter_table()
 and inplace_alter_table() inside the data dictionary tables,
 when not rebuilding the table.
@@ -8000,13 +8922,20 @@ commit_try_norebuild(
 	DBUG_ASSERT(!ctx->need_rebuild());
 	DBUG_ASSERT(trx->dict_operation_lock_mode == RW_X_LATCH);
 	DBUG_ASSERT(!(ha_alter_info->handler_flags
-		      & Alter_inplace_info::DROP_FOREIGN_KEY)
+		      & ALTER_DROP_FOREIGN_KEY)
 		    || ctx->num_to_drop_fk > 0);
 	DBUG_ASSERT(ctx->num_to_drop_fk
 		    == ha_alter_info->alter_info->drop_list.elements
 		    || ctx->num_to_drop_vcol
 		       == ha_alter_info->alter_info->drop_list.elements);
 
+	if (ctx->page_compression_level
+	    && innobase_page_compression_try(ctx->page_compression_level,
+					     ctx->new_table, trx,
+					     table_name)) {
+		DBUG_RETURN(true);
+	}
+
 	for (ulint i = 0; i < ctx->num_to_add_index; i++) {
 		dict_index_t*	index = ctx->add_index[i];
 		DBUG_ASSERT(dict_index_get_online_status(index)
@@ -8034,6 +8963,11 @@ commit_try_norebuild(
 		DBUG_RETURN(true);
 	}
 
+	if ((ha_alter_info->handler_flags & ALTER_COLUMN_UNVERSIONED)
+	    && vers_change_fields_try(ha_alter_info, ctx, trx, old_table)) {
+		DBUG_RETURN(true);
+	}
+
 	dberr_t	error;
 
 	/* We altered the table in place. Mark the indexes as committed. */
@@ -8090,14 +9024,14 @@ commit_try_norebuild(
 	}
 
 	if ((ha_alter_info->handler_flags
-	     & Alter_inplace_info::ALTER_COLUMN_NAME)
+	     & ALTER_COLUMN_NAME)
 	    && innobase_rename_columns_try(ha_alter_info, ctx, old_table,
 					   trx, table_name)) {
 		DBUG_RETURN(true);
 	}
 
 	if ((ha_alter_info->handler_flags
-	     & Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH)
+	     & ALTER_COLUMN_EQUAL_PACK_LENGTH)
 	    && innobase_enlarge_columns_try(ha_alter_info, old_table,
 					    ctx->old_table, trx, table_name)) {
 		DBUG_RETURN(true);
@@ -8105,25 +9039,25 @@ commit_try_norebuild(
 
 #ifdef MYSQL_RENAME_INDEX
 	if ((ha_alter_info->handler_flags
-	     & Alter_inplace_info::RENAME_INDEX)
+	     & ALTER_RENAME_INDEX)
 	    && rename_indexes_in_data_dictionary(ctx, ha_alter_info, trx)) {
 		DBUG_RETURN(true);
 	}
 #endif /* MYSQL_RENAME_INDEX */
 
 	if ((ha_alter_info->handler_flags
-	     & Alter_inplace_info::DROP_VIRTUAL_COLUMN)
-	    && innobase_drop_virtual_try(
-		    ha_alter_info, altered_table, old_table,
-		    ctx->old_table, trx)) {
+	     & ALTER_DROP_VIRTUAL_COLUMN)
+	    && innobase_drop_virtual_try(ha_alter_info, ctx->old_table, trx)) {
 		DBUG_RETURN(true);
 	}
 
 	if ((ha_alter_info->handler_flags
-	     & Alter_inplace_info::ADD_VIRTUAL_COLUMN)
-	    && innobase_add_virtual_try(
-		    ha_alter_info, altered_table, old_table,
-		    ctx->old_table, trx)) {
+	     & ALTER_ADD_VIRTUAL_COLUMN)
+	    && innobase_add_virtual_try(ha_alter_info, ctx->old_table, trx)) {
+		DBUG_RETURN(true);
+	}
+
+	if (innobase_add_instant_try(ctx, altered_table, old_table, trx)) {
 		DBUG_RETURN(true);
 	}
 
@@ -8132,24 +9066,73 @@ commit_try_norebuild(
 
 /** Commit the changes to the data dictionary cache
 after a successful commit_try_norebuild() call.
-@param ctx In-place ALTER TABLE context
+@param ha_alter_info algorithm=inplace context
+@param ctx In-place ALTER TABLE context for the current partition
 @param table the TABLE before the ALTER
-@param trx Data dictionary transaction object
-(will be started and committed)
-@return whether all replacements were found for dropped indexes */
-inline MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
+@param trx Data dictionary transaction
+(will be started and committed, for DROP INDEX) */
+inline MY_ATTRIBUTE((nonnull))
+void
 commit_cache_norebuild(
 /*===================*/
+	Alter_inplace_info*	ha_alter_info,
 	ha_innobase_inplace_ctx*ctx,
 	const TABLE*		table,
 	trx_t*			trx)
 {
 	DBUG_ENTER("commit_cache_norebuild");
-
-	bool	found = true;
-
 	DBUG_ASSERT(!ctx->need_rebuild());
+	DBUG_ASSERT(ctx->new_table->space != fil_system.temp_space);
+	DBUG_ASSERT(!ctx->new_table->is_temporary());
+
+	if (ctx->page_compression_level) {
+		DBUG_ASSERT(ctx->new_table->space != fil_system.sys_space);
+		ctx->new_table->flags &=
+			~(0xFU << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
+		ctx->new_table->flags |= 1 << DICT_TF_POS_PAGE_COMPRESSION
+			| (ctx->page_compression_level
+			   << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
+
+		if (fil_space_t* space = ctx->new_table->space) {
+			bool update = !(space->flags
+					& FSP_FLAGS_MASK_PAGE_COMPRESSION);
+			mutex_enter(&fil_system.mutex);
+			space->flags = (~FSP_FLAGS_MASK_MEM_COMPRESSION_LEVEL
+					& (space->flags
+					   | FSP_FLAGS_MASK_PAGE_COMPRESSION))
+				| ctx->page_compression_level
+				<< FSP_FLAGS_MEM_COMPRESSION_LEVEL;
+			mutex_exit(&fil_system.mutex);
+
+			if (update) {
+				/* Maybe we should introduce an undo
+				log record for updating tablespace
+				flags, and perform the update already
+				in innobase_page_compression_try().
+
+				If the server is killed before the
+				following mini-transaction commit
+				becomes durable, fsp_flags_try_adjust()
+				will perform the equivalent adjustment
+				and warn "adjusting FSP_SPACE_FLAGS". */
+				mtr_t	mtr;
+				mtr.start();
+				if (buf_block_t* b = buf_page_get(
+					    page_id_t(space->id, 0),
+					    page_size_t(space->flags),
+					    RW_X_LATCH, &mtr)) {
+					mtr.set_named_space(space);
+					mlog_write_ulint(
+						FSP_HEADER_OFFSET
+						+ FSP_SPACE_FLAGS + b->frame,
+						space->flags
+						& ~FSP_FLAGS_MEM_MASK,
+						MLOG_4BYTES, &mtr);
+				}
+				mtr.commit();
+			}
+		}
+	}
 
 	col_set			drop_list;
 	col_set			v_drop_list;
@@ -8204,7 +9187,7 @@ commit_cache_norebuild(
 
 			if (!dict_foreign_replace_index(
 				    index->table, ctx->col_names, index)) {
-				found = false;
+				ut_a(!ctx->prebuilt->trx->check_foreigns);
 			}
 
 			/* Mark the index dropped
@@ -8236,6 +9219,19 @@ commit_cache_norebuild(
 		trx_commit_for_mysql(trx);
 	}
 
+	if (!ctx->is_instant()) {
+		innobase_rename_or_enlarge_columns_cache(
+			ha_alter_info, table, ctx->new_table);
+	}
+
+	if (ha_alter_info->handler_flags & ALTER_COLUMN_UNVERSIONED) {
+		vers_change_fields_cache(ha_alter_info, ctx, table);
+	}
+
+#ifdef MYSQL_RENAME_INDEX
+	rename_indexes_in_cache(ctx, ha_alter_info);
+#endif
+
 	ctx->new_table->fts_doc_id_index
 		= ctx->new_table->fts
 		? dict_table_get_index_on_name(
@@ -8243,8 +9239,7 @@ commit_cache_norebuild(
 		: NULL;
 	DBUG_ASSERT((ctx->new_table->fts == NULL)
 		    == (ctx->new_table->fts_doc_id_index == NULL));
-
-	DBUG_RETURN(found);
+	DBUG_VOID_RETURN;
 }
 
 /** Adjust the persistent statistics after non-rebuilding ALTER TABLE.
@@ -8252,8 +9247,6 @@ Remove statistics for dropped indexes, add statistics for created indexes
 and rename statistics for renamed indexes.
 @param ha_alter_info Data used during in-place alter
 @param ctx In-place ALTER TABLE context
-@param altered_table MySQL table that is being altered
-@param table_name Table name in MySQL
 @param thd MySQL connection
 */
 static
@@ -8262,8 +9255,6 @@ alter_stats_norebuild(
 /*==================*/
 	Alter_inplace_info*		ha_alter_info,
 	ha_innobase_inplace_ctx*	ctx,
-	TABLE*				altered_table,
-	const char*			table_name,
 	THD*				thd)
 {
 	ulint	i;
@@ -8302,7 +9293,7 @@ alter_stats_norebuild(
 		char	errstr[1024];
 
 		if (dict_stats_drop_index(
-			    ctx->new_table->name.m_name, key->name,
+			    ctx->new_table->name.m_name, key->name.str,
 			    errstr, sizeof errstr) != DB_SUCCESS) {
 			push_warning(thd,
 				     Sql_condition::WARN_LEVEL_WARN,
@@ -8365,7 +9356,7 @@ alter_stats_rebuild(
 {
 	DBUG_ENTER("alter_stats_rebuild");
 
-	if (dict_table_is_discarded(table)
+	if (!table->space
 	    || !dict_stats_is_persistent_enabled(table)) {
 		DBUG_VOID_RETURN;
 	}
@@ -8503,9 +9494,9 @@ ha_innobase::commit_inplace_alter_table(
 		/* If decryption failed for old table or new table
 		fail here. */
 		if ((!ctx->old_table->is_readable()
-		     && fil_space_get(ctx->old_table->space))
+		     && ctx->old_table->space)
 		    || (!ctx->new_table->is_readable()
-			&& fil_space_get(ctx->new_table->space))) {
+			&& ctx->new_table->space)) {
 			String str;
 			const char* engine= table_type();
 			get_error_message(HA_ERR_DECRYPTION_FAILED, &str);
@@ -8685,9 +9676,16 @@ ha_innobase::commit_inplace_alter_table(
 	}
 
 	/* Commit or roll back the changes to the data dictionary. */
+	DEBUG_SYNC(m_user_thd, "innodb_alter_inplace_before_commit");
 
 	if (fail) {
 		trx_rollback_for_mysql(trx);
+		for (inplace_alter_handler_ctx** pctx = ctx_array;
+		     *pctx; pctx++) {
+			ha_innobase_inplace_ctx*	ctx
+				= static_cast<ha_innobase_inplace_ctx*>(*pctx);
+			ctx->rollback_instant();
+		}
 	} else if (!new_clustered) {
 		trx_commit_for_mysql(trx);
 	} else {
@@ -8864,19 +9862,9 @@ foreign_fail:
 					"InnoDB: Could not add foreign"
 					" key constraints.");
 			} else {
-				if (!commit_cache_norebuild(
-					    ctx, table, trx)) {
-					ut_a(!m_prebuilt->trx->check_foreigns);
-				}
-
-				innobase_rename_or_enlarge_columns_cache(
-					ha_alter_info, table,
-					ctx->new_table);
-#ifdef MYSQL_RENAME_INDEX
-				rename_indexes_in_cache(ctx, ha_alter_info);
-#endif
+				commit_cache_norebuild(ha_alter_info, ctx,
+						       table, trx);
 			}
-
 		}
 
 		dict_mem_table_free_foreign_vcol_set(ctx->new_table);
@@ -8916,7 +9904,7 @@ foreign_fail:
 
 		row_mysql_unlock_data_dictionary(trx);
 		if (trx != ctx0->trx) {
-			trx_free_for_mysql(trx);
+			trx_free(trx);
 		}
 		DBUG_RETURN(true);
 	}
@@ -8935,8 +9923,7 @@ foreign_fail:
 			= static_cast<ha_innobase_inplace_ctx*>(*pctx);
 
 		if (ctx->trx) {
-			trx_free_for_mysql(ctx->trx);
-			ctx->trx = NULL;
+			trx_free(ctx->trx);
 		}
 	}
 
@@ -8979,7 +9966,7 @@ foreign_fail:
 		}
 
 		row_mysql_unlock_data_dictionary(trx);
-		trx_free_for_mysql(trx);
+		trx_free(trx);
 		MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
 		DBUG_RETURN(false);
 	}
@@ -9110,7 +10097,7 @@ foreign_fail:
 	}
 
 	row_mysql_unlock_data_dictionary(trx);
-	trx_free_for_mysql(trx);
+	trx_free(trx);
 
 	/* TODO: The following code could be executed
 	while allowing concurrent access to the table
@@ -9138,9 +10125,7 @@ foreign_fail:
 				(*pctx);
 			DBUG_ASSERT(!ctx->need_rebuild());
 
-			alter_stats_norebuild(
-				ha_alter_info, ctx, altered_table,
-				table->s->table_name.str, m_user_thd);
+			alter_stats_norebuild(ha_alter_info, ctx, m_user_thd);
 			DBUG_INJECT_CRASH("ib_commit_inplace_crash",
 					  crash_inject_count++);
 		}
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index 3e5fca6541a..cedb6e05782 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -84,11 +84,6 @@ in i_s_page_type[] array */
 
 #define I_S_PAGE_TYPE_BITS		4
 
-/* Check if we can hold all page types */
-#if I_S_PAGE_TYPE_LAST >= 1 << I_S_PAGE_TYPE_BITS
-# error i_s_page_type[] is too large
-#endif
-
 /** Name string for File Page Types */
 static buf_page_desc_t	i_s_page_type[] = {
 	{"ALLOCATED", FIL_PAGE_TYPE_ALLOCATED},
@@ -281,7 +276,7 @@ field_store_ulint(
 
 	if (n != ULINT_UNDEFINED) {
 
-		ret = field->store(n, true);
+		ret = field->store(longlong(n), true);
 		field->set_notnull();
 	} else {
 
@@ -481,18 +476,7 @@ static ST_FIELD_INFO	innodb_trx_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#ifdef BTR_CUR_HASH_ADAPT
-#define IDX_TRX_ADAPTIVE_HASH_LATCHED	20
-	{STRUCT_FLD(field_name,		"trx_adaptive_hash_latched"),
-	 STRUCT_FLD(field_length,	1),
-	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
-	 STRUCT_FLD(value,		0),
-	 STRUCT_FLD(field_flags,	0),
-	 STRUCT_FLD(old_name,		""),
-	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
-#endif /* BTR_CUR_HASH_ADAPT */
-
-#define IDX_TRX_READ_ONLY		20 + I_S_AHI
+#define IDX_TRX_READ_ONLY		20
 	{STRUCT_FLD(field_name,		"trx_is_read_only"),
 	 STRUCT_FLD(field_length,	1),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
@@ -501,7 +485,7 @@ static ST_FIELD_INFO	innodb_trx_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define IDX_TRX_AUTOCOMMIT_NON_LOCKING	21 + I_S_AHI
+#define IDX_TRX_AUTOCOMMIT_NON_LOCKING	21
 	{STRUCT_FLD(field_name,		"trx_autocommit_non_locking"),
 	 STRUCT_FLD(field_length,	1),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
@@ -648,11 +632,6 @@ fill_innodb_trx_from_cache(
 		OK(field_store_string(fields[IDX_TRX_LAST_FOREIGN_KEY_ERROR],
 				      row->trx_foreign_key_error));
 
-#ifdef BTR_CUR_HASH_ADAPT
-		/* trx_adaptive_hash_latched */
-		OK(fields[IDX_TRX_ADAPTIVE_HASH_LATCHED]->store(0, true));
-#endif /* BTR_CUR_HASH_ADAPT */
-
 		/* trx_is_read_only*/
 		OK(fields[IDX_TRX_READ_ONLY]->store(
 			   row->trx_is_read_only, true));
@@ -1195,7 +1174,7 @@ trx_i_s_common_fill_table(
 	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
 	Item*		)	/*!< in: condition (not used) */
 {
-	const char*		table_name;
+	LEX_CSTRING		table_name;
 	int			ret;
 	trx_i_s_cache_t*	cache;
 
@@ -1215,7 +1194,7 @@ trx_i_s_common_fill_table(
 	table_name = tables->schema_table_name;
 	/* or table_name = tables->schema_table->table_name; */
 
-	RETURN_IF_INNODB_NOT_STARTED(table_name);
+	RETURN_IF_INNODB_NOT_STARTED(table_name.str);
 
 	/* update the cache */
 	trx_i_s_cache_start_write(cache);
@@ -1224,7 +1203,7 @@ trx_i_s_common_fill_table(
 
 	if (trx_i_s_cache_is_truncated(cache)) {
 
-		ib::warn() << "Data in " << table_name << " truncated due to"
+		ib::warn() << "Data in " << table_name.str << " truncated due to"
 			" memory limit of " << TRX_I_S_MEM_LIMIT << " bytes";
 	}
 
@@ -1232,7 +1211,7 @@ trx_i_s_common_fill_table(
 
 	trx_i_s_cache_start_read(cache);
 
-	if (innobase_strcasecmp(table_name, "innodb_trx") == 0) {
+	if (innobase_strcasecmp(table_name.str, "innodb_trx") == 0) {
 
 		if (fill_innodb_trx_from_cache(
 			cache, thd, tables->table) != 0) {
@@ -1240,7 +1219,7 @@ trx_i_s_common_fill_table(
 			ret = 1;
 		}
 
-	} else if (innobase_strcasecmp(table_name, "innodb_locks") == 0) {
+	} else if (innobase_strcasecmp(table_name.str, "innodb_locks") == 0) {
 
 		if (fill_innodb_locks_from_cache(
 			cache, thd, tables->table) != 0) {
@@ -1248,7 +1227,7 @@ trx_i_s_common_fill_table(
 			ret = 1;
 		}
 
-	} else if (innobase_strcasecmp(table_name, "innodb_lock_waits") == 0) {
+	} else if (innobase_strcasecmp(table_name.str, "innodb_lock_waits") == 0) {
 
 		if (fill_innodb_lock_waits_from_cache(
 			cache, thd, tables->table) != 0) {
@@ -1258,7 +1237,7 @@ trx_i_s_common_fill_table(
 
 	} else {
 		ib::error() << "trx_i_s_common_fill_table() was"
-			" called to fill unknown table: " << table_name << "."
+			" called to fill unknown table: " << table_name.str << "."
 			" This function only knows how to fill"
 			" innodb_trx, innodb_locks and"
 			" innodb_lock_waits tables.";
@@ -1362,7 +1341,7 @@ i_s_cmp_fill_low(
 		DBUG_RETURN(0);
 	}
 
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	for (uint i = 0; i < PAGE_ZIP_SSIZE_MAX; i++) {
 		page_zip_stat_t*	zip_stat = &page_zip_stat[i];
@@ -1665,7 +1644,7 @@ i_s_cmp_per_index_fill_low(
 		DBUG_RETURN(0);
 	}
 
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* Create a snapshot of the stats so we do not bump into lock
 	order violations with dict_sys->mutex below. */
@@ -1686,7 +1665,7 @@ i_s_cmp_per_index_fill_low(
 			char	db_utf8[MAX_DB_UTF8_LEN];
 			char	table_utf8[MAX_TABLE_UTF8_LEN];
 
-			dict_fs2utf8(index->table_name,
+			dict_fs2utf8(index->table->name.m_name,
 				     db_utf8, sizeof(db_utf8),
 				     table_utf8, sizeof(table_utf8));
 
@@ -1988,7 +1967,7 @@ i_s_cmpmem_fill_low(
 		DBUG_RETURN(0);
 	}
 
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*		buf_pool;
@@ -2909,7 +2888,7 @@ i_s_fts_deleted_generic_fill(
 
 	deleted = fts_doc_ids_create();
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 	trx->op_info = "Select for FTS DELETE TABLE";
 
 	FTS_INIT_FTS_TABLE(&fts_table,
@@ -2932,7 +2911,7 @@ i_s_fts_deleted_generic_fill(
 		BREAK_IF(ret = schema_table_store_record(thd, table));
 	}
 
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	fts_doc_ids_free(deleted);
 
@@ -3442,7 +3421,7 @@ i_s_fts_index_table_fill_selected(
 	        fts_result_cache_limit = 8192;
 	);
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 
 	trx->op_info = "fetching FTS index nodes";
 
@@ -3499,7 +3478,7 @@ i_s_fts_index_table_fill_selected(
 	que_graph_free(graph);
 	mutex_exit(&dict_sys->mutex);
 
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	if (fetch.total_memory >= fts_result_cache_limit) {
 		error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT;
@@ -3937,7 +3916,7 @@ i_s_fts_config_fill(
 		DBUG_RETURN(0);
 	}
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 	trx->op_info = "Select for FTS CONFIG TABLE";
 
 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, user_table);
@@ -3988,7 +3967,7 @@ i_s_fts_config_fill(
 
 	fts_sql_commit(trx);
 
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	dict_table_close(user_table, FALSE, FALSE);
 
@@ -4509,7 +4488,7 @@ i_s_innodb_buffer_stats_fill_table(
 	buf_pool_info_t*	pool_info;
 
 	DBUG_ENTER("i_s_innodb_buffer_fill_general");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* Only allow the PROCESS privilege holder to access the stats */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -4817,6 +4796,8 @@ i_s_innodb_buffer_page_fill(
 	TABLE*			table;
 	Field**			fields;
 
+	compile_time_assert(I_S_PAGE_TYPE_LAST < 1 << I_S_PAGE_TYPE_BITS);
+
 	DBUG_ENTER("i_s_innodb_buffer_page_fill");
 
 	table = tables->table;
@@ -4889,8 +4870,8 @@ i_s_innodb_buffer_page_fill(
 			if (index) {
 				table_name_end = innobase_convert_name(
 					table_name, sizeof(table_name),
-					index->table_name,
-					strlen(index->table_name),
+					index->table->name.m_name,
+					strlen(index->table->name.m_name),
 					thd);
 
 				ret = fields[IDX_BUFFER_PAGE_TABLE_NAME]
@@ -4927,10 +4908,7 @@ i_s_innodb_buffer_page_fill(
 			   page_info->zip_ssize
 			   ? (UNIV_ZIP_SIZE_MIN >> 1) << page_info->zip_ssize
 			   : 0, true));
-
-#if BUF_PAGE_STATE_BITS > 3
-# error "BUF_PAGE_STATE_BITS > 3, please ensure that all 1<<BUF_PAGE_STATE_BITS values are checked for"
-#endif
+		compile_time_assert(BUF_PAGE_STATE_BITS == 3);
 		state = static_cast<enum buf_page_state>(page_info->page_state);
 
 		switch (state) {
@@ -5014,13 +4992,15 @@ i_s_innodb_set_page_type(
 		in the i_s_page_type[] array is I_S_PAGE_TYPE_INDEX
 		(1) for index pages or I_S_PAGE_TYPE_IBUF for
 		change buffer index pages */
-		if (page_info->index_id
-		    == static_cast<index_id_t>(DICT_IBUF_ID_MIN
-					       + IBUF_SPACE_ID)) {
-			page_info->page_type = I_S_PAGE_TYPE_IBUF;
-		} else if (page_type == FIL_PAGE_RTREE) {
+		if (page_type == FIL_PAGE_RTREE) {
 			page_info->page_type = I_S_PAGE_TYPE_RTREE;
+		} else if (page_info->index_id
+			   == static_cast<index_id_t>(DICT_IBUF_ID_MIN
+						      + IBUF_SPACE_ID)) {
+			page_info->page_type = I_S_PAGE_TYPE_IBUF;
 		} else {
+			ut_ad(page_type == FIL_PAGE_INDEX
+			      || page_type == FIL_PAGE_TYPE_INSTANT);
 			page_info->page_type = I_S_PAGE_TYPE_INDEX;
 		}
 
@@ -5244,7 +5224,7 @@ i_s_innodb_buffer_page_fill_table(
 
 	DBUG_ENTER("i_s_innodb_buffer_page_fill_table");
 
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -5613,8 +5593,8 @@ i_s_innodb_buf_page_lru_fill(
 			if (index) {
 				table_name_end = innobase_convert_name(
 					table_name, sizeof(table_name),
-					index->table_name,
-					strlen(index->table_name),
+					index->table->name.m_name,
+					strlen(index->table->name.m_name),
 					thd);
 
 				ret = fields[IDX_BUF_LRU_PAGE_TABLE_NAME]
@@ -5795,7 +5775,7 @@ i_s_innodb_buf_page_lru_fill_table(
 
 	DBUG_ENTER("i_s_innodb_buf_page_lru_fill_table");
 
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to any users that do not hold PROCESS_ACL */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -5893,12 +5873,8 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_buffer_page_lru =
 
 /*******************************************************************//**
 Unbind a dynamic INFORMATION_SCHEMA table.
-@return 0 on success */
-static
-int
-i_s_common_deinit(
-/*==============*/
-	void*	p)	/*!< in/out: table schema object */
+@return 0 */
+static int i_s_common_deinit(void*)
 {
 	DBUG_ENTER("i_s_common_deinit");
 
@@ -5956,16 +5932,7 @@ static ST_FIELD_INFO	innodb_sys_tables_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLES_FILE_FORMAT		5
-	{STRUCT_FLD(field_name,		"FILE_FORMAT"),
-	 STRUCT_FLD(field_length,	10),
-	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
-	 STRUCT_FLD(value,		0),
-	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
-	 STRUCT_FLD(old_name,		""),
-	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
-
-#define SYS_TABLES_ROW_FORMAT		6
+#define SYS_TABLES_ROW_FORMAT		5
 	{STRUCT_FLD(field_name,		"ROW_FORMAT"),
 	 STRUCT_FLD(field_length,	12),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
@@ -5974,7 +5941,7 @@ static ST_FIELD_INFO	innodb_sys_tables_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLES_ZIP_PAGE_SIZE	7
+#define SYS_TABLES_ZIP_PAGE_SIZE	6
 	{STRUCT_FLD(field_name,		"ZIP_PAGE_SIZE"),
 	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
@@ -5983,7 +5950,7 @@ static ST_FIELD_INFO	innodb_sys_tables_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLES_SPACE_TYPE	8
+#define SYS_TABLES_SPACE_TYPE		7
 	{STRUCT_FLD(field_name,		"SPACE_TYPE"),
 	 STRUCT_FLD(field_length,	10),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
@@ -6012,11 +5979,8 @@ i_s_dict_fill_sys_tables(
 	ulint			atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(
 								table->flags);
 	const page_size_t&	page_size = dict_tf_get_page_size(table->flags);
-	const char*		file_format;
 	const char*		row_format;
-	const char*		space_type;
 
-	file_format = trx_sys_file_format_id_to_name(atomic_blobs);
 	if (!compact) {
 		row_format = "Redundant";
 	} else if (!atomic_blobs) {
@@ -6027,12 +5991,6 @@ i_s_dict_fill_sys_tables(
 		row_format = "Dynamic";
 	}
 
-	if (is_system_tablespace(table->space)) {
-		space_type = "System";
-	} else {
-		space_type = "Single";
-	}
-
 	DBUG_ENTER("i_s_dict_fill_sys_tables");
 
 	fields = table_to_fill->field;
@@ -6045,9 +6003,7 @@ i_s_dict_fill_sys_tables(
 
 	OK(fields[SYS_TABLES_NUM_COLUMN]->store(table->n_cols));
 
-	OK(fields[SYS_TABLES_SPACE]->store(table->space));
-
-	OK(field_store_string(fields[SYS_TABLES_FILE_FORMAT], file_format));
+	OK(fields[SYS_TABLES_SPACE]->store(table->space_id, true));
 
 	OK(field_store_string(fields[SYS_TABLES_ROW_FORMAT], row_format));
 
@@ -6056,7 +6012,8 @@ i_s_dict_fill_sys_tables(
 				? page_size.physical()
 				: 0, true));
 
-	OK(field_store_string(fields[SYS_TABLES_SPACE_TYPE], space_type));
+	OK(field_store_string(fields[SYS_TABLES_SPACE_TYPE],
+			      table->space_id ? "Single" : "System"));
 
 	OK(schema_table_store_record(thd, table_to_fill));
 
@@ -6080,7 +6037,7 @@ i_s_sys_tables_fill_table(
 	mtr_t		mtr;
 
 	DBUG_ENTER("i_s_sys_tables_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -6100,23 +6057,19 @@ i_s_sys_tables_fill_table(
 		/* Create and populate a dict_table_t structure with
 		information from SYS_TABLES row */
 		err_msg = dict_process_sys_tables_rec_and_mtr_commit(
-			heap, rec, &table_rec,
-			DICT_TABLE_LOAD_FROM_RECORD, &mtr);
+			heap, rec, &table_rec, false, &mtr);
 
 		mutex_exit(&dict_sys->mutex);
 
 		if (!err_msg) {
-			i_s_dict_fill_sys_tables(thd, table_rec, tables->table);
+			i_s_dict_fill_sys_tables(thd, table_rec,
+						 tables->table);
 		} else {
 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
 					    ER_CANT_FIND_SYSTEM_REC, "%s",
 					    err_msg);
 		}
 
-		/* Since dict_process_sys_tables_rec_and_mtr_commit()
-		is called with DICT_TABLE_LOAD_FROM_RECORD, the table_rec
-		is created in dict_process_sys_tables_rec(), we will
-		need to free it */
 		if (table_rec) {
 			dict_mem_table_free(table_rec);
 		}
@@ -6380,7 +6333,7 @@ i_s_sys_tables_fill_table_stats(
 	mtr_t		mtr;
 
 	DBUG_ENTER("i_s_sys_tables_fill_table_stats");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -6401,8 +6354,7 @@ i_s_sys_tables_fill_table_stats(
 		/* Fetch the dict_table_t structure corresponding to
 		this SYS_TABLES record */
 		err_msg = dict_process_sys_tables_rec_and_mtr_commit(
-			heap, rec, &table_rec,
-			DICT_TABLE_LOAD_FROM_CACHE, &mtr);
+			heap, rec, &table_rec, true, &mtr);
 
 		ulint ref_count = table_rec ? table_rec->get_ref_count() : 0;
 		mutex_exit(&dict_sys->mutex);
@@ -6601,6 +6553,7 @@ i_s_dict_fill_sys_indexes(
 /*======================*/
 	THD*		thd,		/*!< in: thread */
 	table_id_t	table_id,	/*!< in: table id */
+	ulint		space_id,	/*!< in: tablespace id */
 	dict_index_t*	index,		/*!< in: populated dict_index_t
 					struct with index info */
 	TABLE*		table_to_fill)	/*!< in/out: fill this table */
@@ -6625,20 +6578,25 @@ i_s_dict_fill_sys_indexes(
 
 	OK(fields[SYS_INDEX_TABLE_ID]->store(longlong(table_id), true));
 
-	OK(fields[SYS_INDEX_TYPE]->store(index->type));
+	OK(fields[SYS_INDEX_TYPE]->store(index->type, true));
 
 	OK(fields[SYS_INDEX_NUM_FIELDS]->store(index->n_fields));
 
 	/* FIL_NULL is ULINT32_UNDEFINED */
 	if (index->page == FIL_NULL) {
-		OK(fields[SYS_INDEX_PAGE_NO]->store(-1));
+		fields[SYS_INDEX_PAGE_NO]->set_null();
 	} else {
-		OK(fields[SYS_INDEX_PAGE_NO]->store(index->page));
+		OK(fields[SYS_INDEX_PAGE_NO]->store(index->page, true));
 	}
 
-	OK(fields[SYS_INDEX_SPACE]->store(index->space));
+	if (space_id == ULINT_UNDEFINED) {
+		fields[SYS_INDEX_SPACE]->set_null();
+	} else {
+		OK(fields[SYS_INDEX_SPACE]->store(space_id, true));
+	}
 
-	OK(fields[SYS_INDEX_MERGE_THRESHOLD]->store(index->merge_threshold));
+	OK(fields[SYS_INDEX_MERGE_THRESHOLD]->store(index->merge_threshold,
+						    true));
 
 	OK(schema_table_store_record(thd, table_to_fill));
 
@@ -6662,7 +6620,7 @@ i_s_sys_indexes_fill_table(
 	mtr_t			mtr;
 
 	DBUG_ENTER("i_s_sys_indexes_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -6680,19 +6638,27 @@ i_s_sys_indexes_fill_table(
 	while (rec) {
 		const char*	err_msg;
 		table_id_t	table_id;
+		ulint		space_id;
 		dict_index_t	index_rec;
 
 		/* Populate a dict_index_t structure with information from
 		a SYS_INDEXES row */
 		err_msg = dict_process_sys_indexes_rec(heap, rec, &index_rec,
 						       &table_id);
-
+		const byte* field = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_INDEXES__SPACE, &space_id);
+		space_id = space_id == 4 ? mach_read_from_4(field)
+			: ULINT_UNDEFINED;
 		mtr_commit(&mtr);
 		mutex_exit(&dict_sys->mutex);
 
 		if (!err_msg) {
-			i_s_dict_fill_sys_indexes(thd, table_id, &index_rec,
-						 tables->table);
+			if (int err = i_s_dict_fill_sys_indexes(
+				    thd, table_id, space_id, &index_rec,
+				    tables->table)) {
+				mem_heap_free(heap);
+				DBUG_RETURN(err);
+			}
 		} else {
 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
 					    ER_CANT_FIND_SYSTEM_REC, "%s",
@@ -6871,7 +6837,7 @@ i_s_dict_fill_sys_columns(
 
 	OK(field_store_string(fields[SYS_COLUMN_NAME], col_name));
 
-	if (dict_col_is_virtual(column)) {
+	if (column->is_virtual()) {
 		ulint	pos = dict_create_v_col_pos(nth_v_col, column->ind);
 		OK(fields[SYS_COLUMN_POSITION]->store(pos, true));
 	} else {
@@ -6907,7 +6873,7 @@ i_s_sys_columns_fill_table(
 	mtr_t		mtr;
 
 	DBUG_ENTER("i_s_sys_columns_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -7114,18 +7080,16 @@ i_s_sys_virtual_fill_table(
 	const rec_t*	rec;
 	ulint		pos;
 	ulint		base_pos;
-	mem_heap_t*	heap;
 	mtr_t		mtr;
 
 	DBUG_ENTER("i_s_sys_virtual_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
 		DBUG_RETURN(0);
 	}
 
-	heap = mem_heap_create(1000);
 	mutex_enter(&dict_sys->mutex);
 	mtr_start(&mtr);
 
@@ -7137,7 +7101,7 @@ i_s_sys_virtual_fill_table(
 
 		/* populate a dict_col_t structure with information from
 		a SYS_VIRTUAL row */
-		err_msg = dict_process_sys_virtual_rec(heap, rec,
+		err_msg = dict_process_sys_virtual_rec(rec,
 						       &table_id, &pos,
 						       &base_pos);
 
@@ -7153,8 +7117,6 @@ i_s_sys_virtual_fill_table(
 					    err_msg);
 		}
 
-		mem_heap_empty(heap);
-
 		/* Get the next record */
 		mutex_enter(&dict_sys->mutex);
 		mtr_start(&mtr);
@@ -7163,7 +7125,6 @@ i_s_sys_virtual_fill_table(
 
 	mtr_commit(&mtr);
 	mutex_exit(&dict_sys->mutex);
-	mem_heap_free(heap);
 
 	DBUG_RETURN(0);
 }
@@ -7320,7 +7281,7 @@ i_s_sys_fields_fill_table(
 	mtr_t		mtr;
 
 	DBUG_ENTER("i_s_sys_fields_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -7552,7 +7513,7 @@ i_s_sys_foreign_fill_table(
 	mtr_t		mtr;
 
 	DBUG_ENTER("i_s_sys_foreign_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -7767,7 +7728,7 @@ i_s_sys_foreign_cols_fill_table(
 	mtr_t		mtr;
 
 	DBUG_ENTER("i_s_sys_foreign_cols_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -7919,16 +7880,7 @@ static ST_FIELD_INFO	innodb_sys_tablespaces_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLESPACES_FILE_FORMAT	3
-	{STRUCT_FLD(field_name,		"FILE_FORMAT"),
-	 STRUCT_FLD(field_length,	10),
-	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
-	 STRUCT_FLD(value,		0),
-	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
-	 STRUCT_FLD(old_name,		""),
-	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESPACES_ROW_FORMAT	4
+#define SYS_TABLESPACES_ROW_FORMAT	3
 	{STRUCT_FLD(field_name,		"ROW_FORMAT"),
 	 STRUCT_FLD(field_length,	22),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
@@ -7937,7 +7889,7 @@ static ST_FIELD_INFO	innodb_sys_tablespaces_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLESPACES_PAGE_SIZE	5
+#define SYS_TABLESPACES_PAGE_SIZE	4
 	{STRUCT_FLD(field_name,		"PAGE_SIZE"),
 	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
@@ -7946,7 +7898,7 @@ static ST_FIELD_INFO	innodb_sys_tablespaces_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLESPACES_ZIP_PAGE_SIZE	6
+#define SYS_TABLESPACES_ZIP_PAGE_SIZE	5
 	{STRUCT_FLD(field_name,		"ZIP_PAGE_SIZE"),
 	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
@@ -7955,7 +7907,7 @@ static ST_FIELD_INFO	innodb_sys_tablespaces_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLESPACES_SPACE_TYPE	7
+#define SYS_TABLESPACES_SPACE_TYPE	6
 	{STRUCT_FLD(field_name,		"SPACE_TYPE"),
 	 STRUCT_FLD(field_length,	10),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
@@ -7964,7 +7916,7 @@ static ST_FIELD_INFO	innodb_sys_tablespaces_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLESPACES_FS_BLOCK_SIZE	8
+#define SYS_TABLESPACES_FS_BLOCK_SIZE	7
 	{STRUCT_FLD(field_name,		"FS_BLOCK_SIZE"),
 	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
@@ -7973,7 +7925,7 @@ static ST_FIELD_INFO	innodb_sys_tablespaces_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLESPACES_FILE_SIZE	9
+#define SYS_TABLESPACES_FILE_SIZE	8
 	{STRUCT_FLD(field_name,		"FILE_SIZE"),
 	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
@@ -7982,7 +7934,7 @@ static ST_FIELD_INFO	innodb_sys_tablespaces_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLESPACES_ALLOC_SIZE	10
+#define SYS_TABLESPACES_ALLOC_SIZE	9
 	{STRUCT_FLD(field_name,		"ALLOCATED_SIZE"),
 	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
@@ -8011,12 +7963,10 @@ i_s_dict_fill_sys_tablespaces(
 {
 	Field**	fields;
 	ulint	atomic_blobs	= FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
-	const char* file_format;
 	const char* row_format;
 
 	DBUG_ENTER("i_s_dict_fill_sys_tablespaces");
 
-	file_format = trx_sys_file_format_id_to_name(atomic_blobs);
 	if (is_system_tablespace(space)) {
 		row_format = "Compact, Redundant or Dynamic";
 	} else if (FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
@@ -8035,9 +7985,6 @@ i_s_dict_fill_sys_tablespaces(
 
 	OK(fields[SYS_TABLESPACES_FLAGS]->store(flags, true));
 
-	OK(field_store_string(fields[SYS_TABLESPACES_FILE_FORMAT],
-			      file_format));
-
 	OK(field_store_string(fields[SYS_TABLESPACES_ROW_FORMAT], row_format));
 
 	OK(field_store_string(fields[SYS_TABLESPACES_SPACE_TYPE],
@@ -8145,7 +8092,7 @@ i_s_sys_tablespaces_fill_table(
 	mtr_t		mtr;
 
 	DBUG_ENTER("i_s_sys_tablespaces_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -8336,7 +8283,7 @@ i_s_sys_datafiles_fill_table(
 	mtr_t		mtr;
 
 	DBUG_ENTER("i_s_sys_datafiles_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -8631,31 +8578,32 @@ i_s_tablespaces_encryption_fill_table(
 	Item*		)	/*!< in: condition (not used) */
 {
 	DBUG_ENTER("i_s_tablespaces_encryption_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, SUPER_ACL)) {
 		DBUG_RETURN(0);
 	}
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
-	for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->space_list);
+	for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
 	     space; space = UT_LIST_GET_NEXT(space_list, space)) {
-		if (space->purpose == FIL_TYPE_TABLESPACE) {
-			space->n_pending_ops++;
-			mutex_exit(&fil_system->mutex);
+		if (space->purpose == FIL_TYPE_TABLESPACE
+		    && !space->is_stopping()) {
+			space->acquire();
+			mutex_exit(&fil_system.mutex);
 			if (int err = i_s_dict_fill_tablespaces_encryption(
 				    thd, space, tables->table)) {
-				fil_space_release(space);
+				space->release();
 				DBUG_RETURN(err);
 			}
-			mutex_enter(&fil_system->mutex);
-			space->n_pending_ops--;
+			mutex_enter(&fil_system.mutex);
+			space->release();
 		}
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 	DBUG_RETURN(0);
 }
 /*******************************************************************//**
@@ -8902,31 +8850,32 @@ i_s_tablespaces_scrubbing_fill_table(
 	Item*		)	/*!< in: condition (not used) */
 {
 	DBUG_ENTER("i_s_tablespaces_scrubbing_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without SUPER_ACL privilege */
 	if (check_global_access(thd, SUPER_ACL)) {
 		DBUG_RETURN(0);
 	}
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
-	for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->space_list);
+	for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
 	     space; space = UT_LIST_GET_NEXT(space_list, space)) {
-		if (space->purpose == FIL_TYPE_TABLESPACE) {
-			space->n_pending_ops++;
-			mutex_exit(&fil_system->mutex);
+		if (space->purpose == FIL_TYPE_TABLESPACE
+		    && !space->is_stopping()) {
+			space->acquire();
+			mutex_exit(&fil_system.mutex);
 			if (int err = i_s_dict_fill_tablespaces_scrubbing(
 				    thd, space, tables->table)) {
-				fil_space_release(space);
+				space->release();
 				DBUG_RETURN(err);
 			}
-			mutex_enter(&fil_system->mutex);
-			space->n_pending_ops--;
+			mutex_enter(&fil_system.mutex);
+			space->release();
 		}
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 	DBUG_RETURN(0);
 }
 /*******************************************************************//**
@@ -9058,7 +9007,7 @@ i_s_innodb_mutexes_fill_table(
 	Field**		fields = tables->table->field;
 
 	DBUG_ENTER("i_s_innodb_mutexes_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -9376,25 +9325,7 @@ static ST_FIELD_INFO	innodb_sys_semaphore_waits_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-	// SYS_SEMAPHORE_WAITS_LAST_READER_FILE 17
-	{STRUCT_FLD(field_name,		"LAST_READER_FILE"),
-	 STRUCT_FLD(field_length,	OS_FILE_MAX_PATH),
-	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
-	 STRUCT_FLD(value,		0),
-	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
-	 STRUCT_FLD(old_name,		""),
-	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
-
-	// SYS_SEMAPHORE_WAITS_LAST_READER_LINE 18
-	{STRUCT_FLD(field_name,		"LAST_READER_LINE"),
-	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
-	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
-	 STRUCT_FLD(value,		0),
-	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
-	 STRUCT_FLD(old_name,		""),
-	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
-
-	// SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE 19
+	// SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE 17
 	{STRUCT_FLD(field_name,		"LAST_WRITER_FILE"),
 	 STRUCT_FLD(field_length,	OS_FILE_MAX_PATH),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
@@ -9403,7 +9334,7 @@ static ST_FIELD_INFO	innodb_sys_semaphore_waits_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-	// SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE 20
+	// SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE 18
 	{STRUCT_FLD(field_name,		"LAST_WRITER_LINE"),
 	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
@@ -9412,7 +9343,7 @@ static ST_FIELD_INFO	innodb_sys_semaphore_waits_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-	// SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT 21
+	// SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT 19
 	{STRUCT_FLD(field_name,		"OS_WAIT_COUNT"),
 	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
diff --git a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
index e07fe49f7fa..4ff2248c28e 100644
--- a/storage/innobase/handler/i_s.h
+++ b/storage/innobase/handler/i_s.h
@@ -127,11 +127,9 @@ HPUX aCC: HP ANSI C++ B3910B A.03.65) can't handle it. */
 #define SYS_SEMAPHORE_WAITS_READERS	14
 #define SYS_SEMAPHORE_WAITS_WAITERS_FLAG 15
 #define SYS_SEMAPHORE_WAITS_LOCK_WORD	16
-#define SYS_SEMAPHORE_WAITS_LAST_READER_FILE 17
-#define SYS_SEMAPHORE_WAITS_LAST_READER_LINE 18
-#define SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE 19
-#define SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE 20
-#define SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT 21
+#define SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE 17
+#define SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE 18
+#define SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT 19
 
 /*******************************************************************//**
 Auxiliary function to store ulint value in MYSQL_TYPE_LONGLONG field.
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index 1fd7f7a2241..9ed1efd35ff 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -34,9 +34,6 @@ my_bool	srv_ibuf_disable_background_merge;
 
 /** Number of bits describing a single page */
 #define IBUF_BITS_PER_PAGE	4
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE must be an even number!"
-#endif
 /** The start address for an insert buffer bitmap page bitmap */
 #define IBUF_BITMAP		PAGE_DATA
 
@@ -185,7 +182,7 @@ it uses synchronous aio, it can access any pages, as long as it obeys the
 access order rules. */
 
 /** Operations that can currently be buffered. */
-ibuf_use_t	ibuf_use		= IBUF_USE_ALL;
+ulong	innodb_change_buffering;
 
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 /** Flag to control insert buffer debugging. */
@@ -255,9 +252,6 @@ type, counter, and some flags. */
 /* @{ */
 #define IBUF_REC_INFO_SIZE	4	/*!< Combined size of info fields at
 					the beginning of the fourth field */
-#if IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-# error "IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
 
 /* Offsets for the fields at the beginning of the fourth field */
 #define IBUF_REC_OFFSET_COUNTER	0	/*!< Operation counter */
@@ -437,7 +431,7 @@ ibuf_count_set(
 	ulint			val)
 {
 	ibuf_count_check(page_id);
-	ut_a(val < UNIV_PAGE_SIZE);
+	ut_a(val < srv_page_size);
 
 	ibuf_counts[page_id.space()][page_id.page_no()] = val;
 }
@@ -449,6 +443,10 @@ void
 ibuf_close(void)
 /*============*/
 {
+	if (ibuf == NULL) {
+		return;
+	}
+
 	mutex_free(&ibuf_pessimistic_insert_mutex);
 
 	mutex_free(&ibuf_mutex);
@@ -478,7 +476,7 @@ ibuf_size_update(
 	ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
 					   + PAGE_BTR_IBUF_FREE_LIST);
 
-	ibuf->height = 1 + btr_page_get_level_low(root);
+	ibuf->height = 1 + btr_page_get_level(root);
 
 	/* the '1 +' is the ibuf header page */
 	ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
@@ -505,7 +503,7 @@ ibuf_init_at_db_start(void)
 	buffer pool size. Once ibuf struct is initialized this
 	value is updated with the user supplied size by calling
 	ibuf_max_size_update(). */
-	ibuf->max_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE)
+	ibuf->max_size = ((buf_pool_get_curr_size() >> srv_page_size_shift)
 			  * CHANGE_BUFFER_DEFAULT_SIZE) / 100;
 
 	mutex_create(LATCH_ID_IBUF, &ibuf_mutex);
@@ -517,7 +515,9 @@ ibuf_init_at_db_start(void)
 
 	mtr_start(&mtr);
 
-	mtr_x_lock_space(IBUF_SPACE_ID, &mtr);
+	compile_time_assert(IBUF_SPACE_ID == TRX_SYS_SPACE);
+	compile_time_assert(IBUF_SPACE_ID == 0);
+	mtr_x_lock(&fil_system.sys_space->latch, &mtr);
 
 	mutex_enter(&ibuf_mutex);
 
@@ -553,11 +553,11 @@ ibuf_init_at_db_start(void)
 	mtr.commit();
 
 	ibuf->index = dict_mem_index_create(
-		"innodb_change_buffer", "CLUST_IND",
-		IBUF_SPACE_ID, DICT_CLUSTERED | DICT_IBUF, 1);
+		dict_mem_table_create("innodb_change_buffer",
+				      fil_system.sys_space, 1, 0, 0, 0),
+		"CLUST_IND",
+		DICT_CLUSTERED | DICT_IBUF, 1);
 	ibuf->index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
-	ibuf->index->table = dict_mem_table_create(
-		"innodb_change_buffer", IBUF_SPACE_ID, 1, 0, 0, 0);
 	ibuf->index->n_uniq = REC_MAX_N_FIELDS;
 	rw_lock_create(index_tree_rw_lock_key, &ibuf->index->lock,
 		       SYNC_IBUF_INDEX_TREE);
@@ -577,7 +577,7 @@ ibuf_max_size_update(
 	ulint	new_val)	/*!< in: new value in terms of
 				percentage of the buffer pool size */
 {
-	ulint	new_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE)
+	ulint	new_size = ((buf_pool_get_curr_size() >> srv_page_size_shift)
 			    * new_val) / 100;
 	mutex_enter(&ibuf_mutex);
 	ibuf->max_size = new_size;
@@ -600,6 +600,7 @@ ibuf_bitmap_page_init(
 	fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
 
 	/* Write all zeros to the bitmap */
+	compile_time_assert(!(IBUF_BITS_PER_PAGE % 2));
 
 	byte_offset = UT_BITS_IN_BYTES(block->page.size.physical()
 				       * IBUF_BITS_PER_PAGE);
@@ -683,9 +684,7 @@ ibuf_bitmap_page_get_bits_low(
 	ulint	value;
 
 	ut_ad(bit < IBUF_BITS_PER_PAGE);
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE % 2 != 0"
-#endif
+	compile_time_assert(!(IBUF_BITS_PER_PAGE % 2));
 	ut_ad(mtr_memo_contains_page(mtr, page, latch_type));
 
 	bit_offset = (page_id.page_no() % page_size.physical())
@@ -694,7 +693,7 @@ ibuf_bitmap_page_get_bits_low(
 	byte_offset = bit_offset / 8;
 	bit_offset = bit_offset % 8;
 
-	ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
+	ut_ad(byte_offset + IBUF_BITMAP < srv_page_size);
 
 	map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
 
@@ -731,9 +730,7 @@ ibuf_bitmap_page_set_bits(
 	ulint	map_byte;
 
 	ut_ad(bit < IBUF_BITS_PER_PAGE);
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE % 2 != 0"
-#endif
+	compile_time_assert(!(IBUF_BITS_PER_PAGE % 2));
 	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mtr->is_named_space(page_id.space()));
 #ifdef UNIV_IBUF_COUNT_DEBUG
@@ -747,7 +744,7 @@ ibuf_bitmap_page_set_bits(
 	byte_offset = bit_offset / 8;
 	bit_offset = bit_offset % 8;
 
-	ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
+	ut_ad(byte_offset + IBUF_BITMAP < srv_page_size);
 
 	map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
 
@@ -903,7 +900,8 @@ ibuf_set_free_bits_func(
 	}
 
 	mtr_start(&mtr);
-	const fil_space_t* space = mtr.set_named_space(block->page.id.space());
+	const fil_space_t* space = mtr.set_named_space_id(
+		block->page.id.space());
 
 	bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
 					       block->page.size, &mtr);
@@ -1146,7 +1144,8 @@ ibuf_page_low(
 		return(FALSE);
 	}
 
-	ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TYPE_TABLESPACE);
+	compile_time_assert(IBUF_SPACE_ID == 0);
+	ut_ad(fil_system.sys_space->purpose == FIL_TYPE_TABLESPACE);
 
 #ifdef UNIV_DEBUG
 	if (!x_latch) {
@@ -1317,6 +1316,8 @@ ibuf_rec_get_info_func(
 	types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
 
 	info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
+	compile_time_assert(IBUF_REC_INFO_SIZE
+			    < DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
 
 	switch (info_len_local) {
 	case 0:
@@ -1486,14 +1487,10 @@ ibuf_dummy_index_create(
 	dict_table_t*	table;
 	dict_index_t*	index;
 
-	table = dict_mem_table_create("IBUF_DUMMY",
-				      DICT_HDR_SPACE, n, 0,
+	table = dict_mem_table_create("IBUF_DUMMY", NULL, n, 0,
 				      comp ? DICT_TF_COMPACT : 0, 0);
 
-	index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
-				      DICT_HDR_SPACE, 0, n);
-
-	index->table = table;
+	index = dict_mem_index_create(table, "IBUF_DUMMY", 0, n);
 
 	/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
 	index->cached = TRUE;
@@ -1620,6 +1617,9 @@ ibuf_build_entry_from_ibuf_rec_func(
 		ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
 	}
 
+	index->n_core_null_bytes
+		= UT_BITS_IN_BYTES(unsigned(index->n_nullable));
+
 	/* Prevent an ut_ad() failure in page_zip_write_rec() by
 	adding system columns to the dummy table pointed to by the
 	dummy secondary index.  The insert buffer is only used for
@@ -1908,7 +1908,7 @@ ibuf_entry_build(
 
 	field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_METADATA);
 
-	dfield_set_data(field, type_info, ti - type_info);
+	dfield_set_data(field, type_info, ulint(ti - type_info));
 
 	/* Set all the types in the new tuple binary */
 
@@ -1973,11 +1973,8 @@ ibuf_search_tuple_build(
 /*********************************************************************//**
 Checks if there are enough pages in the free list of the ibuf tree that we
 dare to start a pessimistic insert to the insert buffer.
-@return TRUE if enough free pages in list */
-UNIV_INLINE
-ibool
-ibuf_data_enough_free_for_insert(void)
-/*==================================*/
+@return whether enough free pages in list */
+static inline bool ibuf_data_enough_free_for_insert()
 {
 	ut_ad(mutex_own(&ibuf_mutex));
 
@@ -2021,11 +2018,9 @@ ibuf_add_free_page(void)
 	page_t*		bitmap_page;
 
 	mtr_start(&mtr);
-	fil_space_t* space = mtr.set_sys_modified();
-
 	/* Acquire the fsp latch before the ibuf header, obeying the latching
 	order */
-	mtr_x_lock(&space->latch, &mtr);
+	mtr_x_lock(&fil_system.sys_space->latch, &mtr);
 	header_page = ibuf_header_page_get(&mtr);
 
 	/* Allocate a new page: NOTE that if the page has been a part of a
@@ -2071,13 +2066,11 @@ ibuf_add_free_page(void)
 	(level 2 page) */
 
 	const page_id_t		page_id(IBUF_SPACE_ID, block->page.id.page_no());
-	const page_size_t	page_size(space->flags);
-
-	bitmap_page = ibuf_bitmap_get_map_page(page_id, page_size, &mtr);
+	bitmap_page = ibuf_bitmap_get_map_page(page_id, univ_page_size, &mtr);
 
 	mutex_exit(&ibuf_mutex);
 
-	ibuf_bitmap_page_set_bits(bitmap_page, page_id, page_size,
+	ibuf_bitmap_page_set_bits(bitmap_page, page_id, univ_page_size,
 				  IBUF_BITMAP_IBUF, TRUE, &mtr);
 
 	ibuf_mtr_commit(&mtr);
@@ -2103,13 +2096,10 @@ ibuf_remove_free_page(void)
 	log_free_check();
 
 	mtr_start(&mtr);
-	fil_space_t*		space = mtr.set_sys_modified();
-	const page_size_t	page_size(space->flags);
-
 	/* Acquire the fsp latch before the ibuf header, obeying the latching
 	order */
 
-	mtr_x_lock(&space->latch, &mtr);
+	mtr_x_lock(&fil_system.sys_space->latch, &mtr);
 	header_page = ibuf_header_page_get(&mtr);
 
 	/* Prevent pessimistic inserts to insert buffer trees for a while */
@@ -2188,12 +2178,12 @@ ibuf_remove_free_page(void)
 	/* Set the bit indicating that this page is no more an ibuf tree page
 	(level 2 page) */
 
-	bitmap_page = ibuf_bitmap_get_map_page(page_id, page_size, &mtr);
+	bitmap_page = ibuf_bitmap_get_map_page(page_id, univ_page_size, &mtr);
 
 	mutex_exit(&ibuf_mutex);
 
 	ibuf_bitmap_page_set_bits(
-		bitmap_page, page_id, page_size, IBUF_BITMAP_IBUF, FALSE,
+		bitmap_page, page_id, univ_page_size, IBUF_BITMAP_IBUF, FALSE,
 		&mtr);
 
 	ut_d(buf_page_set_file_page_was_freed(page_id));
@@ -2383,7 +2373,7 @@ ibuf_get_merge_page_nos_func(
 				&& prev_space_id == first_space_id)
 			    || (volume_for_page
 				> ((IBUF_MERGE_THRESHOLD - 1)
-				   * 4 * UNIV_PAGE_SIZE
+				   * 4U << srv_page_size_shift
 				   / IBUF_PAGE_SIZE_PER_FREE_SPACE)
 				/ IBUF_MERGE_THRESHOLD)) {
 
@@ -2580,8 +2570,6 @@ ibuf_merge_space(
 
 	ut_ad(space < SRV_LOG_SPACE_FIRST_ID);
 
-	ut_ad(space < SRV_LOG_SPACE_FIRST_ID);
-
 	ibuf_mtr_start(&mtr);
 
 	/* Position the cursor on the first matching record. */
@@ -2882,7 +2870,7 @@ ibuf_get_volume_buffered_count_func(
 
 	types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
 
-	switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
+	switch (UNIV_EXPECT(int(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE),
 			    IBUF_REC_INFO_SIZE)) {
 	default:
 		ut_error;
@@ -2969,7 +2957,7 @@ get_volume_comp:
 Gets an upper limit for the combined size of entries buffered in the insert
 buffer for a given page.
 @return upper limit for the volume of buffered inserts for the index
-page, in bytes; UNIV_PAGE_SIZE, if the entries for the index page span
+page, in bytes; srv_page_size, if the entries for the index page span
 several pages in the insert buffer */
 static
 ulint
@@ -3070,7 +3058,7 @@ ibuf_get_volume_buffered(
 			do not have the x-latch on it, and cannot acquire one
 			because of the latching order: we have to give up */
 
-			return(UNIV_PAGE_SIZE);
+			return(srv_page_size);
 		}
 
 		if (page_no != ibuf_rec_get_page_no(mtr, rec)
@@ -3140,7 +3128,7 @@ count_later:
 
 			/* We give up */
 
-			return(UNIV_PAGE_SIZE);
+			return(srv_page_size);
 		}
 
 		if (page_no != ibuf_rec_get_page_no(mtr, rec)
@@ -3316,8 +3304,7 @@ ibuf_get_entry_counter_func(
 		return(ULINT_UNDEFINED);
 	} else if (!page_rec_is_infimum(rec)) {
 		return(ibuf_get_entry_counter_low(mtr, rec, space, page_no));
-	} else if (only_leaf
-		   || fil_page_get_prev(page_align(rec)) == FIL_NULL) {
+	} else if (only_leaf || !page_has_prev(page_align(rec))) {
 		/* The parent node pointer did not contain the
 		searched for (space, page_no), which means that the
 		search ended on the correct page regardless of the
@@ -3385,6 +3372,7 @@ ibuf_insert_low(
 	ut_ad(!dict_index_is_spatial(index));
 	ut_ad(dtuple_check_typed(entry));
 	ut_ad(!no_counter || op == IBUF_OP_INSERT);
+	ut_ad(page_id.space() == index->table->space_id);
 	ut_a(op < IBUF_OP_COUNT);
 
 	do_merge = FALSE;
@@ -3507,7 +3495,7 @@ fail_exit:
 	ut_a((buffered == 0) || ibuf_count_get(page_id));
 #endif
 	ibuf_mtr_start(&bitmap_mtr);
-	bitmap_mtr.set_named_space(page_id.space());
+	index->set_modified(bitmap_mtr);
 
 	bitmap_page = ibuf_bitmap_get_map_page(page_id, page_size,
 					       &bitmap_mtr);
@@ -3702,9 +3690,9 @@ ibuf_insert(
 	dberr_t		err;
 	ulint		entry_size;
 	ibool		no_counter;
-	/* Read the settable global variable ibuf_use only once in
+	/* Read the settable global variable only once in
 	this function, so that we will have a consistent view of it. */
-	ibuf_use_t	use		= ibuf_use;
+	ibuf_use_t	use		= ibuf_use_t(innodb_change_buffering);
 	DBUG_ENTER("ibuf_insert");
 
 	DBUG_PRINT("ibuf", ("op: %d, space: " UINT32PF ", page_no: " UINT32PF,
@@ -3714,7 +3702,7 @@ ibuf_insert(
 	ut_ad(page_id.space() != SRV_TMP_SPACE_ID);
 
 	ut_a(!dict_index_is_clust(index));
-	ut_ad(!dict_table_is_temporary(index->table));
+	ut_ad(!index->table->is_temporary());
 
 	no_counter = use <= IBUF_USE_INSERT;
 
@@ -3729,8 +3717,6 @@ ibuf_insert(
 		case IBUF_USE_INSERT_DELETE_MARK:
 		case IBUF_USE_ALL:
 			goto check_watch;
-		case IBUF_USE_COUNT:
-			break;
 		}
 		break;
 	case IBUF_OP_DELETE_MARK:
@@ -3744,8 +3730,6 @@ ibuf_insert(
 		case IBUF_USE_ALL:
 			ut_ad(!no_counter);
 			goto check_watch;
-		case IBUF_USE_COUNT:
-			break;
 		}
 		break;
 	case IBUF_OP_DELETE:
@@ -3759,8 +3743,6 @@ ibuf_insert(
 		case IBUF_USE_ALL:
 			ut_ad(!no_counter);
 			goto skip_watch;
-		case IBUF_USE_COUNT:
-			break;
 		}
 		break;
 	case IBUF_OP_COUNT:
@@ -4274,7 +4256,7 @@ ibuf_restore_pos(
 			" ibuf record inserted to page "
 			<< space << ":" << page_no
 			<< " in file " << s->chain.start->name;
-		fil_space_release(s);
+		s->release();
 
 		ib::error() << BUG_REPORT_MSG;
 
@@ -4511,7 +4493,7 @@ ibuf_merge_or_delete_for_page(
 			if (!bitmap_bits) {
 				/* No inserts buffered for this page */
 
-				fil_space_release(space);
+				space->release();
 				return;
 			}
 		}
@@ -4571,7 +4553,7 @@ loop:
 	if (block != NULL) {
 		ibool success;
 
-		mtr.set_named_space(page_id.space());
+		mtr.set_named_space(space);
 
 		success = buf_page_get_known_nowait(
 			RW_X_LATCH, block,
@@ -4587,12 +4569,11 @@ loop:
 		latch an io-fixed block. */
 		buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
 	} else if (update_ibuf_bitmap) {
-		mtr.set_named_space(page_id.space());
+		mtr.set_named_space(space);
 	}
 
 	if (!btr_pcur_is_on_user_rec(&pcur)) {
-		ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
-
+		ut_ad(btr_pcur_is_after_last_in_tree(&pcur));
 		goto reset_bit;
 	}
 
@@ -4638,6 +4619,9 @@ loop:
 
 			entry = ibuf_build_entry_from_ibuf_rec(
 				&mtr, rec, heap, &dummy_index);
+			ut_ad(!dummy_index->table->space);
+			dummy_index->table->space = space;
+			dummy_index->table->space_id = space->id;
 
 			ut_ad(page_validate(block->frame, dummy_index));
 
@@ -4650,8 +4634,8 @@ loop:
 
 				volume += page_dir_calc_reserved_space(1);
 
-				ut_a(volume <= 4 * UNIV_PAGE_SIZE
-					/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
+				ut_a(volume <= (4U << srv_page_size_shift)
+				     / IBUF_PAGE_SIZE_PER_FREE_SPACE);
 #endif
 				ibuf_insert_to_index_page(
 					entry, block, dummy_index, &mtr);
@@ -4689,7 +4673,7 @@ loop:
 				ibuf_btr_pcur_commit_specify_mtr(&pcur, &mtr);
 
 				ibuf_mtr_start(&mtr);
-				mtr.set_named_space(page_id.space());
+				mtr.set_named_space(space);
 
 				success = buf_page_get_known_nowait(
 					RW_X_LATCH, block,
@@ -4773,7 +4757,7 @@ reset_bit:
 	ibuf_mtr_commit(&mtr);
 
 	if (space) {
-		fil_space_release(space);
+		space->release();
 	}
 
 	btr_pcur_close(&pcur);
@@ -4826,8 +4810,7 @@ loop:
 		&pcur, &mtr);
 
 	if (!btr_pcur_is_on_user_rec(&pcur)) {
-		ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
-
+		ut_ad(btr_pcur_is_after_last_in_tree(&pcur));
 		goto leave_loop;
 	}
 
@@ -4944,25 +4927,15 @@ ibuf_print(
 	mutex_exit(&ibuf_mutex);
 }
 
-/******************************************************************//**
-Checks the insert buffer bitmaps on IMPORT TABLESPACE.
+/** Check the insert buffer bitmaps on IMPORT TABLESPACE.
+@param[in]	trx	transaction
+@param[in,out]	space	tablespace being imported
 @return DB_SUCCESS or error code */
-dberr_t
-ibuf_check_bitmap_on_import(
-/*========================*/
-	const trx_t*	trx,		/*!< in: transaction */
-	ulint		space_id)	/*!< in: tablespace identifier */
+dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
 {
 	ulint	page_no;
-
-	ut_ad(space_id);
 	ut_ad(trx->mysql_thd);
-
-	FilSpace space(space_id);
-	if (!space()) {
-		return(DB_TABLE_NOT_FOUND);
-	}
-
+	ut_ad(space->purpose == FIL_TYPE_IMPORT);
 	const page_size_t	page_size(space->flags);
 	/* fil_space_t::size and fil_space_t::free_limit would still be 0
 	at this point. So, we will have to read page 0. */
@@ -4972,7 +4945,7 @@ ibuf_check_bitmap_on_import(
 	mtr_t	mtr;
 	ulint	size;
 	mtr.start();
-	if (buf_block_t* sp = buf_page_get(page_id_t(space_id, 0), page_size,
+	if (buf_block_t* sp = buf_page_get(page_id_t(space->id, 0), page_size,
 					   RW_S_LATCH, &mtr)) {
 		size = std::min(
 			mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT
@@ -5012,7 +4985,7 @@ ibuf_check_bitmap_on_import(
 		ibuf_enter(&mtr);
 
 		bitmap_page = ibuf_bitmap_get_map_page(
-			page_id_t(space_id, page_no), page_size, &mtr);
+			page_id_t(space->id, page_no), page_size, &mtr);
 
 		if (buf_page_is_zeroes(bitmap_page, page_size)) {
 			/* This means we got all-zero page instead of
@@ -5023,9 +4996,8 @@ ibuf_check_bitmap_on_import(
 			     curr_page < page_size.physical(); curr_page++) {
 
 				buf_block_t* block = buf_page_get(
-						page_id_t(space_id, curr_page),
-						page_size,
-						RW_S_LATCH, &mtr);
+					page_id_t(space->id, curr_page),
+					page_size, RW_S_LATCH, &mtr);
 	                        page_t*	page = buf_block_get_frame(block);
 				ut_ad(buf_page_is_zeroes(page, page_size));
 			}
@@ -5046,7 +5018,7 @@ ibuf_check_bitmap_on_import(
 
 			const ulint	offset = page_no + i;
 
-			const page_id_t	cur_page_id(space_id, offset);
+			const page_id_t	cur_page_id(space->id, offset);
 
 			if (ibuf_bitmap_page_get_bits(
 					bitmap_page, cur_page_id, page_size,
@@ -5059,12 +5031,10 @@ ibuf_check_bitmap_on_import(
 				ib_errf(trx->mysql_thd,
 					IB_LOG_LEVEL_ERROR,
 					 ER_INNODB_INDEX_CORRUPT,
-					 "Space %u page %u"
+					 "File %s page " ULINTPF
 					 " is wrongly flagged to belong to the"
 					 " insert buffer",
-					 (unsigned) space_id,
-					 (unsigned) offset);
-
+					space->chain.start->name, offset);
 				return(DB_CORRUPTION);
 			}
 
@@ -5076,9 +5046,9 @@ ibuf_check_bitmap_on_import(
 					IB_LOG_LEVEL_WARN,
 					ER_INNODB_INDEX_CORRUPT,
 					"Buffered changes"
-					" for space %u page %u are lost",
-					(unsigned) space_id,
-					(unsigned) offset);
+					" for file %s page " ULINTPF
+					" are lost",
+					space->chain.start->name, offset);
 
 				/* Tolerate this error, so that
 				slightly corrupted tables can be
@@ -5114,7 +5084,7 @@ ibuf_set_bitmap_for_bulk_load(
 	free_val = ibuf_index_page_calc_free(block);
 
 	mtr_start(&mtr);
-	mtr.set_named_space(block->page.id.space());
+	mtr.set_named_space_id(block->page.id.space());
 
 	bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
                                                block->page.size, &mtr);
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index 967f738f4e3..b99e7e4c522 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -34,9 +34,15 @@ Created 6/2/1994 Heikki Tuuri
 #include "btr0types.h"
 #include "gis0type.h"
 
+#define BTR_MAX_NODE_LEVEL	50	/*!< Maximum B-tree page level
+					(not really a hard limit).
+					Used in debug assertions
+					in btr_page_set_level and
+					btr_page_get_level */
+
 /** Maximum record size which can be stored on a page, without using the
 special big record storage structure */
-#define	BTR_PAGE_MAX_REC_SIZE	(UNIV_PAGE_SIZE / 2 - 200)
+#define	BTR_PAGE_MAX_REC_SIZE	(srv_page_size / 2 - 200)
 
 /** @brief Maximum depth of a B-tree in InnoDB.
 
@@ -148,23 +154,23 @@ free the pages of externally stored fields. */
 record is in spatial index */
 #define BTR_RTREE_DELETE_MARK	524288U
 
-#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode)			\
-	((latch_mode) & btr_latch_mode(~(BTR_INSERT			\
-					 | BTR_DELETE_MARK		\
-					 | BTR_RTREE_UNDO_INS		\
-					 | BTR_RTREE_DELETE_MARK	\
-					 | BTR_DELETE			\
-					 | BTR_ESTIMATE			\
-					 | BTR_IGNORE_SEC_UNIQUE	\
-					 | BTR_ALREADY_S_LATCHED	\
-					 | BTR_LATCH_FOR_INSERT		\
-					 | BTR_LATCH_FOR_DELETE		\
-					 | BTR_MODIFY_EXTERNAL)))
-
-#define BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode)			\
-	((latch_mode) & btr_latch_mode(~(BTR_LATCH_FOR_INSERT		\
-					 | BTR_LATCH_FOR_DELETE		\
-					 | BTR_MODIFY_EXTERNAL)))
+#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode)		\
+	((latch_mode) & ulint(~(BTR_INSERT			\
+				| BTR_DELETE_MARK		\
+				| BTR_RTREE_UNDO_INS		\
+				| BTR_RTREE_DELETE_MARK		\
+				| BTR_DELETE			\
+				| BTR_ESTIMATE			\
+				| BTR_IGNORE_SEC_UNIQUE		\
+				| BTR_ALREADY_S_LATCHED		\
+				| BTR_LATCH_FOR_INSERT		\
+				| BTR_LATCH_FOR_DELETE		\
+				| BTR_MODIFY_EXTERNAL)))
+
+#define BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode)		\
+	((latch_mode) & ulint(~(BTR_LATCH_FOR_INSERT		\
+				| BTR_LATCH_FOR_DELETE		\
+				| BTR_MODIFY_EXTERNAL)))
 
 /**************************************************************//**
 Report that an index page is corrupted. */
@@ -286,14 +292,22 @@ btr_page_get_index_id(
 	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************//**
 Gets the node level field in an index page.
+@param[in]	page	index page
 @return level, leaf level == 0 */
 UNIV_INLINE
 ulint
-btr_page_get_level_low(
-/*===================*/
-	const page_t*	page)	/*!< in: index page */
-	MY_ATTRIBUTE((warn_unused_result));
-#define btr_page_get_level(page, mtr) btr_page_get_level_low(page)
+btr_page_get_level(const page_t* page)
+{
+	ulint	level;
+
+	ut_ad(page);
+
+	level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
+
+	ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+	return(level);
+} MY_ATTRIBUTE((warn_unused_result))
 /********************************************************//**
 Gets the next index page number.
 @return next page number */
@@ -342,8 +356,7 @@ btr_node_ptr_get_child_page_no(
 
 /** Create the root node for a new index tree.
 @param[in]	type			type of the index
-@param[in]	space			space where created
-@param[in]	page_size		page size
+@param[in,out]	space			tablespace where created
 @param[in]	index_id		index id
 @param[in]	index			index, or NULL when applying TRUNCATE
 log record during recovery
@@ -354,8 +367,7 @@ record during recovery
 ulint
 btr_create(
 	ulint			type,
-	ulint			space,
-	const page_size_t&	page_size,
+	fil_space_t*		space,
 	index_id_t		index_id,
 	dict_index_t*		index,
 	const btr_create_t*	btr_redo_create_info,
@@ -681,6 +693,20 @@ btr_page_free(
 	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
 	mtr_t*		mtr)	/*!< in: mtr */
 	MY_ATTRIBUTE((nonnull));
+/** Empty an index page (possibly the root page). @see btr_page_create().
+@param[in,out]	block		page to be emptied
+@param[in,out]	page_zip	compressed page frame, or NULL
+@param[in]	index		index of the page
+@param[in]	level		B-tree level of the page (0=leaf)
+@param[in,out]	mtr		mini-transaction */
+void
+btr_page_empty(
+	buf_block_t*	block,
+	page_zip_des_t*	page_zip,
+	dict_index_t*	index,
+	ulint		level,
+	mtr_t*		mtr)
+	MY_ATTRIBUTE((nonnull(1, 3, 5)));
 /**************************************************************//**
 Creates a new index page (not the root, and also not
 used in page reorganization).  @see btr_page_empty(). */
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
index d68f74170c2..2669611a9e6 100644
--- a/storage/innobase/include/btr0btr.ic
+++ b/storage/innobase/include/btr0btr.ic
@@ -29,12 +29,6 @@ Created 6/2/1994 Heikki Tuuri
 #include "mtr0log.h"
 #include "page0zip.h"
 
-#define BTR_MAX_NODE_LEVEL	50	/*!< Maximum B-tree page level
-					(not really a hard limit).
-					Used in debug assertions
-					in btr_page_set_level and
-					btr_page_get_level_low */
-
 /** Gets a buffer page and declares its latching order level.
 @param[in]	page_id	page id
 @param[in]	mode	latch mode
@@ -144,26 +138,6 @@ btr_page_get_index_id(
 }
 
 /********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level_low(
-/*===================*/
-	const page_t*	page)	/*!< in: index page */
-{
-	ulint	level;
-
-	ut_ad(page);
-
-	level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
-
-	ut_ad(level <= BTR_MAX_NODE_LEVEL);
-
-	return(level);
-}
-
-/********************************************************//**
 Sets the node level field in an index page. */
 UNIV_INLINE
 void
diff --git a/storage/innobase/include/btr0bulk.h b/storage/innobase/include/btr0bulk.h
index a63ab9ded0d..e6716b19b09 100644
--- a/storage/innobase/include/btr0bulk.h
+++ b/storage/innobase/include/btr0bulk.h
@@ -32,7 +32,7 @@ Created 03/11/2014 Shaohua Wang
 #include <vector>
 
 /** Innodb B-tree index fill factor for bulk load. */
-extern	long	innobase_fill_factor;
+extern	uint	innobase_fill_factor;
 /** whether to reduce redo logging during ALTER TABLE */
 extern	my_bool	innodb_log_optimize_ddl;
 
@@ -85,7 +85,7 @@ public:
 		m_err(DB_SUCCESS)
 	{
 		ut_ad(!dict_index_is_spatial(m_index));
-		ut_ad(!dict_table_is_temporary(m_index->table));
+		ut_ad(!m_index->table->is_temporary());
 	}
 
 	/** Deconstructor */
@@ -286,7 +286,8 @@ public:
 	{
 #ifdef UNIV_DEBUG
 		if (m_flush_observer)
-		fil_space_inc_redo_skipped_count(m_index->space);
+		my_atomic_addlint(&m_index->table->space->redo_skipped_count,
+				  1);
 #endif /* UNIV_DEBUG */
 	}
 
@@ -295,7 +296,8 @@ public:
 	{
 #ifdef UNIV_DEBUG
 		if (m_flush_observer)
-		fil_space_dec_redo_skipped_count(m_index->space);
+		my_atomic_addlint(&m_index->table->space->redo_skipped_count,
+				  ulint(-1));
 #endif /* UNIV_DEBUG */
 	}
 
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 0f027536525..358f394c5b4 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,6 +31,7 @@ Created 10/16/1994 Heikki Tuuri
 #include "page0cur.h"
 #include "btr0types.h"
 #include "gis0type.h"
+#include "my_base.h"
 
 /** Mode flags for btr_cur operations; these can be ORed */
 enum {
@@ -41,6 +42,11 @@ enum {
 	/** sys fields will be found in the update vector or inserted
 	entry */
 	BTR_KEEP_SYS_FLAG = 4,
+
+	/** no rollback */
+	BTR_NO_ROLLBACK = BTR_NO_UNDO_LOG_FLAG
+		| BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG,
+
 	/** btr_cur_pessimistic_update() must keep cursor position
 	when moving columns to big_rec */
 	BTR_KEEP_POS_FLAG = 8,
@@ -126,6 +132,24 @@ btr_cur_position(
 	buf_block_t*	block,	/*!< in: buffer block of rec */
 	btr_cur_t*	cursor);/*!< in: cursor */
 
+/** Load the instant ALTER TABLE metadata from the clustered index
+when loading a table definition.
+@param[in,out]	table	table definition from the data dictionary
+@return	error code
+@retval	DB_SUCCESS	if no error occurred */
+dberr_t
+btr_cur_instant_init(dict_table_t* table)
+	ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result));
+
+/** Initialize the n_core_null_bytes on first access to a clustered
+index root page.
+@param[in]	index	clustered index that is on its first access
+@param[in]	page	clustered index root page
+@return	whether the page is corrupted */
+bool
+btr_cur_instant_root_init(dict_index_t* index, const page_t* page)
+	ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result));
+
 /** Optimistically latches the leaf page or pages requested.
 @param[in]	block		guessed buffer block
 @param[in]	modify_clock	modify clock value
@@ -153,8 +177,7 @@ Note that if mode is PAGE_CUR_LE, which is used in inserts, then
 cursor->up_match and cursor->low_match both will have sensible values.
 If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
 dberr_t
-btr_cur_search_to_nth_level(
-/*========================*/
+btr_cur_search_to_nth_level_func(
 	dict_index_t*	index,	/*!< in: index */
 	ulint		level,	/*!< in: the tree level of search */
 	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
@@ -173,23 +196,29 @@ btr_cur_search_to_nth_level(
 				cursor->left_block is used to store a pointer
 				to the left neighbor page, in the cases
 				BTR_SEARCH_PREV and BTR_MODIFY_PREV;
-				NOTE that if has_search_latch
-				is != 0, we maybe do not have a latch set
-				on the cursor page, we assume
-				the caller uses his search latch
-				to protect the record! */
+				NOTE that if ahi_latch, we might not have a
+				cursor page latch, we assume that ahi_latch
+				protects the record! */
 	btr_cur_t*	cursor, /*!< in/out: tree cursor; the cursor page is
 				s- or x-latched, but see also above! */
-	ulint		has_search_latch,
-				/*!< in: latch mode the caller
-				currently has on search system:
-				RW_S_LATCH, or 0 */
+#ifdef BTR_CUR_HASH_ADAPT
+	rw_lock_t*	ahi_latch,
+				/*!< in: currently held btr_search_latch
+				(in RW_S_LATCH mode), or NULL */
+#endif /* BTR_CUR_HASH_ADAPT */
 	const char*	file,	/*!< in: file name */
 	unsigned	line,	/*!< in: line where called */
 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
 	ib_uint64_t	autoinc = 0);
 				/*!< in: PAGE_ROOT_AUTO_INC to be written
 				(0 if none) */
+#ifdef BTR_CUR_HASH_ADAPT
+# define btr_cur_search_to_nth_level(i,l,t,m,lm,c,a,fi,li,mtr) \
+	btr_cur_search_to_nth_level_func(i,l,t,m,lm,c,a,fi,li,mtr)
+#else /* BTR_CUR_HASH_ADAPT */
+# define btr_cur_search_to_nth_level(i,l,t,m,lm,c,a,fi,li,mtr) \
+	btr_cur_search_to_nth_level_func(i,l,t,m,lm,c,fi,li,mtr)
+#endif /* BTR_CUR_HASH_ADAPT */
 
 /*****************************************************************//**
 Opens a cursor at either end of an index.
@@ -571,7 +600,7 @@ btr_cur_parse_del_mark_set_sec_rec(
 @param[in]	tuple2	range end, may also be empty tuple
 @param[in]	mode2	search mode for range end
 @return estimated number of rows */
-int64_t
+ha_rows
 btr_estimate_n_rows_in_range(
 	dict_index_t*	index,
 	const dtuple_t*	tuple1,
@@ -805,7 +834,7 @@ btr_cur_latch_leaves(
 /** In the pessimistic delete, if the page data size drops below this
 limit, merging it to a neighbor is tried */
 #define BTR_CUR_PAGE_COMPRESS_LIMIT(index) \
-	((UNIV_PAGE_SIZE * (ulint)((index)->merge_threshold)) / 100)
+	((srv_page_size * (ulint)((index)->merge_threshold)) / 100)
 
 /** A slot in the path array. We store here info on a search path down the
 tree. Each slot contains data on a single level of the tree. */
@@ -973,11 +1002,11 @@ We store locally a long enough prefix of each column so that we can determine
 the ordering parts of each index record without looking into the externally
 stored part. */
 /*-------------------------------------- @{ */
-#define BTR_EXTERN_SPACE_ID		0	/*!< space id where stored */
-#define BTR_EXTERN_PAGE_NO		4	/*!< page no where stored */
-#define BTR_EXTERN_OFFSET		8	/*!< offset of BLOB header
+#define BTR_EXTERN_SPACE_ID		0U	/*!< space id where stored */
+#define BTR_EXTERN_PAGE_NO		4U	/*!< page no where stored */
+#define BTR_EXTERN_OFFSET		8U	/*!< offset of BLOB header
 						on that page */
-#define BTR_EXTERN_LEN			12	/*!< 8 bytes containing the
+#define BTR_EXTERN_LEN			12U	/*!< 8 bytes containing the
 						length of the externally
 						stored part of the BLOB.
 						The 2 highest bits are
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
index b1e59651a1d..adcd92e2fc8 100644
--- a/storage/innobase/include/btr0cur.ic
+++ b/storage/innobase/include/btr0cur.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +29,7 @@ Created 10/16/1994 Heikki Tuuri
 #ifdef UNIV_DEBUG
 # define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\
 if (btr_cur_limit_optimistic_insert_debug > 1\
-    && (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\
+    && (NREC) >= btr_cur_limit_optimistic_insert_debug) {\
         CODE;\
 }
 #else
@@ -128,19 +129,17 @@ btr_cur_compress_recommendation(
 {
 	const page_t*	page;
 
-	ut_ad(mtr_is_block_fix(
-		mtr, btr_cur_get_block(cursor),
-		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+			       MTR_MEMO_PAGE_X_FIX));
 
 	page = btr_cur_get_page(cursor);
 
-	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
+	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2U,
 				      return(FALSE));
 
-	if ((page_get_data_size(page)
-	     < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index))
-	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
-		&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
+	if (page_get_data_size(page)
+	    < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index)
+	    || !page_has_siblings(page)) {
 
 		/* The page fillfactor has dropped below a predefined
 		minimum value OR the level in the B-tree contains just
@@ -173,11 +172,9 @@ btr_cur_can_delete_without_compress(
 
 	page = btr_cur_get_page(cursor);
 
-	if ((page_get_data_size(page) - rec_size
-	     < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index))
-	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
-		&& (btr_page_get_prev(page, mtr) == FIL_NULL))
-	    || (page_get_n_recs(page) < 2)) {
+	if (page_get_data_size(page) - rec_size
+	    < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index)
+	    || !page_has_siblings(page) || page_get_n_recs(page) < 2) {
 
 		/* The page fillfactor will drop below a predefined
 		minimum value, OR the level in the B-tree contains just
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
index b9dd06dbf24..b69d07811f3 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innobase/include/btr0pcur.h
@@ -131,20 +131,25 @@ btr_pcur_open_with_no_init_func(
 				may end up on the previous page of the
 				record! */
 	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
-				NOTE that if has_search_latch != 0 then
-				we maybe do not acquire a latch on the cursor
-				page, but assume that the caller uses his
-				btr search latch to protect the record! */
+				NOTE that if ahi_latch then we might not
+				acquire a cursor page latch, but assume
+				that the ahi_latch protects the record! */
 	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
-	ulint		has_search_latch,
-				/*!< in: latch mode the caller
-				currently has on search system:
-				RW_S_LATCH, or 0 */
+#ifdef BTR_CUR_HASH_ADAPT
+	rw_lock_t*	ahi_latch,
+				/*!< in: adaptive hash index latch held
+				by the caller, or NULL if none */
+#endif /* BTR_CUR_HASH_ADAPT */
 	const char*	file,	/*!< in: file name */
 	unsigned	line,	/*!< in: line where called */
 	mtr_t*		mtr);	/*!< in: mtr */
-#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m)			\
-	btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m)
+#ifdef BTR_CUR_HASH_ADAPT
+# define btr_pcur_open_with_no_init(ix,t,md,l,cur,ahi,m)		\
+	btr_pcur_open_with_no_init_func(ix,t,md,l,cur,ahi,__FILE__,__LINE__,m)
+#else /* BTR_CUR_HASH_ADAPT */
+# define btr_pcur_open_with_no_init(ix,t,md,l,cur,ahi,m)		\
+	btr_pcur_open_with_no_init_func(ix,t,md,l,cur,__FILE__,__LINE__,m)
+#endif /* BTR_CUR_HASH_ADAPT */
 
 /*****************************************************************//**
 Opens a persistent cursor at either end of an index. */
@@ -431,21 +436,11 @@ btr_pcur_is_before_first_on_page(
 /*********************************************************//**
 Checks if the persistent cursor is before the first user record in
 the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_in_tree(
-/*=============================*/
-	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
-	mtr_t*		mtr);	/*!< in: mtr */
+static inline bool btr_pcur_is_before_first_in_tree(btr_pcur_t* cursor);
 /*********************************************************//**
 Checks if the persistent cursor is after the last user record in
 the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_in_tree(
-/*===========================*/
-	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
-	mtr_t*		mtr);	/*!< in: mtr */
+static inline bool btr_pcur_is_after_last_in_tree(btr_pcur_t* cursor);
 /*********************************************************//**
 Moves the persistent cursor to the next record on the same page. */
 UNIV_INLINE
diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic
index b2a85def63d..6e38bf61701 100644
--- a/storage/innobase/include/btr0pcur.ic
+++ b/storage/innobase/include/btr0pcur.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
+Copyright (c) 2015, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -209,43 +209,25 @@ btr_pcur_is_on_user_rec(
 /*********************************************************//**
 Checks if the persistent cursor is before the first user record in
 the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_in_tree(
-/*=============================*/
-	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
-	mtr_t*		mtr)	/*!< in: mtr */
+static inline bool btr_pcur_is_before_first_in_tree(btr_pcur_t* cursor)
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
-	if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
-		return(FALSE);
-	}
-
-	return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
+	return !page_has_prev(btr_pcur_get_page(cursor))
+		&& page_cur_is_before_first(btr_pcur_get_page_cur(cursor));
 }
 
 /*********************************************************//**
 Checks if the persistent cursor is after the last user record in
 the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_in_tree(
-/*===========================*/
-	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
-	mtr_t*		mtr)	/*!< in: mtr */
+static inline bool btr_pcur_is_after_last_in_tree(btr_pcur_t* cursor)
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
-	if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
-		return(FALSE);
-	}
-
-	return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
+	return !page_has_next(btr_pcur_get_page(cursor))
+		&& page_cur_is_after_last(btr_pcur_get_page_cur(cursor));
 }
 
 /*********************************************************//**
@@ -315,9 +297,7 @@ btr_pcur_move_to_next_user_rec(
 	cursor->old_stored = false;
 loop:
 	if (btr_pcur_is_after_last_on_page(cursor)) {
-
-		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
-
+		if (btr_pcur_is_after_last_in_tree(cursor)) {
 			return(FALSE);
 		}
 
@@ -352,19 +332,15 @@ btr_pcur_move_to_next(
 	cursor->old_stored = false;
 
 	if (btr_pcur_is_after_last_on_page(cursor)) {
-
-		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
-
+		if (btr_pcur_is_after_last_in_tree(cursor)) {
 			return(FALSE);
 		}
 
 		btr_pcur_move_to_next_page(cursor, mtr);
-
 		return(TRUE);
 	}
 
 	btr_pcur_move_to_next_on_page(cursor);
-
 	return(TRUE);
 }
 
@@ -480,9 +456,12 @@ btr_pcur_open_low(
 
 	ut_ad(!dict_index_is_spatial(index));
 
-	err = btr_cur_search_to_nth_level(
-		index, level, tuple, mode, latch_mode,
-		btr_cursor, 0, file, line, mtr, autoinc);
+	err = btr_cur_search_to_nth_level_func(
+		index, level, tuple, mode, latch_mode, btr_cursor,
+#ifdef BTR_CUR_HASH_ADAPT
+		NULL,
+#endif /* BTR_CUR_HASH_ADAPT */
+		file, line, mtr, autoinc);
 
 	if (err != DB_SUCCESS) {
 		ib::warn() << " Error code: " << err
@@ -517,15 +496,15 @@ btr_pcur_open_with_no_init_func(
 				may end up on the previous page of the
 				record! */
 	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
-				NOTE that if has_search_latch != 0 then
-				we maybe do not acquire a latch on the cursor
-				page, but assume that the caller uses his
-				btr search latch to protect the record! */
+				NOTE that if ahi_latch then we might not
+				acquire a cursor page latch, but assume
+				that the ahi_latch protects the record! */
 	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
-	ulint		has_search_latch,
-				/*!< in: latch mode the caller
-				currently has on search system:
-				RW_S_LATCH, or 0 */
+#ifdef BTR_CUR_HASH_ADAPT
+	rw_lock_t*	ahi_latch,
+				/*!< in: adaptive hash index latch held
+				by the caller, or NULL if none */
+#endif /* BTR_CUR_HASH_ADAPT */
 	const char*	file,	/*!< in: file name */
 	unsigned	line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mtr */
@@ -540,9 +519,12 @@ btr_pcur_open_with_no_init_func(
 
 	btr_cursor = btr_pcur_get_btr_cur(cursor);
 
-	err = btr_cur_search_to_nth_level(
+	err = btr_cur_search_to_nth_level_func(
 		index, 0, tuple, mode, latch_mode, btr_cursor,
-		has_search_latch, file, line, mtr);
+#ifdef BTR_CUR_HASH_ADAPT
+		ahi_latch,
+#endif /* BTR_CUR_HASH_ADAPT */
+		file, line, mtr);
 
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
index e6e442f28b6..0c8b152d413 100644
--- a/storage/innobase/include/btr0sea.h
+++ b/storage/innobase/include/btr0sea.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,26 +33,20 @@ Created 2/17/1996 Heikki Tuuri
 
 /** Creates and initializes the adaptive search system at a database start.
 @param[in]	hash_size	hash table size. */
-void
-btr_search_sys_create(ulint hash_size);
+void btr_search_sys_create(ulint hash_size);
 
 /** Resize hash index hash table.
 @param[in]	hash_size	hash index hash table size */
-void
-btr_search_sys_resize(ulint hash_size);
+void btr_search_sys_resize(ulint hash_size);
 
 /** Frees the adaptive search system at a database shutdown. */
-void
-btr_search_sys_free();
+void btr_search_sys_free();
 
 /** Disable the adaptive hash search system and empty the index.
 @param  need_mutex      need to acquire dict_sys->mutex */
-void
-btr_search_disable(
-	bool	need_mutex);
+void btr_search_disable(bool need_mutex);
 /** Enable the adaptive hash search system. */
-void
-btr_search_enable();
+void btr_search_enable();
 
 /** Returns the value of ref_count. The value is protected by latch.
 @param[in]	info		search info
@@ -86,12 +80,11 @@ both have sensible values.
 				we assume the caller uses his search latch
 				to protect the record!
 @param[out]	cursor		tree cursor
-@param[in]	has_search_latch
-				latch mode the caller currently has on
-				search system: RW_S/X_LATCH or 0
+@param[in]	ahi_latch	the adaptive hash index latch being held,
+				or NULL
 @param[in]	mtr		mini transaction
-@return TRUE if succeeded */
-ibool
+@return whether the search succeeded */
+bool
 btr_search_guess_on_hash(
 	dict_index_t*	index,
 	btr_search_t*	info,
@@ -99,22 +92,19 @@ btr_search_guess_on_hash(
 	ulint		mode,
 	ulint		latch_mode,
 	btr_cur_t*	cursor,
-	ulint		has_search_latch,
+	rw_lock_t*	ahi_latch,
 	mtr_t*		mtr);
 
-/** Moves or deletes hash entries for moved records. If new_page is already
-hashed, then the hash index for page, if any, is dropped. If new_page is not
-hashed, and page is hashed, then a new hash index is built to new_page with the
-same parameters as page (this often happens when a page is split).
-@param[in,out]	new_block	records are copied to this page.
-@param[in,out]	block		index page from which record are copied, and the
-				copied records will be deleted from this page.
-@param[in,out]	index		record descriptor */
+/** Move or delete hash entries for moved records, usually in a page split.
+If new_block is already hashed, then any hash index for block is dropped.
+If new_block is not hashed, and block is hashed, then a new hash index is
+built to new_block with the same parameters as block.
+@param[in,out]	new_block	destination page
+@param[in,out]	block		source page (subject to deletion later) */
 void
 btr_search_move_or_delete_hash_entries(
 	buf_block_t*	new_block,
-	buf_block_t*	block,
-	dict_index_t*	index);
+	buf_block_t*	block);
 
 /** Drop any adaptive hash index entries that point to an index page.
 @param[in,out]	block	block containing index page, s- or x-latched, or an
@@ -122,8 +112,7 @@ btr_search_move_or_delete_hash_entries(
 			block->buf_fix_count == 0 or it is an index page which
 			has already been removed from the buf_pool->page_hash
 			i.e.: it is in state BUF_BLOCK_REMOVE_HASH */
-void
-btr_search_drop_page_hash_index(buf_block_t* block);
+void btr_search_drop_page_hash_index(buf_block_t* block);
 
 /** Drop possible adaptive hash index entries when a page is evicted
 from the buffer pool or freed in a file, or the index is being dropped.
@@ -133,118 +122,78 @@ void btr_search_drop_page_hash_when_freed(const page_id_t page_id);
 /** Updates the page hash index when a single record is inserted on a page.
 @param[in]	cursor	cursor which was positioned to the place to insert
 			using btr_cur_search_, and the new record has been
-			inserted next to the cursor. */
+			inserted next to the cursor.
+@param[in]	ahi_latch	the adaptive hash index latch */
 void
-btr_search_update_hash_node_on_insert(btr_cur_t* cursor);
+btr_search_update_hash_node_on_insert(btr_cur_t* cursor, rw_lock_t* ahi_latch);
 
 /** Updates the page hash index when a single record is inserted on a page.
-@param[in]	cursor		cursor which was positioned to the
+@param[in,out]	cursor		cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
-				to the cursor */
+				to the cursor
+@param[in]	ahi_latch	the adaptive hash index latch */
 void
-btr_search_update_hash_on_insert(btr_cur_t* cursor);
+btr_search_update_hash_on_insert(btr_cur_t* cursor, rw_lock_t* ahi_latch);
 
 /** Updates the page hash index when a single record is deleted from a page.
 @param[in]	cursor	cursor which was positioned on the record to delete
 			using btr_cur_search_, the record is not yet deleted.*/
-void
-btr_search_update_hash_on_delete(btr_cur_t* cursor);
+void btr_search_update_hash_on_delete(btr_cur_t* cursor);
 
 /** Validates the search system.
 @return true if ok */
-bool
-btr_search_validate();
-
-/** X-Lock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_x_lock(const dict_index_t* index);
-
-/** X-Unlock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_x_unlock(const dict_index_t* index);
+bool btr_search_validate();
 
 /** Lock all search latches in exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_lock_all();
+static inline void btr_search_x_lock_all();
 
 /** Unlock all search latches from exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_unlock_all();
-
-/** S-Lock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_s_lock(const dict_index_t* index);
-
-/** S-Unlock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_s_unlock(const dict_index_t* index);
+static inline void btr_search_x_unlock_all();
 
 /** Lock all search latches in shared mode. */
-UNIV_INLINE
-void
-btr_search_s_lock_all();
+static inline void btr_search_s_lock_all();
 
 #ifdef UNIV_DEBUG
 /** Check if thread owns all the search latches.
 @param[in]	mode	lock mode check
 @retval true if owns all of them
 @retval false if does not own some of them */
-UNIV_INLINE
-bool
-btr_search_own_all(ulint mode);
+static inline bool btr_search_own_all(ulint mode);
 
 /** Check if thread owns any of the search latches.
 @param[in]	mode	lock mode check
 @retval true if owns any of them
 @retval false if owns no search latch */
-UNIV_INLINE
-bool
-btr_search_own_any(ulint mode);
+static inline bool btr_search_own_any(ulint mode);
+
+/** @return whether this thread holds any of the search latches */
+static inline bool btr_search_own_any();
 #endif /* UNIV_DEBUG */
 
 /** Unlock all search latches from shared mode. */
-UNIV_INLINE
-void
-btr_search_s_unlock_all();
+static inline void btr_search_s_unlock_all();
 
 /** Get the latch based on index attributes.
 A latch is selected from an array of latches using pair of index-id, space-id.
 @param[in]	index	index handler
 @return latch */
-UNIV_INLINE
-rw_lock_t*
-btr_get_search_latch(const dict_index_t* index);
+static inline rw_lock_t* btr_get_search_latch(const dict_index_t* index);
 
 /** Get the hash-table based on index attributes.
 A table is selected from an array of tables using pair of index-id, space-id.
 @param[in]	index	index handler
 @return hash table */
-UNIV_INLINE
-hash_table_t*
-btr_get_search_table(const dict_index_t* index);
+static inline hash_table_t* btr_get_search_table(const dict_index_t* index);
 #else /* BTR_CUR_HASH_ADAPT */
 # define btr_search_sys_create(size)
+# define btr_search_sys_free()
 # define btr_search_drop_page_hash_index(block)
-# define btr_search_s_lock(index)
-# define btr_search_s_unlock(index)
 # define btr_search_s_lock_all(index)
 # define btr_search_s_unlock_all(index)
-# define btr_search_x_lock(index)
-# define btr_search_x_unlock(index)
 # define btr_search_info_update(index, cursor)
-# define btr_search_move_or_delete_hash_entries(new_block, block, index)
-# define btr_search_update_hash_on_insert(cursor)
+# define btr_search_move_or_delete_hash_entries(new_block, block)
+# define btr_search_update_hash_on_insert(cursor, ahi_latch)
 # define btr_search_update_hash_on_delete(cursor)
 # define btr_search_sys_resize(hash_size)
 #endif /* BTR_CUR_HASH_ADAPT */
@@ -253,15 +202,11 @@ btr_get_search_table(const dict_index_t* index);
 /** Create and initialize search info.
 @param[in,out]	heap		heap where created
 @return own: search info struct */
-UNIV_INLINE
-btr_search_t*
-btr_search_info_create(mem_heap_t* heap)
+static inline btr_search_t* btr_search_info_create(mem_heap_t* heap)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /** @return the search info of an index */
-UNIV_INLINE
-btr_search_t*
-btr_search_get_info(dict_index_t* index)
+static inline btr_search_t* btr_search_get_info(dict_index_t* index)
 {
 	return(index->search_info);
 }
@@ -305,7 +250,7 @@ struct btr_search_t{
 	ulint	n_bytes;	/*!< recommended prefix: number of bytes in
 				an incomplete field
 				@see BTR_PAGE_MAX_REC_SIZE */
-	ibool	left_side;	/*!< TRUE or FALSE, depending on whether
+	bool	left_side;	/*!< true or false, depending on whether
 				the leftmost record of several records with
 				the same prefix should be indexed in the
 				hash index */
diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
index b5a7536a2b4..716410e3557 100644
--- a/storage/innobase/include/btr0sea.ic
+++ b/storage/innobase/include/btr0sea.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,9 +31,7 @@ Created 2/17/1996 Heikki Tuuri
 /** Create and initialize search info.
 @param[in,out]	heap		heap where created
 @return own: search info struct */
-UNIV_INLINE
-btr_search_t*
-btr_search_info_create(mem_heap_t* heap)
+static inline btr_search_t* btr_search_info_create(mem_heap_t* heap)
 {
 	btr_search_t*	info = static_cast<btr_search_t*>(
 		mem_heap_zalloc(heap, sizeof(btr_search_t)));
@@ -45,25 +44,23 @@ btr_search_info_create(mem_heap_t* heap)
 }
 
 #ifdef BTR_CUR_HASH_ADAPT
-/*********************************************************************//**
-Updates the search info. */
+/** Updates the search info.
+@param[in,out]	info	search info
+@param[in,out]	cursor	cursor which was just positioned */
 void
-btr_search_info_update_slow(
-/*========================*/
-	btr_search_t*	info,	/*!< in/out: search info */
-	btr_cur_t*	cursor);/*!< in: cursor which was just positioned */
+btr_search_info_update_slow(btr_search_t* info, btr_cur_t* cursor);
 
 /*********************************************************************//**
 Updates the search info. */
-UNIV_INLINE
+static inline
 void
 btr_search_info_update(
 /*===================*/
 	dict_index_t*	index,	/*!< in: index of the cursor */
 	btr_cur_t*	cursor)	/*!< in: cursor which was just positioned */
 {
-	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
-	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
+	ut_ad(!btr_search_own_any(RW_LOCK_S));
+	ut_ad(!btr_search_own_any(RW_LOCK_X));
 
 	if (dict_index_is_spatial(index) || !btr_search_enabled) {
 		return;
@@ -87,28 +84,8 @@ btr_search_info_update(
 	btr_search_info_update_slow(info, cursor);
 }
 
-/** X-Lock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_x_lock(const dict_index_t* index)
-{
-	rw_lock_x_lock(btr_get_search_latch(index));
-}
-
-/** X-Unlock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_x_unlock(const dict_index_t* index)
-{
-	rw_lock_x_unlock(btr_get_search_latch(index));
-}
-
 /** Lock all search latches in exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_lock_all()
+static inline void btr_search_x_lock_all()
 {
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 		rw_lock_x_lock(btr_search_latches[i]);
@@ -116,37 +93,15 @@ btr_search_x_lock_all()
 }
 
 /** Unlock all search latches from exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_unlock_all()
+static inline void btr_search_x_unlock_all()
 {
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 		rw_lock_x_unlock(btr_search_latches[i]);
 	}
 }
 
-/** S-Lock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_s_lock(const dict_index_t* index)
-{
-	rw_lock_s_lock(btr_get_search_latch(index));
-}
-
-/** S-Unlock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_s_unlock(const dict_index_t* index)
-{
-	rw_lock_s_unlock(btr_get_search_latch(index));
-}
-
 /** Lock all search latches in shared mode. */
-UNIV_INLINE
-void
-btr_search_s_lock_all()
+static inline void btr_search_s_lock_all()
 {
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 		rw_lock_s_lock(btr_search_latches[i]);
@@ -154,9 +109,7 @@ btr_search_s_lock_all()
 }
 
 /** Unlock all search latches from shared mode. */
-UNIV_INLINE
-void
-btr_search_s_unlock_all()
+static inline void btr_search_s_unlock_all()
 {
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 		rw_lock_s_unlock(btr_search_latches[i]);
@@ -168,9 +121,7 @@ btr_search_s_unlock_all()
 @param[in]	mode	lock mode check
 @retval true if owns all of them
 @retval false if does not own some of them */
-UNIV_INLINE
-bool
-btr_search_own_all(ulint mode)
+static inline bool btr_search_own_all(ulint mode)
 {
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 		if (!rw_lock_own(btr_search_latches[i], mode)) {
@@ -184,9 +135,7 @@ btr_search_own_all(ulint mode)
 @param[in]	mode	lock mode check
 @retval true if owns any of them
 @retval false if owns no search latch */
-UNIV_INLINE
-bool
-btr_search_own_any(ulint mode)
+static inline bool btr_search_own_any(ulint mode)
 {
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 		if (rw_lock_own(btr_search_latches[i], mode)) {
@@ -195,19 +144,31 @@ btr_search_own_any(ulint mode)
 	}
 	return(false);
 }
+
+/** @return whether this thread holds any of the search latches */
+static inline bool btr_search_own_any()
+{
+	for (ulint i = btr_ahi_parts; i--; ) {
+		if (rw_lock_own_flagged(btr_search_latches[i],
+					RW_LOCK_FLAG_X | RW_LOCK_FLAG_S)) {
+			return true;
+		}
+	}
+	return false;
+}
 #endif /* UNIV_DEBUG */
 
 /** Get the adaptive hash search index latch for a b-tree.
 @param[in]	index	b-tree index
 @return latch */
-UNIV_INLINE
-rw_lock_t*
-btr_get_search_latch(const dict_index_t* index)
+static inline rw_lock_t* btr_get_search_latch(const dict_index_t* index)
 {
 	ut_ad(index != NULL);
+	ut_ad(!index->table->space
+	      || index->table->space->id == index->table->space_id);
 
-	ulint	ifold = ut_fold_ulint_pair(static_cast<ulint>(index->id),
-					   static_cast<ulint>(index->space));
+	ulint	ifold = ut_fold_ulint_pair(ulint(index->id),
+					   index->table->space_id);
 
 	return(btr_search_latches[ifold % btr_ahi_parts]);
 }
@@ -216,14 +177,13 @@ btr_get_search_latch(const dict_index_t* index)
 A table is selected from an array of tables using pair of index-id, space-id.
 @param[in]	index	index handler
 @return hash table */
-UNIV_INLINE
-hash_table_t*
-btr_get_search_table(const dict_index_t* index)
+static inline hash_table_t* btr_get_search_table(const dict_index_t* index)
 {
 	ut_ad(index != NULL);
+	ut_ad(index->table->space->id == index->table->space_id);
 
-	ulint	ifold = ut_fold_ulint_pair(static_cast<ulint>(index->id),
-					   static_cast<ulint>(index->space));
+	ulint	ifold = ut_fold_ulint_pair(ulint(index->id),
+					   index->table->space_id);
 
 	return(btr_search_sys->hash_tables[ifold % btr_ahi_parts]);
 }
diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
index 17efa65714c..7ee837d6d2e 100644
--- a/storage/innobase/include/buf0buddy.h
+++ b/storage/innobase/include/buf0buddy.h
@@ -47,9 +47,9 @@ buf_buddy_alloc(
 					the page resides */
 	ulint		size,		/*!< in: compressed page size
 					(between UNIV_ZIP_SIZE_MIN and
-					UNIV_PAGE_SIZE) */
-	ibool*		lru)		/*!< in: pointer to a variable
-					that will be assigned TRUE if
+					srv_page_size) */
+	bool*		lru)		/*!< in: pointer to a variable
+					that will be assigned true if
 				       	storage was allocated from the
 				       	LRU list and buf_pool->mutex was
 				       	temporarily released */
@@ -66,14 +66,14 @@ buf_buddy_free(
 	void*		buf,		/*!< in: block to be freed, must not
 					be pointed to by the buffer pool */
 	ulint		size)		/*!< in: block size,
-					up to UNIV_PAGE_SIZE */
+					up to srv_page_size */
 	MY_ATTRIBUTE((nonnull));
 
 /** Reallocate a block.
 @param[in]	buf_pool	buffer pool instance
 @param[in]	buf		block to be reallocated, must be pointed
 to by the buffer pool
-@param[in]	size		block size, up to UNIV_PAGE_SIZE
+@param[in]	size		block size, up to srv_page_size
 @retval false	if failed because of no free blocks. */
 bool
 buf_buddy_realloc(
diff --git a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
index 2b6d76df009..d166ab8441c 100644
--- a/storage/innobase/include/buf0buddy.ic
+++ b/storage/innobase/include/buf0buddy.ic
@@ -42,8 +42,8 @@ buf_buddy_alloc_low(
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
 	ulint		i,		/*!< in: index of buf_pool->zip_free[],
 					or BUF_BUDDY_SIZES */
-	ibool*		lru)		/*!< in: pointer to a variable that
-					will be assigned TRUE if storage was
+	bool*		lru)		/*!< in: pointer to a variable that
+					will be assigned true if storage was
 					allocated from the LRU list and
 					buf_pool->mutex was temporarily
 					released */
@@ -96,9 +96,9 @@ buf_buddy_alloc(
 					the page resides */
 	ulint		size,		/*!< in: compressed page size
 					(between UNIV_ZIP_SIZE_MIN and
-					UNIV_PAGE_SIZE) */
-	ibool*		lru)		/*!< in: pointer to a variable
-					that will be assigned TRUE if
+					srv_page_size) */
+	bool*		lru)		/*!< in: pointer to a variable
+					that will be assigned true if
 				       	storage was allocated from the
 				       	LRU list and buf_pool->mutex was
 				       	temporarily released */
@@ -106,7 +106,7 @@ buf_buddy_alloc(
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(ut_is_2pow(size));
 	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
-	ut_ad(size <= UNIV_PAGE_SIZE);
+	ut_ad(size <= srv_page_size);
 
 	return((byte*) buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size),
 					   lru));
@@ -123,12 +123,12 @@ buf_buddy_free(
 	void*		buf,		/*!< in: block to be freed, must not
 					be pointed to by the buffer pool */
 	ulint		size)		/*!< in: block size,
-					up to UNIV_PAGE_SIZE */
+					up to srv_page_size */
 {
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(ut_is_2pow(size));
 	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
-	ut_ad(size <= UNIV_PAGE_SIZE);
+	ut_ad(size <= srv_page_size);
 
 	buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
 }
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 8e13b3876e4..8ca9ddd28fe 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1541,7 +1541,7 @@ public:
 	bool            encrypted;	/*!< page is still encrypted */
 
 	ulint           real_size;	/*!< Real size of the page
-					Normal pages == UNIV_PAGE_SIZE
+					Normal pages == srv_page_size
 					page compressed pages, payload
 					size alligned to sector boundary.
 					*/
@@ -1676,9 +1676,9 @@ struct buf_block_t{
 					buf_pool->page_hash can point
 					to buf_page_t or buf_block_t */
 	byte*		frame;		/*!< pointer to buffer frame which
-					is of size UNIV_PAGE_SIZE, and
+					is of size srv_page_size, and
 					aligned to an address divisible by
-					UNIV_PAGE_SIZE */
+					srv_page_size */
 	BPageLock	lock;		/*!< read-write lock of the buffer
 					frame */
 	UT_LIST_NODE_T(buf_block_t) unzip_LRU;
@@ -1692,7 +1692,7 @@ struct buf_block_t{
 					used in debugging */
 	ibool		in_withdraw_list;
 #endif /* UNIV_DEBUG */
-	unsigned	lock_hash_val:32;/*!< hashed value of the page address
+	uint32_t	lock_hash_val;	/*!< hashed value of the page address
 					in the record lock hash table;
 					protected by buf_block_t::lock
 					(or buf_block_t::mutex, buf_pool->mutex
@@ -1838,7 +1838,7 @@ struct buf_block_t{
 /**********************************************************************//**
 Compute the hash fold value for blocks in buf_pool->zip_hash. */
 /* @{ */
-#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
+#define BUF_POOL_ZIP_FOLD_PTR(ptr) (ulint(ptr) >> srv_page_size_shift)
 #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
 #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
 /* @} */
@@ -2305,8 +2305,12 @@ Use these instead of accessing buf_pool->mutex directly. */
 
 
 /** Get appropriate page_hash_lock. */
-# define buf_page_hash_lock_get(buf_pool, page_id)	\
-	hash_get_lock((buf_pool)->page_hash, (page_id).fold())
+UNIV_INLINE
+rw_lock_t*
+buf_page_hash_lock_get(const buf_pool_t* buf_pool, const page_id_t& page_id)
+{
+	return hash_get_lock(buf_pool->page_hash, page_id.fold());
+}
 
 /** If not appropriate page_hash_lock, relock until appropriate. */
 # define buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id)\
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index 91c602f71b0..21f6ed0c71f 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -2,7 +2,7 @@
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
-Copyright (c) 2014, 2017, MariaDB Corporation.
+Copyright (c) 2014, 2018, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -113,7 +113,7 @@ ulint
 buf_pool_get_n_pages(void)
 /*======================*/
 {
-	return(buf_pool_get_curr_size() / UNIV_PAGE_SIZE);
+	return buf_pool_get_curr_size() >> srv_page_size_shift;
 }
 
 /********************************************************************//**
@@ -759,7 +759,7 @@ buf_frame_align(
 
         ut_ad(ptr);
 
-        frame = (buf_frame_t*) ut_align_down(ptr, UNIV_PAGE_SIZE);
+        frame = (buf_frame_t*) ut_align_down(ptr, srv_page_size);
 
         return(frame);
 }
@@ -776,11 +776,11 @@ buf_ptr_get_fsp_addr(
 	fil_addr_t*	addr)	/*!< out: page offset and byte offset */
 {
 	const page_t*	page = (const page_t*) ut_align_down(ptr,
-							     UNIV_PAGE_SIZE);
+							     srv_page_size);
 
 	*space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 	addr->page = mach_read_from_4(page + FIL_PAGE_OFFSET);
-	addr->boffset = ut_align_offset(ptr, UNIV_PAGE_SIZE);
+	addr->boffset = ut_align_offset(ptr, srv_page_size);
 }
 
 /**********************************************************************//**
@@ -865,7 +865,7 @@ buf_frame_copy(
 {
 	ut_ad(buf && frame);
 
-	ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
+	ut_memcpy(buf, frame, srv_page_size);
 
 	return(buf);
 }
@@ -953,7 +953,7 @@ ulint
 buf_block_fix(
 	buf_page_t*	bpage)
 {
-	return(my_atomic_add32((int32*) &bpage->buf_fix_count, 1) + 1);
+	return uint32(my_atomic_add32((int32*) &bpage->buf_fix_count, 1) + 1);
 }
 
 /** Increments the bufferfix count.
@@ -1001,9 +1001,10 @@ ulint
 buf_block_unfix(
 	buf_page_t*	bpage)
 {
-	ulint	count = my_atomic_add32((int32*) &bpage->buf_fix_count, -1) - 1;
-	ut_ad(count + 1 != 0);
-	return(count);
+	uint32	count = uint32(my_atomic_add32((int32*) &bpage->buf_fix_count,
+					       -1));
+	ut_ad(count != 0);
+	return count - 1;
 }
 
 /** Decrements the bufferfix count.
@@ -1416,8 +1417,8 @@ bool
 buf_pool_is_obsolete(
 	ulint	withdraw_clock)
 {
-	return(buf_pool_withdrawing
-	       || buf_withdraw_clock != withdraw_clock);
+	return(UNIV_UNLIKELY(buf_pool_withdrawing
+			     || buf_withdraw_clock != withdraw_clock));
 }
 
 /** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
diff --git a/storage/innobase/include/buf0checksum.h b/storage/innobase/include/buf0checksum.h
index 0bac2b911ee..98c6ff16a6a 100644
--- a/storage/innobase/include/buf0checksum.h
+++ b/storage/innobase/include/buf0checksum.h
@@ -34,7 +34,7 @@ when it is written to a file and also checked for a match when reading from
 the file. When reading we allow both normal CRC32 and CRC-legacy-big-endian
 variants. Note that we must be careful to calculate the same value on 32-bit
 and 64-bit architectures.
-@param[in]	page			buffer page (UNIV_PAGE_SIZE bytes)
+@param[in]	page			buffer page (srv_page_size bytes)
 @param[in]	use_legacy_big_endian	if true then use big endian
 byteorder when converting byte strings to integers
 @return checksum */
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index 8920d0ed4a8..0f0cc53ba1b 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -130,7 +130,7 @@ struct buf_dblwr_t{
 				doublewrite block (64 pages) */
 	ulint		block2;	/*!< page number of the second block */
 	ulint		first_free;/*!< first free position in write_buf
-				measured in units of UNIV_PAGE_SIZE */
+				measured in units of srv_page_size */
 	ulint		b_reserved;/*!< number of slots currently reserved
 				for batch flush. */
 	os_event_t	b_event;/*!< event where threads wait for a
@@ -149,7 +149,7 @@ struct buf_dblwr_t{
 				buffer. */
 	byte*		write_buf;/*!< write buffer used in writing to the
 				doublewrite buffer, aligned to an
-				address divisible by UNIV_PAGE_SIZE
+				address divisible by srv_page_size
 				(which is required by Windows aio) */
 	byte*		write_buf_unaligned;/*!< pointer to write_buf,
 				but unaligned */
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 991ed33e890..6ee84e99d5b 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -216,16 +216,10 @@ buf_flush_ready_for_replace(
 #ifdef UNIV_DEBUG
 /** Disables page cleaner threads (coordinator and workers).
 It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0).
-@param[in]	thd		thread handle
-@param[in]	var		pointer to system variable
-@param[out]	var_ptr		where the formal string goes
 @param[in]	save		immediate result from check function */
-void
-buf_flush_page_cleaner_disabled_debug_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save);
+void buf_flush_page_cleaner_disabled_debug_update(THD*,
+						  st_mysql_sys_var*, void*,
+						  const void* save);
 #endif /* UNIV_DEBUG */
 
 /******************************************************************//**
@@ -238,6 +232,12 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(
 /*===============================================*/
 	void*	arg);		/*!< in: a dummy parameter required by
 				os_thread_create */
+
+/** Adjust thread count for page cleaner workers.
+@param[in]	new_cnt		Number of threads to be used */
+void
+buf_flush_set_page_cleaner_thread_cnt(ulong new_cnt);
+
 /******************************************************************//**
 Worker thread of page_cleaner.
 @return a dummy parameter */
@@ -338,12 +338,12 @@ flushed to disk before any redo logged operations go to the index. */
 class FlushObserver {
 public:
 	/** Constructor
-	@param[in]	space_id	table space id
+	@param[in,out]	space		tablespace
 	@param[in]	trx		trx instance
 	@param[in]	stage		performance schema accounting object,
 	used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages()
 	for accounting. */
-	FlushObserver(ulint space_id, trx_t* trx, ut_stage_alter_t* stage);
+	FlushObserver(fil_space_t* space, trx_t* trx, ut_stage_alter_t* stage);
 
 	/** Deconstructor */
 	~FlushObserver();
@@ -389,8 +389,8 @@ public:
 		buf_pool_t*	buf_pool,
 		buf_page_t*	bpage);
 private:
-	/** Table space id */
-	const ulint		m_space_id;
+	/** Tablespace */
+	fil_space_t*		m_space;
 
 	/** Trx instance */
 	const trx_t* const	m_trx;
@@ -412,57 +412,6 @@ private:
 	bool			m_interrupted;
 };
 
-/******************************************************************//**
-Start a buffer flush batch for LRU or flush list */
-ibool
-buf_flush_start(
-/*============*/
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	buf_flush_t	flush_type);	/*!< in: BUF_FLUSH_LRU
-					or BUF_FLUSH_LIST */
-/******************************************************************//**
-End a buffer flush batch for LRU or flush list */
-void
-buf_flush_end(
-/*==========*/
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	buf_flush_t	flush_type);	/*!< in: BUF_FLUSH_LRU
-					or BUF_FLUSH_LIST */
-/******************************************************************//**
-Gather the aggregated stats for both flush list and LRU list flushing */
-void
-buf_flush_common(
-/*=============*/
-	buf_flush_t	flush_type,	/*!< in: type of flush */
-	ulint		page_count);	/*!< in: number of pages flushed */
-
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages! */
-__attribute__((nonnull))
-void
-buf_flush_batch(
-/*============*/
-	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	buf_flush_t	flush_type,	/*!< in: BUF_FLUSH_LRU or
-					BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
-					then the caller must not own any
-					latches on pages */
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit,	/*!< in: in the case of BUF_FLUSH_LIST
-					all blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
-	flush_counters_t*	n);	/*!< out: flushed/evicted page
-					counts  */
-
-
 #include "buf0flu.ic"
 
 #endif
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 6c7cf75cbe8..81a257b0371 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -32,6 +32,7 @@ Created 11/5/1995 Heikki Tuuri
 
 // Forward declaration
 struct trx_t;
+struct fil_space_t;
 
 /******************************************************************//**
 Returns TRUE if less than 25 % of the buffer pool is available. This can be
diff --git a/storage/innobase/include/buf0mtflu.h b/storage/innobase/include/buf0mtflu.h
deleted file mode 100644
index 0475335bbf5..00000000000
--- a/storage/innobase/include/buf0mtflu.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2014 SkySQL Ab. All Rights Reserved.
-Copyright (C) 2014 Fusion-io. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/buf0mtflu.h
-Multi-threadef flush method interface function prototypes
-
-Created 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
-		   Dhananjoy Das DDas@fusionio.com
-***********************************************************************/
-
-#ifndef buf0mtflu_h
-#define buf0mtflu_h
-
-/******************************************************************//**
-Add exit work item to work queue to signal multi-threded flush
-threads that they should exit.
-*/
-void
-buf_mtflu_io_thread_exit(void);
-/*===========================*/
-
-/******************************************************************//**
-Initialize multi-threaded flush thread syncronization data.
-@return Initialized multi-threaded flush thread syncroniztion data. */
-void*
-buf_mtflu_handler_init(
-/*===================*/
-	ulint n_threads,	/*!< in: Number of threads to create */
-	ulint wrk_cnt);		/*!< in: Number of work items */
-
-/******************************************************************//**
-Return true if multi-threaded flush is initialized
-@return true if initialized, false if not */
-bool
-buf_mtflu_init_done(void);
-/*======================*/
-
-/*********************************************************************//**
-Clears up tail of the LRU lists:
-* Put replaceable pages at the tail of LRU to the free list
-* Flush dirty pages at the tail of LRU to the disk
-The depth to which we scan each buffer pool is controlled by dynamic
-config parameter innodb_LRU_scan_depth.
-@return total pages flushed */
-UNIV_INTERN
-ulint
-buf_mtflu_flush_LRU_tail(void);
-/*===========================*/
-
-/*******************************************************************//**
-Multi-threaded version of buf_flush_list
-*/
-bool
-buf_mtflu_flush_list(
-/*=================*/
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
-					blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
-	ulint*		n_processed);	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
-
-/*********************************************************************//**
-Set correct thread identifiers to io thread array based on
-information we have. */
-void
-buf_mtflu_set_thread_ids(
-/*=====================*/
-	ulint n_threads,		/*!<in: Number of threads to fill */
-	void* ctx,		        /*!<in: thread context */
-	os_thread_id_t* thread_ids);	/*!<in: thread id array */
-
-#endif
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index 719699f5ee2..2847e328515 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -113,7 +113,7 @@ is_checksum_strict(ulint algo)
 #define BUF_BUDDY_LOW		(1U << BUF_BUDDY_LOW_SHIFT)
 
 /** Actual number of buddy sizes based on current page size */
-#define BUF_BUDDY_SIZES		(UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
+#define BUF_BUDDY_SIZES		(srv_page_size_shift - BUF_BUDDY_LOW_SHIFT)
 
 /** Maximum number of buddy sizes based on the max page size */
 #define BUF_BUDDY_SIZES_MAX	(UNIV_PAGE_SIZE_SHIFT_MAX	\
@@ -121,7 +121,7 @@ is_checksum_strict(ulint algo)
 
 /** twice the maximum block size of the buddy system;
 the underlying memory is aligned by this amount:
-this must be equal to UNIV_PAGE_SIZE */
+this must be equal to srv_page_size */
 #define BUF_BUDDY_HIGH	(BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
 /* @} */
 
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
index c181aa01a38..15c7ac9866a 100644
--- a/storage/innobase/include/data0data.h
+++ b/storage/innobase/include/data0data.h
@@ -589,6 +589,22 @@ struct dfield_t{
 	@param[in,out]	heap	memory heap in which the clone will be created
 	@return	the cloned object */
 	dfield_t* clone(mem_heap_t* heap) const;
+
+	/** @return system field indicates history row */
+	bool vers_history_row() const
+	{
+		ut_ad(type.vers_sys_end());
+		if (type.mtype == DATA_FIXBINARY) {
+			ut_ad(len == sizeof timestamp_max_bytes);
+			return 0 != memcmp(data, timestamp_max_bytes, len);
+		} else {
+			ut_ad(type.mtype == DATA_INT);
+			ut_ad(len == sizeof trx_id_max_bytes);
+			return 0 != memcmp(data, trx_id_max_bytes, len);
+		}
+		ut_ad(0);
+		return false;
+	}
 };
 
 /** Structure for an SQL data tuple of fields (logical record) */
@@ -617,6 +633,16 @@ struct dtuple_t {
 /** Value of dtuple_t::magic_n */
 # define		DATA_TUPLE_MAGIC_N	65478679
 #endif /* UNIV_DEBUG */
+
+	/** Trim the tail of an index tuple before insert or update.
+	After instant ADD COLUMN, if the last fields of a clustered index tuple
+	match the default values that were explicitly specified or implied
+	during ADD COLUMN, there will be no need to store them.
+	NOTE: A page latch in the index must be held, so that the index
+	may not lose 'instantness' before the trimmed tuple has been
+	inserted or updated.
+	@param[in]	index	index possibly with instantly added columns */
+	void trim(const dict_index_t& index);
 };
 
 /** A slot for a field in a big rec vector */
diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic
index 81788885aa5..310902f5166 100644
--- a/storage/innobase/include/data0data.ic
+++ b/storage/innobase/include/data0data.ic
@@ -94,6 +94,7 @@ dfield_get_len(
 	ut_ad(field);
 	ut_ad((field->len == UNIV_SQL_NULL)
 	      || (field->data != &data_error));
+	ut_ad(field->len != UNIV_SQL_DEFAULT);
 
 	return(field->len);
 }
@@ -108,6 +109,7 @@ dfield_set_len(
 	ulint		len)	/*!< in: length or UNIV_SQL_NULL */
 {
 	ut_ad(field);
+	ut_ad(len != UNIV_SQL_DEFAULT);
 #ifdef UNIV_VALGRIND_DEBUG
 	if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len);
 #endif /* UNIV_VALGRIND_DEBUG */
@@ -326,6 +328,7 @@ dfield_data_is_binary_equal(
 	ulint		len,	/*!< in: data length or UNIV_SQL_NULL */
 	const byte*	data)	/*!< in: data */
 {
+	ut_ad(len != UNIV_SQL_DEFAULT);
 	return(len == dfield_get_len(field)
 	       && (len == UNIV_SQL_NULL
 		   || !memcmp(dfield_get_data(field), data, len)));
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
index c4521d0723b..b999106fee0 100644
--- a/storage/innobase/include/data0type.h
+++ b/storage/innobase/include/data0type.h
@@ -29,6 +29,12 @@ Created 1/16/1996 Heikki Tuuri
 
 #include "univ.i"
 
+/** Special length indicating a missing instantly added column */
+#define UNIV_SQL_DEFAULT (UNIV_SQL_NULL - 1)
+
+/** @return whether a length is actually stored in a field */
+#define len_is_stored(len) (len != UNIV_SQL_NULL && len != UNIV_SQL_DEFAULT)
+
 extern ulint	data_mysql_default_charset_coll;
 #define DATA_MYSQL_BINARY_CHARSET_COLL 63
 
@@ -183,8 +189,12 @@ be less than 256 */
 				for shorter VARCHARs MySQL uses only 1 byte */
 #define	DATA_VIRTUAL	8192U	/* Virtual column */
 
-/** Get the number of system columns in a table. */
-#define dict_table_get_n_sys_cols(table) DATA_N_SYS_COLS
+/** System Versioning */
+#define DATA_VERS_START	16384U	/* start system field */
+#define DATA_VERS_END	32768U	/* end system field */
+/** system-versioned user data column */
+#define DATA_VERSIONED (DATA_VERS_START|DATA_VERS_END)
+
 /** Check whether locking is disabled (never). */
 #define dict_table_is_locking_disabled(table) false
 
@@ -355,9 +365,9 @@ dtype_form_prtype(ulint old_prtype, ulint charset_coll)
 Determines if a MySQL string type is a subset of UTF-8.  This function
 may return false negatives, in case further character-set collation
 codes are introduced in MySQL later.
-@return TRUE if a subset of UTF-8 */
+@return whether a subset of UTF-8 */
 UNIV_INLINE
-ibool
+bool
 dtype_is_utf8(
 /*==========*/
 	ulint	prtype);/*!< in: precise data type */
@@ -531,8 +541,24 @@ struct dtype_t{
 					in bytes */
 	unsigned	mbmaxlen:3;	/*!< maximum length of a character,
 					in bytes */
+
+	/** @return whether this is system versioned user field */
+	bool is_versioned() const { return !(~prtype & DATA_VERSIONED); }
+	/** @return whether this is the system field start */
+	bool vers_sys_start() const
+	{
+		return (prtype & DATA_VERSIONED) == DATA_VERS_START;
+	}
+	/** @return whether this is the system field end */
+	bool vers_sys_end() const
+	{
+		return (prtype & DATA_VERSIONED) == DATA_VERS_END;
+	}
 };
 
+/** The DB_TRX_ID,DB_ROLL_PTR values for "no history is available" */
+extern const byte reset_trx_id[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
+
 #include "data0type.ic"
 
 #endif
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
index 59f8c75fd65..56a588562ee 100644
--- a/storage/innobase/include/data0type.ic
+++ b/storage/innobase/include/data0type.ic
@@ -43,9 +43,9 @@ dtype_get_charset_coll(
 Determines if a MySQL string type is a subset of UTF-8.  This function
 may return false negatives, in case further character-set collation
 codes are introduced in MySQL later.
-@return TRUE if a subset of UTF-8 */
+@return whether a subset of UTF-8 */
 UNIV_INLINE
-ibool
+bool
 dtype_is_utf8(
 /*==========*/
 	ulint	prtype)	/*!< in: precise data type */
@@ -58,10 +58,10 @@ dtype_is_utf8(
 	case 33: /* utf8_general_ci */
 	case 83: /* utf8_bin */
 	case 254: /* utf8_general_cs */
-			return(TRUE);
+		return true;
 	}
 
-	return(FALSE);
+	return false;
 }
 
 /*********************************************************************//**
@@ -235,9 +235,8 @@ dtype_new_store_for_order_and_null_size(
 	ulint		prefix_len)/*!< in: prefix length to
 				replace type->len, or 0 */
 {
-#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
+	compile_time_assert(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+
 	ulint	len;
 
 	ut_ad(type);
@@ -280,10 +279,7 @@ dtype_read_for_order_and_null_size(
 	dtype_t*	type,	/*!< in: type struct */
 	const byte*	buf)	/*!< in: buffer for stored type order info */
 {
-#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE
-# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
-
+	compile_time_assert(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
 	type->mtype = buf[0] & 63;
 	type->prtype = buf[1];
 
@@ -309,11 +305,7 @@ dtype_new_read_for_order_and_null_size(
 	dtype_t*	type,	/*!< in: type struct */
 	const byte*	buf)	/*!< in: buffer for stored type order info */
 {
-	ulint	charset_coll;
-
-#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
+	compile_time_assert(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
 
 	type->mtype = buf[0] & 63;
 	type->prtype = buf[1];
@@ -328,7 +320,7 @@ dtype_new_read_for_order_and_null_size(
 
 	type->len = mach_read_from_2(buf + 2);
 
-	charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK;
+	ulint charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK;
 
 	if (dtype_is_string_type(type->mtype)) {
 		ut_a(charset_coll <= MAX_CHAR_COLL_NUM);
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
index be0184820b1..d683afcdc7e 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innobase/include/dict0boot.h
@@ -119,7 +119,7 @@ dict_is_sys_table(
 /* The ids for the basic system tables and their indexes */
 #define DICT_TABLES_ID		1
 #define DICT_COLUMNS_ID		2
-#define DICT_INDEXES_ID		3
+#define DICT_INDEXES_ID		dict_index_t::DICT_INDEXES_ID /* 3 */
 #define DICT_FIELDS_ID		4
 /* The following is a secondary index on SYS_TABLES */
 #define DICT_TABLE_IDS_ID	5
diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic
index e40c3f844e3..845a0a3888d 100644
--- a/storage/innobase/include/dict0boot.ic
+++ b/storage/innobase/include/dict0boot.ic
@@ -58,10 +58,7 @@ dict_sys_read_row_id(
 /*=================*/
 	const byte*	field)	/*!< in: record field */
 {
-#if DATA_ROW_ID_LEN != 6
-# error "DATA_ROW_ID_LEN != 6"
-#endif
-
+	compile_time_assert(DATA_ROW_ID_LEN == 6);
 	return(mach_read_from_6(field));
 }
 
@@ -74,10 +71,7 @@ dict_sys_write_row_id(
 	byte*		field,	/*!< in: record field */
 	row_id_t	row_id)	/*!< in: row id */
 {
-#if DATA_ROW_ID_LEN != 6
-# error "DATA_ROW_ID_LEN != 6"
-#endif
-
+	compile_time_assert(DATA_ROW_ID_LEN == 6);
 	mach_write_to_6(field, row_id);
 }
 
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
index 6b0092c0127..75ef4f09dbb 100644
--- a/storage/innobase/include/dict0crea.h
+++ b/storage/innobase/include/dict0crea.h
@@ -47,6 +47,7 @@ tab_create_graph_create(
 
 /** Creates an index create graph.
 @param[in]	index	index to create, built as a memory data structure
+@param[in]	table	table name
 @param[in,out]	heap	heap where created
 @param[in]	add_v	new virtual columns added in the same clause with
 			add index
@@ -54,8 +55,9 @@ tab_create_graph_create(
 ind_node_t*
 ind_create_graph_create(
 	dict_index_t*		index,
+	const char*		table,
 	mem_heap_t*		heap,
-	const dict_add_v_col_t*	add_v);
+	const dict_add_v_col_t*	add_v = NULL);
 
 /***********************************************************//**
 Creates a table. This is a high-level function used in SQL execution graphs.
@@ -139,22 +141,6 @@ dict_create_index_tree_in_mem(
 	dict_index_t*	index,		/*!< in/out: index */
 	const trx_t*	trx);		/*!< in: InnoDB transaction handle */
 
-/*******************************************************************//**
-Truncates the index tree but don't update SYSTEM TABLES.
-@return DB_SUCCESS or error */
-dberr_t
-dict_truncate_index_tree_in_mem(
-/*============================*/
-	dict_index_t*	index);		/*!< in/out: index */
-
-/*******************************************************************//**
-Drops the index tree but don't update SYS_INDEXES table. */
-void
-dict_drop_index_tree_in_mem(
-/*========================*/
-	const dict_index_t*	index,	/*!< in: index */
-	ulint			page_no);/*!< in: index page-no */
-
 /****************************************************************//**
 Creates the foreign key constraints system tables inside InnoDB
 at server bootstrap or server start if they are not found or are
@@ -313,6 +299,7 @@ struct ind_node_t{
 	dict_index_t*	index;		/*!< index to create, built as a
 					memory data structure with
 					dict_mem_... functions */
+	const char*	table_name;	/*!< table name */
 	ins_node_t*	ind_def;	/*!< child node which does the insert of
 					the index definition; the row to be
 					inserted is built by the parent node  */
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 26d5ed34318..bf9fcd70f18 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -178,7 +178,7 @@ dict_col_copy_type(
 
 /**********************************************************************//**
 Determine bytes of column prefix to be stored in the undo log. Please
-note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
+note that if !dict_table_has_atomic_blobs(table), no prefix
 needs to be stored in the undo log.
 @return bytes of column prefix to be stored in the undo log */
 UNIV_INLINE
@@ -361,15 +361,6 @@ dict_table_add_system_columns(
 	mem_heap_t*	heap)	/*!< in: temporary heap */
 	MY_ATTRIBUTE((nonnull));
 /**********************************************************************//**
-Adds a table object to the dictionary cache. */
-void
-dict_table_add_to_cache(
-/*====================*/
-	dict_table_t*	table,		/*!< in: table */
-	bool		can_be_evicted,	/*!< in: whether can be evicted*/
-	mem_heap_t*	heap)		/*!< in: temporary heap */
-	MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
 Removes a table object from the dictionary cache. */
 void
 dict_table_remove_from_cache(
@@ -567,16 +558,6 @@ dict_foreign_find_index(
 					happened */
 
 	MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
-/**********************************************************************//**
-Returns a column's name.
-@return column name. NOTE: not guaranteed to stay valid if table is
-modified in any way (columns added, etc.). */
-const char*
-dict_table_get_col_name(
-/*====================*/
-	const dict_table_t*	table,	/*!< in: table */
-	ulint			col_nr)	/*!< in: column number */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /** Returns a virtual column's name.
 @param[in]	table		table object
@@ -879,14 +860,25 @@ dict_table_get_sys_col(
 	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 #else /* UNIV_DEBUG */
-#define dict_table_get_nth_col(table, pos)	\
-((table)->cols + (pos))
-#define dict_table_get_sys_col(table, sys)	\
-((table)->cols + (table)->n_cols + (sys)	\
- - (dict_table_get_n_sys_cols(table)))
+#define dict_table_get_nth_col(table, pos)				\
+	(&(table)->cols[pos])
+#define dict_table_get_sys_col(table, sys)				\
+	(&(table)->cols[(table)->n_cols + (sys) - DATA_N_SYS_COLS])
 /* Get nth virtual columns */
-#define dict_table_get_nth_v_col(table, pos)	((table)->v_cols + (pos))
+#define dict_table_get_nth_v_col(table, pos)	(&(table)->v_cols[pos])
 #endif /* UNIV_DEBUG */
+/** Wrapper function.
+@see dict_col_t::name()
+@param[in]	table	table
+@param[in]	col_nr	column number in table
+@return	column name */
+inline
+const char*
+dict_table_get_col_name(const dict_table_t* table, ulint col_nr)
+{
+	return(dict_table_get_nth_col(table, col_nr)->name(*table));
+}
+
 /********************************************************************//**
 Gets the given system column number of a table.
 @return column number */
@@ -911,30 +903,21 @@ dict_index_get_min_size(
 Check whether the table uses the compact page format.
 @return TRUE if table uses the compact page format */
 UNIV_INLINE
-ibool
+bool
 dict_table_is_comp(
 /*===============*/
 	const dict_table_t*	table)	/*!< in: table */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/********************************************************************//**
-Determine the file format of a table.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_table_get_format(
-/*==================*/
-	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Determine the file format from a dict_table_t::flags.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_tf_get_format(
-/*===============*/
-	ulint		flags)		/*!< in: dict_table_t::flags */
-	MY_ATTRIBUTE((warn_unused_result));
+/** Determine if a table uses atomic BLOBs (no locally stored prefix).
+@param[in]	table	InnoDB table
+@return whether BLOBs are atomic */
+inline
+bool
+dict_table_has_atomic_blobs(const dict_table_t* table)
+{
+	return(DICT_TF_HAS_ATOMIC_BLOBS(table->flags));
+}
 
 /** Set the various values in a dict_table_t::flags pointer.
 @param[in,out]	flags,		Pointer to a 4 byte Table Flags
@@ -942,8 +925,7 @@ dict_tf_get_format(
 @param[in]	zip_ssize	Zip Shift Size
 @param[in]	use_data_dir	Table uses DATA DIRECTORY
 @param[in]	page_compressed Table uses page compression
-@param[in]	page_compression_level Page compression level
-@param[in]	not_used        For future */
+@param[in]	page_compression_level Page compression level */
 UNIV_INLINE
 void
 dict_tf_set(
@@ -952,8 +934,7 @@ dict_tf_set(
 	ulint		zip_ssize,
 	bool		use_data_dir,
 	bool		page_compressed,
-	ulint		page_compression_level,
-	ulint		not_used);
+	ulint		page_compression_level);
 
 /** Convert a 32 bit integer table flags to the 32 bit FSP Flags.
 Fsp Flags are written into the tablespace header at the offset
@@ -989,14 +970,8 @@ ulint
 dict_table_extent_size(
 	const dict_table_t*	table);
 
-/** Get the table page size.
-@param[in]	table	table
-@return compressed page size, or 0 if not compressed */
-UNIV_INLINE
-const page_size_t
-dict_table_page_size(
-	const dict_table_t*	table)
-	MY_ATTRIBUTE((warn_unused_result));
+/** Get the table page size. */
+#define dict_table_page_size(table) page_size_t(table->space->flags)
 
 /*********************************************************************//**
 Obtain exclusive locks on all index trees of the table. This is to prevent
@@ -1088,51 +1063,32 @@ dict_make_room_in_cache(
 	ulint		max_tables,	/*!< in: max tables allowed in cache */
 	ulint		pct_check);	/*!< in: max percent to check */
 
-#define BIG_ROW_SIZE	1024
-
-/** Adds an index to the dictionary cache.
-@param[in]	table	table on which the index is
-@param[in]	index	index; NOTE! The index memory
-			object is freed in this function!
-@param[in]	page_no	root page number of the index
-@param[in]	strict	TRUE=refuse to create the index
-			if records could be too big to fit in
-			an B-tree page
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-dberr_t
-dict_index_add_to_cache(
-	dict_table_t*	table,
-	dict_index_t*	index,
-	ulint		page_no,
-	ibool		strict)
-	MY_ATTRIBUTE((warn_unused_result));
-
 /** Clears the virtual column's index list before index is being freed.
 @param[in]  index   Index being freed */
-void
-dict_index_remove_from_v_col_list(
-	dict_index_t* index);
+void dict_index_remove_from_v_col_list(dict_index_t* index);
 
 /** Adds an index to the dictionary cache, with possible indexing newly
 added column.
-@param[in]	table	table on which the index is
 @param[in]	index	index; NOTE! The index memory
 			object is freed in this function!
-@param[in]	add_v	new virtual column that being added along with
-			an add index call
 @param[in]	page_no	root page number of the index
-@param[in]	strict	TRUE=refuse to create the index
+@param[in]	strict	true=refuse to create the index
 			if records could be too big to fit in
 			an B-tree page
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-dberr_t
-dict_index_add_to_cache_w_vcol(
-	dict_table_t*		table,
+@param[out]	err	DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION
+@param[in]	add_v	new virtual column that being added along with
+			an add index call
+@return	the added index
+@retval	NULL	on error */
+dict_index_t*
+dict_index_add_to_cache(
 	dict_index_t*		index,
-	const dict_add_v_col_t* add_v,
 	ulint			page_no,
-	ibool			strict)
-	MY_ATTRIBUTE((warn_unused_result));
+	bool			strict = false,
+	dberr_t*		err = NULL,
+	const dict_add_v_col_t* add_v = NULL)
+	MY_ATTRIBUTE((nonnull(1)));
+
 /********************************************************************//**
 Gets the number of fields in the internal representation of an index,
 including fields added by the dictionary system.
@@ -1145,6 +1101,7 @@ dict_index_get_n_fields(
 					representation of index (in
 					the dictionary cache) */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /********************************************************************//**
 Gets the number of fields in the internal representation of an index
 that uniquely determine the position of an index entry in the index, if
@@ -1271,7 +1228,7 @@ Returns TRUE if the index contains a column or a prefix of that column.
 @param[in]	n		column number
 @param[in]	is_virtual	whether it is a virtual col
 @return TRUE if contains the column or its prefix */
-ibool
+bool
 dict_index_contains_col_or_prefix(
 /*==============================*/
 	const dict_index_t*	index,	/*!< in: index */
@@ -1433,42 +1390,15 @@ dict_index_copy_rec_order_prefix(
 @param[in,out]	heap		memory heap for allocation
 @return own: data tuple */
 dtuple_t*
-dict_index_build_data_tuple_func(
+dict_index_build_data_tuple(
 	const rec_t*		rec,
 	const dict_index_t*	index,
-#ifdef UNIV_DEBUG
 	bool			leaf,
-#endif /* UNIV_DEBUG */
 	ulint			n_fields,
 	mem_heap_t*		heap)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifdef UNIV_DEBUG
-# define dict_index_build_data_tuple(rec, index, leaf, n_fields, heap)	\
-	dict_index_build_data_tuple_func(rec, index, leaf, n_fields, heap)
-#else /* UNIV_DEBUG */
-# define dict_index_build_data_tuple(rec, index, leaf, n_fields, heap)	\
-	dict_index_build_data_tuple_func(rec, index, n_fields, heap)
-#endif /* UNIV_DEBUG */
 
 /*********************************************************************//**
-Gets the space id of the root of the index tree.
-@return space id */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
-	const dict_index_t*	index)	/*!< in: index */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
-	dict_index_t*	index,	/*!< in/out: index */
-	ulint		space)	/*!< in: space id */
-	MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
 Gets the page number of the root of the index tree.
 @return page number */
 UNIV_INLINE
@@ -1850,18 +1780,10 @@ dict_set_corrupted_index_cache_only(
 Flags a table with specified space_id corrupted in the table dictionary
 cache.
 @return TRUE if successful */
-ibool
-dict_set_corrupted_by_space(
-/*========================*/
-	ulint		space_id);	/*!< in: space ID */
+bool dict_set_corrupted_by_space(const fil_space_t* space);
 
-/** Flag a table with specified space_id encrypted in the data dictionary
-cache
-@param[in]	space_id	Tablespace id */
-UNIV_INTERN
-void
-dict_set_encrypted_by_space(
-	ulint	space_id);
+/** Flag a table encrypted in the data dictionary cache. */
+void dict_set_encrypted_by_space(const fil_space_t* space);
 
 /** Sets merge_threshold in the SYS_INDEXES
 @param[in,out]	index		index
@@ -1898,18 +1820,6 @@ dict_tf2_is_valid(
 	ulint	flags,
 	ulint	flags2);
 
-/********************************************************************//**
-Check if the tablespace for the table has been discarded.
-@return true if the tablespace has been discarded. */
-UNIV_INLINE
-bool
-dict_table_is_discarded(
-/*====================*/
-	const dict_table_t*	table)	/*!< in: table to check */
-	MY_ATTRIBUTE((warn_unused_result));
-
-#define dict_table_is_temporary(table) (table)->is_temporary()
-
 /*********************************************************************//**
 This function should be called whenever a page is successfully
 compressed. Updates the compression padding information. */
@@ -1943,8 +1853,6 @@ dict_tf_to_row_format_string(
 /*=========================*/
 	ulint	table_flag);		/*!< in: row format setting */
 
-#define dict_col_is_virtual(col) (col)->is_virtual()
-
 /** encode number of columns and number of virtual columns in one
 4 bytes value. We could do this because the number of columns in
 InnoDB is limited to 1017
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 337ded84f5f..03b842e041d 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -264,7 +264,6 @@ dict_index_is_clust(
 	const dict_index_t*	index)	/*!< in: index */
 {
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
 	return(index->type & DICT_CLUSTERED);
 }
 
@@ -306,7 +305,7 @@ dict_index_is_spatial(
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 
-	return(index->type & DICT_SPATIAL);
+	return ulint(UNIV_EXPECT(index->type & DICT_SPATIAL, 0));
 }
 
 /********************************************************************//**
@@ -350,8 +349,10 @@ dict_table_get_n_user_cols(
 	const dict_table_t*	table)	/*!< in: table */
 {
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
-	return(table->n_cols - dict_table_get_n_sys_cols(table));
+	/* n_cols counts stored columns only. A table may contain
+	virtual columns and no user-specified stored columns at all. */
+	ut_ad(table->n_cols >= DATA_N_SYS_COLS);
+	return unsigned(table->n_cols) - DATA_N_SYS_COLS;
 }
 
 /********************************************************************//**
@@ -483,8 +484,8 @@ dict_table_get_nth_v_col(
 	ut_ad(table);
 	ut_ad(pos < table->n_v_def);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
-	return(static_cast<dict_v_col_t*>(table->v_cols) + pos);
+	ut_ad(!table->v_cols[pos].m_col.is_instant());
+	return &table->v_cols[pos];
 }
 
 /********************************************************************//**
@@ -498,14 +499,8 @@ dict_table_get_sys_col(
 	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
 {
 	dict_col_t*	col;
-
-	ut_ad(table);
-	ut_ad(sys < dict_table_get_n_sys_cols(table));
-	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
-	col = dict_table_get_nth_col(table, table->n_cols
-				     - dict_table_get_n_sys_cols(table)
-				     + sys);
+	col = dict_table_get_nth_col(table,
+				     dict_table_get_sys_col_no(table, sys));
 	ut_ad(col->mtype == DATA_SYS);
 	ut_ad(col->prtype == (sys | DATA_NOT_NULL));
 
@@ -524,28 +519,23 @@ dict_table_get_sys_col_no(
 	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
 {
 	ut_ad(table);
-	ut_ad(sys < dict_table_get_n_sys_cols(table));
+	ut_ad(sys < DATA_N_SYS_COLS);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
-	return(table->n_cols - dict_table_get_n_sys_cols(table) + sys);
+	return unsigned(table->n_cols) + (sys - DATA_N_SYS_COLS);
 }
 
 /********************************************************************//**
 Check whether the table uses the compact page format.
 @return TRUE if table uses the compact page format */
 UNIV_INLINE
-ibool
+bool
 dict_table_is_comp(
 /*===============*/
 	const dict_table_t*	table)	/*!< in: table */
 {
 	ut_ad(table);
-
-#if DICT_TF_COMPACT != 1
-#error "DICT_TF_COMPACT must be 1"
-#endif
-
-	return(table->flags & DICT_TF_COMPACT);
+	return (table->flags & DICT_TF_COMPACT) != 0;
 }
 
 /************************************************************************
@@ -580,8 +570,8 @@ dict_tf_is_valid_not_redundant(ulint flags)
 		for the uncompressed page format */
 		return(false);
 	} else if (zip_ssize > PAGE_ZIP_SSIZE_MAX
-		   || zip_ssize > UNIV_PAGE_SIZE_SHIFT
-		   || UNIV_PAGE_SIZE_SHIFT > UNIV_ZIP_SIZE_SHIFT_MAX) {
+		   || zip_ssize > srv_page_size_shift
+		   || srv_page_size_shift > UNIV_ZIP_SIZE_SHIFT_MAX) {
 		/* KEY_BLOCK_SIZE is out of bounds, or
 		ROW_FORMAT=COMPRESSED is not supported with this
 		innodb_page_size (only up to 16KiB) */
@@ -621,7 +611,7 @@ dict_tf_is_valid(
 		bit. For ROW_FORMAT=REDUNDANT, only the DATA_DIR flag
 		(which we cleared above) can be set. If any other flags
 		are set, the flags are invalid. */
-		return(flags == 0);
+		return(flags == 0 || flags == DICT_TF_MASK_NO_ROLLBACK);
 	}
 
 	return(dict_tf_is_valid_not_redundant(flags));
@@ -677,44 +667,13 @@ dict_tf_get_rec_format(
 	return(REC_FORMAT_DYNAMIC);
 }
 
-/********************************************************************//**
-Determine the file format from a dict_table_t::flags.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_tf_get_format(
-/*===============*/
-	ulint		flags)	/*!< in: dict_table_t::flags */
-{
-	if (DICT_TF_HAS_ATOMIC_BLOBS(flags)) {
-		return(UNIV_FORMAT_B);
-	}
-
-	return(UNIV_FORMAT_A);
-}
-
-/********************************************************************//**
-Determine the file format of a table.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_table_get_format(
-/*==================*/
-	const dict_table_t*	table)	/*!< in: table */
-{
-	ut_ad(table);
-
-	return(dict_tf_get_format(table->flags));
-}
-
 /** Set the various values in a dict_table_t::flags pointer.
 @param[in,out]	flags,		Pointer to a 4 byte Table Flags
 @param[in]	format		File Format
 @param[in]	zip_ssize	Zip Shift Size
 @param[in]	use_data_dir	Table uses DATA DIRECTORY
 @param[in]	page_compressed Table uses page compression
-@param[in]	page_compression_level Page compression level
-@param[in]	not_used        For future */
+@param[in]	page_compression_level Page compression level */
 UNIV_INLINE
 void
 dict_tf_set(
@@ -724,8 +683,7 @@ dict_tf_set(
 	ulint		zip_ssize,
 	bool		use_data_dir,
 	bool		page_compressed,
-	ulint		page_compression_level,
-	ulint		not_used)
+	ulint		page_compression_level)
 {
 	switch (format) {
 	case REC_FORMAT_REDUNDANT:
@@ -842,7 +800,8 @@ dict_tf_to_sys_tables_type(
 			 | DICT_TF_MASK_ATOMIC_BLOBS
 			 | DICT_TF_MASK_DATA_DIR
 			 | DICT_TF_MASK_PAGE_COMPRESSION
-			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL);
+			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+			 | DICT_TF_MASK_NO_ROLLBACK);
 
 	return(type);
 }
@@ -866,21 +825,7 @@ dict_tf_get_page_size(
 
 	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
 
-	return(page_size_t(zip_size, univ_page_size.logical(), true));
-}
-
-/** Get the table page size.
-@param[in]	table	table
-@return a structure containing the compressed and uncompressed
-page sizes and a boolean indicating if the page is compressed */
-UNIV_INLINE
-const page_size_t
-dict_table_page_size(
-	const dict_table_t*	table)
-{
-	ut_ad(table != NULL);
-
-	return(dict_tf_get_page_size(table->flags));
+	return(page_size_t(zip_size, srv_page_size, true));
 }
 
 /*********************************************************************//**
@@ -1171,36 +1116,6 @@ dict_index_get_min_size(
 }
 
 /*********************************************************************//**
-Gets the space id of the root of the index tree.
-@return space id */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
-	const dict_index_t*	index)	/*!< in: index */
-{
-	ut_ad(index);
-	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
-	return(index->space);
-}
-
-/*********************************************************************//**
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
-	dict_index_t*	index,	/*!< in/out: index */
-	ulint		space)	/*!< in: space id */
-{
-	ut_ad(index);
-	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
-	index->space = unsigned(space);
-}
-
-/*********************************************************************//**
 Gets the page number of the root of the index tree.
 @return page number */
 UNIV_INLINE
@@ -1240,7 +1155,7 @@ ulint
 dict_index_get_space_reserve(void)
 /*==============================*/
 {
-	return(UNIV_PAGE_SIZE / 16);
+	return(srv_page_size / 16);
 }
 
 /********************************************************************//**
@@ -1370,7 +1285,7 @@ dict_table_is_fts_column(
 
 /**********************************************************************//**
 Determine bytes of column prefix to be stored in the undo log. Please
-note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
+note that if !dict_table_has_atomic_blobs(table), no prefix
 needs to be stored in the undo log.
 @return bytes of column prefix to be stored in the undo log */
 UNIV_INLINE
@@ -1381,16 +1296,15 @@ dict_max_field_len_store_undo(
 	const dict_col_t*	col)	/*!< in: column which index prefix
 					is based on */
 {
-	ulint	prefix_len = 0;
+	if (!dict_table_has_atomic_blobs(table)) {
+		return(0);
+	}
 
-	if (dict_table_get_format(table) >= UNIV_FORMAT_B)
-	{
-		prefix_len = col->max_prefix
-			? col->max_prefix
-			: DICT_MAX_FIELD_LEN_BY_FORMAT(table);
+	if (col->max_prefix != 0) {
+		return(col->max_prefix);
 	}
 
-	return(prefix_len);
+	return(REC_VERSION_56_MAX_INDEX_COL_LEN);
 }
 
 /** Determine maximum bytes of a virtual column need to be stored
@@ -1410,10 +1324,10 @@ dict_max_v_field_len_store_undo(
 
 	/* This calculation conforms to the non-virtual column
 	maximum log length calculation:
-	1) for UNIV_FORMAT_A, upto REC_ANTELOPE_MAX_INDEX_COL_LEN
-	for UNIV_FORMAT_B, upto col->max_prefix or
-	2) REC_VERSION_56_MAX_INDEX_COL_LEN, whichever is less */
-	if (dict_table_get_format(table) >= UNIV_FORMAT_B) {
+	1) if No atomic BLOB, upto REC_ANTELOPE_MAX_INDEX_COL_LEN
+	2) if atomic BLOB, upto col->max_prefix or
+	REC_VERSION_56_MAX_INDEX_COL_LEN, whichever is less */
+	if (dict_table_has_atomic_blobs(table)) {
 		if (DATA_BIG_COL(col) && col->max_prefix > 0) {
 			max_log_len = col->max_prefix;
 		} else {
@@ -1456,18 +1370,6 @@ dict_table_is_corrupted(
 	return(table->corrupted);
 }
 
-/********************************************************************//**
-Check if the tablespace for the table has been discarded.
-@return true if the tablespace has been discarded. */
-UNIV_INLINE
-bool
-dict_table_is_discarded(
-/*====================*/
-	const dict_table_t*	table)	/*!< in: table to check */
-{
-	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_DISCARDED));
-}
-
 /** Check if the table is found is a file_per_table tablespace.
 This test does not use table flags2 since some REDUNDANT tables in the
 system tablespace may have garbage in the MIX_LEN field where flags2 is
@@ -1489,7 +1391,8 @@ bool
 dict_table_is_file_per_table(
 	const dict_table_t*	table)	/*!< in: table to check */
 {
-	return !is_system_tablespace(table->space);
+	return table->space != fil_system.sys_space
+		&& table->space != fil_system.temp_space;
 }
 
 /** Acquire the table handle. */
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
index b35add02d9d..cddfbc68cb7 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innobase/include/dict0load.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -55,15 +55,6 @@ enum dict_system_id_t {
 	SYS_NUM_SYSTEM_TABLES
 };
 
-/** Status bit for dict_process_sys_tables_rec_and_mtr_commit() */
-enum dict_table_info_t {
-	DICT_TABLE_LOAD_FROM_RECORD = 0,/*!< Directly populate a dict_table_t
-					structure with information from
-					a SYS_TABLES record */
-	DICT_TABLE_LOAD_FROM_CACHE = 1	/*!< Check first whether dict_table_t
-					is in the cache, if so, return it */
-};
-
 /** Check each tablespace found in the data dictionary.
 Look at each table defined in SYS_TABLES that has a space_id > 0.
 If the tablespace is not yet in the fil_system cache, look up the
@@ -199,10 +190,7 @@ dict_process_sys_tables_rec_and_mtr_commit(
 	mem_heap_t*	heap,		/*!< in: temporary memory heap */
 	const rec_t*	rec,		/*!< in: SYS_TABLES record */
 	dict_table_t**	table,		/*!< out: dict_table_t to fill */
-	dict_table_info_t status,	/*!< in: status bit controls
-					options such as whether we shall
-					look for dict_table_t from cache
-					first */
+	bool		cached,		/*!< in: whether to load from cache */
 	mtr_t*		mtr);		/*!< in/out: mini-transaction,
 					will be committed */
 /********************************************************************//**
@@ -243,7 +231,6 @@ information
 @return error message, or NULL on success */
 const char*
 dict_process_sys_virtual_rec(
-	mem_heap_t*	heap,
 	const rec_t*	rec,
 	table_id_t*	table_id,
 	ulint*		pos,
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index b8726d3136e..63cf5c8bbec 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -46,7 +46,6 @@ Created 1/8/1996 Heikki Tuuri
 #include "gis0type.h"
 #include "os0once.h"
 #include "fil0fil.h"
-#include <my_crypt.h>
 #include "fil0crypt.h"
 #include <set>
 #include <algorithm>
@@ -107,7 +106,7 @@ are described in fsp0fsp.h. */
 /** dict_table_t::flags bit 0 is equal to 0 if the row format = Redundant */
 #define DICT_TF_REDUNDANT		0	/*!< Redundant row format. */
 /** dict_table_t::flags bit 0 is equal to 1 if the row format = Compact */
-#define DICT_TF_COMPACT			1	/*!< Compact row format. */
+#define DICT_TF_COMPACT			1U	/*!< Compact row format. */
 
 /** This bitmask is used in SYS_TABLES.N_COLS to set and test whether
 the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */
@@ -119,9 +118,10 @@ the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */
 /** Width of the ZIP_SSIZE flag */
 #define DICT_TF_WIDTH_ZIP_SSIZE		4
 
-/** Width of the ATOMIC_BLOBS flag.  The Antelope file formats broke up
-BLOB and TEXT fields, storing the first 768 bytes in the clustered index.
-Barracuda row formats store the whole blob or text field off-page atomically.
+/** Width of the ATOMIC_BLOBS flag.  The ROW_FORMAT=REDUNDANT and
+ROW_FORMAT=COMPACT broke up BLOB and TEXT fields, storing the first 768 bytes
+in the clustered index. ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED
+store the whole blob or text field off-page atomically.
 Secondary indexes are created from this external data using row_ext_t
 to cache the BLOB prefixes. */
 #define DICT_TF_WIDTH_ATOMIC_BLOBS	1
@@ -139,10 +139,10 @@ Width of the page compression flag
 #define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4
 
 /**
-Width of atomic writes flag
-DEFAULT=0, ON = 1, OFF = 2
+The NO_ROLLBACK flag (3=yes; the values 1,2 used stand for
+ATOMIC_WRITES=ON and ATOMIC_WRITES=OFF between MariaDB 10.1.0 and 10.2.3)
 */
-#define DICT_TF_WIDTH_ATOMIC_WRITES 2
+#define DICT_TF_WIDTH_NO_ROLLBACK 2
 
 /** Width of all the currently known table flags */
 #define DICT_TF_BITS	(DICT_TF_WIDTH_COMPACT			\
@@ -150,7 +150,8 @@ DEFAULT=0, ON = 1, OFF = 2
 			+ DICT_TF_WIDTH_ATOMIC_BLOBS		\
 			+ DICT_TF_WIDTH_DATA_DIR		\
 			+ DICT_TF_WIDTH_PAGE_COMPRESSION	\
-			+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
+			+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL	\
+			+ DICT_TF_WIDTH_NO_ROLLBACK)
 
 /** Zero relative shift position of the COMPACT field */
 #define DICT_TF_POS_COMPACT		0
@@ -169,11 +170,11 @@ DEFAULT=0, ON = 1, OFF = 2
 /** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
 #define DICT_TF_POS_PAGE_COMPRESSION_LEVEL	(DICT_TF_POS_PAGE_COMPRESSION	\
 					+ DICT_TF_WIDTH_PAGE_COMPRESSION)
-/** Zero relative shift position of the ATOMIC_WRITES field */
-#define DICT_TF_POS_ATOMIC_WRITES	(DICT_TF_POS_PAGE_COMPRESSION_LEVEL \
+/** Zero relative shift position of the NO_ROLLBACK field */
+#define DICT_TF_POS_NO_ROLLBACK		(DICT_TF_POS_PAGE_COMPRESSION_LEVEL \
 					+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
-#define DICT_TF_POS_UNUSED		(DICT_TF_POS_ATOMIC_WRITES     \
-					+ DICT_TF_WIDTH_ATOMIC_WRITES)
+#define DICT_TF_POS_UNUSED		(DICT_TF_POS_NO_ROLLBACK     \
+					+ DICT_TF_WIDTH_NO_ROLLBACK)
 
 /** Bit mask of the COMPACT field */
 #define DICT_TF_MASK_COMPACT				\
@@ -199,10 +200,10 @@ DEFAULT=0, ON = 1, OFF = 2
 #define DICT_TF_MASK_PAGE_COMPRESSION_LEVEL		\
 		((~(~0U << DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)) \
 		<< DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
-/** Bit mask of the ATOMIC_WRITES field */
-#define DICT_TF_MASK_ATOMIC_WRITES		\
-		((~(~0U << DICT_TF_WIDTH_ATOMIC_WRITES)) \
-		<< DICT_TF_POS_ATOMIC_WRITES)
+/** Bit mask of the NO_ROLLBACK field */
+#define DICT_TF_MASK_NO_ROLLBACK		\
+		((~(~0U << DICT_TF_WIDTH_NO_ROLLBACK)) \
+		<< DICT_TF_POS_NO_ROLLBACK)
 
 /** Return the value of the COMPACT field */
 #define DICT_TF_GET_COMPACT(flags)			\
@@ -228,10 +229,6 @@ DEFAULT=0, ON = 1, OFF = 2
 #define DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags)       \
 		((flags & DICT_TF_MASK_PAGE_COMPRESSION_LEVEL)	\
 		>> DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
-/** Return the value of the ATOMIC_WRITES field */
-#define DICT_TF_GET_ATOMIC_WRITES(flags)       \
-		((flags & DICT_TF_MASK_ATOMIC_WRITES)	\
-		>> DICT_TF_POS_ATOMIC_WRITES)
 
 /* @} */
 
@@ -306,22 +303,13 @@ dict_table_t*
 dict_mem_table_create(
 /*==================*/
 	const char*	name,		/*!< in: table name */
-	ulint		space,		/*!< in: space where the clustered index
-					of the table is placed */
+	fil_space_t*	space,		/*!< in: tablespace */
 	ulint		n_cols,		/*!< in: total number of columns
 					including virtual and non-virtual
 					columns */
 	ulint		n_v_cols,	/*!< in: number of virtual columns */
 	ulint		flags,		/*!< in: table flags */
 	ulint		flags2);	/*!< in: table flags2 */
-/**********************************************************************//**
-Determines if a table belongs to a system database
-@return */
-UNIV_INTERN
-bool
-dict_mem_table_is_system(
-/*==================*/
-	char	*name);		/*!< in: table name */
 /****************************************************************//**
 Free a table memory object. */
 void
@@ -405,11 +393,7 @@ dict_mem_fill_index_struct(
 /*=======================*/
 	dict_index_t*	index,		/*!< out: index to be filled */
 	mem_heap_t*	heap,		/*!< in: memory heap */
-	const char*	table_name,	/*!< in: table name */
 	const char*	index_name,	/*!< in: index name */
-	ulint		space,		/*!< in: space where the index tree is
-					placed, ignored if the index is of
-					the clustered type */
 	ulint		type,		/*!< in: DICT_UNIQUE,
 					DICT_CLUSTERED, ... ORed */
 	ulint		n_fields);	/*!< in: number of fields */
@@ -419,11 +403,8 @@ Creates an index memory object.
 dict_index_t*
 dict_mem_index_create(
 /*==================*/
-	const char*	table_name,	/*!< in: table name */
+	dict_table_t*	table,		/*!< in: table */
 	const char*	index_name,	/*!< in: index name */
-	ulint		space,		/*!< in: space where the index tree is
-					placed, ignored if the index is of
-					the clustered type */
 	ulint		type,		/*!< in: DICT_UNIQUE,
 					DICT_CLUSTERED, ... ORed */
 	ulint		n_fields);	/*!< in: number of fields */
@@ -560,36 +541,6 @@ private:
 	const char*	m_name;
 };
 
-/** Table name wrapper for pretty-printing */
-struct table_name_t
-{
-	/** The name in internal representation */
-	char*	m_name;
-
-	/** @return the end of the schema name */
-	const char* dbend() const
-	{
-		const char* sep = strchr(m_name, '/');
-		ut_ad(sep);
-		return sep;
-	}
-
-	/** @return the length of the schema name, in bytes */
-	size_t dblen() const { return dbend() - m_name; }
-
-	/** Determine the filename-safe encoded table name.
-	@return	the filename-safe encoded table name */
-	const char* basename() const { return dbend() + 1; }
-
-	/** The start of the table basename suffix for partitioned tables */
-	static const char part_suffix[4];
-
-	/** Determine the partition or subpartition name suffix.
-	@return the partition name
-	@retval	NULL	if the table is not partitioned */
-	const char* part() const { return strstr(basename(), part_suffix); }
-};
-
 /** Data structure for a column in a table */
 struct dict_col_t{
 	/*----------------------*/
@@ -631,14 +582,74 @@ struct dict_col_t{
 					of an index */
 	unsigned	max_prefix:12;	/*!< maximum index prefix length on
 					this column. Our current max limit is
-					3072 for Barracuda table */
-
-	/** @return whether this is a virtual column */
-	bool is_virtual() const { return prtype & DATA_VIRTUAL; }
+					3072 (REC_VERSION_56_MAX_INDEX_COL_LEN)
+					bytes. */
 
 	/** Detach the column from an index.
 	@param[in]	index	index to be detached from */
 	inline void detach(const dict_index_t& index);
+
+	/** Data for instantly added columns */
+	struct def_t {
+		/** original default value of instantly added column */
+		const void*	data;
+		/** len of data, or UNIV_SQL_DEFAULT if unavailable */
+		ulint		len;
+	} def_val;
+
+	/** Retrieve the column name.
+	@param[in]	table	the table of this column */
+	const char* name(const dict_table_t& table) const;
+
+	/** @return whether this is a virtual column */
+	bool is_virtual() const { return prtype & DATA_VIRTUAL; }
+	/** @return whether NULL is an allowed value for this column */
+	bool is_nullable() const { return !(prtype & DATA_NOT_NULL); }
+
+	/** @return whether table of this system field is TRX_ID-based */
+	bool vers_native() const
+	{
+		ut_ad(vers_sys_start() || vers_sys_end());
+		ut_ad(mtype == DATA_INT || mtype == DATA_FIXBINARY);
+		return mtype == DATA_INT;
+	}
+	/** @return whether this is system versioned */
+	bool is_versioned() const { return !(~prtype & DATA_VERSIONED); }
+	/** @return whether this is the system version start */
+	bool vers_sys_start() const
+	{
+		return (prtype & DATA_VERSIONED) == DATA_VERS_START;
+	}
+	/** @return whether this is the system version end */
+	bool vers_sys_end() const
+	{
+		return (prtype & DATA_VERSIONED) == DATA_VERS_END;
+	}
+
+	/** @return whether this is an instantly-added column */
+	bool is_instant() const
+	{
+		DBUG_ASSERT(def_val.len != UNIV_SQL_DEFAULT || !def_val.data);
+		return def_val.len != UNIV_SQL_DEFAULT;
+	}
+	/** Get the default value of an instantly-added column.
+	@param[out]	len	value length (in bytes), or UNIV_SQL_NULL
+	@return	default value
+	@retval	NULL	if the default value is SQL NULL (len=UNIV_SQL_NULL) */
+	const byte* instant_value(ulint* len) const
+	{
+		DBUG_ASSERT(is_instant());
+		*len = def_val.len;
+		return static_cast<const byte*>(def_val.data);
+	}
+
+	/** Remove the 'instant ADD' status of the column */
+	void remove_instant()
+	{
+		DBUG_ASSERT(is_instant());
+		def_val.len = UNIV_SQL_DEFAULT;
+		def_val.data = NULL;
+	}
 };
 
 /** Index information put in a list of virtual column structure. Index
@@ -650,6 +661,9 @@ struct dict_v_idx_t {
 
 	/** position in this index */
 	ulint		nth_field;
+
+	dict_v_idx_t(dict_index_t* index, ulint nth_field)
+		: index(index), nth_field(nth_field) {}
 };
 
 /** Index list to put in dict_v_col_t */
@@ -719,17 +733,17 @@ files would be at risk! */
 /** Find out maximum indexed column length by its table format.
 For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT, the maximum
 field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For
-Barracuda row formats COMPRESSED and DYNAMIC, the length could
+ROW_FORMAT=COMPRESSED and ROW_FORMAT=DYNAMIC, the length could
 be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
-#define DICT_MAX_FIELD_LEN_BY_FORMAT(table)				\
-		((dict_table_get_format(table) < UNIV_FORMAT_B)		\
-			? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1)		\
-			: REC_VERSION_56_MAX_INDEX_COL_LEN)
+#define DICT_MAX_FIELD_LEN_BY_FORMAT(table)	\
+	(dict_table_has_atomic_blobs(table)	\
+	 ? REC_VERSION_56_MAX_INDEX_COL_LEN	\
+	 : REC_ANTELOPE_MAX_INDEX_COL_LEN - 1)
 
-#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags)			\
-		((DICT_TF_HAS_ATOMIC_BLOBS(flags) < UNIV_FORMAT_B)	\
-			? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1)		\
-			: REC_VERSION_56_MAX_INDEX_COL_LEN)
+#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags)	\
+	(DICT_TF_HAS_ATOMIC_BLOBS(flags)		\
+	 ? REC_VERSION_56_MAX_INDEX_COL_LEN		\
+	 : REC_ANTELOPE_MAX_INDEX_COL_LEN - 1)
 
 /** Defines the maximum fixed length column size */
 #define DICT_MAX_FIXED_COL_LEN		DICT_ANTELOPE_MAX_INDEX_COL_LEN
@@ -756,6 +770,15 @@ struct dict_field_t{
 
 	/** Zero-initialize all fields */
 	dict_field_t() : col(NULL), name(NULL), prefix_len(0), fixed_len(0) {}
+
+	/** Check whether two index fields are equivalent.
+	@param[in]	old	the other index field
+	@return	whether the index fields are equivalent */
+	bool same(const dict_field_t& other) const
+	{
+		return(prefix_len == other.prefix_len
+		       && fixed_len == other.fixed_len);
+	}
 };
 
 /**********************************************************************//**
@@ -831,10 +854,7 @@ struct dict_index_t{
 	index_id_t	id;	/*!< id of the index */
 	mem_heap_t*	heap;	/*!< memory heap */
 	id_name_t	name;	/*!< index name */
-	const char*	table_name;/*!< table name */
 	dict_table_t*	table;	/*!< back pointer to table */
-	unsigned	space:32;
-				/*!< space where the index tree is placed */
 	unsigned	page:32;/*!< index tree root page number */
 	unsigned	merge_threshold:6;
 				/*!< In the pessimistic delete, if the page
@@ -850,8 +870,8 @@ struct dict_index_t{
 				in a clustered index record, if the fields
 				before it are known to be of a fixed size,
 				0 otherwise */
-#if (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
-# error (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
+#if (1<<MAX_KEY_LENGTH_BITS) < HA_MAX_KEY_LENGTH
+# error (1<<MAX_KEY_LENGTH_BITS) < HA_MAX_KEY_LENGTH
 #endif
 	unsigned	n_user_defined_cols:10;
 				/*!< number of columns the user defined to
@@ -874,6 +894,17 @@ struct dict_index_t{
 	unsigned	n_def:10;/*!< number of fields defined so far */
 	unsigned	n_fields:10;/*!< number of fields in the index */
 	unsigned	n_nullable:10;/*!< number of nullable fields */
+	unsigned	n_core_fields:10;/*!< number of fields in the index
+				(before the first time of instant add columns) */
+	/** number of bytes of null bits in ROW_FORMAT!=REDUNDANT node pointer
+	records; usually equal to UT_BITS_IN_BYTES(n_nullable), but
+	can be less in clustered indexes with instant ADD COLUMN */
+	unsigned	n_core_null_bytes:8;
+	/** magic value signalling that n_core_null_bytes was not
+	initialized yet */
+	static const unsigned NO_CORE_NULL_BYTES = 0xff;
+	/** The clustered index ID of the hard-coded SYS_INDEXES table. */
+	static const unsigned DICT_INDEXES_ID = 3;
 	unsigned	cached:1;/*!< TRUE if the index object is in the
 				dictionary cache */
 	unsigned	to_be_dropped:1;
@@ -997,6 +1028,10 @@ struct dict_index_t{
 		uncommitted = !committed;
 	}
 
+	/** Notify that the index pages are going to be modified.
+	@param[in,out]	mtr	mini-transaction */
+	inline void set_modified(mtr_t& mtr) const;
+
 	/** @return whether this index is readable
 	@retval	true	normally
 	@retval	false	if this is a single-table tablespace
@@ -1004,6 +1039,9 @@ struct dict_index_t{
 			page cannot be read or decrypted */
 	inline bool is_readable() const;
 
+	/** @return whether instant ADD COLUMN is in effect */
+	inline bool is_instant() const;
+
 	/** @return whether the index is the primary key index
 	(not the clustered index of the change buffer) */
 	bool is_primary() const
@@ -1028,6 +1066,69 @@ struct dict_index_t{
 			n_fields = 0;
 		}
 	}
+
+	/** Determine how many fields of a given prefix can be set NULL.
+	@param[in]	n_prefix	number of fields in the prefix
+	@return	number of fields 0..n_prefix-1 that can be set NULL */
+	unsigned get_n_nullable(ulint n_prefix) const
+	{
+		DBUG_ASSERT(n_prefix > 0);
+		DBUG_ASSERT(n_prefix <= n_fields);
+		unsigned n = n_nullable;
+		for (; n_prefix < n_fields; n_prefix++) {
+			const dict_col_t* col = fields[n_prefix].col;
+			DBUG_ASSERT(!col->is_virtual());
+			n -= col->is_nullable();
+		}
+		DBUG_ASSERT(n < n_def);
+		return n;
+	}
+
+	/** Get the default value of an instantly-added clustered index field.
+	@param[in]	n	instantly added field position
+	@param[out]	len	value length (in bytes), or UNIV_SQL_NULL
+	@return	default value
+	@retval	NULL	if the default value is SQL NULL (len=UNIV_SQL_NULL) */
+	const byte* instant_field_value(ulint n, ulint* len) const
+	{
+		DBUG_ASSERT(is_instant() || id == DICT_INDEXES_ID);
+		DBUG_ASSERT(n + (id == DICT_INDEXES_ID) >= n_core_fields);
+		DBUG_ASSERT(n < n_fields);
+		return fields[n].col->instant_value(len);
+	}
+
+	/** Adjust clustered index metadata for instant ADD COLUMN.
+	@param[in]	clustered index definition after instant ADD COLUMN */
+	void instant_add_field(const dict_index_t& instant);
+
+	/** Remove the 'instant ADD' status of a clustered index.
+	Protected by index root page x-latch or table X-lock. */
+	void remove_instant()
+	{
+		DBUG_ASSERT(is_primary());
+		if (!is_instant()) {
+			return;
+		}
+		for (unsigned i = n_core_fields; i < n_fields; i++) {
+			fields[i].col->remove_instant();
+		}
+		n_core_fields = n_fields;
+		n_core_null_bytes = UT_BITS_IN_BYTES(unsigned(n_nullable));
+	}
+
+	/** Check if record in clustered index is historical row.
+	@param[in]	rec	clustered row
+	@param[in]	offsets	offsets
+	@return true if row is historical */
+	bool
+	vers_history_row(const rec_t* rec, const ulint* offsets);
+
+	/** Check if record in secondary index is historical row.
+	@param[in]	rec	record in a secondary index
+	@param[out]	history_row true if row is historical
+	@return true on error */
+	bool
+	vers_history_row(const rec_t* rec, bool &history_row);
 };
 
 /** Detach a column from an index.
@@ -1393,6 +1494,11 @@ struct dict_table_t {
 	@return	whether the last handle was released */
 	inline bool release();
 
+	/** @return whether the table supports transactions */
+	bool no_rollback() const
+	{
+		return !(~unsigned(flags) & DICT_TF_MASK_NO_ROLLBACK);
+        }
 	/** @return whether this is a temporary table */
 	bool is_temporary() const
 	{
@@ -1406,9 +1512,66 @@ struct dict_table_t {
 			page cannot be read or decrypted */
 	bool is_readable() const
 	{
+		ut_ad(file_unreadable || space);
 		return(UNIV_LIKELY(!file_unreadable));
 	}
 
+	/** @return whether instant ADD COLUMN is in effect */
+	bool is_instant() const
+	{
+		return(UT_LIST_GET_FIRST(indexes)->is_instant());
+	}
+
+	/** @return whether the table supports instant ADD COLUMN */
+	bool supports_instant() const
+	{
+		return(!(flags & DICT_TF_MASK_ZIP_SSIZE));
+	}
+
+	/** Adjust metadata for instant ADD COLUMN.
+	@param[in]	table	table definition after instant ADD COLUMN */
+	void instant_add_column(const dict_table_t& table);
+
+	/** Roll back instant_add_column().
+	@param[in]	old_n_cols	original n_cols
+	@param[in]	old_cols	original cols
+	@param[in]	old_col_names	original col_names */
+	void rollback_instant(
+		unsigned	old_n_cols,
+		dict_col_t*	old_cols,
+		const char*	old_col_names);
+
+	/** Trim the instantly added columns when an insert into SYS_COLUMNS
+	is rolled back during ALTER TABLE or recovery.
+	@param[in]	n	number of surviving non-system columns */
+	void rollback_instant(unsigned n);
+
+	/** Add the table definition to the data dictionary cache */
+	void add_to_cache();
+
+	bool versioned() const { return vers_start || vers_end; }
+	bool versioned_by_id() const
+	{
+		return vers_start && cols[vers_start].mtype == DATA_INT;
+	}
+
+	void inc_fk_checks()
+	{
+#ifdef UNIV_DEBUG
+		lint fk_checks= (lint)
+#endif
+		my_atomic_addlint(&n_foreign_key_checks_running, 1);
+		ut_ad(fk_checks >= 0);
+	}
+	void dec_fk_checks()
+	{
+#ifdef UNIV_DEBUG
+		lint fk_checks= (lint)
+#endif
+		my_atomic_addlint(&n_foreign_key_checks_running, ulint(-1));
+		ut_ad(fk_checks > 0);
+	}
+
 	/** Id of the table. */
 	table_id_t				id;
 
@@ -1429,8 +1592,10 @@ struct dict_table_t {
 	/** NULL or the directory path specified by DATA DIRECTORY. */
 	char*					data_dir_path;
 
-	/** Space where the clustered index of the table is placed. */
-	uint32_t				space;
+	/** The tablespace of the table */
+	fil_space_t*				space;
+	/** Tablespace ID */
+	ulint					space_id;
 
 	/** Stores information about:
 	1 row format (redundant or compact),
@@ -1529,7 +1694,10 @@ struct dict_table_t {
 
 	/** Virtual column names */
 	const char*				v_col_names;
-
+	unsigned	vers_start:10;
+				/*!< System Versioning: row start col index */
+	unsigned	vers_end:10;
+				/*!< System Versioning: row end col index */
 	bool		is_system_db;
 				/*!< True if the table belongs to a system
 				database (mysql, information_schema or
@@ -1746,7 +1914,7 @@ struct dict_table_t {
 	ulong					n_waiting_or_granted_auto_inc_locks;
 
 	/** The transaction that currently holds the the AUTOINC lock on this
-	table. Protected by lock_sys->mutex. */
+	table. Protected by lock_sys.mutex. */
 	const trx_t*				autoinc_trx;
 
 	/* @} */
@@ -1761,7 +1929,7 @@ struct dict_table_t {
 
 	/** Count of the number of record locks on this table. We use this to
 	determine whether we can evict the table from the dictionary cache.
-	It is protected by lock_sys->mutex. */
+	It is protected by lock_sys.mutex. */
 	ulint					n_rec_locks;
 
 private:
@@ -1771,7 +1939,7 @@ private:
 	int32					n_ref_count;
 
 public:
-	/** List of locks on the table. Protected by lock_sys->mutex. */
+	/** List of locks on the table. Protected by lock_sys.mutex. */
 	table_lock_list_t			locks;
 
 	/** Timestamp of the last modification of this table. */
@@ -1789,9 +1957,22 @@ public:
 	dict_vcol_templ_t*			vc_templ;
 };
 
-inline bool dict_index_t::is_readable() const
+inline void dict_index_t::set_modified(mtr_t& mtr) const
+{
+	mtr.set_named_space(table->space);
+}
+
+inline bool dict_index_t::is_readable() const { return table->is_readable(); }
+
+inline bool dict_index_t::is_instant() const
 {
-	return(UNIV_LIKELY(!table->file_unreadable));
+	ut_ad(n_core_fields > 0);
+	ut_ad(n_core_fields <= n_fields);
+	ut_ad(n_core_fields == n_fields
+	      || (type & ~(DICT_UNIQUE | DICT_CORRUPT)) == DICT_CLUSTERED);
+	ut_ad(n_core_fields == n_fields || table->supports_instant());
+	ut_ad(n_core_fields == n_fields || !table->is_temporary());
+	return(n_core_fields != n_fields);
 }
 
 inline bool dict_index_t::is_corrupted() const
diff --git a/storage/innobase/include/dict0mem.ic b/storage/innobase/include/dict0mem.ic
index da2ac629850..70424af7347 100644
--- a/storage/innobase/include/dict0mem.ic
+++ b/storage/innobase/include/dict0mem.ic
@@ -37,11 +37,7 @@ dict_mem_fill_index_struct(
 /*=======================*/
 	dict_index_t*	index,		/*!< out: index to be filled */
 	mem_heap_t*	heap,		/*!< in: memory heap */
-	const char*	table_name,	/*!< in: table name */
 	const char*	index_name,	/*!< in: index name */
-	ulint		space,		/*!< in: space where the index tree is
-					placed, ignored if the index is of
-					the clustered type */
 	ulint		type,		/*!< in: DICT_UNIQUE,
 					DICT_CLUSTERED, ... ORed */
 	ulint		n_fields)	/*!< in: number of fields */
@@ -61,11 +57,10 @@ dict_mem_fill_index_struct(
 	/* Assign a ulint to a 4-bit-mapped field.
 	Only the low-order 4 bits are assigned. */
 	index->type = unsigned(type);
-	index->space = (unsigned int) space;
 	index->page = FIL_NULL;
 	index->merge_threshold = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
-	index->table_name = table_name;
 	index->n_fields = (unsigned int) n_fields;
+	index->n_core_fields = (unsigned int) n_fields;
 	/* The '1 +' above prevents allocation
 	of an empty mem block */
 	index->nulls_equal = false;
diff --git a/storage/innobase/include/dict0stats_bg.h b/storage/innobase/include/dict0stats_bg.h
index 22bcb1504cc..8387e3265b2 100644
--- a/storage/innobase/include/dict0stats_bg.h
+++ b/storage/innobase/include/dict0stats_bg.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -114,16 +114,9 @@ dict_stats_thread_deinit();
 #ifdef UNIV_DEBUG
 /** Disables dict stats thread. It's used by:
 	SET GLOBAL innodb_dict_stats_disabled_debug = 1 (0).
-@param[in]	thd		thread handle
-@param[in]	var		pointer to system variable
-@param[out]	var_ptr		where the formal string goes
 @param[in]	save		immediate result from check function */
-void
-dict_stats_disabled_debug_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save);
+void dict_stats_disabled_debug_update(THD*, st_mysql_sys_var*, void*,
+				      const void* save);
 #endif /* UNIV_DEBUG */
 
 /*****************************************************************//**
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index 27b4cc0e694..f2fcae69bd5 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -28,6 +28,7 @@ Created 1/8/1996 Heikki Tuuri
 #define dict0types_h
 
 #include <ut0mutex.h>
+#include <rem0types.h>
 
 struct dict_sys_t;
 struct dict_col_t;
@@ -52,6 +53,13 @@ DICT_IBUF_ID_MIN plus the space id */
 typedef ib_id_t		table_id_t;
 typedef ib_id_t		index_id_t;
 
+/** Maximum transaction identifier */
+#define TRX_ID_MAX	IB_ID_MAX
+
+/** The bit pattern corresponding to TRX_ID_MAX */
+extern const byte trx_id_max_bytes[8];
+extern const byte timestamp_max_bytes[7];
+
 /** Error to ignore when we load table dictionary into memory. However,
 the table and index will be marked as "corrupted", and caller will
 be responsible to deal with corrupted table or index.
@@ -92,6 +100,36 @@ typedef ib_mutex_t DictSysMutex;
 #define TEMP_TABLE_PREFIX                "#sql"
 #define TEMP_TABLE_PATH_PREFIX           "/" TEMP_TABLE_PREFIX
 
+/** Table name wrapper for pretty-printing */
+struct table_name_t
+{
+	/** The name in internal representation */
+	char*	m_name;
+
+	/** @return the end of the schema name */
+	const char* dbend() const
+	{
+		const char* sep = strchr(m_name, '/');
+		ut_ad(sep);
+		return sep;
+	}
+
+	/** @return the length of the schema name, in bytes */
+	size_t dblen() const { return size_t(dbend() - m_name); }
+
+	/** Determine the filename-safe encoded table name.
+	@return	the filename-safe encoded table name */
+	const char* basename() const { return dbend() + 1; }
+
+	/** The start of the table basename suffix for partitioned tables */
+	static const char part_suffix[4];
+
+	/** Determine the partition or subpartition name suffix.
+	@return the partition name
+	@retval	NULL	if the table is not partitioned */
+	const char* part() const { return strstr(basename(), part_suffix); }
+};
+
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 /** Flag to control insert buffer debugging. */
 extern uint		ibuf_debug;
diff --git a/storage/innobase/include/dyn0buf.h b/storage/innobase/include/dyn0buf.h
index c6c5a2233ca..0954851bca4 100644
--- a/storage/innobase/include/dyn0buf.h
+++ b/storage/innobase/include/dyn0buf.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,14 +32,13 @@ Created 2013-03-16 Sunny Bains
 #include "ut0lst.h"
 
 /** Class that manages dynamic buffers. It uses a UT_LIST of
-dyn_buf_t::block_t instances. We don't use STL containers in
+mtr_buf_t::block_t instances. We don't use STL containers in
 order to avoid the overhead of heap calls. Using a custom memory
 allocator doesn't solve the problem either because we have to get
 the memory from somewhere. We can't use the block_t::m_data as the
 backend for the custom allocator because we would like the data in
 the blocks to be contiguous. */
-template <size_t SIZE = DYN_ARRAY_DATA_SIZE>
-class dyn_buf_t {
+class mtr_buf_t {
 public:
 
 	class block_t;
@@ -46,17 +46,19 @@ public:
 	typedef UT_LIST_NODE_T(block_t) block_node_t;
 	typedef UT_LIST_BASE_NODE_T(block_t) block_list_t;
 
+	/** SIZE - sizeof(m_node) + sizeof(m_used) */
+	enum { MAX_DATA_SIZE = DYN_ARRAY_DATA_SIZE
+	       - sizeof(block_node_t) + sizeof(ib_uint32_t) };
+
 	class block_t {
 	public:
 
 		block_t()
 		{
-			ut_ad(MAX_DATA_SIZE <= (2 << 15));
+			compile_time_assert(MAX_DATA_SIZE <= (2 << 15));
 			init();
 		}
 
-		~block_t() { }
-
 		/**
 		Gets the number of used bytes in a block.
 		@return	number of bytes used */
@@ -111,12 +113,12 @@ public:
 		/**
 		@return pointer to start of reserved space */
 		template <typename Type>
-		Type push(ib_uint32_t size)
+		Type push(uint32_t size)
 		{
 			Type	ptr = reinterpret_cast<Type>(end());
 
 			m_used += size;
-			ut_ad(m_used <= static_cast<ib_uint32_t>(MAX_DATA_SIZE));
+			ut_ad(m_used <= uint32_t(MAX_DATA_SIZE));
 
 			return(ptr);
 		}
@@ -130,7 +132,7 @@ public:
 			ut_ad(ptr <= begin() + m_buf_end);
 
 			/* We have done the boundary check above */
-			m_used = static_cast<ib_uint32_t>(ptr - begin());
+			m_used = uint32_t(ptr - begin());
 
 			ut_ad(m_used <= MAX_DATA_SIZE);
 			ut_d(m_buf_end = 0);
@@ -153,13 +155,6 @@ public:
 		ulint		m_magic_n;
 #endif /* UNIV_DEBUG */
 
-		/** SIZE - sizeof(m_node) + sizeof(m_used) */
-		enum {
-			MAX_DATA_SIZE = SIZE
-				      - sizeof(block_node_t)
-				      + sizeof(ib_uint32_t)
-		};
-
 		/** Storage */
 		byte		m_data[MAX_DATA_SIZE];
 
@@ -168,15 +163,13 @@ public:
 
 		/** number of data bytes used in this block;
 		DYN_BLOCK_FULL_FLAG is set when the block becomes full */
-		ib_uint32_t	m_used;
+		uint32_t	m_used;
 
-		friend class dyn_buf_t;
+		friend class mtr_buf_t;
 	};
 
-	enum { MAX_DATA_SIZE = block_t::MAX_DATA_SIZE};
-
 	/** Default constructor */
-	dyn_buf_t()
+	mtr_buf_t()
 		:
 		m_heap(),
 		m_size()
@@ -186,7 +179,7 @@ public:
 	}
 
 	/** Destructor */
-	~dyn_buf_t()
+	~mtr_buf_t()
 	{
 		erase();
 	}
@@ -251,7 +244,7 @@ public:
 	@param size	in bytes of the element
 	@return	pointer to the element */
 	template <typename Type>
-	Type push(ib_uint32_t size)
+	Type push(uint32_t size)
 	{
 		ut_ad(size > 0);
 		ut_ad(size <= MAX_DATA_SIZE);
@@ -271,17 +264,11 @@ public:
 	Pushes n bytes.
 	@param str	string to write
 	@param len	string length */
-	void push(const byte* ptr, ib_uint32_t len)
+	void push(const byte* ptr, uint32_t len)
 	{
 		while (len > 0) {
-			ib_uint32_t	n_copied;
-
-			if (len >= MAX_DATA_SIZE) {
-				n_copied = MAX_DATA_SIZE;
-			} else {
-				n_copied = len;
-			}
-
+			uint32_t n_copied = std::min(len,
+						     uint32_t(MAX_DATA_SIZE));
 			::memmove(push<byte*>(n_copied), ptr, n_copied);
 
 			ptr += n_copied;
@@ -297,7 +284,7 @@ public:
 	const Type at(ulint pos) const
 	{
 		block_t*	block = const_cast<block_t*>(
-			const_cast<dyn_buf_t*>(this)->find(pos));
+			const_cast<mtr_buf_t*>(this)->find(pos));
 
 		return(reinterpret_cast<Type>(block->begin() + pos));
 	}
@@ -390,8 +377,8 @@ public:
 
 private:
 	// Disable copying
-	dyn_buf_t(const dyn_buf_t&);
-	dyn_buf_t& operator=(const dyn_buf_t&);
+	mtr_buf_t(const mtr_buf_t&);
+	mtr_buf_t& operator=(const mtr_buf_t&);
 
 	/**
 	Add the block to the end of the list*/
@@ -403,7 +390,7 @@ private:
 	}
 
 	/** @return the last block in the list */
-	block_t* back()
+	block_t* back() const
 	{
 		return(UT_LIST_GET_LAST(m_list));
 	}
@@ -483,8 +470,6 @@ private:
 	block_t			m_first_block;
 };
 
-typedef dyn_buf_t<DYN_ARRAY_DATA_SIZE> mtr_buf_t;
-
 /** mtr_buf_t copier */
 struct mtr_buf_copy_t {
 	/** The copied buffer */
diff --git a/storage/innobase/include/fil0crypt.h b/storage/innobase/include/fil0crypt.h
index 13b3ec4e37e..5238213135f 100644
--- a/storage/innobase/include/fil0crypt.h
+++ b/storage/innobase/include/fil0crypt.h
@@ -27,9 +27,9 @@ Created 04/01/2015 Jan Lindström
 #define fil0crypt_h
 
 #ifndef UNIV_INNOCHECKSUM
-
 #include "os0event.h"
 #include "my_crypt.h"
+#include "fil0fil.h"
 #endif /*! UNIV_INNOCHECKSUM */
 
 /**
@@ -296,7 +296,6 @@ fil_space_destroy_crypt_data(
 Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry
 @param[in]	ptr		Log entry start
 @param[in]	end_ptr		Log entry end
-@param[in]	block		buffer block
 @param[out]	err		DB_SUCCESS or DB_DECRYPTION_FAILED
 @return position on log buffer */
 UNIV_INTERN
@@ -304,7 +303,6 @@ byte*
 fil_parse_write_crypt_data(
 	byte*			ptr,
 	const byte*		end_ptr,
-	const buf_block_t*	block,
 	dberr_t*		err)
 	MY_ATTRIBUTE((warn_unused_result));
 
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 46faa3e90b2..91f50d70e5d 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -32,19 +32,14 @@ Created 10/25/1995 Heikki Tuuri
 #include "log0recv.h"
 #include "dict0types.h"
 #include "page0size.h"
-#include "ibuf0types.h"
-
-#include <list>
 
 // Forward declaration
-extern ibool srv_use_doublewrite_buf;
+extern my_bool srv_use_doublewrite_buf;
 extern struct buf_dblwr_t* buf_dblwr;
 class page_id_t;
 struct trx_t;
 class truncate_t;
 
-typedef std::list<char*, ut_allocator<char*> >	space_name_list_t;
-
 /** Structure containing encryption specification */
 struct fil_space_crypt_t;
 
@@ -85,7 +80,7 @@ struct fil_space_t {
 				/*!< LSN of the most recent
 				fil_names_write_if_was_clean().
 				Reset to 0 by fil_names_clear().
-				Protected by log_sys->mutex.
+				Protected by log_sys.mutex.
 				If and only if this is nonzero, the
 				tablespace will be in named_spaces. */
 	bool		stop_new_ops;
@@ -106,7 +101,8 @@ struct fil_space_t {
 	ulint		redo_skipped_count;
 				/*!< reference count for operations who want
 				to skip redo log in the file space in order
-				to make fsp_space_modify_check pass. */
+				to make fsp_space_modify_check pass.
+				Uses my_atomic_loadlint() and friends. */
 #endif
 	fil_type_t	purpose;/*!< purpose */
 	UT_LIST_BASE_NODE_T(fil_node_t) chain;
@@ -140,14 +136,14 @@ struct fil_space_t {
 	dropped. An example is change buffer merge.
 	The tablespace cannot be dropped while this is nonzero,
 	or while fil_node_t::n_pending is nonzero.
-	Protected by fil_system->mutex. */
+	Protected by fil_system.mutex and my_atomic_loadlint() and friends. */
 	ulint		n_pending_ops;
 	/** Number of pending block read or write operations
 	(when a write is imminent or a read has recently completed).
 	The tablespace object cannot be freed while this is nonzero,
 	but it can be detached from fil_system.
 	Note that fil_node_t::n_pending tracks actual pending I/O requests.
-	Protected by fil_system->mutex. */
+	Protected by fil_system.mutex and my_atomic_loadlint() and friends. */
 	ulint		n_pending_ios;
 	hash_node_t	hash;	/*!< hash chain node */
 	hash_node_t	name_hash;/*!< hash chain the name_hash table */
@@ -175,10 +171,6 @@ struct fil_space_t {
 	/** True if the device this filespace is on supports atomic writes */
 	bool		atomic_write_supported;
 
-	/** Release the reserved free extents.
-	@param[in]	n_reserved	number of reserved extents */
-	void release_free_extents(ulint n_reserved);
-
 	/** True if file system storing this tablespace supports
 	punch hole */
 	bool		punch_hole;
@@ -207,6 +199,84 @@ struct fil_space_t {
 	fil_node_t* add(const char* name, pfs_os_file_t handle,
 			ulint size, bool is_raw, bool atomic_write,
 			ulint max_pages = ULINT_MAX);
+
+	/** Try to reserve free extents.
+	@param[in]	n_free_now	current number of free extents
+	@param[in]	n_to_reserve	number of extents to reserve
+	@return	whether the reservation succeeded */
+	bool reserve_free_extents(ulint n_free_now, ulint n_to_reserve)
+	{
+		ut_ad(rw_lock_own(&latch, RW_LOCK_X));
+		if (n_reserved_extents + n_to_reserve > n_free_now) {
+			return false;
+		}
+
+		n_reserved_extents += n_to_reserve;
+		return true;
+	}
+
+	/** Release the reserved free extents.
+	@param[in]	n_reserved	number of reserved extents */
+	void release_free_extents(ulint n_reserved)
+	{
+		if (!n_reserved) return;
+		ut_ad(rw_lock_own(&latch, RW_LOCK_X));
+		ut_a(n_reserved_extents >= n_reserved);
+		n_reserved_extents -= n_reserved;
+	}
+
+	/** Rename a file.
+	@param[in]	name	table name after renaming
+	@param[in]	path	tablespace file name after renaming
+	@param[in]	log	whether to write redo log
+	@param[in]	replace	whether to ignore the existence of path
+	@return	error code
+	@retval	DB_SUCCESS	on success */
+	dberr_t rename(const char* name, const char* path, bool log,
+		       bool replace = false);
+
+	/** Note that the tablespace has been imported.
+	Initially, purpose=FIL_TYPE_IMPORT so that no redo log is
+	written while the space ID is being updated in each page. */
+	void set_imported();
+
+	/** Open each file. Only invoked on fil_system.temp_space.
+	@return whether all files were opened */
+	bool open();
+	/** Close each file. Only invoked on fil_system.temp_space. */
+	void close();
+
+	/** Acquire a tablespace reference. */
+	void acquire() { my_atomic_addlint(&n_pending_ops, 1); }
+	/** Release a tablespace reference. */
+	void release()
+	{
+		ut_ad(referenced());
+		my_atomic_addlint(&n_pending_ops, ulint(-1));
+	}
+	/** @return whether references are being held */
+	bool referenced() { return my_atomic_loadlint(&n_pending_ops); }
+	/** @return whether references are being held */
+	bool referenced() const
+	{
+		return const_cast<fil_space_t*>(this)->referenced();
+	}
+
+	/** Acquire a tablespace reference for I/O. */
+	void acquire_for_io() { my_atomic_addlint(&n_pending_ios, 1); }
+	/** Release a tablespace reference for I/O. */
+	void release_for_io()
+	{
+		ut_ad(pending_io());
+		my_atomic_addlint(&n_pending_ios, ulint(-1));
+	}
+	/** @return whether I/O is pending */
+	bool pending_io() { return my_atomic_loadlint(&n_pending_ios); }
+	/** @return whether I/O is pending */
+	bool pending_io() const
+	{
+		return const_cast<fil_space_t*>(this)->pending_io();
+	}
 };
 
 /** Value of fil_space_t::magic_n */
@@ -216,13 +286,13 @@ struct fil_space_t {
 struct fil_node_t {
 	/** tablespace containing this file */
 	fil_space_t*	space;
-	/** file name; protected by fil_system->mutex and log_sys->mutex. */
+	/** file name; protected by fil_system.mutex and log_sys.mutex. */
 	char*		name;
 	/** file handle (valid if is_open) */
 	pfs_os_file_t	handle;
 	/** event that groups and serializes calls to fsync;
 	os_event_set() and os_event_reset() are protected by
-	fil_system_t::mutex */
+	fil_system.mutex */
 	os_event_t	sync_event;
 	/** whether the file actually is a raw device or disk partition */
 	bool		is_raw_disk;
@@ -247,7 +317,7 @@ struct fil_node_t {
 	int64_t		flush_counter;
 	/** link to other files in this tablespace */
 	UT_LIST_NODE_T(fil_node_t) chain;
-	/** link to the fil_system->LRU list (keeping track of open files) */
+	/** link to the fil_system.LRU list (keeping track of open files) */
 	UT_LIST_NODE_T(fil_node_t) LRU;
 
 	/** whether this file could use atomic write (data file) */
@@ -269,6 +339,9 @@ struct fil_node_t {
 	@param[in]	first	whether this is the very first read
 	@return	whether the page was found valid */
 	bool read_page0(bool first);
+
+	/** Close the file handle. */
+	void close();
 };
 
 /** Value of fil_node_t::magic_n */
@@ -297,18 +370,20 @@ of the address is FIL_NULL, the address is considered undefined. */
 
 typedef	byte	fil_faddr_t;	/*!< 'type' definition in C: an address
 				stored in a file page is a string of bytes */
+#else
+# include "univ.i"
 #endif /* !UNIV_INNOCHECKSUM */
 
 /** Initial size of a single-table tablespace in pages */
-#define FIL_IBD_FILE_INITIAL_SIZE	4
+#define FIL_IBD_FILE_INITIAL_SIZE	4U
 
 /** 'null' (undefined) page offset in the context of file spaces */
 #define	FIL_NULL	ULINT32_UNDEFINED
 
 
-#define FIL_ADDR_PAGE	0	/* first in address is the page offset */
-#define	FIL_ADDR_BYTE	4	/* then comes 2-byte byte offset within page*/
-#define	FIL_ADDR_SIZE	6	/* address size is 6 bytes */
+#define FIL_ADDR_PAGE	0U	/* first in address is the page offset */
+#define	FIL_ADDR_BYTE	4U	/* then comes 2-byte byte offset within page*/
+#define	FIL_ADDR_SIZE	6U	/* address size is 6 bytes */
 
 #ifndef UNIV_INNOCHECKSUM
 
@@ -319,7 +394,7 @@ struct fil_addr_t {
 };
 
 /** The null file address */
-extern fil_addr_t	fil_addr_null;
+extern const fil_addr_t	fil_addr_null;
 
 #endif /* !UNIV_INNOCHECKSUM */
 
@@ -328,15 +403,15 @@ extern fil_addr_t	fil_addr_null;
 					page belongs to (== 0) but in later
 					versions the 'new' checksum of the
 					page */
-#define FIL_PAGE_OFFSET		4	/*!< page offset inside space */
-#define FIL_PAGE_PREV		8	/*!< if there is a 'natural'
+#define FIL_PAGE_OFFSET		4U	/*!< page offset inside space */
+#define FIL_PAGE_PREV		8U	/*!< if there is a 'natural'
 					predecessor of the page, its
 					offset.  Otherwise FIL_NULL.
 					This field is not set on BLOB
 					pages, which are stored as a
 					singly-linked list.  See also
 					FIL_PAGE_NEXT. */
-#define FIL_PAGE_NEXT		12	/*!< if there is a 'natural' successor
+#define FIL_PAGE_NEXT		12U	/*!< if there is a 'natural' successor
 					of the page, its offset.
 					Otherwise FIL_NULL.
 					B-tree index pages
@@ -346,9 +421,9 @@ extern fil_addr_t	fil_addr_null;
 					FIL_PAGE_PREV and FIL_PAGE_NEXT
 					in the collation order of the
 					smallest user record on each page. */
-#define FIL_PAGE_LSN		16	/*!< lsn of the end of the newest
+#define FIL_PAGE_LSN		16U	/*!< lsn of the end of the newest
 					modification log record to the page */
-#define	FIL_PAGE_TYPE		24	/*!< file page type: FIL_PAGE_INDEX,...,
+#define	FIL_PAGE_TYPE		24U	/*!< file page type: FIL_PAGE_INDEX,...,
 					2 bytes.
 
 					The contents of this field can only
@@ -363,7 +438,7 @@ extern fil_addr_t	fil_addr_null;
 					MySQL/InnoDB 5.1.7 or later, the
 					contents of this field is valid
 					for all uncompressed pages. */
-#define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26 /*!< for the first page
+#define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26U /*!< for the first page
 					in a system tablespace data file
 					(ibdata*, not *.ibd): the file has
 					been flushed to disk at least up
@@ -377,7 +452,7 @@ extern fil_addr_t	fil_addr_null;
 #define	FIL_RTREE_SPLIT_SEQ_NUM	FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
 
 /** starting from 4.1.x this contains the space id of the page */
-#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34
+#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34U
 
 #define FIL_PAGE_SPACE_ID  FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
 
@@ -404,7 +479,7 @@ extern fil_addr_t	fil_addr_null;
 						 then encrypted */
 #define FIL_PAGE_PAGE_COMPRESSED 34354  /*!< page compressed page */
 #define FIL_PAGE_INDEX		17855	/*!< B-tree node */
-#define FIL_PAGE_RTREE		17854	/*!< B-tree node */
+#define FIL_PAGE_RTREE		17854	/*!< R-tree node (SPATIAL INDEX) */
 #define FIL_PAGE_UNDO_LOG	2	/*!< Undo log page */
 #define FIL_PAGE_INODE		3	/*!< Index node */
 #define FIL_PAGE_IBUF_FREE_LIST	4	/*!< Insert buffer free list */
@@ -427,15 +502,26 @@ extern fil_addr_t	fil_addr_null;
 //#define FIL_PAGE_ENCRYPTED	15
 //#define FIL_PAGE_COMPRESSED_AND_ENCRYPTED 16
 //#define FIL_PAGE_ENCRYPTED_RTREE 17
+/** Clustered index root page after instant ADD COLUMN */
+#define FIL_PAGE_TYPE_INSTANT	18
 
-/** Used by i_s.cc to index into the text description. */
+/** Used by i_s.cc to index into the text description.
+Note: FIL_PAGE_TYPE_INSTANT maps to the same as FIL_PAGE_INDEX. */
 #define FIL_PAGE_TYPE_LAST	FIL_PAGE_TYPE_UNKNOWN
 					/*!< Last page type */
 /* @} */
 
-/** macro to check whether the page type is index (Btree or Rtree) type */
-#define fil_page_type_is_index(page_type)                          \
-        (page_type == FIL_PAGE_INDEX || page_type == FIL_PAGE_RTREE)
+/** @return whether the page type is B-tree or R-tree index */
+inline bool fil_page_type_is_index(ulint page_type)
+{
+	switch (page_type) {
+	case FIL_PAGE_TYPE_INSTANT:
+	case FIL_PAGE_INDEX:
+	case FIL_PAGE_RTREE:
+		return(true);
+	}
+	return(false);
+}
 
 /** Check whether the page is index page (either regular Btree index or Rtree
 index */
@@ -467,7 +553,7 @@ The caller should hold an InnoDB table lock or a MDL that prevents
 the tablespace from being dropped during the operation,
 or the caller should be in single-threaded crash recovery mode
 (no user connections that could drop tablespaces).
-If this is not the case, fil_space_acquire() and fil_space_release()
+If this is not the case, fil_space_acquire() and fil_space_t::release()
 should be used instead.
 @param[in]	id	tablespace ID
 @return tablespace, or NULL if not found */
@@ -480,12 +566,42 @@ fil_space_get(
 data space) is stored here; below we talk about tablespaces, but also
 the ib_logfiles form a 'space' and it is handled here */
 struct fil_system_t {
+  /**
+    Constructor.
+
+    Some members may require late initialisation, thus we just mark object as
+    uninitialised. Real initialisation happens in create().
+  */
+  fil_system_t(): m_initialised(false)
+  {
+    UT_LIST_INIT(LRU, &fil_node_t::LRU);
+    UT_LIST_INIT(space_list, &fil_space_t::space_list);
+    UT_LIST_INIT(rotation_list, &fil_space_t::rotation_list);
+    UT_LIST_INIT(unflushed_spaces, &fil_space_t::unflushed_spaces);
+    UT_LIST_INIT(named_spaces, &fil_space_t::named_spaces);
+  }
+
+  bool is_initialised() const { return m_initialised; }
+
+  /**
+    Create the file system interface at database start.
+
+    @param[in] hash_size	hash table size
+  */
+  void create(ulint hash_size);
+
+  /** Close the file system interface at shutdown */
+  void close();
+
+private:
+  bool m_initialised;
+public:
 	ib_mutex_t	mutex;		/*!< The mutex protecting the cache */
+	fil_space_t*	sys_space;	/*!< The innodb_system tablespace */
+	fil_space_t*	temp_space;	/*!< The innodb_temporary tablespace */
 	hash_table_t*	spaces;		/*!< The hash table of spaces in the
 					system; they are hashed on the space
 					id */
-	hash_table_t*	name_hash;	/*!< hash table based on the space
-					name */
 	UT_LIST_BASE_NODE_T(fil_node_t) LRU;
 					/*!< base node for the LRU list of the
 					most recently used open files with no
@@ -504,8 +620,6 @@ struct fil_system_t {
 					at least one file node where
 					modification_counter > flush_counter */
 	ulint		n_open;		/*!< number of files currently open */
-	ulint		max_n_open;	/*!< n_open is not allowed to exceed
-					this */
 	int64_t		modification_counter;/*!< when we write to a file we
 					increment this by one */
 	ulint		max_assigned_id;/*!< maximum space id in the existing
@@ -521,20 +635,19 @@ struct fil_system_t {
 					for which a MLOG_FILE_NAME
 					record has been written since
 					the latest redo log checkpoint.
-					Protected only by log_sys->mutex. */
+					Protected only by log_sys.mutex. */
 	UT_LIST_BASE_NODE_T(fil_space_t) rotation_list;
 					/*!< list of all file spaces needing
 					key rotation.*/
 
-	ibool		space_id_reuse_warned;
-					/* !< TRUE if fil_space_create()
+	bool		space_id_reuse_warned;
+					/*!< whether fil_space_create()
 					has issued a warning about
 					potential space_id reuse */
 };
 
-/** The tablespace memory cache. This variable is NULL before the module is
-initialized. */
-extern fil_system_t*	fil_system;
+/** The tablespace memory cache. */
+extern fil_system_t	fil_system;
 
 #include "fil0crypt.h"
 
@@ -547,23 +660,6 @@ fil_space_get_latch(
 	ulint	id,
 	ulint*	flags);
 
-/** Gets the type of a file space.
-@param[in]	id	tablespace identifier
-@return file type */
-fil_type_t
-fil_space_get_type(
-	ulint	id);
-
-/** Note that a tablespace has been imported.
-It is initially marked as FIL_TYPE_IMPORT so that no logging is
-done during the import process when the space ID is stamped to each page.
-Now we change it to FIL_SPACE_TABLESPACE to start redo and undo logging.
-NOTE: temporary tablespaces are never imported.
-@param[in]	id	tablespace identifier */
-void
-fil_space_set_imported(
-	ulint	id);
-
 /** Create a space memory object and put it to the fil_system hash table.
 Error messages are issued to the server log.
 @param[in]	name		tablespace name
@@ -605,16 +701,6 @@ fil_space_free(
 	ulint		id,
 	bool		x_latched);
 
-/** Returns the path from the first fil_node_t found with this space ID.
-The caller is responsible for freeing the memory allocated here for the
-value returned.
-@param[in]	id	Tablespace ID
-@return own: A copy of fil_node_t::path, NULL if space ID is zero
-or not found. */
-char*
-fil_space_get_first_path(
-	ulint		id);
-
 /** Set the recovered size of a tablespace in pages.
 @param id	tablespace ID
 @param size	recovered size in pages */
@@ -638,19 +724,6 @@ fil_space_get_flags(
 /*================*/
 	ulint	id);	/*!< in: space id */
 
-/** Open each fil_node_t of a named fil_space_t if not already open.
-@param[in]	name	Tablespace name
-@return true if all file nodes are opened. */
-bool
-fil_space_open(
-	const char*	name);
-
-/** Close each fil_node_t of a named fil_space_t if open.
-@param[in]	name	Tablespace name */
-void
-fil_space_close(
-	const char*	name);
-
 /** Returns the page size of the space and whether it is compressed or not.
 The tablespace must be cached in the memory cache.
 @param[in]	id	space id
@@ -661,18 +734,6 @@ fil_space_get_page_size(
 	ulint	id,
 	bool*	found);
 
-/****************************************************************//**
-Initializes the tablespace memory cache. */
-void
-fil_init(
-/*=====*/
-	ulint	hash_size,	/*!< in: hash table size */
-	ulint	max_n_open);	/*!< in: max number of open files */
-/*******************************************************************//**
-Initializes the tablespace memory cache. */
-void
-fil_close(void);
-/*===========*/
 /*******************************************************************//**
 Opens all log files and system tablespace data files. They stay open until the
 database server shutdown. This should be called at a server startup after the
@@ -750,11 +811,6 @@ fil_space_acquire_silent(ulint id)
 	return (fil_space_acquire_low(id, true));
 }
 
-/** Release a tablespace acquired with fil_space_acquire().
-@param[in,out]	space	tablespace to release  */
-void
-fil_space_release(fil_space_t* space);
-
 /** Acquire a tablespace for reading or writing a block,
 when it could be dropped concurrently.
 @param[in]	id	tablespace ID
@@ -763,17 +819,12 @@ when it could be dropped concurrently.
 fil_space_t*
 fil_space_acquire_for_io(ulint id);
 
-/** Release a tablespace acquired with fil_space_acquire_for_io().
-@param[in,out]	space	tablespace to release  */
-void
-fil_space_release_for_io(fil_space_t* space);
-
 /** Return the next fil_space_t.
 Once started, the caller must keep calling this until it returns NULL.
-fil_space_acquire() and fil_space_release() are invoked here which
+fil_space_acquire() and fil_space_t::release() are invoked here which
 blocks a concurrent operation from dropping the tablespace.
 @param[in,out]	prev_space	Pointer to the previous fil_space_t.
-If NULL, use the first fil_space_t on fil_system->space_list.
+If NULL, use the first fil_space_t on fil_system.space_list.
 @return pointer to the next fil_space_t.
 @retval NULL if this was the last  */
 fil_space_t*
@@ -783,10 +834,10 @@ fil_space_next(
 
 /** Return the next fil_space_t from key rotation list.
 Once started, the caller must keep calling this until it returns NULL.
-fil_space_acquire() and fil_space_release() are invoked here which
+fil_space_acquire() and fil_space_t::release() are invoked here which
 blocks a concurrent operation from dropping the tablespace.
 @param[in,out]	prev_space	Pointer to the previous fil_space_t.
-If NULL, use the first fil_space_t on fil_system->space_list.
+If NULL, use the first fil_space_t on fil_system.space_list.
 @return pointer to the next fil_space_t.
 @retval NULL if this was the last*/
 fil_space_t*
@@ -794,68 +845,6 @@ fil_space_keyrotate_next(
 	fil_space_t*	prev_space)
 	MY_ATTRIBUTE((warn_unused_result));
 
-/** Wrapper with reference-counting for a fil_space_t. */
-class FilSpace
-{
-public:
-	/** Default constructor: Use this when reference counting
-	is done outside this wrapper. */
-	FilSpace() : m_space(NULL) {}
-
-	/** Constructor: Look up the tablespace and increment the
-	reference count if found.
-	@param[in]	space_id	tablespace ID
-	@param[in]	silent		whether not to display errors */
-	explicit FilSpace(ulint space_id, bool silent = false)
-		: m_space(fil_space_acquire_low(space_id, silent)) {}
-
-	/** Assignment operator: This assumes that fil_space_acquire()
-	has already been done for the fil_space_t. The caller must
-	assign NULL if it calls fil_space_release().
-	@param[in]	space	tablespace to assign */
-	class FilSpace& operator=(fil_space_t* space)
-	{
-		/* fil_space_acquire() must have been invoked. */
-		ut_ad(space == NULL || space->n_pending_ops > 0);
-		m_space = space;
-		return(*this);
-	}
-
-	/** Destructor - Decrement the reference count if a fil_space_t
-	is still assigned. */
-	~FilSpace()
-	{
-		if (m_space != NULL) {
-			fil_space_release(m_space);
-		}
-	}
-
-	/** Implicit type conversion
-	@return the wrapped object */
-	operator const fil_space_t*() const
-	{
-		return(m_space);
-	}
-
-	/** Member accessor
-	@return the wrapped object */
-	const fil_space_t* operator->() const
-	{
-		return(m_space);
-	}
-
-	/** Explicit type conversion
-	@return the wrapped object */
-	const fil_space_t* operator()() const
-	{
-		return(m_space);
-	}
-
-private:
-	/** The wrapped pointer */
-	fil_space_t*	m_space;
-};
-
 /********************************************************//**
 Creates the database directory for a table if it does not exist yet. */
 void
@@ -863,43 +852,6 @@ fil_create_directory_for_tablename(
 /*===============================*/
 	const char*	name);	/*!< in: name in the standard
 				'databasename/tablename' format */
-/** Write redo log for renaming a file.
-@param[in]	space_id	tablespace id
-@param[in]	old_name	tablespace file name
-@param[in]	new_name	tablespace file name after renaming */
-void
-fil_name_write_rename(
-	ulint		space_id,
-	const char*	old_name,
-	const char*	new_name);
-/********************************************************//**
-Recreates table indexes by applying
-TRUNCATE log record during recovery.
-@return DB_SUCCESS or error code */
-dberr_t
-fil_recreate_table(
-/*===============*/
-	ulint			space_id,	/*!< in: space id */
-	ulint			format_flags,	/*!< in: page format */
-	ulint			flags,		/*!< in: tablespace flags */
-	const char*		name,		/*!< in: table name */
-	truncate_t&		truncate);	/*!< in/out: The information of
-						TRUNCATE log record */
-/********************************************************//**
-Recreates the tablespace and table indexes by applying
-TRUNCATE log record during recovery.
-@return DB_SUCCESS or error code */
-dberr_t
-fil_recreate_tablespace(
-/*====================*/
-	ulint			space_id,	/*!< in: space id */
-	ulint			format_flags,	/*!< in: page format */
-	ulint			flags,		/*!< in: tablespace flags */
-	const char*		name,		/*!< in: table name */
-	truncate_t&		truncate,	/*!< in/out: The information of
-						TRUNCATE log record */
-	lsn_t			recv_lsn);	/*!< in: the end LSN of
-						the log record */
 /** Replay a file rename operation if possible.
 @param[in]	space_id	tablespace identifier
 @param[in]	first_page_no	first page number in the file
@@ -949,37 +901,6 @@ fil_space_t* fil_truncate_prepare(ulint space_id);
 void fil_truncate_log(fil_space_t* space, ulint size, mtr_t* mtr)
 	MY_ATTRIBUTE((nonnull));
 
-/** Truncate the tablespace to needed size.
-@param[in]	space_id	id of tablespace to truncate
-@param[in]	size_in_pages	truncate size.
-@return true if truncate was successful. */
-bool
-fil_truncate_tablespace(
-	ulint		space_id,
-	ulint		size_in_pages);
-
-/*******************************************************************//**
-Prepare for truncating a single-table tablespace. The tablespace
-must be cached in the memory cache.
-1) Check pending operations on a tablespace;
-2) Remove all insert buffer entries for the tablespace;
-@return DB_SUCCESS or error */
-dberr_t
-fil_prepare_for_truncate(
-/*=====================*/
-	ulint	id);			/*!< in: space id */
-
-/** Reinitialize the original tablespace header with the same space id
-for single tablespace
-@param[in]	table		table belongs to the tablespace
-@param[in]	size            size in blocks
-@param[in]	trx		Transaction covering truncate */
-void
-fil_reinit_space_header_for_table(
-	dict_table_t*	table,
-	ulint		size,
-	trx_t*		trx);
-
 /*******************************************************************//**
 Closes a single-table tablespace. The tablespace must be cached in the
 memory cache. Free all pages used by the tablespace.
@@ -991,58 +912,6 @@ fil_close_tablespace(
 	ulint	id);	/*!< in: space id */
 
 /*******************************************************************//**
-Discards a single-table tablespace. The tablespace must be cached in the
-memory cache. Discarding is like deleting a tablespace, but
-
- 1. We do not drop the table from the data dictionary;
-
- 2. We remove all insert buffer entries for the tablespace immediately;
-    in DROP TABLE they are only removed gradually in the background;
-
- 3. When the user does IMPORT TABLESPACE, the tablespace will have the
-    same id as it originally had.
-
- 4. Free all the pages in use by the tablespace if rename=true.
-@return DB_SUCCESS or error */
-dberr_t
-fil_discard_tablespace(
-/*===================*/
-	ulint	id)	/*!< in: space id */
-	MY_ATTRIBUTE((warn_unused_result));
-
-/** Test if a tablespace file can be renamed to a new filepath by checking
-if that the old filepath exists and the new filepath does not exist.
-@param[in]	space_id	tablespace id
-@param[in]	old_path	old filepath
-@param[in]	new_path	new filepath
-@param[in]	is_discarded	whether the tablespace is discarded
-@param[in]	replace_new	whether to ignore the existence of new_path
-@return innodb error code */
-dberr_t
-fil_rename_tablespace_check(
-	ulint		space_id,
-	const char*	old_path,
-	const char*	new_path,
-	bool		is_discarded,
-	bool		replace_new = false);
-
-/** Rename a single-table tablespace.
-The tablespace must exist in the memory cache.
-@param[in]	id		tablespace identifier
-@param[in]	old_path	old file name
-@param[in]	new_name	new table name in the
-databasename/tablename format
-@param[in]	new_path_in	new file name,
-or NULL if it is located in the normal data directory
-@return true if success */
-bool
-fil_rename_tablespace(
-	ulint		id,
-	const char*	old_path,
-	const char*	new_name,
-	const char*	new_path_in);
-
-/*******************************************************************//**
 Allocates and builds a file name from a path, a table or tablespace name
 and a suffix. The string must be freed by caller with ut_free().
 @param[in] path NULL or the direcory path or the full path and filename.
@@ -1065,8 +934,10 @@ fil_make_filepath(
 must be >= FIL_IBD_FILE_INITIAL_SIZE
 @param[in]	mode		MariaDB encryption mode
 @param[in]	key_id		MariaDB encryption key_id
-@return DB_SUCCESS or error code */
-dberr_t
+@param[out]	err		DB_SUCCESS or error code
+@return	the created tablespace
+@retval	NULL	on error */
+fil_space_t*
 fil_ibd_create(
 	ulint		space_id,
 	const char*	name,
@@ -1074,16 +945,15 @@ fil_ibd_create(
 	ulint		flags,
 	ulint		size,
 	fil_encryption_t mode,
-	uint32_t	key_id)
-	MY_ATTRIBUTE((nonnull(2), warn_unused_result));
+	uint32_t	key_id,
+	dberr_t*	err)
+	MY_ATTRIBUTE((nonnull(2,8), warn_unused_result));
 
 /** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations.
 (Typically when upgrading from MariaDB 10.1.0..10.1.20.)
-@param[in]	space_id	tablespace ID
+@param[in,out]	space		tablespace
 @param[in]	flags		desired tablespace flags */
-UNIV_INTERN
-void
-fsp_flags_try_adjust(ulint space_id, ulint flags);
+void fsp_flags_try_adjust(fil_space_t* space, ulint flags);
 
 /********************************************************************//**
 Tries to open a single-table tablespace and optionally checks the space id is
@@ -1110,19 +980,22 @@ statement to update the dictionary tables if they are incorrect.
 @param[in]	purpose		FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
 @param[in]	id		tablespace ID
 @param[in]	flags		expected FSP_SPACE_FLAGS
-@param[in]	space_name	tablespace name of the datafile
+@param[in]	tablename	table name
 If file-per-table, it is the table name in the databasename/tablename format
 @param[in]	path_in		expected filepath, usually read from dictionary
-@return DB_SUCCESS or error code */
-dberr_t
+@param[out]	err		DB_SUCCESS or error code
+@return	tablespace
+@retval	NULL	if the tablespace could not be opened */
+fil_space_t*
 fil_ibd_open(
-	bool		validate,
-	bool		fix_dict,
-	fil_type_t	purpose,
-	ulint		id,
-	ulint		flags,
-	const char*	tablename,
-	const char*	path_in)
+	bool			validate,
+	bool			fix_dict,
+	fil_type_t		purpose,
+	ulint			id,
+	ulint			flags,
+	const table_name_t&	tablename,
+	const char*		path_in,
+	dberr_t*		err = NULL)
 	MY_ATTRIBUTE((warn_unused_result));
 
 enum fil_load_status {
@@ -1172,15 +1045,14 @@ startup, there may be many tablespaces which are not yet in the memory cache.
 @param[in]	print_error_if_does_not_exist
 				Print detailed error information to the
 error log if a matching tablespace is not found from memory.
-@param[in]	heap		Heap memory
 @param[in]	table_flags	table flags
-@return true if a matching tablespace exists in the memory cache */
-bool
+@return the tablespace
+@retval	NULL	if no matching tablespace exists in the memory cache */
+fil_space_t*
 fil_space_for_table_exists_in_mem(
 	ulint		id,
 	const char*	name,
 	bool		print_error_if_does_not_exist,
-	mem_heap_t*	heap,
 	ulint		table_flags);
 
 /** Try to extend a tablespace if it is smaller than the specified size.
@@ -1191,29 +1063,6 @@ bool
 fil_space_extend(
 	fil_space_t*	space,
 	ulint		size);
-/*******************************************************************//**
-Tries to reserve free extents in a file space.
-@return true if succeed */
-bool
-fil_space_reserve_free_extents(
-/*===========================*/
-	ulint	id,		/*!< in: space id */
-	ulint	n_free_now,	/*!< in: number of free extents now */
-	ulint	n_to_reserve);	/*!< in: how many one wants to reserve */
-/*******************************************************************//**
-Releases free extents in a file space. */
-void
-fil_space_release_free_extents(
-/*===========================*/
-	ulint	id,		/*!< in: space id */
-	ulint	n_reserved);	/*!< in: how many one reserved */
-/*******************************************************************//**
-Gets the number of reserved extents. If the database is silent, this number
-should be zero. */
-ulint
-fil_space_get_n_reserved_extents(
-/*=============================*/
-	ulint	id);		/*!< in: space id */
 
 /** Reads or writes data. This operation could be asynchronous (aio).
 
@@ -1310,20 +1159,6 @@ fil_page_set_type(
 	byte*	page,	/*!< in/out: file page */
 	ulint	type);	/*!< in: type */
 
-#ifdef UNIV_DEBUG
-/** Increase redo skipped of a tablespace.
-@param[in]	id	space id */
-void
-fil_space_inc_redo_skipped_count(
-	ulint		id);
-
-/** Decrease redo skipped of a tablespace.
-@param[in]	id	space id */
-void
-fil_space_dec_redo_skipped_count(
-	ulint		id);
-#endif
-
 /********************************************************************//**
 Delete the tablespace file and any related files like .cfg.
 This should not be called for temporary tables. */
@@ -1352,27 +1187,6 @@ char*
 fil_path_to_space_name(
 	const char*	filename);
 
-/** Returns the space ID based on the tablespace name.
-The tablespace must be found in the tablespace memory cache.
-This call is made from external to this module, so the mutex is not owned.
-@param[in]	tablespace	Tablespace name
-@return space ID if tablespace found, ULINT_UNDEFINED if space not. */
-ulint
-fil_space_get_id_by_name(
-	const char*	tablespace);
-
-/**
-Iterate over all the spaces in the space list and fetch the
-tablespace names. It will return a copy of the name that must be
-freed by the caller using: delete[].
-@return DB_SUCCESS if all OK. */
-dberr_t
-fil_get_space_names(
-/*================*/
-	space_name_list_t&	space_name_list)
-				/*!< in/out: Vector for collecting the names. */
-	MY_ATTRIBUTE((warn_unused_result));
-
 /** Generate redo log for swapping two .ibd files
 @param[in]	old_table	old table
 @param[in]	new_table	new table
@@ -1388,9 +1202,9 @@ fil_mtr_rename_log(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /** Acquire the fil_system mutex. */
-#define fil_system_enter()	mutex_enter(&fil_system->mutex)
+#define fil_system_enter()	mutex_enter(&fil_system.mutex)
 /** Release the fil_system mutex. */
-#define fil_system_exit()	mutex_exit(&fil_system->mutex)
+#define fil_system_exit()	mutex_exit(&fil_system.mutex)
 
 /*******************************************************************//**
 Returns the table space by a given id, NULL if not found. */
@@ -1399,14 +1213,7 @@ fil_space_get_by_id(
 /*================*/
 	ulint	id);	/*!< in: space id */
 
-/** Look up a tablespace.
-@param[in]	name	tablespace name
-@return	tablespace
-@retval	NULL	if not found */
-fil_space_t*
-fil_space_get_by_name(const char* name);
-
-/*******************************************************************//**
+/** Note that a non-predefined persistent tablespace has been modified
 by redo log.
 @param[in,out]	space	tablespace */
 void
@@ -1441,8 +1248,8 @@ fil_names_write_if_was_clean(
 	}
 
 	const bool	was_clean = space->max_lsn == 0;
-	ut_ad(space->max_lsn <= log_sys->lsn);
-	space->max_lsn = log_sys->lsn;
+	ut_ad(space->max_lsn <= log_sys.lsn);
+	space->max_lsn = log_sys.lsn;
 
 	if (was_clean) {
 		fil_names_dirty_and_write(space, mtr);
diff --git a/storage/innobase/include/fil0fil.ic b/storage/innobase/include/fil0fil.ic
index 023a48a5066..2a7d06e243f 100644
--- a/storage/innobase/include/fil0fil.ic
+++ b/storage/innobase/include/fil0fil.ic
@@ -39,6 +39,7 @@ fil_get_page_type_name(
 		return "PAGE_COMPRESSED_ENRYPTED";
 	case FIL_PAGE_PAGE_COMPRESSED:
 		return "PAGE_COMPRESSED";
+	case FIL_PAGE_TYPE_INSTANT:
 	case FIL_PAGE_INDEX:
 		return "INDEX";
 	case FIL_PAGE_RTREE:
@@ -89,6 +90,7 @@ fil_page_type_validate(
 	if (!((page_type == FIL_PAGE_PAGE_COMPRESSED ||
 		page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED ||
 		page_type == FIL_PAGE_INDEX ||
+		page_type == FIL_PAGE_TYPE_INSTANT ||
 		page_type == FIL_PAGE_RTREE ||
 		page_type == FIL_PAGE_UNDO_LOG ||
 		page_type == FIL_PAGE_INODE ||
diff --git a/storage/innobase/include/fsp0file.h b/storage/innobase/include/fsp0file.h
index 9bc7b4bf9c9..72810a25191 100644
--- a/storage/innobase/include/fsp0file.h
+++ b/storage/innobase/include/fsp0file.h
@@ -414,7 +414,8 @@ private:
 	/** Flags to use for opening the data file */
 	os_file_create_t	m_open_flags;
 
-	/** size in database pages */
+	/** size in megabytes or pages; converted from megabytes to
+	pages in SysTablespace::normalize_size() */
 	ulint			m_size;
 
 	/** ordinal position of this datafile in the tablespace */
@@ -477,7 +478,7 @@ public:
 		/* No op - base constructor is called. */
 	}
 
-	RemoteDatafile(const char* name, ulint size, ulint order)
+	RemoteDatafile(const char*, ulint, ulint)
 		:
 		m_link_filepath()
 	{
@@ -499,12 +500,6 @@ public:
 		return(m_link_filepath);
 	}
 
-	/** Set the link filepath. Use default datadir, the base name of
-	the path provided without its suffix, plus DOT_ISL.
-	@param[in]	path	filepath which contains a basename to use.
-				If NULL, use m_name as the basename. */
-	void set_link_filepath(const char* path);
-
 	/** Create a link filename based on the contents of m_name,
 	open that file, and read the contents into m_filepath.
 	@retval DB_SUCCESS if remote linked tablespace file is opened and read.
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index 8b9bbba5239..76c7762fac3 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -41,8 +41,8 @@ Created 12/18/1995 Heikki Tuuri
 
 /** @return the PAGE_SSIZE flags for the current innodb_page_size */
 #define FSP_FLAGS_PAGE_SSIZE()						\
-	((UNIV_PAGE_SIZE == UNIV_PAGE_SIZE_ORIG) ?			\
-	 0 : (UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)	\
+	((srv_page_size == UNIV_PAGE_SIZE_ORIG) ?			\
+	 0U : (srv_page_size_shift - UNIV_ZIP_SIZE_SHIFT_MIN + 1)	\
 	 << FSP_FLAGS_POS_PAGE_SSIZE)
 
 /* @defgroup Compatibility macros for MariaDB 10.1.0 through 10.1.20;
@@ -290,22 +290,6 @@ the extent are free and which contain old tuple version to clean. */
 #ifndef UNIV_INNOCHECKSUM
 /* @} */
 
-/**********************************************************************//**
-Initializes the file space system. */
-void
-fsp_init(void);
-/*==========*/
-
-/**********************************************************************//**
-Gets the size of the system tablespace from the tablespace header.  If
-we do not have an auto-extending data file, this should be equal to
-the size of the data files.  If there is an auto-extending data file,
-this can be smaller.
-@return size in pages */
-ulint
-fsp_header_get_tablespace_size(void);
-/*================================*/
-
 /** Calculate the number of pages to extend a datafile.
 We extend single-table tablespaces first one extent at a time,
 but 4 at a time for bigger tablespaces. It is not enough to extend always
@@ -330,7 +314,7 @@ UNIV_INLINE
 ulint
 fsp_get_extent_size_in_pages(const page_size_t&	page_size)
 {
-	return(FSP_EXTENT_SIZE * UNIV_PAGE_SIZE / page_size.physical());
+	return (FSP_EXTENT_SIZE << srv_page_size_shift) / page_size.physical();
 }
 
 /**********************************************************************//**
@@ -393,56 +377,33 @@ fsp_header_init_fields(
 	ulint	flags);		/*!< in: tablespace flags (FSP_SPACE_FLAGS):
 				0, or table->flags if newer than COMPACT */
 /** Initialize a tablespace header.
-@param[in]	space_id	space id
-@param[in]	size		current size in blocks
-@param[in,out]	mtr		mini-transaction */
-void
-fsp_header_init(ulint space_id, ulint size, mtr_t* mtr);
+@param[in,out]	space	tablespace
+@param[in]	size	current size in blocks
+@param[in,out]	mtr	mini-transaction */
+void fsp_header_init(fil_space_t* space, ulint size, mtr_t* mtr)
+	MY_ATTRIBUTE((nonnull));
 
 /**********************************************************************//**
-Increases the space size field of a space. */
-void
-fsp_header_inc_size(
-/*================*/
-	ulint	space_id,	/*!< in: space id */
-	ulint	size_inc,	/*!< in: size increment in pages */
-	mtr_t*	mtr);		/*!< in/out: mini-transaction */
-/**********************************************************************//**
 Creates a new segment.
 @return the block where the segment header is placed, x-latched, NULL
 if could not create segment because of lack of space */
 buf_block_t*
 fseg_create(
-/*========*/
-	ulint	space_id,/*!< in: space id */
+	fil_space_t* space, /*!< in,out: tablespace */
 	ulint	page,	/*!< in: page where the segment header is placed: if
 			this is != 0, the page must belong to another segment,
 			if this is 0, a new page will be allocated and it
 			will belong to the created segment */
 	ulint	byte_offset, /*!< in: byte offset of the created segment header
 			on the page */
-	mtr_t*	mtr);	/*!< in/out: mini-transaction */
-/**********************************************************************//**
-Creates a new segment.
-@return the block where the segment header is placed, x-latched, NULL
-if could not create segment because of lack of space */
-buf_block_t*
-fseg_create_general(
-/*================*/
-	ulint	space_id,/*!< in: space id */
-	ulint	page,	/*!< in: page where the segment header is placed: if
-			this is != 0, the page must belong to another segment,
-			if this is 0, a new page will be allocated and it
-			will belong to the created segment */
-	ulint	byte_offset, /*!< in: byte offset of the created segment header
-			on the page */
-	ibool	has_done_reservation, /*!< in: TRUE if the caller has already
-			done the reservation for the pages with
+	mtr_t*	mtr,
+   	bool	has_done_reservation = false); /*!< in: whether the caller
+			has already done the reservation for the pages with
 			fsp_reserve_free_extents (at least 2 extents: one for
 			the inode and the other for the segment) then there is
 			no need to do the check for this individual
 			operation */
-	mtr_t*	mtr);	/*!< in/out: mini-transaction */
+
 /**********************************************************************//**
 Calculates the number of pages reserved by a segment, and how many pages are
 currently used.
@@ -504,7 +465,7 @@ fseg_alloc_free_page_general(
 use several pages from the tablespace should call this function beforehand
 and reserve enough free extents so that they certainly will be able
 to do their operation, like a B-tree page split, fully. Reservations
-must be released with function fil_space_release_free_extents!
+must be released with function fil_space_t::release_free_extents()!
 
 The alloc_type below has the following meaning: FSP_NORMAL means an
 operation which will probably result in more space usage, like an
@@ -530,7 +491,7 @@ free pages available.
 				return true and the tablespace size is <
 				FSP_EXTENT_SIZE pages, then this can be 0,
 				otherwise it is n_ext
-@param[in]	space_id	tablespace identifier
+@param[in,out]	space		tablespace
 @param[in]	n_ext		number of extents to reserve
 @param[in]	alloc_type	page reservation type (FSP_BLOB, etc)
 @param[in,out]	mtr		the mini transaction
@@ -541,30 +502,12 @@ free pages available.
 bool
 fsp_reserve_free_extents(
 	ulint*		n_reserved,
-	ulint		space_id,
+	fil_space_t*	space,
 	ulint		n_ext,
 	fsp_reserve_t	alloc_type,
 	mtr_t*		mtr,
 	ulint		n_pages = 2);
 
-/** Calculate how many KiB of new data we will be able to insert to the
-tablespace without running out of space.
-@param[in]	space_id	tablespace ID
-@return available space in KiB
-@retval UINTMAX_MAX if unknown */
-uintmax_t
-fsp_get_available_space_in_free_extents(
-	ulint		space_id);
-
-/** Calculate how many KiB of new data we will be able to insert to the
-tablespace without running out of space. Start with a space object that has
-been acquired by the caller who holds it for the calculation,
-@param[in]	space		tablespace object from fil_space_acquire()
-@return available space in KiB */
-uintmax_t
-fsp_get_available_space_in_free_extents(
-	const fil_space_t*	space);
-
 /**********************************************************************//**
 Frees a single page of a segment. */
 void
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index 2160287bd1a..3258704615a 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -92,21 +92,15 @@ xdes_calc_descriptor_page(
 	const page_size_t&	page_size,
 	ulint			offset)
 {
-#ifndef DOXYGEN /* Doxygen gets confused by these */
-# if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET				\
-			   + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX)	\
-			   * XDES_SIZE_MAX
-#  error
-# endif
-# if UNIV_ZIP_SIZE_MIN <= XDES_ARR_OFFSET				\
-			  + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE_MIN)	\
-			  * XDES_SIZE_MIN
-#  error
-# endif
-#endif /* !DOXYGEN */
-
-	ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
-	      + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE)
+	compile_time_assert(UNIV_PAGE_SIZE_MAX > XDES_ARR_OFFSET
+			    + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX)
+			    * XDES_SIZE_MAX);
+	compile_time_assert(UNIV_PAGE_SIZE_MIN > XDES_ARR_OFFSET
+			    + (UNIV_PAGE_SIZE_MIN / FSP_EXTENT_SIZE_MIN)
+			    * XDES_SIZE_MIN);
+
+	ut_ad(srv_page_size > XDES_ARR_OFFSET
+	      + (srv_page_size / FSP_EXTENT_SIZE)
 	      * XDES_SIZE);
 	ut_ad(UNIV_ZIP_SIZE_MIN > XDES_ARR_OFFSET
 	      + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
diff --git a/storage/innobase/include/fsp0sysspace.h b/storage/innobase/include/fsp0sysspace.h
index a60891c5515..0a05e323193 100644
--- a/storage/innobase/include/fsp0sysspace.h
+++ b/storage/innobase/include/fsp0sysspace.h
@@ -32,14 +32,6 @@ Created 2013-7-26 by Kevin Lewis
 at a time. We have to make this public because it is a config variable. */
 extern ulong sys_tablespace_auto_extend_increment;
 
-#ifdef UNIV_DEBUG
-/** Control if extra debug checks need to be done for temporary tablespace.
-Default = true that is disable such checks.
-This variable is not exposed to end-user but still kept as variable for
-developer to enable it during debug. */
-extern bool srv_skip_temp_table_checks_debug;
-#endif /* UNIV_DEBUG */
-
 /** Data structure that contains the information about shared tablespaces.
 Currently this can be the system tablespace or a temporary table tablespace */
 class SysTablespace : public Tablespace
@@ -110,7 +102,7 @@ public:
 	void shutdown();
 
 	/** Normalize the file size, convert to extents. */
-	void normalize();
+	void normalize_size();
 
 	/**
 	@return true if a new raw device was created. */
@@ -146,8 +138,8 @@ public:
 	@return the autoextend increment in pages. */
 	ulint get_autoextend_increment() const
 	{
-		return(sys_tablespace_auto_extend_increment
-		       * ((1024 * 1024) / UNIV_PAGE_SIZE));
+		return sys_tablespace_auto_extend_increment
+			<< (20 - srv_page_size_shift);
 	}
 
 	/**
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index 92ba956e30b..642bd20e67e 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2017, MariaDB Corporation.
+Copyright (c) 2014, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -60,11 +60,8 @@ page size | file space extent size
   32 KiB  |  64 pages = 2 MiB
   64 KiB  |  64 pages = 4 MiB
 */
-#define FSP_EXTENT_SIZE         ((UNIV_PAGE_SIZE <= (16384) ?	\
-				(1048576 / UNIV_PAGE_SIZE) :	\
-				((UNIV_PAGE_SIZE <= (32768)) ?	\
-				(2097152 / UNIV_PAGE_SIZE) :	\
-				(4194304 / UNIV_PAGE_SIZE))))
+#define FSP_EXTENT_SIZE         (srv_page_size_shift < 14 ?	\
+				 (1048576U >> srv_page_size_shift) : 64U)
 
 /** File space extent size (four megabyte) in pages for MAX page size */
 #define	FSP_EXTENT_SIZE_MAX	(4194304 / UNIV_PAGE_SIZE_MAX)
@@ -152,38 +149,38 @@ enum fsp_reserve_t {
 /* Number of pages described in a single descriptor page: currently each page
 description takes less than 1 byte; a descriptor page is repeated every
 this many file pages */
-/* #define XDES_DESCRIBED_PER_PAGE		UNIV_PAGE_SIZE */
-/* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */
+/* #define XDES_DESCRIBED_PER_PAGE		srv_page_size */
+/* This has been replaced with either srv_page_size or page_zip->size. */
 
 /** @name The space low address page map
 The pages at FSP_XDES_OFFSET and FSP_IBUF_BITMAP_OFFSET are repeated
 every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
 /* @{ */
 /*--------------------------------------*/
-#define FSP_XDES_OFFSET			0	/* !< extent descriptor */
-#define FSP_IBUF_BITMAP_OFFSET		1	/* !< insert buffer bitmap */
+#define FSP_XDES_OFFSET			0U	/* !< extent descriptor */
+#define FSP_IBUF_BITMAP_OFFSET		1U	/* !< insert buffer bitmap */
 				/* The ibuf bitmap pages are the ones whose
 				page number is the number above plus a
 				multiple of XDES_DESCRIBED_PER_PAGE */
 
-#define FSP_FIRST_INODE_PAGE_NO		2	/*!< in every tablespace */
+#define FSP_FIRST_INODE_PAGE_NO		2U	/*!< in every tablespace */
 				/* The following pages exist
 				in the system tablespace (space 0). */
-#define FSP_IBUF_HEADER_PAGE_NO		3	/*!< insert buffer
+#define FSP_IBUF_HEADER_PAGE_NO		3U	/*!< insert buffer
 						header page, in
 						tablespace 0 */
-#define FSP_IBUF_TREE_ROOT_PAGE_NO	4	/*!< insert buffer
+#define FSP_IBUF_TREE_ROOT_PAGE_NO	4U	/*!< insert buffer
 						B-tree root page in
 						tablespace 0 */
 				/* The ibuf tree root page number in
 				tablespace 0; its fseg inode is on the page
 				number FSP_FIRST_INODE_PAGE_NO */
-#define FSP_TRX_SYS_PAGE_NO		5	/*!< transaction
+#define FSP_TRX_SYS_PAGE_NO		5U	/*!< transaction
 						system header, in
 						tablespace 0 */
-#define	FSP_FIRST_RSEG_PAGE_NO		6	/*!< first rollback segment
+#define	FSP_FIRST_RSEG_PAGE_NO		6U	/*!< first rollback segment
 						page, in tablespace 0 */
-#define FSP_DICT_HDR_PAGE_NO		7	/*!< data dictionary header
+#define FSP_DICT_HDR_PAGE_NO		7U	/*!< data dictionary header
 						page, in tablespace 0 */
 /*--------------------------------------*/
 /* @} */
@@ -197,17 +194,6 @@ fsp_is_system_temporary(ulint	space_id)
 {
 	return(space_id == SRV_TMP_SPACE_ID);
 }
-
-#ifdef UNIV_DEBUG
-/** Skip some of the sanity checks that are time consuming even in debug mode
-and can affect frequent verification runs that are done to ensure stability of
-the product.
-@return true if check should be skipped for given space. */
-bool
-fsp_skip_sanity_check(
-	ulint	space_id);
-#endif /* UNIV_DEBUG */
-
 #endif /* !UNIV_INNOCHECKSUM */
 
 /* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */
@@ -218,7 +204,7 @@ fsp_skip_sanity_check(
 #define FSP_FLAGS_WIDTH_ZIP_SSIZE	4
 /** Width of the ATOMIC_BLOBS flag.  The ability to break up a long
 column into an in-record prefix and an externally stored part is available
-to the two Barracuda row formats COMPRESSED and DYNAMIC. */
+to ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. */
 #define FSP_FLAGS_WIDTH_ATOMIC_BLOBS	1
 /** Number of flag bits used to indicate the tablespace page size */
 #define FSP_FLAGS_WIDTH_PAGE_SSIZE	4
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index 82431c76b51..4c4647dba95 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -484,47 +484,49 @@ fts_trx_free(
 /*=========*/
 	fts_trx_t*	fts_trx);		/*!< in, own: FTS trx */
 
-/******************************************************************//**
-Creates the common ancillary tables needed for supporting an FTS index
-on the given table. row_mysql_lock_data_dictionary must have been
-called before this.
-@return DB_SUCCESS or error code */
+/** Creates the common auxiliary tables needed for supporting an FTS index
+on the given table. row_mysql_lock_data_dictionary must have been called
+before this.
+The following tables are created.
+CREATE TABLE $FTS_PREFIX_DELETED
+	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_DELETED_CACHE
+	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_BEING_DELETED
+	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
+	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_CONFIG
+	(key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
+@param[in,out]	trx			transaction
+@param[in]	table			table with FTS index
+@param[in]	skip_doc_id_index	Skip index on doc id
+@return DB_SUCCESS if succeed */
 dberr_t
 fts_create_common_tables(
-/*=====================*/
-	trx_t*		trx,			/*!< in: transaction handle */
-	const dict_table_t*
-			table,			/*!< in: table with one FTS
-						index */
-	const char*	name,			/*!< in: table name */
-	bool		skip_doc_id_index)	/*!< in: Skip index on doc id */
-	MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************//**
-Wrapper function of fts_create_index_tables_low(), create auxiliary
-tables for an FTS index
-@return DB_SUCCESS or error code */
-dberr_t
-fts_create_index_tables(
-/*====================*/
-	trx_t*			trx,		/*!< in: transaction handle */
-	const dict_index_t*	index)		/*!< in: the FTS index
-						instance */
-	MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************//**
-Creates the column specific ancillary tables needed for supporting an
+	trx_t*		trx,
+	dict_table_t*	table,
+	bool		skip_doc_id_index)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Creates the column specific ancillary tables needed for supporting an
 FTS index on the given table. row_mysql_lock_data_dictionary must have
 been called before this.
+
+All FTS AUX Index tables have the following schema.
+CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
+	word		VARCHAR(FTS_MAX_WORD_LEN),
+	first_doc_id	INT NOT NULL,
+	last_doc_id	UNSIGNED NOT NULL,
+	doc_count	UNSIGNED INT NOT NULL,
+	ilist		VARBINARY NOT NULL,
+	UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
+@param[in,out]	trx	dictionary transaction
+@param[in]	index	fulltext index
+@param[in]	id	table id
 @return DB_SUCCESS or error code */
 dberr_t
-fts_create_index_tables_low(
-/*========================*/
-	trx_t*		trx,			/*!< in: transaction handle */
-	const dict_index_t*
-			index,			/*!< in: the FTS index
-						instance */
-	const char*	table_name,		/*!< in: the table name */
-	table_id_t	table_id)		/*!< in: the table id */
-	MY_ATTRIBUTE((warn_unused_result));
+fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /******************************************************************//**
 Add the FTS document id hidden column. */
 void
@@ -555,7 +557,7 @@ fts_commit(
 	MY_ATTRIBUTE((warn_unused_result));
 
 /** FTS Query entry point.
-@param[in]	trx		transaction
+@param[in,out]	trx		transaction
 @param[in]	index		fts index to search
 @param[in]	flags		FTS search mode
 @param[in]	query_str	FTS query
@@ -736,7 +738,6 @@ Take a FTS savepoint. */
 void
 fts_savepoint_take(
 /*===============*/
-	trx_t*		trx,			/*!< in: transaction */
 	fts_trx_t*	fts_trx,		/*!< in: fts transaction */
 	const char*	name);			/*!< in: savepoint name */
 
diff --git a/storage/innobase/include/fts0priv.h b/storage/innobase/include/fts0priv.h
index 59e6311f7d4..85331cbd31e 100644
--- a/storage/innobase/include/fts0priv.h
+++ b/storage/innobase/include/fts0priv.h
@@ -318,7 +318,6 @@ the dict mutex
 que_t*
 fts_parse_sql_no_dict_lock(
 /*=======================*/
-	fts_table_t*	fts_table,	/*!< in: table with FTS index */
 	pars_info_t*	info,		/*!< in: parser info */
 	const char*	sql)		/*!< in: SQL string to evaluate */
 	MY_ATTRIBUTE((warn_unused_result));
diff --git a/storage/innobase/include/fts0tokenize.h b/storage/innobase/include/fts0tokenize.h
index 15726aea1de..909d2ce07ba 100644
--- a/storage/innobase/include/fts0tokenize.h
+++ b/storage/innobase/include/fts0tokenize.h
@@ -144,7 +144,7 @@ fts_get_word(
 				}
 			}
 
-			info->prev = *doc;
+			info->prev = char(*doc);
 			info->yesno = (FTB_YES == ' ') ? 1 : (info->quot != 0);
 			info->weight_adjust = info->wasign = 0;
 		}
diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic
index 18bc87213fc..486f8c2f109 100644
--- a/storage/innobase/include/fts0types.ic
+++ b/storage/innobase/include/fts0types.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -169,7 +169,6 @@ fts_select_index_by_hash(
 	const byte*		str,
 	ulint			len)
 {
-	int	char_len;
 	ulong	nr1 = 1;
 	ulong	nr2 = 4;
 
@@ -184,9 +183,9 @@ fts_select_index_by_hash(
 	char_len = my_mbcharlen_ptr(cs, reinterpret_cast<const char*>(str),
 				    reinterpret_cast<const char*>(str + len));
 	*/
-	char_len = cs->cset->charlen(cs, str, str+len);
+	size_t char_len = size_t(cs->cset->charlen(cs, str, str + len));
 
-	ut_ad(static_cast<ulint>(char_len) <= len);
+	ut_ad(char_len <= len);
 
 	/* Get collation hash code */
 	cs->coll->hash_sort(cs, str, char_len, &nr1, &nr2);
diff --git a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic
index 6fe031876e6..56be971f233 100644
--- a/storage/innobase/include/fut0fut.ic
+++ b/storage/innobase/include/fut0fut.ic
@@ -48,7 +48,7 @@ fut_get_ptr(
 	buf_block_t*	block;
 	byte*		ptr = NULL;
 
-	ut_ad(addr.boffset < UNIV_PAGE_SIZE);
+	ut_ad(addr.boffset < srv_page_size);
 	ut_ad((rw_latch == RW_S_LATCH)
 	      || (rw_latch == RW_X_LATCH)
 	      || (rw_latch == RW_SX_LATCH));
diff --git a/storage/innobase/include/fut0lst.ic b/storage/innobase/include/fut0lst.ic
index 128dc77ed92..5c9a9ca94c1 100644
--- a/storage/innobase/include/fut0lst.ic
+++ b/storage/innobase/include/fut0lst.ic
@@ -58,7 +58,7 @@ flst_write_addr(
 					     MTR_MEMO_PAGE_X_FIX
 					     | MTR_MEMO_PAGE_SX_FIX));
 	ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
-	ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
+	ut_a(ut_align_offset(faddr, srv_page_size) >= FIL_PAGE_DATA);
 
 	mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
 	mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
@@ -83,7 +83,7 @@ flst_read_addr(
 	addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
 				      mtr);
 	ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
-	ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
+	ut_a(ut_align_offset(faddr, srv_page_size) >= FIL_PAGE_DATA);
 	return(addr);
 }
 
diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h
index 44e00b3abd6..7bfee9b06e5 100644
--- a/storage/innobase/include/gis0rtree.h
+++ b/storage/innobase/include/gis0rtree.h
@@ -70,10 +70,8 @@ rtr_index_build_node_ptr(
 					pointer */
 	ulint			page_no,/*!< in: page number to put in node
 					pointer */
-	mem_heap_t*		heap,	/*!< in: memory heap where pointer
+	mem_heap_t*		heap);	/*!< in: memory heap where pointer
 					created */
-	ulint			level);	/*!< in: level of rec in tree:
-					0 means leaf level */
 
 /*************************************************************//**
 Splits an R-tree index page to halves and inserts the tuple. It is assumed
@@ -162,7 +160,6 @@ dberr_t
 rtr_ins_enlarge_mbr(
 /*=================*/
 	btr_cur_t*		cursor,	/*!< in: btr cursor */
-	que_thr_t*		thr,	/*!< in: query thread */
 	mtr_t*			mtr);	/*!< in: mtr */
 
 /********************************************************************//**
@@ -421,9 +418,6 @@ rtr_merge_and_update_mbr(
 	ulint*			offsets,	/*!< in: rec offsets */
 	ulint*			offsets2,	/*!< in: rec offsets */
 	page_t*			child_page,	/*!< in: the child page. */
-	buf_block_t*		merge_block,	/*!< in: page to merge */
-	buf_block_t*		block,		/*!< in: page be merged */
-	dict_index_t*		index,		/*!< in: index */
 	mtr_t*			mtr);		/*!< in: mtr */
 
 /*************************************************************//**
@@ -431,10 +425,8 @@ Deletes on the upper level the node pointer to a page. */
 void
 rtr_node_ptr_delete(
 /*================*/
-	dict_index_t*	index,	/*!< in: index tree */
-	btr_cur_t*	sea_cur,/*!< in: search cursor, contains information
+	btr_cur_t*	cursor,	/*!< in: search cursor, contains information
 				about parent nodes in search */
-	buf_block_t*	block,	/*!< in: page whose node pointer is deleted */
 	mtr_t*		mtr);	/*!< in: mtr */
 
 /****************************************************************//**
@@ -446,10 +438,7 @@ rtr_merge_mbr_changed(
 	btr_cur_t*	cursor2,	/*!< in: the other cursor */
 	ulint*		offsets,	/*!< in: rec offsets */
 	ulint*		offsets2,	/*!< in: rec offsets */
-	rtr_mbr_t*	new_mbr,	/*!< out: MBR to update */
-	buf_block_t*	merge_block,	/*!< in: page to merge */
-	buf_block_t*	block,		/*!< in: page be merged */
-	dict_index_t*	index);		/*!< in: index */
+	rtr_mbr_t*	new_mbr);	/*!< out: MBR to update */
 
 
 /**************************************************************//**
@@ -526,7 +515,7 @@ rtr_info_reinit_in_cursor(
 @param[in]	tuple	range tuple containing mbr, may also be empty tuple
 @param[in]	mode	search mode
 @return estimated number of rows */
-int64_t
+ha_rows
 rtr_estimate_n_rows_in_range(
 	dict_index_t*	index,
 	const dtuple_t*	tuple,
diff --git a/storage/innobase/include/gis0rtree.ic b/storage/innobase/include/gis0rtree.ic
index e852ebd8028..4dd05d3b251 100644
--- a/storage/innobase/include/gis0rtree.ic
+++ b/storage/innobase/include/gis0rtree.ic
@@ -38,7 +38,7 @@ rtr_page_cal_mbr(
 {
 	page_t*		page;
 	rec_t*		rec;
-	byte*		field;
+	const byte*	field;
 	ulint		len;
 	ulint*		offsets = NULL;
 	double		bmin, bmax;
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index 8af4d320997..15107a93807 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -70,13 +70,11 @@ innobase_invalidate_query_cache(
 /*============================*/
 	trx_t*		trx,		/*!< in: transaction which
 					modifies the table */
-	const char*	full_name,	/*!< in: concatenation of
+	const char*	full_name);	/*!< in: concatenation of
 					database name, path separator,
 					table name, null char NUL;
 					NOTE that in Windows this is
 					always in LOWER CASE! */
-	ulint		full_name_len);	/*!< in: full name length where
-					also the null chars count */
 
 /** Quote a standard SQL identifier like tablespace, index or column name.
 @param[in]	file	output stream
@@ -161,7 +159,6 @@ UNIV_INTERN
 void
 innobase_mysql_log_notify(
 /*======================*/
-	ib_uint64_t	write_lsn,	/*!< in: LSN written to log file */
 	ib_uint64_t	flush_lsn);	/*!< in: LSN flushed to disk */
 
 /** Converts a MySQL type to an InnoDB type. Note that this function returns
@@ -243,7 +240,7 @@ wsrep_innobase_kill_one_trx(void * const thd_ptr,
                             const trx_t * const bf_trx,
                             trx_t *victim_trx,
                             ibool signal);
-int wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
+ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
                              unsigned char* str, unsigned int str_length,
                              unsigned int buf_length);
 #endif /* WITH_WSREP */
@@ -312,14 +309,6 @@ thd_lock_wait_timeout(
 /*==================*/
 	THD*	thd);	/*!< in: thread handle, or NULL to query
 			the global innodb_lock_wait_timeout */
-/******************************************************************//**
-Add up the time waited for the lock for the current query. */
-void
-thd_set_lock_wait_time(
-/*===================*/
-	THD*	thd,	/*!< in/out: thread handle */
-	ulint	value);	/*!< in: time waited for the lock */
-
 /** Get status of innodb_tmpdir.
 @param[in]	thd	thread handle, or NULL to query
 			the global innodb_tmpdir.
@@ -456,14 +445,6 @@ const char*
 server_get_hostname();
 /*=================*/
 
-/******************************************************************//**
-Get the error message format string.
-@return the format string or 0 if not found. */
-const char*
-innobase_get_err_msg(
-/*=================*/
-	int	error_code);	/*!< in: MySQL error code */
-
 /*********************************************************************//**
 Compute the next autoinc value.
 
@@ -536,7 +517,7 @@ UNIV_INTERN
 void
 ib_push_warning(
 	trx_t*		trx,	/*!< in: trx */
-	ulint		error,	/*!< in: error code to push as warning */
+	dberr_t		error,	/*!< in: error code to push as warning */
 	const char	*format,/*!< in: warning message */
 	...);
 
@@ -546,7 +527,7 @@ UNIV_INTERN
 void
 ib_push_warning(
 	void*		ithd,	/*!< in: thd */
-	ulint		error,	/*!< in: error code to push as warning */
+	dberr_t		error,	/*!< in: error code to push as warning */
 	const char	*format,/*!< in: warning message */
 	...);
 
diff --git a/storage/innobase/include/handler0alter.h b/storage/innobase/include/handler0alter.h
index 1c690839449..81c0fd18a29 100644
--- a/storage/innobase/include/handler0alter.h
+++ b/storage/innobase/include/handler0alter.h
@@ -53,14 +53,6 @@ innobase_row_to_mysql(
 	const dtuple_t*		row)	/*!< in: InnoDB row */
 	MY_ATTRIBUTE((nonnull));
 
-/*************************************************************//**
-Resets table->record[0]. */
-void
-innobase_rec_reset(
-/*===============*/
-	struct TABLE*		table)		/*!< in/out: MySQL table */
-	MY_ATTRIBUTE((nonnull));
-
 /** Generate the next autoinc based on a snapshot of the session
 auto_increment_increment and auto_increment_offset variables. */
 struct ib_sequence_t {
diff --git a/storage/innobase/include/ib0mutex.h b/storage/innobase/include/ib0mutex.h
index 7b289c7a98c..eaf391be09b 100644
--- a/storage/innobase/include/ib0mutex.h
+++ b/storage/innobase/include/ib0mutex.h
@@ -53,15 +53,8 @@ struct OSTrackMutex {
 		ut_ad(!m_destroy_at_exit || !m_locked);
 	}
 
-	/** Initialise the mutex.
-	@param[in]	id              Mutex ID
-	@param[in]	filename	File where mutex was created
-	@param[in]	line		Line in filename */
-	void init(
-		latch_id_t	id,
-		const char*	filename,
-		uint32_t	line)
-		UNIV_NOTHROW
+	/** Initialise the mutex. */
+	void init(latch_id_t, const char*, uint32_t) UNIV_NOTHROW
 	{
 		ut_ad(m_freed);
 		ut_ad(!m_locked);
@@ -92,16 +85,8 @@ struct OSTrackMutex {
 		m_mutex.exit();
 	}
 
-	/** Acquire the mutex.
-	@param[in]	max_spins	max number of spins
-	@param[in]	max_delay	max delay per spin
-	@param[in]	filename	from where called
-	@param[in]	line		within filename */
-	void enter(
-		uint32_t	max_spins,
-		uint32_t	max_delay,
-		const char*	filename,
-		uint32_t	line)
+	/** Acquire the mutex. */
+	void enter(uint32_t, uint32_t, const char*, uint32_t)
 		UNIV_NOTHROW
 	{
 		ut_ad(!m_freed);
@@ -186,15 +171,8 @@ struct TTASFutexMutex {
 	}
 
 	/** Called when the mutex is "created". Note: Not from the constructor
-	but when the mutex is initialised.
-	@param[in]	id		Mutex ID
-	@param[in]	filename	File where mutex was created
-	@param[in]	line		Line in filename */
-	void init(
-		latch_id_t	id,
-		const char*	filename,
-		uint32_t	line)
-		UNIV_NOTHROW
+	but when the mutex is initialised. */
+	void init(latch_id_t, const char*, uint32_t) UNIV_NOTHROW
 	{
 		ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
 	}
@@ -208,14 +186,9 @@ struct TTASFutexMutex {
 
 	/** Acquire the mutex.
 	@param[in]	max_spins	max number of spins
-	@param[in]	max_delay	max delay per spin
-	@param[in]	filename	from where called
-	@param[in]	line		within filename */
-	void enter(
-		uint32_t	max_spins,
-		uint32_t	max_delay,
-		const char*	filename,
-		uint32_t	line) UNIV_NOTHROW
+	@param[in]	max_delay	max delay per spin */
+	void enter(uint32_t max_spins, uint32_t max_delay,
+		   const char*, uint32_t) UNIV_NOTHROW
 	{
 		uint32_t n_spins, n_waits;
 
@@ -225,7 +198,7 @@ struct TTASFutexMutex {
 				return;
 			}
 
-			ut_delay(ut_rnd_interval(0, max_delay));
+			ut_delay(max_delay);
 		}
 
 		for (n_waits= 0;; n_waits++) {
@@ -308,15 +281,8 @@ struct TTASMutex {
 	}
 
 	/** Called when the mutex is "created". Note: Not from the constructor
-	but when the mutex is initialised.
-	@param[in]	id		Mutex ID
-	@param[in]	filename	File where mutex was created
-	@param[in]	line		Line in filename */
-	void init(
-		latch_id_t	id,
-		const char*	filename,
-		uint32_t	line)
-		UNIV_NOTHROW
+	but when the mutex is initialised. */
+	void init(latch_id_t) UNIV_NOTHROW
 	{
 		ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
 	}
@@ -349,20 +315,15 @@ struct TTASMutex {
 
 	/** Acquire the mutex.
 	@param max_spins	max number of spins
-	@param max_delay	max delay per spin
-	@param filename		from where called
-	@param line		within filename */
-	void enter(
-		uint32_t	max_spins,
-		uint32_t	max_delay,
-		const char*	filename,
-		uint32_t	line) UNIV_NOTHROW
+	@param max_delay	max delay per spin */
+	void enter(uint32_t max_spins, uint32_t max_delay,
+		   const char*, uint32_t) UNIV_NOTHROW
 	{
 		const uint32_t	step = max_spins;
 		uint32_t n_spins = 0;
 
 		while (!try_lock()) {
-			ut_delay(ut_rnd_interval(0, max_delay));
+			ut_delay(max_delay);
 			if (++n_spins == max_spins) {
 				os_thread_yield();
 				max_spins+= step;
@@ -420,14 +381,8 @@ struct TTASEventMutex {
 
 	/** Called when the mutex is "created". Note: Not from the constructor
 	but when the mutex is initialised.
-	@param[in]	id		Mutex ID
-	@param[in]	filename	File where mutex was created
-	@param[in]	line		Line in filename */
-	void init(
-		latch_id_t	id,
-		const char*	filename,
-		uint32_t	line)
-		UNIV_NOTHROW
+	@param[in]	id		Mutex ID */
+	void init(latch_id_t id, const char*, uint32_t) UNIV_NOTHROW
 	{
 		ut_a(m_event == 0);
 		ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
@@ -516,7 +471,7 @@ struct TTASEventMutex {
 					sync_array_wait_event(sync_arr, cell);
 				}
 			} else {
-				ut_delay(ut_rnd_interval(0, max_delay));
+				ut_delay(max_delay);
 			}
 		}
 
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index ef72081c7cd..72b9e291fca 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2017, MariaDB Corporation.
+Copyright (c) 2016, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -47,22 +47,19 @@ typedef enum {
 	IBUF_OP_COUNT = 3
 } ibuf_op_t;
 
-/** Combinations of operations that can be buffered.  Because the enum
-values are used for indexing innobase_change_buffering_values[], they
-should start at 0 and there should not be any gaps. */
-typedef enum {
+/** Combinations of operations that can be buffered.
+@see innodb_change_buffering_names */
+enum ibuf_use_t {
 	IBUF_USE_NONE = 0,
 	IBUF_USE_INSERT,	/* insert */
 	IBUF_USE_DELETE_MARK,	/* delete */
 	IBUF_USE_INSERT_DELETE_MARK,	/* insert+delete */
 	IBUF_USE_DELETE,	/* delete+purge */
-	IBUF_USE_ALL,		/* insert+delete+purge */
-
-	IBUF_USE_COUNT		/* number of entries in ibuf_use_t */
-} ibuf_use_t;
+	IBUF_USE_ALL		/* insert+delete+purge */
+};
 
 /** Operations that can currently be buffered. */
-extern ibuf_use_t	ibuf_use;
+extern ulong		innodb_change_buffering;
 
 /** The insert buffer control structure */
 extern ibuf_t*		ibuf;
@@ -417,14 +414,11 @@ void
 ibuf_close(void);
 /*============*/
 
-/******************************************************************//**
-Checks the insert buffer bitmaps on IMPORT TABLESPACE.
+/** Check the insert buffer bitmaps on IMPORT TABLESPACE.
+@param[in]	trx	transaction
+@param[in,out]	space	tablespace being imported
 @return DB_SUCCESS or error code */
-dberr_t
-ibuf_check_bitmap_on_import(
-/*========================*/
-	const trx_t*	trx,		/*!< in: transaction */
-	ulint		space_id)	/*!< in: tablespace identifier */
+dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /** Updates free bits and buffered bits for bulk loaded page.
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index 829e7d5a74f..f6ff6f2a7fd 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -28,7 +28,7 @@ Created 7/19/1997 Heikki Tuuri
 #include "fsp0types.h"
 #include "buf0lru.h"
 
-/** An index page must contain at least UNIV_PAGE_SIZE /
+/** An index page must contain at least srv_page_size /
 IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to
 buffer inserts to this page.  If there is this much of free space, the
 corresponding bits are set in the ibuf bitmap. */
@@ -124,7 +124,7 @@ ibuf_should_try(
 						a secondary index when we
 						decide */
 {
-	return(ibuf_use != IBUF_USE_NONE
+	return(innodb_change_buffering
 	       && ibuf->max_size != 0
 	       && !dict_index_is_clust(index)
 	       && !dict_index_is_spatial(index)
@@ -314,9 +314,7 @@ ibuf_update_free_bits_if_full(
 		block->page.size.physical(), max_ins_size);
 
 	if (max_ins_size >= increase) {
-#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX
-# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX"
-#endif
+		compile_time_assert(ULINT32_UNDEFINED > UNIV_PAGE_SIZE_MAX);
 		after = ibuf_index_page_calc_free_bits(
 			block->page.size.physical(), max_ins_size - increase);
 #ifdef UNIV_IBUF_DEBUG
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index 79fb30eb3f5..701c79b8727 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -63,23 +63,6 @@ ulint
 lock_get_size(void);
 /*===============*/
 /*********************************************************************//**
-Creates the lock system at database start. */
-void
-lock_sys_create(
-/*============*/
-	ulint	n_cells);	/*!< in: number of slots in lock hash table */
-/** Resize the lock hash table.
-@param[in]	n_cells	number of slots in lock hash table */
-void
-lock_sys_resize(
-	ulint	n_cells);
-
-/*********************************************************************//**
-Closes the lock system at database shutdown. */
-void
-lock_sys_close(void);
-/*================*/
-/*********************************************************************//**
 Gets the heap_no of the smallest user record on a page.
 @return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
 UNIV_INLINE
@@ -294,7 +277,7 @@ lock_rec_insert_check_and_lock(
 	dict_index_t*	index,	/*!< in: index */
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
-	ibool*		inherit)/*!< out: set to TRUE if the new
+	bool*		inherit)/*!< out: set to true if the new
 				inserted record maybe should inherit
 				LOCK_GAP type locks from the successor
 				record */
@@ -507,18 +490,6 @@ void
 lock_trx_release_locks(
 /*===================*/
 	trx_t*	trx);	/*!< in/out: transaction */
-/*********************************************************************//**
-Removes locks on a table to be dropped or discarded.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-void
-lock_remove_all_on_table(
-/*=====================*/
-	dict_table_t*	table,			/*!< in: table to be dropped
-						or discarded */
-	ibool		remove_also_table_sx_locks);/*!< in: also removes
-						table S and X locks */
 
 /*********************************************************************//**
 Calculates the fold value of a page file address: used in inserting or
@@ -563,8 +534,8 @@ lock_rec_find_set_bit(
 
 /*********************************************************************//**
 Checks if a lock request lock1 has to wait for request lock2.
-@return TRUE if lock1 has to wait for lock2 to be removed */
-ibool
+@return whether lock1 has to wait for lock2 to be removed */
+bool
 lock_has_to_wait(
 /*=============*/
 	const lock_t*	lock1,	/*!< in: waiting lock */
@@ -581,7 +552,7 @@ lock_report_trx_id_insanity(
 	const rec_t*	rec,		/*!< in: user record */
 	dict_index_t*	index,		/*!< in: index */
 	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
-	trx_id_t	max_trx_id);	/*!< in: trx_sys_get_max_trx_id() */
+	trx_id_t	max_trx_id);	/*!< in: trx_sys.get_max_trx_id() */
 /*********************************************************************//**
 Prints info of locks for all transactions.
 @return FALSE if not able to obtain lock mutex and exits without
@@ -613,7 +584,7 @@ lock_print_info_all_transactions(
 Return approximate number or record locks (bits set in the bitmap) for
 this transaction. Since delete-marked records may be removed, the
 record count will not be precise.
-The caller must be holding lock_sys->mutex. */
+The caller must be holding lock_sys.mutex. */
 ulint
 lock_number_of_rows_locked(
 /*=======================*/
@@ -622,7 +593,7 @@ lock_number_of_rows_locked(
 
 /*********************************************************************//**
 Return the number of table locks for a transaction.
-The caller must be holding lock_sys->mutex. */
+The caller must be holding lock_sys.mutex. */
 ulint
 lock_number_of_tables_locked(
 /*=========================*/
@@ -797,7 +768,6 @@ Set the lock system timeout event. */
 void
 lock_set_timeout_event();
 /*====================*/
-#ifdef UNIV_DEBUG
 /*********************************************************************//**
 Checks that a transaction id is sensible, i.e., not in the future.
 @return true if ok */
@@ -807,8 +777,8 @@ lock_check_trx_id_sanity(
 	trx_id_t	trx_id,		/*!< in: trx id */
 	const rec_t*	rec,		/*!< in: user record */
 	dict_index_t*	index,		/*!< in: index */
-	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
-	MY_ATTRIBUTE((warn_unused_result));
+	const ulint*	offsets);	/*!< in: rec_get_offsets(rec, index) */
+#ifdef UNIV_DEBUG
 /*******************************************************************//**
 Check if the transaction holds any locks on the sys tables
 or its records.
@@ -817,19 +787,21 @@ const lock_t*
 lock_trx_has_sys_table_locks(
 /*=========================*/
 	const trx_t*	trx)	/*!< in: transaction to check */
-	MY_ATTRIBUTE((warn_unused_result));
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/*******************************************************************//**
-Check if the transaction holds an exclusive lock on a record.
-@return whether the locks are held */
+/** Check if the transaction holds an explicit exclusive lock on a record.
+@param[in]	trx	transaction
+@param[in]	table	table
+@param[in]	block	leaf page
+@param[in]	heap_no	heap number identifying the record
+@return whether an explicit X-lock is held */
 bool
-lock_trx_has_rec_x_lock(
-/*====================*/
+lock_trx_has_expl_x_lock(
 	const trx_t*		trx,	/*!< in: transaction to check */
 	const dict_table_t*	table,	/*!< in: table to check */
 	const buf_block_t*	block,	/*!< in: buffer block of the record */
 	ulint			heap_no)/*!< in: record heap number */
-	MY_ATTRIBUTE((warn_unused_result));
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
 #endif /* UNIV_DEBUG */
 
 /** Lock operation struct */
@@ -841,11 +813,12 @@ struct lock_op_t{
 typedef ib_mutex_t LockMutex;
 
 /** The lock system struct */
-struct lock_sys_t{
-	char		pad1[CACHE_LINE_SIZE];	/*!< padding to prevent other
-						memory update hotspots from
-						residing on the same memory
-						cache line */
+class lock_sys_t
+{
+  bool m_initialised;
+
+public:
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	LockMutex	mutex;			/*!< Mutex protecting the
 						locks */
 	hash_table_t*	rec_hash;		/*!< hash table of the record
@@ -855,13 +828,13 @@ struct lock_sys_t{
 	hash_table_t*	prdt_page_hash;		/*!< hash table of the page
 						lock */
 
-	char		pad2[CACHE_LINE_SIZE];	/*!< Padding */
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	LockMutex	wait_mutex;		/*!< Mutex protecting the
 						next two fields */
 	srv_slot_t*	waiting_threads;	/*!< Array  of user threads
 						suspended while waiting for
 						locks within InnoDB, protected
-						by the lock_sys->wait_mutex;
+						by the lock_sys.wait_mutex;
 						os_event_set() and
 						os_event_reset() on
 						waiting_threads[]->event
@@ -870,12 +843,7 @@ struct lock_sys_t{
 	srv_slot_t*	last_slot;		/*!< highest slot ever used
 						in the waiting_threads array,
 						protected by
-						lock_sys->wait_mutex */
-	ibool		rollback_complete;
-						/*!< TRUE if rollback of all
-						recovered transactions is
-						complete. Protected by
-						lock_sys->mutex */
+						lock_sys.wait_mutex */
 
 	ulint		n_lock_max_wait_time;	/*!< Max wait time */
 
@@ -887,6 +855,38 @@ struct lock_sys_t{
 
 	bool		timeout_thread_active;	/*!< True if the timeout thread
 						is running */
+
+
+  /**
+    Constructor.
+
+    Some members may require late initialisation, thus we just mark object as
+    uninitialised. Real initialisation happens in create().
+  */
+  lock_sys_t(): m_initialised(false) {}
+
+
+  bool is_initialised() { return m_initialised; }
+
+
+  /**
+    Creates the lock system at database start.
+
+    @param[in] n_cells number of slots in lock hash table
+  */
+  void create(ulint n_cells);
+
+
+  /**
+    Resize the lock hash table.
+
+    @param[in] n_cells number of slots in lock hash table
+  */
+  void resize(ulint n_cells);
+
+
+  /** Closes the lock system at database shutdown. */
+  void close();
 };
 
 /*********************************************************************//**
@@ -1000,36 +1000,36 @@ lock_rec_free_all_from_discard_page(
 	const buf_block_t*	block);		/*!< in: page to be discarded */
 
 /** The lock system */
-extern lock_sys_t*	lock_sys;
+extern lock_sys_t lock_sys;
 
-/** Test if lock_sys->mutex can be acquired without waiting. */
+/** Test if lock_sys.mutex can be acquired without waiting. */
 #define lock_mutex_enter_nowait() 		\
-	(lock_sys->mutex.trylock(__FILE__, __LINE__))
+	(lock_sys.mutex.trylock(__FILE__, __LINE__))
 
-/** Test if lock_sys->mutex is owned. */
-#define lock_mutex_own() (lock_sys->mutex.is_owned())
+/** Test if lock_sys.mutex is owned. */
+#define lock_mutex_own() (lock_sys.mutex.is_owned())
 
-/** Acquire the lock_sys->mutex. */
+/** Acquire the lock_sys.mutex. */
 #define lock_mutex_enter() do {			\
-	mutex_enter(&lock_sys->mutex);		\
+	mutex_enter(&lock_sys.mutex);		\
 } while (0)
 
-/** Release the lock_sys->mutex. */
+/** Release the lock_sys.mutex. */
 #define lock_mutex_exit() do {			\
-	lock_sys->mutex.exit();			\
+	lock_sys.mutex.exit();			\
 } while (0)
 
-/** Test if lock_sys->wait_mutex is owned. */
-#define lock_wait_mutex_own() (lock_sys->wait_mutex.is_owned())
+/** Test if lock_sys.wait_mutex is owned. */
+#define lock_wait_mutex_own() (lock_sys.wait_mutex.is_owned())
 
-/** Acquire the lock_sys->wait_mutex. */
+/** Acquire the lock_sys.wait_mutex. */
 #define lock_wait_mutex_enter() do {		\
-	mutex_enter(&lock_sys->wait_mutex);	\
+	mutex_enter(&lock_sys.wait_mutex);	\
 } while (0)
 
-/** Release the lock_sys->wait_mutex. */
+/** Release the lock_sys.wait_mutex. */
 #define lock_wait_mutex_exit() do {		\
-	lock_sys->wait_mutex.exit();		\
+	lock_sys.wait_mutex.exit();		\
 } while (0)
 
 #ifdef WITH_WSREP
diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic
index 475f2ccedf1..c1c886f6832 100644
--- a/storage/innobase/include/lock0lock.ic
+++ b/storage/innobase/include/lock0lock.ic
@@ -54,7 +54,7 @@ lock_rec_hash(
 	ulint	page_no)/*!< in: page number */
 {
 	return(unsigned(hash_calc_hash(lock_rec_fold(space, page_no),
-				       lock_sys->rec_hash)));
+				       lock_sys.rec_hash)));
 }
 
 /*********************************************************************//**
@@ -90,11 +90,11 @@ lock_hash_get(
 	ulint	mode)	/*!< in: lock mode */
 {
 	if (mode & LOCK_PREDICATE) {
-		return(lock_sys->prdt_hash);
+		return(lock_sys.prdt_hash);
 	} else if (mode & LOCK_PRDT_PAGE) {
-		return(lock_sys->prdt_page_hash);
+		return(lock_sys.prdt_page_hash);
 	} else {
-		return(lock_sys->rec_hash);
+		return(lock_sys.rec_hash);
 	}
 }
 
diff --git a/storage/innobase/include/lock0prdt.h b/storage/innobase/include/lock0prdt.h
index 1fa7796794a..629cd05122a 100644
--- a/storage/innobase/include/lock0prdt.h
+++ b/storage/innobase/include/lock0prdt.h
@@ -50,9 +50,8 @@ lock_prdt_lock(
 				SELECT FOR UPDATE */
 	ulint		type_mode,
 				/*!< in: LOCK_PREDICATE or LOCK_PRDT_PAGE */
-	que_thr_t*	thr,	/*!< in: query thread
+	que_thr_t*	thr);	/*!< in: query thread
 				(can be NULL if BTR_NO_LOCKING_FLAG) */
-	mtr_t*		mtr);	/*!< in/out: mini-transaction */
 
 /*********************************************************************//**
 Acquire a "Page" lock on a block
@@ -106,7 +105,6 @@ Update predicate lock when page splits */
 void
 lock_prdt_update_split(
 /*===================*/
-	buf_block_t*	block,		/*!< in/out: page to be split */
 	buf_block_t*	new_block,	/*!< in/out: the new half page */
 	lock_prdt_t*	prdt,		/*!< in: MBR on the old page */
 	lock_prdt_t*	new_prdt,	/*!< in: MBR on the new page */
@@ -122,7 +120,6 @@ lock_prdt_update_parent(
 	buf_block_t*	right_block,	/*!< in/out: the new half page */
 	lock_prdt_t*	left_prdt,	/*!< in: MBR on the old page */
 	lock_prdt_t*	right_prdt,	/*!< in: MBR on the new page */
-	lock_prdt_t*	parent_prdt,	/*!< in: original parent MBR */
 	ulint		space,		/*!< in: space id */
 	ulint		page_no);	/*!< in: page number */
 
diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h
index f32cb9d4815..6157f095c5b 100644
--- a/storage/innobase/include/lock0types.h
+++ b/storage/innobase/include/lock0types.h
@@ -33,7 +33,6 @@ Created 5/7/1996 Heikki Tuuri
 #define lock_t ib_lock_t
 
 struct lock_t;
-struct lock_sys_t;
 struct lock_table_t;
 
 /* Basic lock modes */
@@ -176,7 +175,7 @@ operator<<(std::ostream& out, const lock_rec_t& lock)
 #endif
 /* @} */
 
-/** Lock struct; protected by lock_sys->mutex */
+/** Lock struct; protected by lock_sys.mutex */
 struct ib_lock_t
 {
 	trx_t*		trx;		/*!< transaction owning the
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 336e33fccad..d213a6c0884 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -40,8 +40,8 @@ Created 12/9/1995 Heikki Tuuri
 #include "os0event.h"
 #include "os0file.h"
 
-/** Redo log group */
-struct log_group_t;
+/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
+#define SRV_N_LOG_FILES_MAX 100
 
 /** Magic value to use instead of log checksums when they are disabled */
 #define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
@@ -49,13 +49,13 @@ struct log_group_t;
 /* Margin for the free space in the smallest log group, before a new query
 step which modifies the database, is started */
 
-#define LOG_CHECKPOINT_FREE_PER_THREAD	(4 * UNIV_PAGE_SIZE)
-#define LOG_CHECKPOINT_EXTRA_FREE	(8 * UNIV_PAGE_SIZE)
+#define LOG_CHECKPOINT_FREE_PER_THREAD	(4U << srv_page_size_shift)
+#define LOG_CHECKPOINT_EXTRA_FREE	(8U << srv_page_size_shift)
 
 typedef ulint (*log_checksum_func_t)(const byte* log_block);
 
 /** Pointer to the log checksum calculation function. Protected with
-log_sys->mutex. */
+log_sys.mutex. */
 extern log_checksum_func_t log_checksum_algorithm_ptr;
 
 /** Append a string to the log.
@@ -81,9 +81,7 @@ log_free_check(void);
 
 /** Extends the log buffer.
 @param[in]	len	requested minimum size in bytes */
-void
-log_buffer_extend(
-	ulint	len);
+void log_buffer_extend(ulong len);
 
 /** Check margin not to overwrite transaction log from the last checkpoint.
 If would estimate the log write to exceed the log_group_capacity,
@@ -137,7 +135,7 @@ log_get_flush_lsn(void);
 /*=============*/
 /****************************************************************
 Gets the log group capacity. It is OK to read the value without
-holding log_sys->mutex because it is constant.
+holding log_sys.mutex because it is constant.
 @return log group capacity */
 UNIV_INLINE
 lsn_t
@@ -151,14 +149,7 @@ UNIV_INLINE
 lsn_t
 log_get_max_modified_age_async(void);
 /*================================*/
-/** Initializes the redo logging subsystem. */
-void
-log_sys_init();
 
-/** Initialize the redo log.
-@param[in]	n_files		number of files */
-void
-log_init(ulint n_files);
 /** Calculate the recommended highest values for lsn - last_checkpoint_lsn
 and lsn - buf_get_oldest_modification().
 @param[in]	file_size	requested innodb_log_file_size
@@ -170,12 +161,6 @@ log_set_capacity(ulonglong file_size)
 	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
-Completes an i/o to a log file. */
-void
-log_io_complete(
-/*============*/
-	log_group_t*	group);	/*!< in: log group */
-/******************************************************//**
 This function is called, e.g., when a transaction wants to commit. It checks
 that the log has been written to the log file up to the last log entry written
 by the transaction. If there is a flush running, it waits and checks if the
@@ -234,13 +219,9 @@ shutdown. This function also writes all log in log files to the log archive. */
 void
 logs_empty_and_mark_files_at_shutdown(void);
 /*=======================================*/
-/** Read a log group header page to log_sys->checkpoint_buf.
-@param[in]	group	log group
-@param[in]	header	0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
-void
-log_group_header_read(
-	const log_group_t*	group,
-	ulint			header);
+/** Read a log group header page to log_sys.checkpoint_buf.
+@param[in]	header	0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */
+void log_header_read(ulint header);
 /** Write checkpoint info to the log header and invoke log_mutex_exit().
 @param[in]	sync	whether to wait for the write to complete
 @param[in]	end_lsn	start LSN of the MLOG_CHECKPOINT mini-transaction */
@@ -261,16 +242,6 @@ objects! */
 void
 log_check_margins(void);
 
-/********************************************************//**
-Sets the field values in group to correspond to a given lsn. For this function
-to work, the values must already be correctly initialized to correspond to
-some lsn, for instance, a checkpoint lsn. */
-void
-log_group_set_fields(
-/*=================*/
-	log_group_t*	group,	/*!< in/out: group */
-	lsn_t		lsn);	/*!< in: lsn for which the values should be
-				set */
 /************************************************************//**
 Gets a log block flush bit.
 @return TRUE if this block was the first to be written in a log flush */
@@ -321,11 +292,10 @@ log_block_calc_checksum_crc32(
 	const byte*	block);
 
 /** Calculates the checksum for a log block using the "no-op" algorithm.
-@param[in]	block	the redo log block
 @return		the calculated checksum value */
 UNIV_INLINE
 ulint
-log_block_calc_checksum_none(const byte*	block);
+log_block_calc_checksum_none(const byte*);
 
 /************************************************************//**
 Gets a log block checksum field value.
@@ -402,14 +372,6 @@ Refreshes the statistics used to print per-second averages. */
 void
 log_refresh_stats(void);
 /*===================*/
-/********************************************************//**
-Closes all log groups. */
-void
-log_group_close_all(void);
-/*=====================*/
-/** Shut down the redo log subsystem. */
-void
-log_shutdown();
 
 /** Whether to generate and require checksums on the redo log pages */
 extern my_bool	innodb_log_checksums;
@@ -421,8 +383,6 @@ extern my_bool	innodb_log_checksums;
 /* The counting of lsn's starts from this value: this must be non-zero */
 #define LOG_START_LSN		((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
 
-#define LOG_BUFFER_SIZE		(srv_log_buffer_size * UNIV_PAGE_SIZE)
-
 /* Offsets of a log block header */
 #define	LOG_BLOCK_HDR_NO	0	/* block number which must be > 0 and
 					is allowed to wrap around at 2G; the
@@ -446,7 +406,7 @@ extern my_bool	innodb_log_checksums;
 					from this offset in this log block,
 					if value not 0 */
 #define LOG_BLOCK_CHECKPOINT_NO	8	/* 4 lower bytes of the value of
-					log_sys->next_checkpoint_no when the
+					log_sys.next_checkpoint_no when the
 					log block was last written to: if the
 					block has not yet been written full,
 					this value is only updated before a
@@ -469,7 +429,7 @@ extern my_bool	innodb_log_checksums;
 #define LOG_CHECKPOINT_LSN		8
 /** Byte offset of the log record corresponding to LOG_CHECKPOINT_LSN */
 #define LOG_CHECKPOINT_OFFSET		16
-/** log_sys_t::buf_size at the time of the checkpoint (not used) */
+/** srv_log_buffer_size at the time of the checkpoint (not used) */
 #define LOG_CHECKPOINT_LOG_BUF_SIZE	24
 /** MariaDB 10.2.5 encrypted redo log encryption key version (32 bits)*/
 #define LOG_CHECKPOINT_CRYPT_KEY	32
@@ -511,16 +471,20 @@ or the MySQL version that created the redo log file. */
 	IB_TO_STR(MYSQL_VERSION_MINOR) "."	\
 	IB_TO_STR(MYSQL_VERSION_PATCH)
 
-/** The redo log format identifier corresponding to the current format version.
-Stored in LOG_HEADER_FORMAT.
+/** The original (not version-tagged) InnoDB redo log format */
+#define LOG_HEADER_FORMAT_3_23		0
+/** The MySQL 5.7.9/MariaDB 10.2.2 log format */
+#define LOG_HEADER_FORMAT_10_2		1
+/** The MariaDB 10.3.2 log format.
 To prevent crash-downgrade to earlier 10.2 due to the inability to
 roll back a retroactively introduced TRX_UNDO_RENAME_TABLE undo log record,
 MariaDB 10.2.18 and later will use the 10.3 format, but LOG_HEADER_SUBFORMAT
 1 instead of 0. MariaDB 10.3 will use subformat 0 (5.7-style TRUNCATE) or 2
 (MDEV-13564 backup-friendly TRUNCATE). */
 #define LOG_HEADER_FORMAT_10_3		103
-/** The old MariaDB 10.2.2..10.2.17 log format */
-#define LOG_HEADER_FORMAT_10_2		1
+/** The redo log format identifier corresponding to the current format version.
+Stored in LOG_HEADER_FORMAT. */
+#define LOG_HEADER_FORMAT_CURRENT	LOG_HEADER_FORMAT_10_3
 /** Future MariaDB 10.4 log format */
 #define LOG_HEADER_FORMAT_10_4		104
 /** Encrypted MariaDB redo log */
@@ -539,102 +503,43 @@ MariaDB 10.2.18 and later will use the 10.3 format, but LOG_HEADER_SUBFORMAT
 					header */
 #define LOG_FILE_HDR_SIZE	(4 * OS_FILE_LOG_BLOCK_SIZE)
 
-/** The state of a log group */
-enum log_group_state_t {
-	/** No corruption detected */
-	LOG_GROUP_OK,
-	/** Corrupted */
-	LOG_GROUP_CORRUPTED
-};
-
 typedef ib_mutex_t	LogSysMutex;
 typedef ib_mutex_t	FlushOrderMutex;
 
-/** Log group consists of a number of log files, each of the same size; a log
-group is implemented as a space in the sense of the module fil0fil.
-Currently, this is only protected by log_sys->mutex. However, in the case
-of log_write_up_to(), we will access some members only with the protection
-of log_sys->write_mutex, which should affect nothing for now. */
-struct log_group_t{
-	/** number of files in the group */
-	ulint				n_files;
-	/** format of the redo log: e.g., LOG_HEADER_FORMAT_10_3 */
-	uint32_t			format;
-	/** redo log subformat: 0 with separately logged TRUNCATE,
-	1 with fully redo-logged TRUNCATE */
-	uint32_t			subformat;
-	/** individual log file size in bytes, including the header */
-	lsn_t				file_size;
-	/** corruption status */
-	log_group_state_t		state;
-	/** lsn used to fix coordinates within the log group */
-	lsn_t				lsn;
-	/** the byte offset of the above lsn */
-	lsn_t				lsn_offset;
-	/** unaligned buffers */
-	byte**				file_header_bufs_ptr;
-	/** buffers for each file header in the group */
-	byte**				file_header_bufs;
-
-	/** used only in recovery: recovery scan succeeded up to this
-	lsn in this log group */
-	lsn_t				scanned_lsn;
-	/** unaligned checkpoint header */
-	byte*				checkpoint_buf_ptr;
-	/** buffer for writing a checkpoint header */
-	byte*				checkpoint_buf;
-
-	/** @return whether the redo log is encrypted */
-	bool is_encrypted() const
-	{
-		return((format & LOG_HEADER_FORMAT_ENCRYPTED) != 0);
-	}
-
-	/** @return capacity in bytes */
-	inline lsn_t capacity() const
-	{
-		return((file_size - LOG_FILE_HDR_SIZE) * n_files);
-	}
-};
-
 /** Redo log buffer */
 struct log_t{
-	char		pad1[CACHE_LINE_SIZE];
-					/*!< Padding to prevent other memory
-					update hotspots from residing on the
-					same memory cache line */
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	lsn_t		lsn;		/*!< log sequence number */
-	ulint		buf_free;	/*!< first free offset within the log
+	ulong		buf_free;	/*!< first free offset within the log
 					buffer in use */
 
-	char		pad2[CACHE_LINE_SIZE];/*!< Padding */
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	LogSysMutex	mutex;		/*!< mutex protecting the log */
-	char		pad3[CACHE_LINE_SIZE]; /*!< Padding */
-	LogSysMutex	write_mutex;	/*!< mutex protecting writing to log
-					file and accessing to log_group_t */
-	char		pad4[CACHE_LINE_SIZE];/*!< Padding */
+	MY_ALIGNED(CACHE_LINE_SIZE)
+	LogSysMutex	write_mutex;	/*!< mutex protecting writing to log */
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	FlushOrderMutex	log_flush_order_mutex;/*!< mutex to serialize access to
 					the flush list when we are putting
 					dirty blocks in the list. The idea
 					behind this mutex is to be able
-					to release log_sys->mutex during
+					to release log_sys.mutex during
 					mtr_commit and still ensure that
 					insertions in the flush_list happen
 					in the LSN order. */
-	byte*		buf_ptr;	/*!< unaligned log buffer, which should
-					be of double of buf_size */
-	byte*		buf;		/*!< log buffer currently in use;
-					this could point to either the first
-					half of the aligned(buf_ptr) or the
+	byte*		buf;		/*!< Memory of double the
+					srv_log_buffer_size is
+					allocated here. This pointer will change
+					however to either the first half or the
 					second half in turns, so that log
 					write/flush to disk don't block
 					concurrent mtrs which will write
-					log to this buffer */
+					log to this buffer. Care to switch back
+					to the first half before freeing/resizing
+					must be undertaken. */
 	bool		first_in_use;	/*!< true if buf points to the first
 					half of the aligned(buf_ptr), false
 					if the second half */
-	ulint		buf_size;	/*!< log buffer size of each in bytes */
-	ulint		max_buf_free;	/*!< recommended maximum value of
+	ulong		max_buf_free;	/*!< recommended maximum value of
 					buf_free for the buffer in use, after
 					which the buffer is flushed */
 	bool		check_flush_or_checkpoint;
@@ -646,12 +551,72 @@ struct log_t{
 					max_checkpoint_age; this flag is
 					peeked at by log_free_check(), which
 					does not reserve the log mutex */
-	/** the redo log */
-	log_group_t			log;
+
+  /** Log files. Protected by mutex or write_mutex. */
+  struct files {
+    /** number of files */
+    ulint				n_files;
+    /** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */
+    uint32_t				format;
+    /** redo log subformat: 0 with separately logged TRUNCATE,
+    2 with fully redo-logged TRUNCATE (1 in MariaDB 10.2) */
+    uint32_t				subformat;
+    /** individual log file size in bytes, including the header */
+    lsn_t				file_size;
+    /** lsn used to fix coordinates within the log group */
+    lsn_t				lsn;
+    /** the byte offset of the above lsn */
+    lsn_t				lsn_offset;
+
+    /** unaligned buffers */
+    byte*				file_header_bufs_ptr;
+    /** buffers for each file header in the group */
+    byte*				file_header_bufs[SRV_N_LOG_FILES_MAX];
+
+    /** used only in recovery: recovery scan succeeded up to this
+    lsn in this log group */
+    lsn_t				scanned_lsn;
+
+    /** @return whether the redo log is encrypted */
+    bool is_encrypted() const { return format & LOG_HEADER_FORMAT_ENCRYPTED; }
+    /** @return capacity in bytes */
+    lsn_t capacity() const{ return (file_size - LOG_FILE_HDR_SIZE) * n_files; }
+    /** Calculate the offset of a log sequence number.
+    @param[in]	lsn	log sequence number
+    @return offset within the log */
+    inline lsn_t calc_lsn_offset(lsn_t lsn) const;
+
+    /** Set the field values to correspond to a given lsn. */
+    void set_fields(lsn_t lsn)
+    {
+      lsn_offset = calc_lsn_offset(lsn);
+      this->lsn = lsn;
+    }
+
+    /** Read a log segment to log_sys.buf.
+    @param[in,out]	start_lsn	in: read area start,
+					out: the last read valid lsn
+    @param[in]		end_lsn		read area end
+    @return	whether no invalid blocks (e.g checksum mismatch) were found */
+    bool read_log_seg(lsn_t* start_lsn, lsn_t end_lsn);
+
+    /** Initialize the redo log buffer.
+    @param[in]	n_files		number of files */
+    void create(ulint n_files);
+
+    /** Close the redo log buffer. */
+    void close()
+    {
+      ut_free(file_header_bufs_ptr);
+      n_files = 0;
+      file_header_bufs_ptr = NULL;
+      memset(file_header_bufs, 0, sizeof file_header_bufs);
+    }
+  } log;
 
 	/** The fields involved in the log buffer flush @{ */
 
-	ulint		buf_next_to_write;/*!< first offset in the log buffer
+	ulong		buf_next_to_write;/*!< first offset in the log buffer
 					where the byte content may not exist
 					written to file, e.g., the start
 					offset of a log record catenated
@@ -668,11 +633,11 @@ struct log_t{
 					AND flushed to disk */
 	ulint		n_pending_flushes;/*!< number of currently
 					pending flushes; protected by
-					log_sys_t::mutex */
+					log_sys.mutex */
 	os_event_t	flush_event;	/*!< this event is in the reset state
 					when a flush is running;
 					os_event_set() and os_event_reset()
-					are protected by log_sys_t::mutex */
+					are protected by log_sys.mutex */
 	ulint		n_log_ios;	/*!< number of log i/os initiated thus
 					far */
 	ulint		n_log_ios_old;	/*!< number of log i/o's at the
@@ -718,7 +683,7 @@ struct log_t{
 					/*!< extra redo log records to write
 					during a checkpoint, or NULL if none.
 					The pointer is protected by
-					log_sys->mutex, and the data must
+					log_sys.mutex, and the data must
 					remain constant as long as this
 					pointer is not NULL. */
 	ulint		n_pending_checkpoint_writes;
@@ -728,73 +693,105 @@ struct log_t{
 					checkpoint write is running; a thread
 					should wait for this without owning
 					the log mutex */
-	byte*		checkpoint_buf_ptr;/* unaligned checkpoint header */
-	byte*		checkpoint_buf;	/*!< checkpoint header is read to this
-					buffer */
+
+	/** buffer for checkpoint header */
+	MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE)
+	byte		checkpoint_buf[OS_FILE_LOG_BLOCK_SIZE];
 	/* @} */
 
-	/** @return whether the redo log is encrypted */
-	bool is_encrypted() const
-	{
-		return(log.is_encrypted());
-	}
+private:
+  bool m_initialised;
+public:
+  /**
+    Constructor.
+
+    Some members may require late initialisation, thus we just mark object as
+    uninitialised. Real initialisation happens in create().
+  */
+  log_t(): m_initialised(false) {}
+
+  /** @return whether the redo log is encrypted */
+  bool is_encrypted() const { return(log.is_encrypted()); }
+
+  bool is_initialised() { return m_initialised; }
+
+  /** Complete an asynchronous checkpoint write. */
+  void complete_checkpoint();
+
+  /** Initialise the redo log subsystem. */
+  void create();
+
+  /** Shut down the redo log subsystem. */
+  void close();
 };
 
 /** Redo log system */
-extern log_t*	log_sys;
+extern log_t	log_sys;
+
+/** Calculate the offset of a log sequence number.
+@param[in]     lsn     log sequence number
+@return offset within the log */
+inline lsn_t log_t::files::calc_lsn_offset(lsn_t lsn) const
+{
+  ut_ad(this == &log_sys.log);
+  /* The lsn parameters are updated while holding both the mutexes
+  and it is ok to have either of them while reading */
+  ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned());
+  const lsn_t group_size= capacity();
+  lsn_t l= lsn - this->lsn;
+  if (longlong(l) < 0) {
+    l= lsn_t(-longlong(l)) % group_size;
+    l= group_size - l;
+  }
+
+  l+= lsn_offset - LOG_FILE_HDR_SIZE * (1 + lsn_offset / file_size);
+  l%= group_size;
+  return l + LOG_FILE_HDR_SIZE * (1 + l / (file_size - LOG_FILE_HDR_SIZE));
+}
 
 /** Test if flush order mutex is owned. */
 #define log_flush_order_mutex_own()			\
-	mutex_own(&log_sys->log_flush_order_mutex)
+	mutex_own(&log_sys.log_flush_order_mutex)
 
 /** Acquire the flush order mutex. */
 #define log_flush_order_mutex_enter() do {		\
-	mutex_enter(&log_sys->log_flush_order_mutex);	\
+	mutex_enter(&log_sys.log_flush_order_mutex);	\
 } while (0)
 /** Release the flush order mutex. */
 # define log_flush_order_mutex_exit() do {		\
-	mutex_exit(&log_sys->log_flush_order_mutex);	\
+	mutex_exit(&log_sys.log_flush_order_mutex);	\
 } while (0)
 
 /** Test if log sys mutex is owned. */
-#define log_mutex_own() mutex_own(&log_sys->mutex)
+#define log_mutex_own() mutex_own(&log_sys.mutex)
 
 /** Test if log sys write mutex is owned. */
-#define log_write_mutex_own() mutex_own(&log_sys->write_mutex)
+#define log_write_mutex_own() mutex_own(&log_sys.write_mutex)
 
 /** Acquire the log sys mutex. */
-#define log_mutex_enter() mutex_enter(&log_sys->mutex)
+#define log_mutex_enter() mutex_enter(&log_sys.mutex)
 
 /** Acquire the log sys write mutex. */
-#define log_write_mutex_enter() mutex_enter(&log_sys->write_mutex)
+#define log_write_mutex_enter() mutex_enter(&log_sys.write_mutex)
 
 /** Acquire all the log sys mutexes. */
 #define log_mutex_enter_all() do {		\
-	mutex_enter(&log_sys->write_mutex);	\
-	mutex_enter(&log_sys->mutex);		\
+	mutex_enter(&log_sys.write_mutex);	\
+	mutex_enter(&log_sys.mutex);		\
 } while (0)
 
 /** Release the log sys mutex. */
-#define log_mutex_exit() mutex_exit(&log_sys->mutex)
+#define log_mutex_exit() mutex_exit(&log_sys.mutex)
 
 /** Release the log sys write mutex.*/
-#define log_write_mutex_exit() mutex_exit(&log_sys->write_mutex)
+#define log_write_mutex_exit() mutex_exit(&log_sys.write_mutex)
 
 /** Release all the log sys mutexes. */
 #define log_mutex_exit_all() do {		\
-	mutex_exit(&log_sys->mutex);		\
-	mutex_exit(&log_sys->write_mutex);	\
+	mutex_exit(&log_sys.mutex);		\
+	mutex_exit(&log_sys.write_mutex);	\
 } while (0)
 
-/** Calculate the offset of an lsn within a log group.
-@param[in]	lsn	log sequence number
-@param[in]	group	log group
-@return offset within the log group */
-lsn_t
-log_group_calc_lsn_offset(
-	lsn_t			lsn,
-	const log_group_t*	group);
-
 /* log scrubbing speed, in bytes/sec */
 extern ulonglong innodb_scrub_log_speed;
 
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
index 58da7bacc6f..87d55f9e01d 100644
--- a/storage/innobase/include/log0log.ic
+++ b/storage/innobase/include/log0log.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,12 +26,12 @@ Created 12/9/1995 Heikki Tuuri
 
 #include "mach0data.h"
 #include "srv0mon.h"
-#include "srv0srv.h"
 #include "ut0crc32.h"
 
 #ifdef UNIV_LOG_LSN_DEBUG
 #include "mtr0types.h"
 #endif /* UNIV_LOG_LSN_DEBUG */
+extern ulong srv_log_buffer_size;
 
 /************************************************************//**
 Gets a log block flush bit.
@@ -241,12 +241,10 @@ log_block_calc_checksum_crc32(
 }
 
 /** Calculates the checksum for a log block using the "no-op" algorithm.
-@param[in]     block   log block
 @return        checksum */
 UNIV_INLINE
 ulint
-log_block_calc_checksum_none(
-	const byte*	block)
+log_block_calc_checksum_none(const byte*)
 {
 	return(LOG_NO_CHECKSUM_MAGIC);
 }
@@ -330,15 +328,15 @@ log_reserve_and_write_fast(
 			len - SIZE_OF_MLOG_CHECKPOINT]
 		? 0
 		: 1
-		+ mach_get_compressed_size(log_sys->lsn >> 32)
-		+ mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
+		+ mach_get_compressed_size(log_sys.lsn >> 32)
+		+ mach_get_compressed_size(log_sys.lsn & 0xFFFFFFFFUL);
 #endif /* UNIV_LOG_LSN_DEBUG */
 
 	const ulint	data_len = len
 #ifdef UNIV_LOG_LSN_DEBUG
 		+ lsn_len
 #endif /* UNIV_LOG_LSN_DEBUG */
-		+ log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE;
+		+ log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE;
 
 	if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 
@@ -348,44 +346,44 @@ log_reserve_and_write_fast(
 		return(0);
 	}
 
-	*start_lsn = log_sys->lsn;
+	*start_lsn = log_sys.lsn;
 
 #ifdef UNIV_LOG_LSN_DEBUG
 	if (lsn_len) {
 		/* Write the LSN pseudo-record. */
-		byte* b = &log_sys->buf[log_sys->buf_free];
+		byte* b = &log_sys.buf[log_sys.buf_free];
 
 		*b++ = MLOG_LSN | (MLOG_SINGLE_REC_FLAG & *(const byte*) str);
 
 		/* Write the LSN in two parts,
 		as a pseudo page number and space id. */
-		b += mach_write_compressed(b, log_sys->lsn >> 32);
-		b += mach_write_compressed(b, log_sys->lsn & 0xFFFFFFFFUL);
-		ut_a(b - lsn_len == &log_sys->buf[log_sys->buf_free]);
+		b += mach_write_compressed(b, log_sys.lsn >> 32);
+		b += mach_write_compressed(b, log_sys.lsn & 0xFFFFFFFFUL);
+		ut_a(b - lsn_len == &log_sys.buf[log_sys.buf_free]);
 
 		::memcpy(b, str, len);
 
 		len += lsn_len;
 	} else
 #endif /* UNIV_LOG_LSN_DEBUG */
-	memcpy(log_sys->buf + log_sys->buf_free, str, len);
+	memcpy(log_sys.buf + log_sys.buf_free, str, len);
 
 	log_block_set_data_len(
                 reinterpret_cast<byte*>(ut_align_down(
-                        log_sys->buf + log_sys->buf_free,
+                        log_sys.buf + log_sys.buf_free,
                         OS_FILE_LOG_BLOCK_SIZE)),
                 data_len);
 
-	log_sys->buf_free += len;
+	log_sys.buf_free += ulong(len);
 
-	ut_ad(log_sys->buf_free <= log_sys->buf_size);
+	ut_ad(log_sys.buf_free <= srv_log_buffer_size);
 
-	log_sys->lsn += len;
+	log_sys.lsn += len;
 
 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
-		    log_sys->lsn - log_sys->last_checkpoint_lsn);
+		    log_sys.lsn - log_sys.last_checkpoint_lsn);
 
-	return(log_sys->lsn);
+	return(log_sys.lsn);
 }
 
 /************************************************************//**
@@ -400,7 +398,7 @@ log_get_lsn(void)
 
 	log_mutex_enter();
 
-	lsn = log_sys->lsn;
+	lsn = log_sys.lsn;
 
 	log_mutex_exit();
 
@@ -418,7 +416,7 @@ log_get_flush_lsn(void)
 
 	log_mutex_enter();
 
-	lsn = log_sys->flushed_to_disk_lsn;
+	lsn = log_sys.flushed_to_disk_lsn;
 
 	log_mutex_exit();
 
@@ -435,11 +433,11 @@ log_get_lsn_nowait(void)
 {
 	lsn_t	lsn=0;
 
-	if (!mutex_enter_nowait(&(log_sys->mutex))) {
+	if (!mutex_enter_nowait(&(log_sys.mutex))) {
 
-		lsn = log_sys->lsn;
+		lsn = log_sys.lsn;
 
-		mutex_exit(&(log_sys->mutex));
+		mutex_exit(&(log_sys.mutex));
 	}
 
 	return(lsn);
@@ -447,14 +445,14 @@ log_get_lsn_nowait(void)
 
 /****************************************************************
 Gets the log group capacity. It is OK to read the value without
-holding log_sys->mutex because it is constant.
+holding log_sys.mutex because it is constant.
 @return log group capacity */
 UNIV_INLINE
 lsn_t
 log_get_capacity(void)
 /*==================*/
 {
-	return(log_sys->log_group_capacity);
+	return(log_sys.log_group_capacity);
 }
 
 /****************************************************************
@@ -466,7 +464,7 @@ lsn_t
 log_get_max_modified_age_async(void)
 /*================================*/
 {
-	return(log_sys->max_modified_age_async);
+	return(log_sys.max_modified_age_async);
 }
 
 /***********************************************************************//**
@@ -498,7 +496,7 @@ log_free_check(void)
 		      sync_allowed_latches(latches,
 					   latches + UT_ARR_SIZE(latches))));
 
-	if (log_sys->check_flush_or_checkpoint) {
+	if (log_sys.check_flush_or_checkpoint) {
 
 		log_check_margins();
 	}
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index 267f8f6778d..89485b7f31d 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -94,20 +94,6 @@ void
 recv_sys_debug_free(void);
 /*=====================*/
 
-/** Read a log segment to a buffer.
-@param[out]	buf		buffer
-@param[in]	group		redo log files
-@param[in, out]	start_lsn	in : read area start, out: the last read valid lsn
-@param[in]	end_lsn		read area end
-@param[out] invalid_block - invalid, (maybe incompletely written) block encountered
-@return	false, if invalid block encountered (e.g checksum mismatch), true otherwise */
-bool
-log_group_read_log_seg(
-	byte*			buf,
-	const log_group_t*	group,
-	lsn_t*			start_lsn,
-	lsn_t			end_lsn);
-
 /********************************************************//**
 Reset the state of the recovery system variables. */
 void
@@ -225,7 +211,7 @@ struct recv_sys_t{
 	ib_mutex_t		writer_mutex;/*!< mutex coordinating
 				flushing between recv_writer_thread and
 				the recovery thread. */
-	os_event_t		flush_start;/*!< event to acticate
+	os_event_t		flush_start;/*!< event to activate
 				page cleaner threads */
 	os_event_t		flush_end;/*!< event to signal that the page
 				cleaner has finished the request */
@@ -241,6 +227,7 @@ struct recv_sys_t{
 				/*!< this is TRUE when a log rec application
 				batch is running */
 	byte*		buf;	/*!< buffer for parsing log records */
+	size_t		buf_size;	/*!< size of buf */
 	ulint		len;	/*!< amount of data in buf */
 	lsn_t		parse_start_lsn;
 				/*!< this is the lsn from which we were able to
@@ -328,7 +315,7 @@ extern bool		recv_no_ibuf_operations;
 extern bool		recv_needed_recovery;
 #ifdef UNIV_DEBUG
 /** TRUE if writing to the redo log (mtr_commit) is forbidden.
-Protected by log_sys->mutex. */
+Protected by log_sys.mutex. */
 extern bool		recv_no_log_write;
 #endif /* UNIV_DEBUG */
 
@@ -339,11 +326,11 @@ extern bool		recv_lsn_checks_on;
 
 /** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
 times! */
-#define RECV_PARSING_BUF_SIZE	(2 * 1024 * 1024)
+#define RECV_PARSING_BUF_SIZE	(2U << 20)
 
 /** Size of block reads when the log groups are scanned forward to do a
 roll-forward */
-#define RECV_SCAN_SIZE		(4 * UNIV_PAGE_SIZE)
+#define RECV_SCAN_SIZE		(4U << srv_page_size_shift)
 
 /** This many frames must be left free in the buffer pool when we scan
 the log and store the scanned log records in the buffer pool: we will
diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h
index 0ae784a58d0..a2d676e7f2a 100644
--- a/storage/innobase/include/mem0mem.h
+++ b/storage/innobase/include/mem0mem.h
@@ -69,11 +69,11 @@ allocations of small buffers. */
 
 #define MEM_BLOCK_START_SIZE		64
 #define MEM_BLOCK_STANDARD_SIZE		\
-	(UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
+	(srv_page_size >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
 
 /** If a memory heap is allowed to grow into the buffer pool, the following
 is the maximum size for a single allocated buffer: */
-#define MEM_MAX_ALLOC_IN_BUF		(UNIV_PAGE_SIZE - 200)
+#define MEM_MAX_ALLOC_IN_BUF		(srv_page_size - 200)
 
 /** Space needed when allocating for a user a field of length N.
 The space is allocated only in multiples of UNIV_MEM_ALIGNMENT.  */
@@ -292,26 +292,42 @@ mem_strdupl(
 	const char*	str,	/*!< in: string to be copied */
 	ulint		len);	/*!< in: length of str, in bytes */
 
-/** Duplicates a NUL-terminated string, allocated from a memory heap.
+/** Duplicate a block of data, allocated from a memory heap.
+@param[in]	heap	memory heap where string is allocated
+@param[in]	data	block of data to be copied
+@param[in]	len	length of data, in bytes
+@return own: a copy of data */
+inline
+void*
+mem_heap_dup(mem_heap_t* heap, const void* data, size_t len)
+{
+	return(memcpy(mem_heap_alloc(heap, len), data, len));
+}
+
+/** Duplicate a NUL-terminated string, allocated from a memory heap.
 @param[in]	heap	memory heap where string is allocated
 @param[in]	str	string to be copied
 @return own: a copy of the string */
+inline
 char*
-mem_heap_strdup(
-	mem_heap_t*	heap,
-	const char*	str);
+mem_heap_strdup(mem_heap_t* heap, const char* str)
+{
+	return(static_cast<char*>(mem_heap_dup(heap, str, strlen(str) + 1)));
+}
 
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INLINE
+/** Duplicate a string, allocated from a memory heap.
+@param[in]	heap	memory heap where string is allocated
+@param[in]	str	string to be copied
+@param[in]	len	length of str, in bytes
+@return own: a NUL-terminated copy of str */
+inline
 char*
-mem_heap_strdupl(
-/*=============*/
-	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
-	const char*	str,	/*!< in: string to be copied */
-	ulint		len);	/*!< in: length of str, in bytes */
+mem_heap_strdupl(mem_heap_t* heap, const char* str, size_t len)
+{
+	char*	s = static_cast<char*>(mem_heap_alloc(heap, len + 1));
+	s[len] = 0;
+	return(static_cast<char*>(memcpy(s, str, len)));
+}
 
 /**********************************************************************//**
 Concatenate two strings and return the result, using a memory heap.
@@ -323,16 +339,6 @@ mem_heap_strcat(
 	const char*	s1,	/*!< in: string 1 */
 	const char*	s2);	/*!< in: string 2 */
 
-/**********************************************************************//**
-Duplicate a block of data, allocated from a memory heap.
-@return own: a copy of the data */
-void*
-mem_heap_dup(
-/*=========*/
-	mem_heap_t*	heap,	/*!< in: memory heap where copy is allocated */
-	const void*	data,	/*!< in: data to be copied */
-	ulint		len);	/*!< in: length of data, in bytes */
-
 /****************************************************************//**
 A simple sprintf replacement that dynamically allocates the space for the
 formatted string from the given heap. This supports a very limited set of
diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic
index 42dda9e0bb9..8a8d141ce11 100644
--- a/storage/innobase/include/mem0mem.ic
+++ b/storage/innobase/include/mem0mem.ic
@@ -275,7 +275,8 @@ mem_heap_free_heap_top(
 	ut_ad(block);
 
 	/* Set the free field of block */
-	mem_block_set_free(block, old_top - (byte*) block);
+	mem_block_set_free(block,
+			   ulint(old_top - reinterpret_cast<byte*>(block)));
 
 	ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
 	UNIV_MEM_FREE(old_top, (byte*) block + block->len - old_top);
@@ -545,7 +546,7 @@ mem_heap_get_size(
 	size = heap->total_size;
 
 	if (heap->free_block) {
-		size += UNIV_PAGE_SIZE;
+		size += srv_page_size;
 	}
 
 	return(size);
@@ -578,20 +579,3 @@ mem_strdupl(
 	s[len] = 0;
 	return(static_cast<char*>(memcpy(s, str, len)));
 }
-
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INLINE
-char*
-mem_heap_strdupl(
-/*=============*/
-	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
-	const char*	str,	/*!< in: string to be copied */
-	ulint		len)	/*!< in: length of str, in bytes */
-{
-	char*	s = (char*) mem_heap_alloc(heap, len + 1);
-	s[len] = 0;
-	return((char*) memcpy(s, str, len));
-}
diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
index dd68ea25613..5c72c7cb5da 100644
--- a/storage/innobase/include/mtr0log.ic
+++ b/storage/innobase/include/mtr0log.ic
@@ -225,7 +225,7 @@ mlog_write_initial_log_record_fast(
 	ut_ad(log_ptr);
 	ut_d(mtr->memo_modify_page(ptr));
 
-	page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
+	page = (const byte*) ut_align_down(ptr, srv_page_size);
 	space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 	offset = mach_read_from_4(page + FIL_PAGE_OFFSET);
 
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index 5a7df35c372..fe4c36f0010 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -28,9 +28,7 @@ Created 11/26/1995 Heikki Tuuri
 #ifndef mtr0mtr_h
 #define mtr0mtr_h
 
-#include "log0types.h"
-#include "mtr0types.h"
-#include "buf0types.h"
+#include "fil0fil.h"
 #include "dyn0buf.h"
 
 /** Start a mini-transaction. */
@@ -66,13 +64,6 @@ savepoint. */
 				(m)->memo_release((o), (t))
 
 #ifdef UNIV_DEBUG
-
-/** Check if memo contains the given item. */
-#define mtr_is_block_fix(m, o, t, table) mtr_memo_contains(m, o, t)
-
-/** Check if memo contains the given page. */
-#define mtr_is_page_fix(m, p, t, table) mtr_memo_contains_page(m, p, t)
-
 /** Check if memo contains the given item.
 @return	TRUE if contains */
 #define mtr_memo_contains(m, o, t)					\
@@ -126,9 +117,6 @@ savepoint. */
 @return true if the mtr is dirtying a clean page. */
 #define mtr_block_dirtied(b)	mtr_t::is_block_dirtied((b))
 
-/** Forward declaration of a tablespace object */
-struct fil_space_t;
-
 /** Append records to the system-wide redo log buffer.
 @param[in]	log	redo log records */
 void
@@ -180,12 +168,6 @@ struct mtr_t {
 		/** User tablespace that is being modified by the
 		mini-transaction */
 		fil_space_t*	m_user_space;
-		/** Undo tablespace that is being modified by the
-		mini-transaction */
-		fil_space_t*	m_undo_space;
-		/** System tablespace if it is being modified by the
-		mini-transaction */
-		fil_space_t*	m_sys_space;
 
 		/** State of the transaction */
 		mtr_state_t	m_state;
@@ -209,13 +191,6 @@ struct mtr_t {
 
 	~mtr_t() { }
 
-	/** Release the free extents that was reserved using
-	fsp_reserve_free_extents().  This is equivalent to calling
-	fil_space_release_free_extents().  This is intended for use
-	with index pages.
-	@param[in]	n_reserved	number of reserved extents */
-	void release_free_extents(ulint n_reserved);
-
 	/** Start a mini-transaction. */
 	void start();
 
@@ -274,17 +249,6 @@ struct mtr_t {
 	@return	old mode */
 	inline mtr_log_t set_log_mode(mtr_log_t mode);
 
-	/** Note that the mini-transaction is modifying the system tablespace
-	(for example, for the change buffer or for undo logs)
-	@return the system tablespace */
-	fil_space_t* set_sys_modified()
-	{
-		if (!m_impl.m_sys_space) {
-			lookup_sys_space();
-		}
-		return(m_impl.m_sys_space);
-	}
-
 	/** Copy the tablespaces associated with the mini-transaction
 	(needed for generating MLOG_FILE_NAME records)
 	@param[in]	mtr	mini-transaction that may modify
@@ -293,35 +257,41 @@ struct mtr_t {
 	{
 		ut_ad(!m_impl.m_user_space_id);
 		ut_ad(!m_impl.m_user_space);
-		ut_ad(!m_impl.m_undo_space);
-		ut_ad(!m_impl.m_sys_space);
 
 		ut_d(m_impl.m_user_space_id = mtr.m_impl.m_user_space_id);
 		m_impl.m_user_space = mtr.m_impl.m_user_space;
-		m_impl.m_undo_space = mtr.m_impl.m_undo_space;
-		m_impl.m_sys_space = mtr.m_impl.m_sys_space;
 	}
 
 	/** Set the tablespace associated with the mini-transaction
 	(needed for generating a MLOG_FILE_NAME record)
 	@param[in]	space_id	user or system tablespace ID
 	@return	the tablespace */
-	fil_space_t* set_named_space(ulint space_id)
+	fil_space_t* set_named_space_id(ulint space_id)
 	{
 		ut_ad(!m_impl.m_user_space_id);
 		ut_d(m_impl.m_user_space_id = space_id);
 		if (!space_id) {
-			return(set_sys_modified());
+			return fil_system.sys_space;
 		} else {
-			lookup_user_space(space_id);
-			return(m_impl.m_user_space);
+			ut_ad(m_impl.m_user_space_id == space_id);
+			ut_ad(!m_impl.m_user_space);
+			m_impl.m_user_space = fil_space_get(space_id);
+			ut_ad(m_impl.m_user_space);
+			return m_impl.m_user_space;
 		}
 	}
 
 	/** Set the tablespace associated with the mini-transaction
 	(needed for generating a MLOG_FILE_NAME record)
 	@param[in]	space	user or system tablespace */
-	void set_named_space(fil_space_t* space);
+	void set_named_space(fil_space_t* space)
+	{
+		ut_ad(!m_impl.m_user_space_id);
+		ut_d(m_impl.m_user_space_id = space->id);
+		if (space->id) {
+			m_impl.m_user_space = space;
+		}
+	}
 
 #ifdef UNIV_DEBUG
 	/** Check the tablespace associated with the mini-transaction
@@ -329,6 +299,11 @@ struct mtr_t {
 	@param[in]	space	tablespace
 	@return whether the mini-transaction is associated with the space */
 	bool is_named_space(ulint space) const;
+	/** Check the tablespace associated with the mini-transaction
+	(needed for generating a MLOG_FILE_NAME record)
+	@param[in]	space	tablespace
+	@return whether the mini-transaction is associated with the space */
+	bool is_named_space(const fil_space_t* space) const;
 #endif /* UNIV_DEBUG */
 
 	/** Read 1 - 4 bytes from a file page buffered in the buffer pool.
@@ -554,12 +529,6 @@ struct mtr_t {
 		MY_ATTRIBUTE((warn_unused_result));
 
 private:
-	/** Look up the system tablespace. */
-	void lookup_sys_space();
-	/** Look up the user tablespace.
-	@param[in]	space_id	tablespace ID  */
-	void lookup_user_space(ulint space_id);
-
 	class Command;
 
 	friend class Command;
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
index af8f1d2c7db..eaf838aaa76 100644
--- a/storage/innobase/include/mtr0types.h
+++ b/storage/innobase/include/mtr0types.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -100,16 +100,16 @@ enum mlog_id_t {
 	/** Create an index page */
 	MLOG_PAGE_CREATE = 19,
 
-	/** Insert entry in an undo log */
+	/** insert an undo log record */
 	MLOG_UNDO_INSERT = 20,
 
-	/** erase an undo log page end */
+	/** erase an undo log page end (used in MariaDB 10.2) */
 	MLOG_UNDO_ERASE_END = 21,
 
 	/** initialize a page in an undo log */
 	MLOG_UNDO_INIT = 22,
 
-	/** reuse an insert undo log header */
+	/** reuse an insert undo log header (used in MariaDB 10.2) */
 	MLOG_UNDO_HDR_REUSE = 24,
 
 	/** create an undo log header */
@@ -223,8 +223,12 @@ enum mlog_id_t {
 	redo log about individual pages */
 	MLOG_INDEX_LOAD = 61,
 
+	/** write DB_TRX_ID,DB_ROLL_PTR to a clustered index leaf page
+	of a ROW_FORMAT=COMPRESSED table */
+	MLOG_ZIP_WRITE_TRX_ID = 62,
+
 	/** biggest value (used in assertions) */
-	MLOG_BIGGEST_TYPE = MLOG_INDEX_LOAD,
+	MLOG_BIGGEST_TYPE = MLOG_ZIP_WRITE_TRX_ID,
 
 	/** log record for writing/updating crypt data of
 	a tablespace */
diff --git a/storage/innobase/include/os0event.h b/storage/innobase/include/os0event.h
index d5fdc6ba080..f8227235211 100644
--- a/storage/innobase/include/os0event.h
+++ b/storage/innobase/include/os0event.h
@@ -42,11 +42,7 @@ Creates an event semaphore, i.e., a semaphore which may just have two states:
 signaled and nonsignaled. The created event is manual reset: it must be reset
 explicitly by calling os_event_reset().
 @return	the event handle */
-os_event_t
-os_event_create(
-/*============*/
-	const char*	name);	/*!< in: the name of the event, if NULL
-				the event is created without a name */
+os_event_t os_event_create(const char*);
 
 /**
 Sets an event semaphore to the signaled state: lets waiting threads
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index c19079e1f9e..71da751ad25 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -50,7 +50,6 @@ struct fil_node_t;
 struct fil_space_t;
 
 extern bool	os_has_said_disk_full;
-extern my_bool	srv_use_trim;
 
 /** File offset in bytes */
 typedef ib_uint64_t os_offset_t;
@@ -69,10 +68,6 @@ the OS actually supports it: Win 95 does not, NT does. */
 /** File handle */
 typedef HANDLE os_file_t;
 
-/** Convert a C file descriptor to a native file handle
-@param fd file descriptor
-@return native file handle */
-# define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
 
 #else /* _WIN32 */
 
@@ -81,14 +76,9 @@ typedef DIR*	os_file_dir_t;	/*!< directory stream */
 /** File handle */
 typedef int	os_file_t;
 
-/** Convert a C file descriptor to a native file handle
-@param fd file descriptor
-@return native file handle */
-# define OS_FILE_FROM_FD(fd) fd
-
 #endif /* _WIN32 */
 
-static const os_file_t OS_FILE_CLOSED = os_file_t(~0);
+static const os_file_t OS_FILE_CLOSED = IF_WIN(os_file_t(INVALID_HANDLE_VALUE),-1);
 
 /** File descriptor with optional PERFORMANCE_SCHEMA instrumentation */
 struct pfs_os_file_t
@@ -251,7 +241,7 @@ public:
 		m_fil_node(NULL),
 		m_type(static_cast<uint16_t>(type))
 	{
-		if (!is_punch_hole_supported() || !srv_use_trim) {
+		if (!is_punch_hole_supported()) {
 			clear_punch_hole();
 		}
 	}
@@ -270,7 +260,7 @@ public:
 			set_punch_hole();
 		}
 
-		if (!is_punch_hole_supported() || !srv_use_trim) {
+		if (!is_punch_hole_supported()) {
 			clear_punch_hole();
 		}
 	}
@@ -357,7 +347,7 @@ public:
 	/** Set the punch hole flag */
 	void set_punch_hole()
 	{
-		if (is_punch_hole_supported() && srv_use_trim) {
+		if (is_punch_hole_supported()) {
 			m_type |= PUNCH_HOLE;
 		}
 	}
@@ -372,8 +362,7 @@ public:
 	@param[in] node			File node */
 	void set_fil_node(fil_node_t* node)
 	{
-		if (!srv_use_trim ||
-		   (node && !fil_node_should_punch_hole(node))) {
+		if (node && !fil_node_should_punch_hole(node)) {
 			clear_punch_hole();
 		}
 
@@ -537,14 +526,11 @@ struct os_file_stat_t {
 };
 
 /** Create a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the given parameter path. If the path
-is null then it will create the file in the mysql server configuration
+the temporary file is created in the in the mysql server configuration
 parameter (--tmpdir).
-@param[in]	path	location for creating temporary file
 @return temporary file handle, or NULL on error */
 FILE*
-os_file_create_tmpfile(
-	const char*	path);
+os_file_create_tmpfile();
 
 /** The os_file_opendir() function opens a directory stream corresponding to the
 directory named by the dirname argument. The directory stream is positioned
@@ -848,18 +834,10 @@ The wrapper functions have the prefix of "innodb_". */
 	pfs_os_file_read_no_error_handling_func(			\
 		type, file, buf, offset, n, o, __FILE__, __LINE__)
 
-# define os_file_read_no_error_handling_int_fd(type, file, buf, offset, n) \
-	pfs_os_file_read_no_error_handling_int_fd_func(			\
-		type, file, buf, offset, n, __FILE__, __LINE__)
-
 # define os_file_write(type, name, file, buf, offset, n)	\
 	pfs_os_file_write_func(type, name, file, buf, offset,	\
 			       n, __FILE__, __LINE__)
 
-# define os_file_write_int_fd(type, name, file, buf, offset, n)		\
-	pfs_os_file_write_int_fd_func(type, name, file, buf, offset,	\
-		n, __FILE__, __LINE__)
-
 # define os_file_flush(file)					\
 	pfs_os_file_flush_func(file, __FILE__, __LINE__)
 
@@ -1570,7 +1548,7 @@ path. If the path is NULL then it will be created on --tmpdir location.
 This function is defined in ha_innodb.cc.
 @param[in]	path	location for creating temporary file
 @return temporary file descriptor, or < 0 on error */
-int
+os_file_t
 innobase_mysql_tmpfile(
 	const char*	path);
 
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index 9e79267ab2e..533afb88e3d 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -338,49 +338,6 @@ pfs_os_file_read_no_error_handling_func(
 	return(result);
 }
 
-/** NOTE! Please use the corresponding macro
-os_file_read_no_error_handling_int_fd() to request
-a synchronous read operation.
-@param[in]	type		read request
-@param[in]      file            file handle
-@param[out]     buf             buffer where to read
-@param[in]      offset          file offset where to read
-@param[in]      n               number of bytes to read
-@param[in]      src_file        caller file name
-@param[in]      src_line        caller line number
-@return	whether the request was successful */
-UNIV_INLINE
-bool
-pfs_os_file_read_no_error_handling_int_fd_func(
-	const IORequest&	type,
-	int			file,
-	void*			buf,
-	os_offset_t		offset,
-	ulint			n,
-	const char*		src_file,
-	uint			src_line)
-{
-	PSI_file_locker_state	state;
-
-	PSI_file_locker* locker = PSI_FILE_CALL(
-		get_thread_file_descriptor_locker)(
-			&state, file, PSI_FILE_READ);
-	if (locker != NULL) {
-		PSI_FILE_CALL(start_file_wait)(
-			locker, n,
-			__FILE__, __LINE__);
-	}
-
-	bool success = DB_SUCCESS == os_file_read_no_error_handling_func(
-		type, OS_FILE_FROM_FD(file), buf, offset, n, NULL);
-
-	if (locker != NULL) {
-		PSI_FILE_CALL(end_file_wait)(locker, n);
-	}
-
-	return(success);
-}
-
 /** NOTE! Please use the corresponding macro os_file_write(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
@@ -423,51 +380,6 @@ pfs_os_file_write_func(
 	return(result);
 }
 
-/** NOTE! Please use the corresponding macro os_file_write_int_fd(),
-not directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_write_int_fd() which requests a synchronous write operation.
-@param[in]	type		write request
-@param[in]	name		file name
-@param[in]	file		file handle
-@param[in]	buf		buffer to write
-@param[in]	offset		file offset
-@param[in]	n		number of bytes
-@param[in]	src_file	file name where func invoked
-@param[in]	src_line	line where the func invoked
-@return	whether the request was successful */
-UNIV_INLINE
-bool
-pfs_os_file_write_int_fd_func(
-	const IORequest&	type,
-	const char*		name,
-	int			file,
-	const void*		buf,
-	os_offset_t		offset,
-	ulint			n,
-	const char*		src_file,
-	uint			src_line)
-{
-	PSI_file_locker_state   state;
-	struct PSI_file_locker* locker;
-
-	locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(
-		&state, file, PSI_FILE_WRITE);
-	if (locker != NULL) {
-                PSI_FILE_CALL(start_file_wait)(
-			locker, n,
-			__FILE__, __LINE__);
-	}
-
-        bool success = DB_SUCCESS == os_file_write_func(
-		type, name, OS_FILE_FROM_FD(file), buf, offset, n);
-
-        if (locker != NULL) {
-                PSI_FILE_CALL(end_file_wait)(locker, n);
-        }
-
-        return(success);
-}
 
 /** NOTE! Please use the corresponding macro os_file_flush(), not directly
 this function!
diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h
index dda16af605e..551e78d24ba 100644
--- a/storage/innobase/include/os0once.h
+++ b/storage/innobase/include/os0once.h
@@ -28,7 +28,9 @@ Created Feb 20, 2014 Vasil Dimov
 #define os0once_h
 
 #include "univ.i"
+
 #include "ut0ut.h"
+#include "my_cpu.h"
 
 /** Execute a given function exactly once in a multi-threaded environment
 or wait for the function to be executed by another thread.
@@ -109,7 +111,7 @@ public:
 					ut_error;
 				}
 
-				UT_RELAX_CPU();
+				MY_RELAX_CPU();
 			}
 		}
 	}
diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h
index c240f5dacdd..b6838c919a0 100644
--- a/storage/innobase/include/os0thread.h
+++ b/storage/innobase/include/os0thread.h
@@ -30,12 +30,6 @@ Created 9/8/1995 Heikki Tuuri
 
 #include "univ.i"
 
-/* Maximum number of threads which can be created in the program;
-this is also the size of the wait slot array for MySQL threads which
-can wait inside InnoDB */
-
-#define	OS_THREAD_MAX_N		srv_max_n_threads
-
 /* Possible fixed priorities for threads */
 #define OS_THREAD_PRIORITY_NONE		100
 #define OS_THREAD_PRIORITY_BACKGROUND	1
@@ -53,12 +47,8 @@ typedef LPTHREAD_START_ROUTINE	os_thread_func_t;
 /** Macro for specifying a Windows thread start function. */
 #define DECLARE_THREAD(func)	WINAPI func
 
-/** Required to get around a build error on Windows. Even though our functions
-are defined/declared as WINAPI f(LPVOID a); the compiler complains that they
-are defined as: os_thread_ret_t (__cdecl*)(void*). Because our functions
-don't access the arguments and don't return any value, we should be safe. */
 #define os_thread_create(f,a,i)	\
-	os_thread_create_func(reinterpret_cast<os_thread_func_t>(f), a, i)
+	os_thread_create_func(f, a, i)
 
 #else
 
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
index 3adbfeb93ec..3ab310f97f2 100644
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@@ -154,10 +154,7 @@ page_cur_tuple_insert(
 	ulint**		offsets,/*!< out: offsets on *rec */
 	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	mtr_t*		mtr,	/*!< in: mini-transaction handle, or NULL */
-	bool		use_cache = false)
-				/*!< in: if true, then use record cache to
-				hold the tuple converted record. */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
 	MY_ATTRIBUTE((nonnull(1,2,3,4,5), warn_unused_result));
 /***********************************************************//**
 Inserts a record next to page cursor. Returns pointer to inserted record if
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
index 3e6d40cba4a..86e560395f3 100644
--- a/storage/innobase/include/page0cur.ic
+++ b/storage/innobase/include/page0cur.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, MariaDB Corporation.
+Copyright (c) 2015, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -262,10 +262,7 @@ page_cur_tuple_insert(
 	ulint**		offsets,/*!< out: offsets on *rec */
 	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	mtr_t*		mtr,	/*!< in: mini-transaction handle, or NULL */
-	bool		use_cache)
-				/*!< in: if true, then use record cache to
-				hold the tuple converted record. */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
 {
 	rec_t*		rec;
 	ulint		size = rec_get_converted_size(index, tuple, n_ext);
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index d0c1737b16b..0ff63f8047f 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -61,9 +61,42 @@ typedef	byte		page_header_t;
 #define	PAGE_FREE	 6	/* pointer to start of page free record list */
 #define	PAGE_GARBAGE	 8	/* number of bytes in deleted records */
 #define	PAGE_LAST_INSERT 10	/* pointer to the last inserted record, or
-				NULL if this info has been reset by a delete,
+				0 if this info has been reset by a delete,
 				for example */
-#define	PAGE_DIRECTION	 12	/* last insert direction: PAGE_LEFT, ... */
+
+/** This 10-bit field is usually 0. In B-tree index pages of
+ROW_FORMAT=REDUNDANT tables, this byte can contain garbage if the .ibd
+file was created in MySQL 4.1.0 or if the table resides in the system
+tablespace and was created before MySQL 4.1.1 or MySQL 4.0.14.
+In this case, the FIL_PAGE_TYPE would be FIL_PAGE_INDEX.
+
+In ROW_FORMAT=COMPRESSED tables, this field is always 0, because
+instant ADD COLUMN is not supported.
+
+In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC tables, this field is
+always 0, except in the root page of the clustered index after instant
+ADD COLUMN.
+
+Instant ADD COLUMN will change FIL_PAGE_TYPE to FIL_PAGE_TYPE_INSTANT
+and initialize the PAGE_INSTANT field to the original number of
+fields in the clustered index (dict_index_t::n_core_fields).  The most
+significant bits are in the first byte, and the least significant 5
+bits are stored in the most significant 5 bits of PAGE_DIRECTION_B.
+
+These FIL_PAGE_TYPE_INSTANT and PAGE_INSTANT may be assigned even if
+instant ADD COLUMN was not committed. Changes to these page header fields
+are not undo-logged, but changes to the hidden metadata record are.
+If the server is killed and restarted, the page header fields could
+remain set even though no metadata record is present.
+
+When the table becomes empty, the PAGE_INSTANT field and the
+FIL_PAGE_TYPE can be reset and any metadata record be removed. */
+#define PAGE_INSTANT	12
+
+/** last insert direction: PAGE_LEFT, ....
+In ROW_FORMAT=REDUNDANT tables created before MySQL 4.1.1 or MySQL 4.0.14,
+this byte can be garbage. */
+#define	PAGE_DIRECTION_B 13
 #define	PAGE_N_DIRECTION 14	/* number of consecutive inserts to the same
 				direction */
 #define	PAGE_N_RECS	 16	/* number of user records on the page */
@@ -123,9 +156,9 @@ Otherwise written as 0. @see PAGE_ROOT_AUTO_INC */
 /*-----------------------------*/
 
 /* Heap numbers */
-#define PAGE_HEAP_NO_INFIMUM	0	/* page infimum */
-#define PAGE_HEAP_NO_SUPREMUM	1	/* page supremum */
-#define PAGE_HEAP_NO_USER_LOW	2	/* first user record in
+#define PAGE_HEAP_NO_INFIMUM	0U	/* page infimum */
+#define PAGE_HEAP_NO_SUPREMUM	1U	/* page supremum */
+#define PAGE_HEAP_NO_USER_LOW	2U	/* first user record in
 					creation (insertion) order,
 					not necessarily collation order;
 					this record may have been deleted */
@@ -175,7 +208,7 @@ inline
 page_t*
 page_align(const void* ptr)
 {
-	return(static_cast<page_t*>(ut_align_down(ptr, UNIV_PAGE_SIZE)));
+	return(static_cast<page_t*>(ut_align_down(ptr, srv_page_size)));
 }
 
 /** Gets the byte offset within a page frame.
@@ -186,7 +219,7 @@ inline
 ulint
 page_offset(const void*	ptr)
 {
-	return(ut_align_offset(ptr, UNIV_PAGE_SIZE));
+	return(ut_align_offset(ptr, srv_page_size));
 }
 
 /** Determine whether an index page is not in ROW_FORMAT=REDUNDANT.
@@ -249,6 +282,18 @@ page_rec_is_comp(const byte* rec)
 	return(page_is_comp(page_align(rec)));
 }
 
+# ifdef UNIV_DEBUG
+/** Determine if the record is the metadata pseudo-record
+in the clustered index.
+@param[in]	rec	leaf page record on an index page
+@return	whether the record is the metadata pseudo-record */
+inline bool page_rec_is_metadata(const rec_t* rec)
+{
+	return rec_get_info_bits(rec, page_rec_is_comp(rec))
+		& REC_INFO_MIN_REC_FLAG;
+}
+# endif /* UNIV_DEBUG */
+
 /** Determine the offset of the infimum record on the page.
 @param[in]	page	index page
 @return offset of the infimum record in record list, relative from page */
@@ -286,7 +331,7 @@ page_rec_is_user_rec_low(ulint offset)
 	compile_time_assert(PAGE_NEW_SUPREMUM < PAGE_OLD_SUPREMUM_END);
 	compile_time_assert(PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM_END);
 	ut_ad(offset >= PAGE_NEW_INFIMUM);
-	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+	ut_ad(offset <= srv_page_size - PAGE_EMPTY_DIR_START);
 
 	return(offset != PAGE_NEW_SUPREMUM
 	       && offset != PAGE_NEW_INFIMUM
@@ -302,7 +347,7 @@ bool
 page_rec_is_supremum_low(ulint offset)
 {
 	ut_ad(offset >= PAGE_NEW_INFIMUM);
-	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+	ut_ad(offset <= srv_page_size - PAGE_EMPTY_DIR_START);
 	return(offset == PAGE_NEW_SUPREMUM || offset == PAGE_OLD_SUPREMUM);
 }
 
@@ -314,7 +359,7 @@ bool
 page_rec_is_infimum_low(ulint offset)
 {
 	ut_ad(offset >= PAGE_NEW_INFIMUM);
-	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+	ut_ad(offset <= srv_page_size - PAGE_EMPTY_DIR_START);
 	return(offset == PAGE_NEW_INFIMUM || offset == PAGE_OLD_INFIMUM);
 }
 
@@ -455,7 +500,7 @@ page_header_set_field(
 Returns the offset stored in the given header field.
 @return offset from the start of the page, or 0 */
 UNIV_INLINE
-ulint
+uint16_t
 page_header_get_offs(
 /*=================*/
 	const page_t*	page,	/*!< in: page */
@@ -549,7 +594,7 @@ Gets the number of user records on page (the infimum and supremum records
 are not user records).
 @return number of user records */
 UNIV_INLINE
-ulint
+uint16_t
 page_get_n_recs(
 /*============*/
 	const page_t*	page);	/*!< in: index page */
@@ -567,7 +612,7 @@ page_rec_get_n_recs_before(
 Gets the number of records in the heap.
 @return number of user records */
 UNIV_INLINE
-ulint
+uint16_t
 page_dir_get_n_heap(
 /*================*/
 	const page_t*	page);	/*!< in: index page */
@@ -588,7 +633,7 @@ page_dir_set_n_heap(
 Gets the number of dir slots in directory.
 @return number of slots */
 UNIV_INLINE
-ulint
+uint16_t
 page_dir_get_n_slots(
 /*=================*/
 	const page_t*	page);	/*!< in: index page */
@@ -614,7 +659,7 @@ page_dir_get_nth_slot(
 	ulint		n);	/*!< in: position */
 #else /* UNIV_DEBUG */
 # define page_dir_get_nth_slot(page, n)			\
-	((page) + (UNIV_PAGE_SIZE - PAGE_DIR		\
+	((page) + (srv_page_size - PAGE_DIR		\
 		   - (n + 1) * PAGE_DIR_SLOT_SIZE))
 #endif /* UNIV_DEBUG */
 /**************************************************************//**
@@ -684,14 +729,52 @@ ulint
 page_rec_get_heap_no(
 /*=================*/
 	const rec_t*	rec);	/*!< in: the physical record */
+/** Determine whether a page has any siblings.
+@param[in]	page	page frame
+@return true if the page has any siblings */
+inline
+bool
+page_has_siblings(const page_t* page)
+{
+	compile_time_assert(!(FIL_PAGE_PREV % 8));
+	compile_time_assert(FIL_PAGE_NEXT == FIL_PAGE_PREV + 4);
+	compile_time_assert(FIL_NULL == 0xffffffff);
+	return *reinterpret_cast<const uint64_t*>(page + FIL_PAGE_PREV)
+		!= ~uint64_t(0);
+}
+
 /** Determine whether a page is an index root page.
 @param[in]	page	page frame
 @return true if the page is a root page of an index */
-UNIV_INLINE
+inline
 bool
-page_is_root(
-	const page_t*	page)
-	MY_ATTRIBUTE((warn_unused_result));
+page_is_root(const page_t* page)
+{
+	return fil_page_index_page_check(page) && !page_has_siblings(page);
+}
+
+/** Determine whether a page has a predecessor.
+@param[in]	page	page frame
+@return true if the page has a predecessor */
+inline
+bool
+page_has_prev(const page_t* page)
+{
+	return *reinterpret_cast<const uint32_t*>(page + FIL_PAGE_PREV)
+		!= FIL_NULL;
+}
+
+/** Determine whether a page has a successor.
+@param[in]	page	page frame
+@return true if the page has a successor */
+inline
+bool
+page_has_next(const page_t* page)
+{
+	return *reinterpret_cast<const uint32_t*>(page + FIL_PAGE_NEXT)
+		!= FIL_NULL;
+}
+
 /************************************************************//**
 Gets the pointer to the next record on the page.
 @return pointer to next record */
@@ -863,7 +946,7 @@ Returns the sum of the sizes of the records in the record list
 excluding the infimum and supremum records.
 @return data in bytes */
 UNIV_INLINE
-ulint
+uint16_t
 page_get_data_size(
 /*===============*/
 	const page_t*	page);	/*!< in: index page */
@@ -909,6 +992,45 @@ page_mem_free(
 	const dict_index_t*	index,	/*!< in: index of rec */
 	const ulint*		offsets);/*!< in: array returned by
 					 rec_get_offsets() */
+
+/** Read the PAGE_DIRECTION field from a byte.
+@param[in]	ptr	pointer to PAGE_DIRECTION_B
+@return	the value of the PAGE_DIRECTION field */
+inline
+byte
+page_ptr_get_direction(const byte* ptr);
+
+/** Set the PAGE_DIRECTION field.
+@param[in]	ptr	pointer to PAGE_DIRECTION_B
+@param[in]	dir	the value of the PAGE_DIRECTION field */
+inline
+void
+page_ptr_set_direction(byte* ptr, byte dir);
+
+/** Read the PAGE_DIRECTION field.
+@param[in]	page	index page
+@return	the value of the PAGE_DIRECTION field */
+inline
+byte
+page_get_direction(const page_t* page)
+{
+	return page_ptr_get_direction(PAGE_HEADER + PAGE_DIRECTION_B + page);
+}
+
+/** Read the PAGE_INSTANT field.
+@param[in]	page	index page
+@return the value of the PAGE_INSTANT field */
+inline
+uint16_t
+page_get_instant(const page_t* page);
+/** Assign the PAGE_INSTANT field.
+@param[in,out]	page	clustered index root page
+@param[in]	n	original number of clustered index fields
+@param[in,out]	mtr	mini-transaction */
+inline
+void
+page_set_instant(page_t* page, unsigned n, mtr_t* mtr);
+
 /**********************************************************//**
 Create an uncompressed B-tree index page.
 @return pointer to the page */
@@ -1249,5 +1371,4 @@ page_warn_strict_checksum(
 
 #include "page0page.ic"
 
-
 #endif
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index 0062db56bfa..307803367c0 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2017, MariaDB Corporation.
+Copyright (c) 2016, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -172,8 +172,8 @@ page_header_set_field(
 {
 	ut_ad(page);
 	ut_ad(field <= PAGE_N_RECS);
-	ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE);
-	ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
+	ut_ad(field == PAGE_N_HEAP || val < srv_page_size);
+	ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < srv_page_size);
 
 	mach_write_to_2(page + PAGE_HEADER + field, val);
 	if (page_zip) {
@@ -186,19 +186,17 @@ page_header_set_field(
 Returns the offset stored in the given header field.
 @return offset from the start of the page, or 0 */
 UNIV_INLINE
-ulint
+uint16_t
 page_header_get_offs(
 /*=================*/
 	const page_t*	page,	/*!< in: page */
 	ulint		field)	/*!< in: PAGE_FREE, ... */
 {
-	ulint	offs;
-
 	ut_ad((field == PAGE_FREE)
 	      || (field == PAGE_LAST_INSERT)
 	      || (field == PAGE_HEAP_TOP));
 
-	offs = page_header_get_field(page, field);
+	uint16_t offs = page_header_get_field(page, field);
 
 	ut_ad((field != PAGE_HEAP_TOP) || offs);
 
@@ -277,31 +275,6 @@ page_rec_get_heap_no(
 	}
 }
 
-/** Determine whether a page is an index root page.
-@param[in]	page	page frame
-@return true if the page is a root page of an index */
-UNIV_INLINE
-bool
-page_is_root(
-	const page_t*	page)
-{
-#if FIL_PAGE_PREV % 8
-# error FIL_PAGE_PREV must be 64-bit aligned
-#endif
-#if FIL_PAGE_NEXT != FIL_PAGE_PREV + 4
-# error FIL_PAGE_NEXT must be adjacent to FIL_PAGE_PREV
-#endif
-#if FIL_NULL != 0xffffffff
-# error FIL_NULL != 0xffffffff
-#endif
-	/* Check that this is an index page and both the PREV and NEXT
-	pointers are FIL_NULL, because the root page does not have any
-	siblings. */
-	return(fil_page_index_page_check(page)
-	       && *reinterpret_cast<const ib_uint64_t*>(page + FIL_PAGE_PREV)
-	       == IB_UINT64_MAX);
-}
-
 /** Determine whether an index page record is a user record.
 @param[in]	rec	record in an index page
 @return true if a user record */
@@ -423,7 +396,8 @@ page_get_middle_rec(
 /*================*/
 	page_t*	page)	/*!< in: page */
 {
-	ulint	middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
+	ulint	middle = (ulint(page_get_n_recs(page))
+			  + PAGE_HEAP_NO_USER_LOW) / 2;
 
 	return(page_rec_get_nth(page, middle));
 }
@@ -464,7 +438,7 @@ Gets the number of user records on page (infimum and supremum records
 are not user records).
 @return number of user records */
 UNIV_INLINE
-ulint
+uint16_t
 page_get_n_recs(
 /*============*/
 	const page_t*	page)	/*!< in: index page */
@@ -477,7 +451,7 @@ page_get_n_recs(
 Gets the number of dir slots in directory.
 @return number of slots */
 UNIV_INLINE
-ulint
+uint16_t
 page_dir_get_n_slots(
 /*=================*/
 	const page_t*	page)	/*!< in: index page */
@@ -502,7 +476,7 @@ page_dir_set_n_slots(
 Gets the number of records in the heap.
 @return number of user records */
 UNIV_INLINE
-ulint
+uint16_t
 page_dir_get_n_heap(
 /*================*/
 	const page_t*	page)	/*!< in: index page */
@@ -547,7 +521,7 @@ page_dir_get_nth_slot(
 	ut_ad(page_dir_get_n_slots(page) > n);
 
 	return((page_dir_slot_t*)
-	       page + UNIV_PAGE_SIZE - PAGE_DIR
+	       page + srv_page_size - PAGE_DIR
 	       - (n + 1) * PAGE_DIR_SLOT_SIZE);
 }
 #endif /* UNIV_DEBUG */
@@ -666,7 +640,7 @@ page_rec_get_next_low(
 
 	offs = rec_get_next_offs(rec, comp);
 
-	if (offs >= UNIV_PAGE_SIZE) {
+	if (offs >= srv_page_size) {
 		fprintf(stderr,
 			"InnoDB: Next record offset is nonsensical %lu"
 			" in record at offset %lu\n"
@@ -855,9 +829,8 @@ page_rec_get_base_extra_size(
 /*=========================*/
 	const rec_t*	rec)	/*!< in: physical record */
 {
-#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES
-# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES"
-#endif
+	compile_time_assert(REC_N_NEW_EXTRA_BYTES + 1
+			    == REC_N_OLD_EXTRA_BYTES);
 	return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
 }
 
@@ -868,21 +841,17 @@ Returns the sum of the sizes of the records in the record list, excluding
 the infimum and supremum records.
 @return data in bytes */
 UNIV_INLINE
-ulint
+uint16_t
 page_get_data_size(
 /*===============*/
 	const page_t*	page)	/*!< in: index page */
 {
-	ulint	ret;
-
-	ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
-		      - (page_is_comp(page)
-			 ? PAGE_NEW_SUPREMUM_END
-			 : PAGE_OLD_SUPREMUM_END)
-		      - page_header_get_field(page, PAGE_GARBAGE));
-
-	ut_ad(ret < UNIV_PAGE_SIZE);
-
+	uint16_t	ret = page_header_get_field(page, PAGE_HEAP_TOP)
+		- (page_is_comp(page)
+		   ? PAGE_NEW_SUPREMUM_END
+		   : PAGE_OLD_SUPREMUM_END)
+		- page_header_get_field(page, PAGE_GARBAGE);
+	ut_ad(ret < srv_page_size);
 	return(ret);
 }
 
@@ -930,13 +899,13 @@ page_get_free_space_of_empty(
 	ulint	comp)		/*!< in: nonzero=compact page layout */
 {
 	if (comp) {
-		return((ulint)(UNIV_PAGE_SIZE
+		return((ulint)(srv_page_size
 			       - PAGE_NEW_SUPREMUM_END
 			       - PAGE_DIR
 			       - 2 * PAGE_DIR_SLOT_SIZE));
 	}
 
-	return((ulint)(UNIV_PAGE_SIZE
+	return((ulint)(srv_page_size
 		       - PAGE_OLD_SUPREMUM_END
 		       - PAGE_DIR
 		       - 2 * PAGE_DIR_SLOT_SIZE));
@@ -1074,10 +1043,79 @@ page_mem_free(
 		page_zip_dir_delete(page_zip, rec, index, offsets, free);
 	} else {
 		page_header_set_field(page, page_zip, PAGE_N_RECS,
-				      page_get_n_recs(page) - 1);
+				      ulint(page_get_n_recs(page)) - 1);
+	}
+}
+
+/** Read the PAGE_DIRECTION field from a byte.
+@param[in]	ptr	pointer to PAGE_DIRECTION_B
+@return	the value of the PAGE_DIRECTION field */
+inline
+byte
+page_ptr_get_direction(const byte* ptr)
+{
+	ut_ad(page_offset(ptr) == PAGE_HEADER + PAGE_DIRECTION_B);
+	return *ptr & ((1U << 3) - 1);
+}
+
+/** Set the PAGE_DIRECTION field.
+@param[in]	ptr	pointer to PAGE_DIRECTION_B
+@param[in]	dir	the value of the PAGE_DIRECTION field */
+inline
+void
+page_ptr_set_direction(byte* ptr, byte dir)
+{
+	ut_ad(page_offset(ptr) == PAGE_HEADER + PAGE_DIRECTION_B);
+	ut_ad(dir >= PAGE_LEFT);
+	ut_ad(dir <= PAGE_NO_DIRECTION);
+	*ptr = (*ptr & ~((1U << 3) - 1)) | dir;
+}
+
+/** Read the PAGE_INSTANT field.
+@param[in]	page	index page
+@return the value of the PAGE_INSTANT field */
+inline
+uint16_t
+page_get_instant(const page_t* page)
+{
+	uint16_t i = page_header_get_field(page, PAGE_INSTANT);
+#ifdef UNIV_DEBUG
+	switch (fil_page_get_type(page)) {
+	case FIL_PAGE_TYPE_INSTANT:
+		ut_ad(page_get_direction(page) <= PAGE_NO_DIRECTION);
+		ut_ad(i >> 3);
+		break;
+	case FIL_PAGE_INDEX:
+		ut_ad(i <= PAGE_NO_DIRECTION || !page_is_comp(page));
+		break;
+	case FIL_PAGE_RTREE:
+		ut_ad(i <= PAGE_NO_DIRECTION);
+		break;
+	default:
+		ut_ad(!"invalid page type");
+		break;
 	}
+#endif /* UNIV_DEBUG */
+	return(i >> 3);
 }
 
+/** Assign the PAGE_INSTANT field.
+@param[in,out]	page	clustered index root page
+@param[in]	n	original number of clustered index fields
+@param[in,out]	mtr	mini-transaction */
+inline
+void
+page_set_instant(page_t* page, unsigned n, mtr_t* mtr)
+{
+	ut_ad(fil_page_get_type(page) == FIL_PAGE_TYPE_INSTANT);
+	ut_ad(n > 0);
+	ut_ad(n < REC_MAX_N_FIELDS);
+	uint16_t i = page_header_get_field(page, PAGE_INSTANT);
+	ut_ad(i <= PAGE_NO_DIRECTION);
+	i |= n << 3;
+	mlog_write_ulint(PAGE_HEADER + PAGE_INSTANT + page, i,
+			 MLOG_2BYTES, mtr);
+}
 #endif /* !UNIV_INNOCHECKSUM */
 
 #ifdef UNIV_MATERIALIZE
diff --git a/storage/innobase/include/page0size.h b/storage/innobase/include/page0size.h
index 24b70d13bd0..981f8743960 100644
--- a/storage/innobase/include/page0size.h
+++ b/storage/innobase/include/page0size.h
@@ -29,7 +29,7 @@ Created Nov 14, 2013 Vasil Dimov
 
 #include "fsp0types.h"
 
-#define FIELD_REF_SIZE 20
+#define FIELD_REF_SIZE 20U
 
 /** A BLOB field reference full of zero, for use in assertions and
 tests.Initially, BLOB field references are set to zero, in
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
index d72d5662f78..a2910a73634 100644
--- a/storage/innobase/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
@@ -339,18 +339,39 @@ page_zip_write_node_ptr(
 	ulint		ptr,	/*!< in: node pointer */
 	mtr_t*		mtr);	/*!< in: mini-transaction, or NULL */
 
-/**********************************************************************//**
-Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
+/** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record.
+@param[in,out]	page_zip	compressed page
+@param[in,out]	rec		record
+@param[in]	offsets		rec_get_offsets(rec, index)
+@param[in]	trx_id_field	field number of DB_TRX_ID (number of PK fields)
+@param[in]	trx_id		DB_TRX_ID value (transaction identifier)
+@param[in]	roll_ptr	DB_ROLL_PTR value (undo log pointer)
+@param[in,out]	mtr		mini-transaction, or NULL to skip logging */
 void
 page_zip_write_trx_id_and_roll_ptr(
-/*===============================*/
-	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
-	byte*		rec,	/*!< in/out: record */
-	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
-	ulint		trx_id_col,/*!< in: column number of TRX_ID in rec */
-	trx_id_t	trx_id,	/*!< in: transaction identifier */
-	roll_ptr_t	roll_ptr)/*!< in: roll_ptr */
-	MY_ATTRIBUTE((nonnull));
+	page_zip_des_t*	page_zip,
+	byte*		rec,
+	const ulint*	offsets,
+	ulint		trx_id_col,
+	trx_id_t	trx_id,
+	roll_ptr_t	roll_ptr,
+	mtr_t*		mtr = NULL)
+	MY_ATTRIBUTE((nonnull(1,2,3)));
+
+/** Parse a MLOG_ZIP_WRITE_TRX_ID record.
+@param[in]	ptr		redo log buffer
+@param[in]	end_ptr		end of redo log buffer
+@param[in,out]	page		uncompressed page
+@param[in,out]	page_zip	compressed page
+@return end of log record
+@retval	NULL	if the log record is incomplete */
+byte*
+page_zip_parse_write_trx_id(
+	byte*		ptr,
+	byte*		end_ptr,
+	page_t*		page,
+	page_zip_des_t*	page_zip)
+	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
 
 /**********************************************************************//**
 Write the "deleted" flag of a record on a compressed page.  The flag must
diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic
index b471e2cf64e..b3ebc5dcf51 100644
--- a/storage/innobase/include/page0zip.ic
+++ b/storage/innobase/include/page0zip.ic
@@ -120,7 +120,7 @@ page_zip_get_size(
 	size = (UNIV_ZIP_SIZE_MIN >> 1) << page_zip->ssize;
 
 	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
-	ut_ad(size <= UNIV_PAGE_SIZE);
+	ut_ad(size <= srv_page_size);
 
 	return(size);
 }
@@ -242,9 +242,9 @@ page_zip_get_trailer_len(
 		ut_ad(!page_zip->n_blobs);
 	}
 
-	return((page_dir_get_n_heap(page_zip->data) - 2)
-	       * uncompressed_size
-	       + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
+	return (ulint(page_dir_get_n_heap(page_zip->data)) - 2)
+		* uncompressed_size
+		+ ulint(page_zip->n_blobs) * BTR_EXTERN_FIELD_REF_SIZE;
 }
 
 /**********************************************************************//**
diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h
index a476d7d1d45..fba42be413f 100644
--- a/storage/innobase/include/pars0pars.h
+++ b/storage/innobase/include/pars0pars.h
@@ -538,7 +538,7 @@ pars_info_add_int4_literal(
 /*=======================*/
 	pars_info_t*	info,		/*!< in: info struct */
 	const char*	name,		/*!< in: name */
-	lint		val);		/*!< in: value */
+	ulint		val);		/*!< in: value */
 
 /****************************************************************//**
 Equivalent to:
diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
index cf8e48be0af..ffca9034b38 100644
--- a/storage/innobase/include/que0que.h
+++ b/storage/innobase/include/que0que.h
@@ -333,13 +333,6 @@ enum que_thr_lock_t {
 	QUE_THR_LOCK_TABLE
 };
 
-/** From where the cursor position is counted */
-enum que_cur_t {
-	QUE_CUR_NOT_DEFINED,
-	QUE_CUR_START,
-	QUE_CUR_END
-};
-
 /* Query graph query thread node: the fields are protected by the
 trx_t::mutex with the exceptions named below */
 
@@ -413,18 +406,7 @@ struct que_fork_t{
 					generated by the parser, or NULL
 					if the graph was created 'by hand' */
 	pars_info_t*	info;		/*!< info struct, or NULL */
-	/* The following cur_... fields are relevant only in a select graph */
 
-	ulint		cur_end;	/*!< QUE_CUR_NOT_DEFINED, QUE_CUR_START,
-					QUE_CUR_END */
-	ulint		cur_pos;	/*!< if there are n rows in the result
-					set, values 0 and n + 1 mean before
-					first row, or after last row, depending
-					on cur_end; values 1...n mean a row
-					index */
-	ibool		cur_on_row;	/*!< TRUE if cursor is on a row, i.e.,
-					it is not before the first row or
-					after the last row */
 	sel_node_t*	last_sel_node;	/*!< last executed select node, or NULL
 					if none */
 	UT_LIST_NODE_T(que_fork_t)
diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
deleted file mode 100644
index 770d444e69a..00000000000
--- a/storage/innobase/include/read0read.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/read0read.h
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef read0read_h
-#define read0read_h
-
-#include "read0types.h"
-
-#include <algorithm>
-
-/** The MVCC read view manager */
-class MVCC {
-public:
-	/** Constructor
-	@param size		Number of views to pre-allocate */
-	explicit MVCC(ulint size);
-
-	/** Destructor.
-	Free all the views in the m_free list */
-	~MVCC();
-
-	/**
-	Allocate and create a view.
-	@param view		view owned by this class created for the
-				caller. Must be freed by calling close()
-	@param trx		transaction creating the view */
-	void view_open(ReadView*& view, trx_t* trx);
-
-	/**
-	Close a view created by the above function.
-	@para view		view allocated by trx_open.
-	@param own_mutex	true if caller owns trx_sys_t::mutex */
-	void view_close(ReadView*& view, bool own_mutex);
-
-	/**
-	Release a view that is inactive but not closed. Caller must own
-	the trx_sys_t::mutex.
-	@param view		View to release */
-	void view_release(ReadView*& view);
-
-	/** Clones the oldest view and stores it in view. No need to
-	call view_close(). The caller owns the view that is passed in.
-	It will also move the closed views from the m_views list to the
-	m_free list. This function is called by Purge to create it view.
-	@param view		Preallocated view, owned by the caller */
-	void clone_oldest_view(ReadView* view);
-
-	/**
-	@return the number of active views */
-	ulint size() const;
-
-	/**
-	@return true if the view is active and valid */
-	static bool is_view_active(ReadView* view)
-	{
-		ut_a(view != reinterpret_cast<ReadView*>(0x1));
-
-		return(view != NULL && !(intptr_t(view) & 0x1));
-	}
-
-	/**
-	Set the view creator transaction id. Note: This shouldbe set only
-	for views created by RW transactions. */
-	static void set_view_creator_trx_id(ReadView* view, trx_id_t id);
-
-private:
-
-	/**
-	Validates a read view list. */
-	bool validate() const;
-
-	/**
-	Find a free view from the active list, if none found then allocate
-	a new view. This function will also attempt to move delete marked
-	views from the active list to the freed list.
-	@return a view to use */
-	inline ReadView* get_view();
-
-	/**
-	Get the oldest view in the system. It will also move the delete
-	marked read views from the views list to the freed list.
-	@return oldest view if found or NULL */
-	inline ReadView* get_oldest_view() const;
-
-private:
-	// Prevent copying
-	MVCC(const MVCC&);
-	MVCC& operator=(const MVCC&);
-
-private:
-	typedef UT_LIST_BASE_NODE_T(ReadView) view_list_t;
-
-	/** Free views ready for reuse. */
-	view_list_t		m_free;
-
-	/** Active and closed views, the closed views will have the
-	creator trx id set to TRX_ID_MAX */
-	view_list_t		m_views;
-};
-
-#endif /* read0read_h */
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
index c51a3744398..52338889c47 100644
--- a/storage/innobase/include/read0types.h
+++ b/storage/innobase/include/read0types.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,122 +31,163 @@ Created 2/16/1997 Heikki Tuuri
 #include "trx0types.h"
 #include <algorithm>
 
-// Friend declaration
-class MVCC;
 
-/** Read view lists the trx ids of those transactions for which a consistent
-read should not see the modifications to the database. */
+/** View is not visible to purge thread. */
+#define READ_VIEW_STATE_CLOSED 0
 
-class ReadView {
-	/** This is similar to a std::vector but it is not a drop
-	in replacement. It is specific to ReadView. */
-	class ids_t {
-		typedef trx_ids_t::value_type value_type;
+/** View is being opened, purge thread must wait for state change. */
+#define READ_VIEW_STATE_SNAPSHOT 1
 
-		/**
-		Constructor */
-		ids_t() : m_ptr(), m_size(), m_reserved() { }
+/** View is visible to purge thread. */
+#define READ_VIEW_STATE_OPEN 2
 
-		/**
-		Destructor */
-		~ids_t() { UT_DELETE_ARRAY(m_ptr); }
 
-		/**
-		Try and increase the size of the array. Old elements are
-		copied across. It is a no-op if n is < current size.
+/**
+  Read view lists the trx ids of those transactions for which a consistent read
+  should not see the modifications to the database.
+*/
+class ReadView
+{
+  /**
+    View state.
 
-		@param n 		Make space for n elements */
-		void reserve(ulint n);
+    It is not defined as enum as it has to be updated using atomic operations.
+    Possible values are READ_VIEW_STATE_CLOSED, READ_VIEW_STATE_SNAPSHOT and
+    READ_VIEW_STATE_OPEN.
 
-		/**
-		Resize the array, sets the current element count.
-		@param n		new size of the array, in elements */
-		void resize(ulint n)
-		{
-			ut_ad(n <= capacity());
+    Possible state transfers...
 
-			m_size = n;
-		}
-
-		/**
-		Reset the size to 0 */
-		void clear() { resize(0); }
-
-		/**
-		@return the capacity of the array in elements */
-		ulint capacity() const { return(m_reserved); }
-
-		/**
-		Copy and overwrite the current array contents
-
-		@param start		Source array
-		@param end		Pointer to end of array */
-		void assign(const value_type* start, const value_type* end);
-
-		/**
-		Insert the value in the correct slot, preserving the order.
-		Doesn't check for duplicates. */
-		void insert(value_type value);
-
-		/**
-		@return the value of the first element in the array */
-		value_type front() const
-		{
-			ut_ad(!empty());
-
-			return(m_ptr[0]);
-		}
-
-		/**
-		@return the value of the last element in the array */
-		value_type back() const
-		{
-			ut_ad(!empty());
-
-			return(m_ptr[m_size - 1]);
-		}
-
-		/**
-		Append a value to the array.
-		@param value		the value to append */
-		void push_back(value_type value);
-
-		/**
-		@return a pointer to the start of the array */
-		trx_id_t* data() { return(m_ptr); };
-
-		/**
-		@return a const pointer to the start of the array */
-		const trx_id_t* data() const { return(m_ptr); };
+    Start view open:
+    READ_VIEW_STATE_CLOSED -> READ_VIEW_STATE_SNAPSHOT
 
-		/**
-		@return the number of elements in the array */
-		ulint size() const { return(m_size); }
+    Complete view open:
+    READ_VIEW_STATE_SNAPSHOT -> READ_VIEW_STATE_OPEN
 
-		/**
-		@return true if size() == 0 */
-		bool empty() const { return(size() == 0); }
+    Close view:
+    READ_VIEW_STATE_OPEN -> READ_VIEW_STATE_CLOSED
+  */
+  int32_t m_state;
 
-	private:
-		// Prevent copying
-		ids_t(const ids_t&);
-		ids_t& operator=(const ids_t&);
 
-	private:
-		/** Memory for the array */
-		value_type*	m_ptr;
-
-		/** Number of active elements in the array */
-		ulint		m_size;
+public:
+  ReadView(): m_state(READ_VIEW_STATE_CLOSED), m_low_limit_id(0) {}
+
+
+  /**
+    Copy state from another view.
+
+    This method is used to find min(m_low_limit_no), min(m_low_limit_id) and
+    all transaction ids below min(m_low_limit_id). These values effectively
+    form oldest view.
+
+    @param other    view to copy from
+  */
+  void copy(const ReadView &other)
+  {
+    ut_ad(&other != this);
+    if (m_low_limit_no > other.m_low_limit_no)
+      m_low_limit_no= other.m_low_limit_no;
+    if (m_low_limit_id > other.m_low_limit_id)
+      m_low_limit_id= other.m_low_limit_id;
+
+    trx_ids_t::iterator dst= m_ids.begin();
+    for (trx_ids_t::const_iterator src= other.m_ids.begin();
+         src != other.m_ids.end(); src++)
+    {
+      if (*src >= m_low_limit_id)
+        break;
+loop:
+      if (dst == m_ids.end())
+      {
+        m_ids.push_back(*src);
+        dst= m_ids.end();
+        continue;
+      }
+      if (*dst < *src)
+      {
+        dst++;
+        goto loop;
+      }
+      else if (*dst > *src)
+        dst= m_ids.insert(dst, *src) + 1;
+    }
+    m_ids.erase(std::lower_bound(dst, m_ids.end(), m_low_limit_id),
+                m_ids.end());
+
+    m_up_limit_id= m_ids.empty() ? m_low_limit_id : m_ids.front();
+    ut_ad(m_up_limit_id <= m_low_limit_id);
+  }
+
+
+  /**
+    Opens a read view where exactly the transactions serialized before this
+    point in time are seen in the view.
+
+    View becomes visible to purge thread.
+
+    @param[in,out] trx transaction
+  */
+  void open(trx_t *trx);
+
+
+  /**
+    Closes the view.
+
+    View becomes not visible to purge thread.
+  */
+  void close()
+  {
+    ut_ad(m_state == READ_VIEW_STATE_CLOSED ||
+          m_state == READ_VIEW_STATE_OPEN);
+    if (m_state == READ_VIEW_STATE_OPEN)
+      my_atomic_store32_explicit(&m_state, READ_VIEW_STATE_CLOSED,
+                                 MY_MEMORY_ORDER_RELAXED);
+  }
+
+
+  /** m_state getter for trx_sys::clone_oldest_view() trx_sys::size(). */
+  int32_t get_state() const
+  {
+    return my_atomic_load32_explicit(const_cast<int32*>(&m_state),
+                                     MY_MEMORY_ORDER_ACQUIRE);
+  }
+
+
+  /**
+    Returns true if view is open.
+
+    Only used by view owner thread, thus we can omit atomic operations.
+  */
+  bool is_open() const
+  {
+    ut_ad(m_state == READ_VIEW_STATE_OPEN ||
+          m_state == READ_VIEW_STATE_CLOSED);
+    return m_state == READ_VIEW_STATE_OPEN;
+  }
+
+
+  /**
+    Creates a snapshot where exactly the transactions serialized before this
+    point in time are seen in the view.
+
+    @param[in,out] trx transaction
+  */
+  inline void snapshot(trx_t *trx);
+
+
+  /**
+    Sets the creator transaction id.
+
+    This should be set only for views created by RW transactions.
+  */
+  void set_creator_trx_id(trx_id_t id)
+  {
+    ut_ad(id > 0);
+    ut_ad(m_creator_trx_id == 0);
+    m_creator_trx_id= id;
+  }
 
-		/** Size of m_ptr in elements */
-		ulint		m_reserved;
 
-		friend class ReadView;
-	};
-public:
-	ReadView();
-	~ReadView();
 	/** Check whether transaction id is valid.
 	@param[in]	id		transaction id to check
 	@param[in]	name		table name */
@@ -162,8 +204,6 @@ public:
 		const table_name_t&	name) const
 		MY_ATTRIBUTE((warn_unused_result))
 	{
-		ut_ad(id > 0);
-
 		if (id < m_up_limit_id || id == m_creator_trx_id) {
 
 			return(true);
@@ -180,9 +220,7 @@ public:
 			return(true);
 		}
 
-		const ids_t::value_type*	p = m_ids.data();
-
-		return(!std::binary_search(p, p + m_ids.size(), id));
+		return(!std::binary_search(m_ids.begin(), m_ids.end(), id));
 	}
 
 	/**
@@ -194,21 +232,6 @@ public:
 	}
 
 	/**
-	Mark the view as closed */
-	void close()
-	{
-		ut_ad(m_creator_trx_id != TRX_ID_MAX);
-		m_creator_trx_id = TRX_ID_MAX;
-	}
-
-	/**
-	@return true if the view is closed */
-	bool is_closed() const
-	{
-		return(m_closed);
-	}
-
-	/**
 	Write the limits to the file.
 	@param file		file to write to */
 	void print_limits(FILE* file) const
@@ -233,66 +256,6 @@ public:
 		return(m_low_limit_id);
 	}
 
-	/**
-	@return true if there are no transaction ids in the snapshot */
-	bool empty() const
-	{
-		return(m_ids.empty());
-	}
-
-#ifdef UNIV_DEBUG
-	/**
-	@param rhs		view to compare with
-	@return truen if this view is less than or equal rhs */
-	bool le(const ReadView* rhs) const
-	{
-		return(m_low_limit_no <= rhs->m_low_limit_no);
-	}
-
-	trx_id_t up_limit_id() const
-	{
-		return(m_up_limit_id);
-	}
-#endif /* UNIV_DEBUG */
-private:
-	/**
-	Copy the transaction ids from the source vector */
-	inline void copy_trx_ids(const trx_ids_t& trx_ids);
-
-	/**
-	Opens a read view where exactly the transactions serialized before this
-	point in time are seen in the view.
-	@param id		Creator transaction id */
-	inline void prepare(trx_id_t id);
-
-	/**
-	Complete the read view creation */
-	inline void complete();
-
-	/**
-	Copy state from another view. Must call copy_complete() to finish.
-	@param other		view to copy from */
-	inline void copy_prepare(const ReadView& other);
-
-	/**
-	Complete the copy, insert the creator transaction id into the
-	m_trx_ids too and adjust the m_up_limit_id *, if required */
-	inline void copy_complete();
-
-	/**
-	Set the creator transaction id, existing id must be 0 */
-	void creator_trx_id(trx_id_t id)
-	{
-		ut_ad(m_creator_trx_id == 0);
-		m_creator_trx_id = id;
-	}
-
-	friend class MVCC;
-
-private:
-	// Disable copying
-	ReadView(const ReadView&);
-	ReadView& operator=(const ReadView&);
 
 private:
 	/** The read should not see any transaction with trx id >= this
@@ -310,21 +273,12 @@ private:
 
 	/** Set of RW transactions that was active when this snapshot
 	was taken */
-	ids_t		m_ids;
+	trx_ids_t	m_ids;
 
 	/** The view does not need to see the undo logs for transactions
 	whose transaction number is strictly smaller (<) than this value:
 	they can be removed in purge if not needed by other views */
 	trx_id_t	m_low_limit_no;
-
-	/** AC-NL-RO transaction view that has been "closed". */
-	bool		m_closed;
-
-	typedef UT_LIST_NODE_T(ReadView) node_t;
-
-	/** List of read views in trx_sys */
-	byte		pad1[64 - sizeof(node_t)];
-	node_t		m_view_list;
 };
 
 #endif
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index 5c571df9563..3a541289a61 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -32,6 +32,7 @@ Created 5/30/1994 Heikki Tuuri
 #include "rem0types.h"
 #include "mtr0types.h"
 #include "page0types.h"
+#include "dict0dict.h"
 #include "trx0types.h"
 #endif /*! UNIV_INNOCHECKSUM */
 #include <ostream>
@@ -53,11 +54,29 @@ in addition to the data and the offsets */
 in addition to the data and the offsets */
 #define REC_N_NEW_EXTRA_BYTES	5
 
-/* Record status values */
-#define REC_STATUS_ORDINARY	0
-#define REC_STATUS_NODE_PTR	1
-#define REC_STATUS_INFIMUM	2
-#define REC_STATUS_SUPREMUM	3
+/** Record status values for ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED */
+enum rec_comp_status_t {
+	/** User record (PAGE_LEVEL=0, heap>=PAGE_HEAP_NO_USER_LOW) */
+	REC_STATUS_ORDINARY = 0,
+	/** Node pointer record (PAGE_LEVEL>=0, heap>=PAGE_HEAP_NO_USER_LOW) */
+	REC_STATUS_NODE_PTR = 1,
+	/** The page infimum pseudo-record (heap=PAGE_HEAP_NO_INFIMUM) */
+	REC_STATUS_INFIMUM = 2,
+	/** The page supremum pseudo-record (heap=PAGE_HEAP_NO_SUPREMUM) */
+	REC_STATUS_SUPREMUM = 3,
+	/** Clustered index record that has been inserted or updated
+	after instant ADD COLUMN (more than dict_index_t::n_core_fields) */
+	REC_STATUS_COLUMNS_ADDED = 4
+};
+
+/** The dtuple_t::info_bits of the metadata pseudo-record.
+@see rec_is_metadata() */
+static const byte REC_INFO_METADATA
+	= REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED;
+
+#define REC_NEW_STATUS		3	/* This is single byte bit-field */
+#define REC_NEW_STATUS_MASK	0x7UL
+#define REC_NEW_STATUS_SHIFT	0
 
 /* The following four constants are needed in page0zip.cc in order to
 efficiently compress and decompress pages. */
@@ -93,6 +112,22 @@ offsets[] array, first passed to rec_get_offsets() */
 #define REC_OFFS_NORMAL_SIZE	OFFS_IN_REC_NORMAL_SIZE
 #define REC_OFFS_SMALL_SIZE	10
 
+/** Get the base address of offsets.  The extra_size is stored at
+this position, and following positions hold the end offsets of
+the fields. */
+#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
+
+/** Compact flag ORed to the extra size returned by rec_get_offsets() */
+const ulint REC_OFFS_COMPACT = ~(ulint(~0) >> 1);
+/** SQL NULL flag in offsets returned by rec_get_offsets() */
+const ulint REC_OFFS_SQL_NULL = REC_OFFS_COMPACT;
+/** External flag in offsets returned by rec_get_offsets() */
+const ulint REC_OFFS_EXTERNAL = REC_OFFS_COMPACT >> 1;
+/** Default value flag in offsets returned by rec_get_offsets() */
+const ulint REC_OFFS_DEFAULT = REC_OFFS_COMPACT >> 2;
+/** Mask for offsets returned by rec_get_offsets() */
+const ulint REC_OFFS_MASK = REC_OFFS_DEFAULT - 1;
+
 #ifndef UNIV_INNOCHECKSUM
 /******************************************************//**
 The following function is used to get the pointer of the next chained record
@@ -251,25 +286,55 @@ rec_set_info_bits_new(
 	rec_t*	rec,	/*!< in/out: new-style physical record */
 	ulint	bits)	/*!< in: info bits */
 	MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-The following function retrieves the status bits of a new-style record.
+
+/** Determine the status bits of a non-REDUNDANT record.
+@param[in]	rec	ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED record
 @return status bits */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
-	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((warn_unused_result));
+inline
+rec_comp_status_t
+rec_get_status(const rec_t* rec)
+{
+	byte bits = rec[-REC_NEW_STATUS] & REC_NEW_STATUS_MASK;
+	ut_ad(bits <= REC_STATUS_COLUMNS_ADDED);
+	return static_cast<rec_comp_status_t>(bits);
+}
 
-/******************************************************//**
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
+/** Set the status bits of a non-REDUNDANT record.
+@param[in,out]	rec	ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED record
+@param[in]	bits	status bits */
+inline
 void
-rec_set_status(
-/*===========*/
-	rec_t*	rec,	/*!< in/out: physical record */
-	ulint	bits)	/*!< in: info bits */
-	MY_ATTRIBUTE((nonnull));
+rec_set_status(rec_t* rec, byte bits)
+{
+	ut_ad(bits <= REC_STATUS_COLUMNS_ADDED);
+	rec[-REC_NEW_STATUS] = (rec[-REC_NEW_STATUS] & ~REC_NEW_STATUS_MASK)
+		| bits;
+}
+
+/** Get the length of added field count in a REC_STATUS_COLUMNS_ADDED record.
+@param[in]	n_add_field	number of added fields, minus one
+@return	storage size of the field count, in bytes */
+inline unsigned rec_get_n_add_field_len(ulint n_add_field)
+{
+	ut_ad(n_add_field < REC_MAX_N_FIELDS);
+	return n_add_field < 0x80 ? 1 : 2;
+}
+
+/** Set the added field count in a REC_STATUS_COLUMNS_ADDED record.
+@param[in,out]	header	variable header of a REC_STATUS_COLUMNS_ADDED record
+@param[in]	n_add	number of added fields, minus 1
+@return	record header before the number of added fields */
+inline void rec_set_n_add_field(byte*& header, ulint n_add)
+{
+	ut_ad(n_add < REC_MAX_N_FIELDS);
+
+	if (n_add < 0x80) {
+		*header-- = byte(n_add);
+	} else {
+		*header-- = byte(n_add) | 0x80;
+		*header-- = byte(n_add >> 7);
+	}
+}
 
 /******************************************************//**
 The following function is used to retrieve the info and status
@@ -326,7 +391,7 @@ rec_set_deleted_flag_new(
 The following function tells if a new-style record is a node pointer.
 @return TRUE if node pointer */
 UNIV_INLINE
-ibool
+bool
 rec_get_node_ptr_flag(
 /*==================*/
 	const rec_t*	rec)	/*!< in: physical record */
@@ -458,9 +523,7 @@ rec_get_offsets_func(
 	const rec_t*		rec,
 	const dict_index_t*	index,
 	ulint*			offsets,
-#ifdef UNIV_DEBUG
 	bool			leaf,
-#endif /* UNIV_DEBUG */
 	ulint			n_fields,
 #ifdef UNIV_DEBUG
 	const char*		file,	/*!< in: file name where called */
@@ -470,7 +533,7 @@ rec_get_offsets_func(
 #ifdef UNIV_DEBUG
 	MY_ATTRIBUTE((nonnull(1,2,6,8),warn_unused_result));
 #else /* UNIV_DEBUG */
-	MY_ATTRIBUTE((nonnull(1,2,5),warn_unused_result));
+	MY_ATTRIBUTE((nonnull(1,2,6),warn_unused_result));
 #endif /* UNIV_DEBUG */
 
 #ifdef UNIV_DEBUG
@@ -478,7 +541,7 @@ rec_get_offsets_func(
 	rec_get_offsets_func(rec,index,offsets,leaf,n,__FILE__,__LINE__,heap)
 #else /* UNIV_DEBUG */
 # define rec_get_offsets(rec, index, offsets, leaf, n, heap)		\
-	rec_get_offsets_func(rec, index, offsets, n, heap)
+	rec_get_offsets_func(rec, index, offsets, leaf, n, heap)
 #endif /* UNIV_DEBUG */
 
 /******************************************************//**
@@ -498,32 +561,31 @@ rec_get_offsets_reverse(
 					offsets[0] allocated elements */
 	MY_ATTRIBUTE((nonnull));
 #ifdef UNIV_DEBUG
-/************************************************************//**
-Validates offsets returned by rec_get_offsets().
-@return TRUE if valid */
-UNIV_INLINE
-ibool
+/** Validate offsets returned by rec_get_offsets().
+@param[in]	rec	record, or NULL
+@param[in]	index	the index that the record belongs in, or NULL
+@param[in,out]	offsets	the offsets of the record
+@return true */
+bool
 rec_offs_validate(
-/*==============*/
-	const rec_t*		rec,	/*!< in: record or NULL */
-	const dict_index_t*	index,	/*!< in: record descriptor or NULL */
-	const ulint*		offsets)/*!< in: array returned by
-					rec_get_offsets() */
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	const ulint*		offsets)
 	MY_ATTRIBUTE((nonnull(3), warn_unused_result));
-/************************************************************//**
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
+/** Update debug data in offsets, in order to tame rec_offs_validate().
+@param[in]	rec	record
+@param[in]	index	the index that the record belongs in
+@param[in]	leaf	whether the record resides in a leaf page
+@param[in,out]	offsets	offsets from rec_get_offsets() to adjust */
 void
 rec_offs_make_valid(
-/*================*/
-	const rec_t*		rec,	/*!< in: record */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets)/*!< in: array returned by
-					rec_get_offsets() */
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	bool			leaf,
+	ulint*			offsets)
 	MY_ATTRIBUTE((nonnull));
 #else
-# define rec_offs_make_valid(rec, index, offsets) ((void) 0)
+# define rec_offs_make_valid(rec, index, leaf, offsets)
 #endif /* UNIV_DEBUG */
 
 /************************************************************//**
@@ -567,26 +629,7 @@ rec_get_nth_field_offs(
 	MY_ATTRIBUTE((nonnull));
 #define rec_get_nth_field(rec, offsets, n, len) \
 ((rec) + rec_get_nth_field_offs(offsets, n, len))
-/******************************************************//**
-Determine if the offsets are for a record in the new
-compact format.
-@return nonzero if compact format */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((warn_unused_result));
-/******************************************************//**
-Determine if the offsets are for a record containing
-externally stored columns.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_any_extern(
-/*================*/
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************//**
 Determine if the offsets are for a record containing null BLOB pointers.
 @return first field containing a null BLOB pointer, or NULL if none found */
@@ -597,16 +640,6 @@ rec_offs_any_null_extern(
 	const rec_t*	rec,		/*!< in: record */
 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec) */
 	MY_ATTRIBUTE((warn_unused_result));
-/******************************************************//**
-Returns nonzero if the extern bit is set in nth field of rec.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_nth_extern(
-/*================*/
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n)	/*!< in: nth field */
-	MY_ATTRIBUTE((warn_unused_result));
 
 /** Mark the nth field as externally stored.
 @param[in]	offsets		array returned by rec_get_offsets()
@@ -615,16 +648,175 @@ void
 rec_offs_make_nth_extern(
         ulint*		offsets,
         const ulint     n);
-/******************************************************//**
-Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return nonzero if SQL NULL */
-UNIV_INLINE
+
+/** Determine the number of allocated elements for an array of offsets.
+@param[in]	offsets		offsets after rec_offs_set_n_alloc()
+@return number of elements */
+inline
 ulint
-rec_offs_nth_sql_null(
-/*==================*/
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n)	/*!< in: nth field */
-	MY_ATTRIBUTE((warn_unused_result));
+rec_offs_get_n_alloc(const ulint* offsets)
+{
+	ulint	n_alloc;
+	ut_ad(offsets);
+	n_alloc = offsets[0];
+	ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
+	UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets);
+	return(n_alloc);
+}
+
+/** Determine the number of fields for which offsets have been initialized.
+@param[in]	offsets	rec_get_offsets()
+@return number of fields */
+inline
+ulint
+rec_offs_n_fields(const ulint* offsets)
+{
+	ulint	n_fields;
+	ut_ad(offsets);
+	n_fields = offsets[1];
+	ut_ad(n_fields > 0);
+	ut_ad(n_fields <= REC_MAX_N_FIELDS);
+	ut_ad(n_fields + REC_OFFS_HEADER_SIZE
+	      <= rec_offs_get_n_alloc(offsets));
+	return(n_fields);
+}
+
+/** Get a flag of a record field.
+@param[in]	offsets	rec_get_offsets()
+@param[in]	n	nth field
+@param[in]	flag	flag to extract
+@return	the flag of the record field */
+inline
+ulint
+rec_offs_nth_flag(const ulint* offsets, ulint n, ulint flag)
+{
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	ut_ad(n < rec_offs_n_fields(offsets));
+	/* The DEFAULT, NULL, EXTERNAL flags are mutually exclusive. */
+	ut_ad(ut_is_2pow(rec_offs_base(offsets)[1 + n]
+			 & (REC_OFFS_DEFAULT
+			    | REC_OFFS_SQL_NULL
+			    | REC_OFFS_EXTERNAL)));
+	return rec_offs_base(offsets)[1 + n] & flag;
+}
+
+/** Determine if a record field is missing
+(should be replaced by dict_index_t::instant_field_value()).
+@param[in]	offsets	rec_get_offsets()
+@param[in]	n	nth field
+@return	nonzero if default bit is set */
+inline
+ulint
+rec_offs_nth_default(const ulint* offsets, ulint n)
+{
+	return rec_offs_nth_flag(offsets, n, REC_OFFS_DEFAULT);
+}
+
+/** Determine if a record field is SQL NULL
+(should be replaced by dict_index_t::instant_field_value()).
+@param[in]	offsets	rec_get_offsets()
+@param[in]	n	nth field
+@return	nonzero if SQL NULL set */
+inline
+ulint
+rec_offs_nth_sql_null(const ulint* offsets, ulint n)
+{
+	return rec_offs_nth_flag(offsets, n, REC_OFFS_SQL_NULL);
+}
+
+/** Determine if a record field is stored off-page.
+@param[in]	offsets	rec_get_offsets()
+@param[in]	n	nth field
+Returns nonzero if the extern bit is set in nth field of rec.
+@return nonzero if externally stored */
+inline
+ulint
+rec_offs_nth_extern(const ulint* offsets, ulint n)
+{
+	return rec_offs_nth_flag(offsets, n, REC_OFFS_EXTERNAL);
+}
+
+/** Get a global flag of a record.
+@param[in]	offsets	rec_get_offsets()
+@param[in]	flag	flag to extract
+@return	the flag of the record field */
+inline
+ulint
+rec_offs_any_flag(const ulint* offsets, ulint flag)
+{
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	return *rec_offs_base(offsets) & flag;
+}
+
+/** Determine if the offsets are for a record containing off-page columns.
+@param[in]	offsets	rec_get_offsets()
+@return nonzero if any off-page columns exist */
+inline bool rec_offs_any_extern(const ulint* offsets)
+{
+	return rec_offs_any_flag(offsets, REC_OFFS_EXTERNAL);
+}
+
+/** Determine if the offsets are for a record that is missing fields.
+@param[in]	offsets	rec_get_offsets()
+@return nonzero if any fields need to be replaced with
+		dict_index_t::instant_field_value() */
+inline
+ulint
+rec_offs_any_default(const ulint* offsets)
+{
+	return rec_offs_any_flag(offsets, REC_OFFS_DEFAULT);
+}
+
+/** Determine if the offsets are for other than ROW_FORMAT=REDUNDANT.
+@param[in]	offsets	rec_get_offsets()
+@return	nonzero	if ROW_FORMAT is COMPACT,DYNAMIC or COMPRESSED
+@retval	0	if ROW_FORMAT=REDUNDANT */
+inline
+ulint
+rec_offs_comp(const ulint* offsets)
+{
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
+}
+
+/** Determine if the record is the metadata pseudo-record
+in the clustered index.
+@param[in]	rec	leaf page record
+@param[in]	index	index of the record
+@return	whether the record is the metadata pseudo-record */
+inline bool rec_is_metadata(const rec_t* rec, const dict_index_t* index)
+{
+	bool is = rec_get_info_bits(rec, dict_table_is_comp(index->table))
+		& REC_INFO_MIN_REC_FLAG;
+	ut_ad(!is || index->is_instant());
+	ut_ad(!is || !dict_table_is_comp(index->table)
+	      || rec_get_status(rec) == REC_STATUS_COLUMNS_ADDED);
+	return is;
+}
+
+/** Get the nth field from an index.
+@param[in]	rec	index record
+@param[in]	index	index
+@param[in]	offsets	rec_get_offsets(rec, index)
+@param[in]	n	field number
+@param[out]	len	length of the field in bytes, or UNIV_SQL_NULL
+@return a read-only copy of the index field */
+inline
+const byte*
+rec_get_nth_cfield(
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	const ulint*		offsets,
+	ulint			n,
+	ulint*			len)
+{
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	if (!rec_offs_nth_default(offsets, n)) {
+		return rec_get_nth_field(rec, offsets, n, len);
+	}
+	return index->instant_field_value(n, len);
+}
+
 /******************************************************//**
 Gets the physical size of a field.
 @return length of field */
@@ -678,16 +870,6 @@ rec_get_data_size_old(
 	const rec_t*	rec)	/*!< in: physical record */
 	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
-The following function returns the number of allocated elements
-for an array of offsets.
-@return number of elements */
-UNIV_INLINE
-ulint
-rec_offs_get_n_alloc(
-/*=================*/
-	const ulint*	offsets)/*!< in: array for rec_get_offsets() */
-	MY_ATTRIBUTE((warn_unused_result));
-/**********************************************************//**
 The following function sets the number of allocated elements
 for an array of offsets. */
 UNIV_INLINE
@@ -701,15 +883,6 @@ rec_offs_set_n_alloc(
 #define rec_offs_init(offsets) \
 	rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
 /**********************************************************//**
-The following function returns the number of fields in a record.
-@return number of fields */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((warn_unused_result));
-/**********************************************************//**
 The following function returns the data size of a physical
 record, that is the sum of field lengths. SQL null fields
 are counted as length 0 fields. The value returned by the function
@@ -784,37 +957,60 @@ rec_copy(
 @param[in]	fields		data fields
 @param[in]	n_fields	number of data fields
 @param[out]	extra		record header size
+@param[in]	status		REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED
 @return	total size, in bytes */
 ulint
 rec_get_converted_size_temp(
 	const dict_index_t*	index,
 	const dfield_t*		fields,
 	ulint			n_fields,
-	ulint*			extra)
-	MY_ATTRIBUTE((warn_unused_result, nonnull(1,2)));
+	ulint*			extra,
+	rec_comp_status_t	status = REC_STATUS_ORDINARY)
+	MY_ATTRIBUTE((warn_unused_result, nonnull));
 
-/******************************************************//**
-Determine the offset to each field in temporary file.
-@see rec_convert_dtuple_to_temp() */
+/** Determine the offset to each field in temporary file.
+@param[in]	rec	temporary file record
+@param[in]	index	index of that the record belongs to
+@param[in,out]	offsets	offsets to the fields; in: rec_offs_n_fields(offsets)
+@param[in]	n_core	number of core fields (index->n_core_fields)
+@param[in]	def_val	default values for non-core fields
+@param[in]	status	REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED */
 void
 rec_init_offsets_temp(
-/*==================*/
-	const rec_t*		rec,	/*!< in: temporary file record */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets)/*!< in/out: array of offsets;
-					in: n=rec_offs_n_fields(offsets) */
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	ulint*			offsets,
+	ulint			n_core,
+	const dict_col_t::def_t*def_val,
+	rec_comp_status_t	status = REC_STATUS_ORDINARY)
+	MY_ATTRIBUTE((nonnull));
+/** Determine the offset to each field in temporary file.
+@param[in]	rec	temporary file record
+@param[in]	index	index of that the record belongs to
+@param[in,out]	offsets	offsets to the fields; in: rec_offs_n_fields(offsets)
+*/
+void
+rec_init_offsets_temp(
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	ulint*			offsets)
 	MY_ATTRIBUTE((nonnull));
 
-/*********************************************************//**
-Builds a temporary file record out of a data tuple.
-@see rec_init_offsets_temp() */
+/** Convert a data tuple prefix to the temporary file format.
+@param[out]	rec		record in temporary file format
+@param[in]	index		clustered or secondary index
+@param[in]	fields		data fields
+@param[in]	n_fields	number of data fields
+@param[in]	status		REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED
+*/
 void
 rec_convert_dtuple_to_temp(
-/*=======================*/
-	rec_t*			rec,		/*!< out: record */
-	const dict_index_t*	index,		/*!< in: record descriptor */
-	const dfield_t*		fields,		/*!< in: array of data fields */
-	ulint			n_fields);	/*!< in: number of fields */
+	rec_t*			rec,
+	const dict_index_t*	index,
+	const dfield_t*		fields,
+	ulint			n_fields,
+	rec_comp_status_t	status = REC_STATUS_ORDINARY)
+	MY_ATTRIBUTE((nonnull));
 
 /**************************************************************//**
 Copies the first n fields of a physical record to a new physical record in
@@ -832,22 +1028,6 @@ rec_copy_prefix_to_buf(
 						or NULL */
 	ulint*			buf_size)	/*!< in/out: buffer size */
 	MY_ATTRIBUTE((nonnull));
-/** Fold a prefix of a physical record.
-@param[in]	rec		index record
-@param[in]	offsets		return value of rec_get_offsets()
-@param[in]	n_fields	number of complete fields to fold
-@param[in]	n_bytes		number of bytes to fold in the last field
-@param[in]	index_id	index tree ID
-@return the folded value */
-UNIV_INLINE
-ulint
-rec_fold(
-	const rec_t*	rec,
-	const ulint*	offsets,
-	ulint		n_fields,
-	ulint		n_bytes,
-	index_id_t	tree_id)
-	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Builds a physical record out of a data tuple and
 stores it into the given buffer.
@@ -895,7 +1075,7 @@ rec_get_converted_size_comp(
 					dict_table_is_comp() is
 					assumed to hold, even if
 					it does not */
-	ulint			status,	/*!< in: status bits of the record */
+	rec_comp_status_t	status,	/*!< in: status bits of the record */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
 	ulint*			extra)	/*!< out: extra size */
@@ -920,23 +1100,14 @@ The fields are copied into the memory heap.
 @param[in]	n_fields	number of fields to copy
 @param[in,out]	heap		memory heap */
 void
-rec_copy_prefix_to_dtuple_func(
+rec_copy_prefix_to_dtuple(
 	dtuple_t*		tuple,
 	const rec_t*		rec,
 	const dict_index_t*	index,
-#ifdef UNIV_DEBUG
 	bool			is_leaf,
-#endif /* UNIV_DEBUG */
 	ulint			n_fields,
 	mem_heap_t*		heap)
 	MY_ATTRIBUTE((nonnull));
-#ifdef UNIV_DEBUG
-# define rec_copy_prefix_to_dtuple(tuple,rec,index,leaf,n_fields,heap)	\
-	rec_copy_prefix_to_dtuple_func(tuple,rec,index,leaf,n_fields,heap)
-#else /* UNIV_DEBUG */
-# define rec_copy_prefix_to_dtuple(tuple,rec,index,leaf,n_fields,heap)	\
-	rec_copy_prefix_to_dtuple_func(tuple,rec,index,n_fields,heap)
-#endif /* UNIV_DEBUG */
 /***************************************************************//**
 Validates the consistency of a physical record.
 @return TRUE if ok */
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
index cb1f0d9836f..41794582f37 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innobase/include/rem0rec.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,19 +26,9 @@ Created 5/30/1994 Heikki Tuuri
 
 #include "mach0data.h"
 #include "ut0byte.h"
-#include "dict0dict.h"
 #include "dict0boot.h"
 #include "btr0types.h"
 
-/* Compact flag ORed to the extra size returned by rec_get_offsets() */
-#define REC_OFFS_COMPACT	((ulint) 1 << 31)
-/* SQL NULL flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_SQL_NULL	((ulint) 1 << 31)
-/* External flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_EXTERNAL	((ulint) 1 << 30)
-/* Mask for offsets returned by rec_get_offsets() */
-#define REC_OFFS_MASK		(REC_OFFS_EXTERNAL - 1)
-
 /* Offsets of the bit-fields in an old-style record. NOTE! In the table the
 most significant bytes and bits are written below less significant.
 
@@ -71,12 +61,13 @@ most significant bytes and bits are written below less significant.
 				  we can calculate the offset of the next
 				  record with the formula:
 				  relative_offset + offset_of_this_record
-				  mod UNIV_PAGE_SIZE
+				  mod srv_page_size
 			3	3 bits status:
-					000=conventional record
-					001=node pointer record (inside B-tree)
-					010=infimum record
-					011=supremum record
+					000=REC_STATUS_ORDINARY
+					001=REC_STATUS_NODE_PTR
+					010=REC_STATUS_INFIMUM
+					011=REC_STATUS_SUPREMUM
+					100=REC_STATUS_COLUMNS_ADDED
 					1xx=reserved
 				5 bits heap number
 			4	8 bits heap number
@@ -99,10 +90,6 @@ and the shift needed to obtain each bit-field of the record. */
 #define REC_OLD_N_FIELDS_MASK	0x7FEUL
 #define REC_OLD_N_FIELDS_SHIFT	1
 
-#define REC_NEW_STATUS		3	/* This is single byte bit-field */
-#define REC_NEW_STATUS_MASK	0x7UL
-#define REC_NEW_STATUS_SHIFT	0
-
 #define REC_OLD_HEAP_NO		5
 #define REC_HEAP_NO_MASK	0xFFF8UL
 #if 0 /* defined in rem0rec.h for use of page0zip.cc */
@@ -248,8 +235,8 @@ rec_get_next_ptr_const(
 {
 	ulint	field_value;
 
-	ut_ad(REC_NEXT_MASK == 0xFFFFUL);
-	ut_ad(REC_NEXT_SHIFT == 0);
+	compile_time_assert(REC_NEXT_MASK == 0xFFFFUL);
+	compile_time_assert(REC_NEXT_SHIFT == 0);
 
 	field_value = mach_read_from_2(rec - REC_NEXT);
 
@@ -267,13 +254,13 @@ rec_get_next_ptr_const(
 		as signed 16-bit integer in 2's complement arithmetics.
 		If all platforms defined int16_t in the standard headers,
 		the expression could be written simpler as
-		(int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
+		(int16_t) field_value + ut_align_offset(...) < srv_page_size
 		*/
 		ut_ad((field_value >= 32768
 		       ? field_value - 65536
 		       : field_value)
-		      + ut_align_offset(rec, UNIV_PAGE_SIZE)
-		      < UNIV_PAGE_SIZE);
+		      + ut_align_offset(rec, srv_page_size)
+		      < srv_page_size);
 #endif
 		/* There must be at least REC_N_NEW_EXTRA_BYTES + 1
 		between each record. */
@@ -281,12 +268,12 @@ rec_get_next_ptr_const(
 		       && field_value < 32768)
 		      || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
 
-		return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
-		       + ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
+		return((byte*) ut_align_down(rec, srv_page_size)
+		       + ut_align_offset(rec + field_value, srv_page_size));
 	} else {
-		ut_ad(field_value < UNIV_PAGE_SIZE);
+		ut_ad(field_value < srv_page_size);
 
-		return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
+		return((byte*) ut_align_down(rec, srv_page_size)
 		       + field_value);
 	}
 }
@@ -317,12 +304,8 @@ rec_get_next_offs(
 	ulint		comp)	/*!< in: nonzero=compact page format */
 {
 	ulint	field_value;
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
+	compile_time_assert(REC_NEXT_MASK == 0xFFFFUL);
+	compile_time_assert(REC_NEXT_SHIFT == 0);
 
 	field_value = mach_read_from_2(rec - REC_NEXT);
 
@@ -335,13 +318,13 @@ rec_get_next_offs(
 		as signed 16-bit integer in 2's complement arithmetics.
 		If all platforms defined int16_t in the standard headers,
 		the expression could be written simpler as
-		(int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
+		(int16_t) field_value + ut_align_offset(...) < srv_page_size
 		*/
 		ut_ad((field_value >= 32768
 		       ? field_value - 65536
 		       : field_value)
-		      + ut_align_offset(rec, UNIV_PAGE_SIZE)
-		      < UNIV_PAGE_SIZE);
+		      + ut_align_offset(rec, srv_page_size)
+		      < srv_page_size);
 #endif
 		if (field_value == 0) {
 
@@ -354,9 +337,9 @@ rec_get_next_offs(
 		       && field_value < 32768)
 		      || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
 
-		return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
+		return(ut_align_offset(rec + field_value, srv_page_size));
 	} else {
-		ut_ad(field_value < UNIV_PAGE_SIZE);
+		ut_ad(field_value < srv_page_size);
 
 		return(field_value);
 	}
@@ -373,14 +356,9 @@ rec_set_next_offs_old(
 	ulint	next)	/*!< in: offset of the next record */
 {
 	ut_ad(rec);
-	ut_ad(UNIV_PAGE_SIZE > next);
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
-
+	ut_ad(srv_page_size > next);
+	compile_time_assert(REC_NEXT_MASK == 0xFFFFUL);
+	compile_time_assert(REC_NEXT_SHIFT == 0);
 	mach_write_to_2(rec - REC_NEXT, next);
 }
 
@@ -397,7 +375,7 @@ rec_set_next_offs_new(
 	ulint	field_value;
 
 	ut_ad(rec);
-	ut_ad(UNIV_PAGE_SIZE > next);
+	ut_ad(srv_page_size > next);
 
 	if (!next) {
 		field_value = 0;
@@ -408,7 +386,7 @@ rec_set_next_offs_new(
 
 		field_value = (ulint)
 			((lint) next
-			 - (lint) ut_align_offset(rec, UNIV_PAGE_SIZE));
+			 - (lint) ut_align_offset(rec, srv_page_size));
 		field_value &= REC_NEXT_MASK;
 	}
 
@@ -457,26 +435,6 @@ rec_set_n_fields_old(
 }
 
 /******************************************************//**
-The following function retrieves the status bits of a new-style record.
-@return status bits */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
-	const rec_t*	rec)	/*!< in: physical record */
-{
-	ulint	ret;
-
-	ut_ad(rec);
-
-	ret = rec_get_bit_field_1(rec, REC_NEW_STATUS,
-				  REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
-	ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0);
-
-	return(ret);
-}
-
-/******************************************************//**
 The following function is used to get the number of fields
 in a record.
 @return number of data fields */
@@ -495,6 +453,7 @@ rec_get_n_fields(
 	}
 
 	switch (rec_get_status(rec)) {
+	case REC_STATUS_COLUMNS_ADDED:
 	case REC_STATUS_ORDINARY:
 		return(dict_index_get_n_fields(index));
 	case REC_STATUS_NODE_PTR:
@@ -502,10 +461,10 @@ rec_get_n_fields(
 	case REC_STATUS_INFIMUM:
 	case REC_STATUS_SUPREMUM:
 		return(1);
-	default:
-		ut_error;
-		return(ULINT_UNDEFINED);
 	}
+
+	ut_error;
+	return(ULINT_UNDEFINED);
 }
 
 /** Confirms the n_fields of the entry is sane with comparing the other
@@ -521,13 +480,15 @@ rec_n_fields_is_sane(
 	const rec_t*	rec,
 	const dtuple_t*	entry)
 {
-	return(rec_get_n_fields(rec, index)
-	       == dtuple_get_n_fields(entry)
+	const ulint n_fields = rec_get_n_fields(rec, index);
+
+	return(n_fields == dtuple_get_n_fields(entry)
+	       || (index->is_instant()
+		   && n_fields >= index->n_core_fields)
 	       /* a record for older SYS_INDEXES table
 	       (missing merge_threshold column) is acceptable. */
 	       || (index->table->id == DICT_INDEXES_ID
-		   && rec_get_n_fields(rec, index)
-		      == dtuple_get_n_fields(entry) - 1));
+		   && n_fields == dtuple_get_n_fields(entry) - 1));
 }
 
 /******************************************************//**
@@ -646,19 +607,6 @@ rec_set_info_bits_new(
 }
 
 /******************************************************//**
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
-void
-rec_set_status(
-/*===========*/
-	rec_t*	rec,	/*!< in/out: physical record */
-	ulint	bits)	/*!< in: info bits */
-{
-	rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
-			    REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
-}
-
-/******************************************************//**
 The following function is used to retrieve the info and status
 bits of a record.  (Only compact records have status bits.)
 @return info bits */
@@ -670,12 +618,11 @@ rec_get_info_and_status_bits(
 	ulint		comp)	/*!< in: nonzero=compact page format */
 {
 	ulint	bits;
-#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
-& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
-# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
-#endif
+	compile_time_assert(!((REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT)
+			      & (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
 	if (comp) {
-		bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec);
+		bits = rec_get_info_bits(rec, TRUE)
+			| ulint(rec_get_status(rec));
 	} else {
 		bits = rec_get_info_bits(rec, FALSE);
 		ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
@@ -692,10 +639,8 @@ rec_set_info_and_status_bits(
 	rec_t*	rec,	/*!< in/out: physical record */
 	ulint	bits)	/*!< in: info bits */
 {
-#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
-& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
-# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
-#endif
+	compile_time_assert(!((REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT)
+			      & (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
 	rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
 	rec_set_info_bits_new(rec, bits & ~REC_NEW_STATUS_MASK);
 }
@@ -774,7 +719,7 @@ rec_set_deleted_flag_new(
 The following function tells if a new-style record is a node pointer.
 @return TRUE if node pointer */
 UNIV_INLINE
-ibool
+bool
 rec_get_node_ptr_flag(
 /*==================*/
 	const rec_t*	rec)	/*!< in: physical record */
@@ -848,10 +793,6 @@ rec_get_1byte_offs_flag(
 /*====================*/
 	const rec_t*	rec)	/*!< in: physical record */
 {
-#if TRUE != 1
-#error "TRUE != 1"
-#endif
-
 	return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
 				   REC_OLD_SHORT_SHIFT));
 }
@@ -865,10 +806,7 @@ rec_set_1byte_offs_flag(
 	rec_t*	rec,	/*!< in: physical record */
 	ibool	flag)	/*!< in: TRUE if 1byte form */
 {
-#if TRUE != 1
-#error "TRUE != 1"
-#endif
-	ut_ad(flag <= TRUE);
+	ut_ad(flag <= 1);
 
 	rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
 			    REC_OLD_SHORT_SHIFT);
@@ -925,29 +863,6 @@ rec_2_is_field_extern(
 	return(rec_2_get_field_end_info(rec, n) & REC_2BYTE_EXTERN_MASK);
 }
 
-/* Get the base address of offsets.  The extra_size is stored at
-this position, and following positions hold the end offsets of
-the fields. */
-#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
-
-/**********************************************************//**
-The following function returns the number of allocated elements
-for an array of offsets.
-@return number of elements */
-UNIV_INLINE
-ulint
-rec_offs_get_n_alloc(
-/*=================*/
-	const ulint*	offsets)/*!< in: array for rec_get_offsets() */
-{
-	ulint	n_alloc;
-	ut_ad(offsets);
-	n_alloc = offsets[0];
-	ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
-	UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets);
-	return(n_alloc);
-}
-
 /**********************************************************//**
 The following function sets the number of allocated elements
 for an array of offsets. */
@@ -965,102 +880,6 @@ rec_offs_set_n_alloc(
 	offsets[0] = n_alloc;
 }
 
-/**********************************************************//**
-The following function returns the number of fields in a record.
-@return number of fields */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-{
-	ulint	n_fields;
-	ut_ad(offsets);
-	n_fields = offsets[1];
-	ut_ad(n_fields > 0);
-	ut_ad(n_fields <= REC_MAX_N_FIELDS);
-	ut_ad(n_fields + REC_OFFS_HEADER_SIZE
-	      <= rec_offs_get_n_alloc(offsets));
-	return(n_fields);
-}
-
-/************************************************************//**
-Validates offsets returned by rec_get_offsets().
-@return TRUE if valid */
-UNIV_INLINE
-ibool
-rec_offs_validate(
-/*==============*/
-	const rec_t*		rec,	/*!< in: record or NULL */
-	const dict_index_t*	index,	/*!< in: record descriptor or NULL */
-	const ulint*		offsets)/*!< in: array returned by
-					rec_get_offsets() */
-{
-	ulint	i	= rec_offs_n_fields(offsets);
-	ulint	last	= ULINT_MAX;
-	ulint	comp	= *rec_offs_base(offsets) & REC_OFFS_COMPACT;
-
-	if (rec) {
-		ut_ad((ulint) rec == offsets[2]);
-		if (!comp) {
-			ut_a(rec_get_n_fields_old(rec) >= i);
-		}
-	}
-	if (index) {
-		ulint max_n_fields;
-		ut_ad((ulint) index == offsets[3]);
-		max_n_fields = ut_max(
-			dict_index_get_n_fields(index),
-			dict_index_get_n_unique_in_tree(index) + 1);
-		if (comp && rec) {
-			switch (rec_get_status(rec)) {
-			case REC_STATUS_ORDINARY:
-				break;
-			case REC_STATUS_NODE_PTR:
-				max_n_fields = dict_index_get_n_unique_in_tree(
-					index) + 1;
-				break;
-			case REC_STATUS_INFIMUM:
-			case REC_STATUS_SUPREMUM:
-				max_n_fields = 1;
-				break;
-			default:
-				ut_error;
-			}
-		}
-		/* index->n_def == 0 for dummy indexes if !comp */
-		ut_a(!comp || index->n_def);
-		ut_a(!index->n_def || i <= max_n_fields);
-	}
-	while (i--) {
-		ulint	curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK;
-		ut_a(curr <= last);
-		last = curr;
-	}
-	return(TRUE);
-}
-#ifdef UNIV_DEBUG
-/************************************************************//**
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
-void
-rec_offs_make_valid(
-/*================*/
-	const rec_t*		rec,	/*!< in: record */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets)/*!< in: array returned by
-					rec_get_offsets() */
-{
-	ut_ad(rec);
-	ut_ad(index);
-	ut_ad(offsets);
-	ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets));
-	offsets[2] = (ulint) rec;
-	offsets[3] = (ulint) index;
-}
-#endif /* UNIV_DEBUG */
-
 /************************************************************//**
 The following function is used to get an offset to the nth
 data field in a record.
@@ -1072,7 +891,7 @@ rec_get_nth_field_offs(
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	ulint		n,	/*!< in: index of the field */
 	ulint*		len)	/*!< out: length of the field; UNIV_SQL_NULL
-				if SQL null */
+				if SQL null; UNIV_SQL_DEFAULT is default value */
 {
 	ulint	offs;
 	ulint	length;
@@ -1089,6 +908,8 @@ rec_get_nth_field_offs(
 
 	if (length & REC_OFFS_SQL_NULL) {
 		length = UNIV_SQL_NULL;
+	} else if (length & REC_OFFS_DEFAULT) {
+		length = UNIV_SQL_DEFAULT;
 	} else {
 		length &= REC_OFFS_MASK;
 		length -= offs;
@@ -1099,34 +920,6 @@ rec_get_nth_field_offs(
 }
 
 /******************************************************//**
-Determine if the offsets are for a record in the new
-compact format.
-@return nonzero if compact format */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-{
-	ut_ad(rec_offs_validate(NULL, NULL, offsets));
-	return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
-}
-
-/******************************************************//**
-Determine if the offsets are for a record containing
-externally stored columns.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_any_extern(
-/*================*/
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-{
-	ut_ad(rec_offs_validate(NULL, NULL, offsets));
-	return(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL);
-}
-
-/******************************************************//**
 Determine if the offsets are for a record containing null BLOB pointers.
 @return first field containing a null BLOB pointer, or NULL if none found */
 UNIV_INLINE
@@ -1163,36 +956,6 @@ rec_offs_any_null_extern(
 }
 
 /******************************************************//**
-Returns nonzero if the extern bit is set in nth field of rec.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_nth_extern(
-/*================*/
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n)	/*!< in: nth field */
-{
-	ut_ad(rec_offs_validate(NULL, NULL, offsets));
-	ut_ad(n < rec_offs_n_fields(offsets));
-	return(rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL);
-}
-
-/******************************************************//**
-Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return nonzero if SQL NULL */
-UNIV_INLINE
-ulint
-rec_offs_nth_sql_null(
-/*==================*/
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n)	/*!< in: nth field */
-{
-	ut_ad(rec_offs_validate(NULL, NULL, offsets));
-	ut_ad(n < rec_offs_n_fields(offsets));
-	return(rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL);
-}
-
-/******************************************************//**
 Gets the physical size of a field.
 @return length of field */
 UNIV_INLINE
@@ -1400,7 +1163,7 @@ rec_get_nth_field_size(
 	os = rec_get_field_start_offs(rec, n);
 	next_os = rec_get_field_start_offs(rec, n + 1);
 
-	ut_ad(next_os - os < UNIV_PAGE_SIZE);
+	ut_ad(next_os - os < srv_page_size);
 
 	return(next_os - os);
 }
@@ -1427,6 +1190,7 @@ rec_set_nth_field(
 
 	ut_ad(rec);
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(!rec_offs_nth_default(offsets, n));
 
 	if (len == UNIV_SQL_NULL) {
 		if (!rec_offs_nth_sql_null(offsets, n)) {
@@ -1437,7 +1201,7 @@ rec_set_nth_field(
 		return;
 	}
 
-	data2 = rec_get_nth_field(rec, offsets, n, &len2);
+	data2 = (byte*)rec_get_nth_field(rec, offsets, n, &len2);
 	if (len2 == UNIV_SQL_NULL) {
 		ut_ad(!rec_offs_comp(offsets));
 		rec_set_nth_field_null_bit(rec, n, FALSE);
@@ -1501,7 +1265,7 @@ rec_offs_data_size(
 	ut_ad(rec_offs_validate(NULL, NULL, offsets));
 	size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)]
 		& REC_OFFS_MASK;
-	ut_ad(size < UNIV_PAGE_SIZE);
+	ut_ad(size < srv_page_size);
 	return(size);
 }
 
@@ -1518,8 +1282,8 @@ rec_offs_extra_size(
 {
 	ulint	size;
 	ut_ad(rec_offs_validate(NULL, NULL, offsets));
-	size = *rec_offs_base(offsets) & ~(REC_OFFS_COMPACT | REC_OFFS_EXTERNAL);
-	ut_ad(size < UNIV_PAGE_SIZE);
+	size = *rec_offs_base(offsets) & REC_OFFS_MASK;
+	ut_ad(size < srv_page_size);
 	return(size);
 }
 
@@ -1631,27 +1395,34 @@ rec_get_converted_size(
 	ut_ad(index);
 	ut_ad(dtuple);
 	ut_ad(dtuple_check_typed(dtuple));
-
-	ut_ad(dict_index_is_ibuf(index)
-
-	      || dtuple_get_n_fields(dtuple)
-		 == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
-		      == REC_STATUS_NODE_PTR)
-		     ? dict_index_get_n_unique_in_tree_nonleaf(index) + 1
-		     : dict_index_get_n_fields(index))
-
-	      /* a record for older SYS_INDEXES table
-	      (missing merge_threshold column) is acceptable. */
-	      || (index->table->id == DICT_INDEXES_ID
-		  && dtuple_get_n_fields(dtuple)
-		     == dict_index_get_n_fields(index) - 1));
+#ifdef UNIV_DEBUG
+	if (dict_index_is_ibuf(index)) {
+		ut_ad(dtuple->n_fields > 1);
+	} else if ((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
+		   == REC_STATUS_NODE_PTR) {
+		ut_ad(dtuple->n_fields
+		      == dict_index_get_n_unique_in_tree_nonleaf(index) + 1);
+	} else if (index->table->id == DICT_INDEXES_ID) {
+		/* The column SYS_INDEXES.MERGE_THRESHOLD was
+		instantly added in MariaDB 10.2.2 (MySQL 5.7). */
+		ut_ad(index->n_fields == DICT_NUM_FIELDS__SYS_INDEXES);
+		ut_ad(dtuple->n_fields == DICT_NUM_FIELDS__SYS_INDEXES
+		      || dtuple->n_fields
+		      == DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD);
+	} else {
+		ut_ad(dtuple->n_fields >= index->n_core_fields);
+		ut_ad(dtuple->n_fields <= index->n_fields);
+	}
+#endif
 
 	if (dict_table_is_comp(index->table)) {
-		return(rec_get_converted_size_comp(index,
-						   dtuple_get_info_bits(dtuple)
-						   & REC_NEW_STATUS_MASK,
-						   dtuple->fields,
-						   dtuple->n_fields, NULL));
+		return(rec_get_converted_size_comp(
+			       index,
+			       static_cast<rec_comp_status_t>(
+				       dtuple->info_bits
+				       & REC_NEW_STATUS_MASK),
+			       dtuple->fields,
+			       dtuple->n_fields, NULL));
 	}
 
 	data_size = dtuple_get_data_size(dtuple, 0);
@@ -1659,105 +1430,5 @@ rec_get_converted_size(
 	extra_size = rec_get_converted_extra_size(
 		data_size, dtuple_get_n_fields(dtuple), n_ext);
 
-#if 0
-	/* This code is inactive since it may be the wrong place to add
-	in the size of node pointers used in parent pages AND it is not
-	currently needed since ha_innobase::max_supported_key_length()
-	ensures that the key size limit for each page size is well below
-	the actual limit ((free space on page / 4) - record overhead).
-	But those limits will need to be raised when InnoDB can
-	support multiple page sizes.  At that time, we will need
-	to consider the node pointer on these universal btrees. */
-
-	if (dict_index_is_ibuf(index)) {
-		/* This is for the insert buffer B-tree.
-		All fields in the leaf tuple ascend to the
-		parent node plus the child page pointer. */
-
-		/* ibuf cannot contain externally stored fields */
-		ut_ad(n_ext == 0);
-
-		/* Add the data pointer and recompute extra_size
-		based on one more field. */
-		data_size += REC_NODE_PTR_SIZE;
-		extra_size = rec_get_converted_extra_size(
-			data_size,
-			dtuple_get_n_fields(dtuple) + 1,
-			0);
-
-		/* Be sure dtuple->n_fields has this node ptr
-		accounted for.  This function should correspond to
-		what rec_convert_dtuple_to_rec() needs in storage.
-		In optimistic insert or update-not-in-place, we will
-		have to ensure that if the record is converted to a
-		node pointer, it will not become too large.*/
-	}
-#endif
-
 	return(data_size + extra_size);
 }
-
-/** Fold a prefix of a physical record.
-@param[in]	rec		index record
-@param[in]	offsets		return value of rec_get_offsets()
-@param[in]	n_fields	number of complete fields to fold
-@param[in]	n_bytes		number of bytes to fold in the last field
-@param[in]	index_id	index tree ID
-@return the folded value */
-UNIV_INLINE
-ulint
-rec_fold(
-	const rec_t*	rec,
-	const ulint*	offsets,
-	ulint		n_fields,
-	ulint		n_bytes,
-	index_id_t	tree_id)
-{
-	ulint		i;
-	const byte*	data;
-	ulint		len;
-	ulint		fold;
-	ulint		n_fields_rec;
-
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-	ut_ad(rec_validate(rec, offsets));
-	ut_ad(n_fields > 0 || n_bytes > 0);
-
-	n_fields_rec = rec_offs_n_fields(offsets);
-	ut_ad(n_fields <= n_fields_rec);
-	ut_ad(n_fields < n_fields_rec || n_bytes == 0);
-
-	if (n_fields > n_fields_rec) {
-		n_fields = n_fields_rec;
-	}
-
-	if (n_fields == n_fields_rec) {
-		n_bytes = 0;
-	}
-
-	fold = ut_fold_ull(tree_id);
-
-	for (i = 0; i < n_fields; i++) {
-		data = rec_get_nth_field(rec, offsets, i, &len);
-
-		if (len != UNIV_SQL_NULL) {
-			fold = ut_fold_ulint_pair(fold,
-						  ut_fold_binary(data, len));
-		}
-	}
-
-	if (n_bytes > 0) {
-		data = rec_get_nth_field(rec, offsets, i, &len);
-
-		if (len != UNIV_SQL_NULL) {
-			if (len > n_bytes) {
-				len = n_bytes;
-			}
-
-			fold = ut_fold_ulint_pair(fold,
-						  ut_fold_binary(data, len));
-		}
-	}
-
-	return(fold);
-}
diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h
index f8133f77466..ac78a3c6748 100644
--- a/storage/innobase/include/rem0types.h
+++ b/storage/innobase/include/rem0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -54,8 +54,7 @@ This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
 files would be at risk! */
 #define REC_ANTELOPE_MAX_INDEX_COL_LEN		768
 
-/** Maximum indexed field length for table format UNIV_FORMAT_B and
-beyond.
+/** Maximum indexed field length for tables that have atomic BLOBs.
 This (3072) is the maximum index row length allowed, so we cannot create index
 prefix column longer than that. */
 #define REC_VERSION_56_MAX_INDEX_COL_LEN	3072
diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h
index 58b4c817acf..0c2ffdb46ea 100644
--- a/storage/innobase/include/row0ftsort.h
+++ b/storage/innobase/include/row0ftsort.h
@@ -180,15 +180,15 @@ tokenized doc string. The index has three "fields":
 dict_index_t*
 row_merge_create_fts_sort_index(
 /*============================*/
-	dict_index_t*		index,	/*!< in: Original FTS index
-					based on which this sort index
-					is created */
-	const dict_table_t*	table,	/*!< in: table that FTS index
-					is being created on */
-	ibool*			opt_doc_id_size);
-					/*!< out: whether to use 4 bytes
-					instead of 8 bytes integer to
-					store Doc ID during sort */
+	dict_index_t*	index,	/*!< in: Original FTS index
+				based on which this sort index
+				is created */
+	dict_table_t*	table,	/*!< in,out: table that FTS index
+				is being created on */
+	ibool*		opt_doc_id_size);
+				/*!< out: whether to use 4 bytes
+				instead of 8 bytes integer to
+				store Doc ID during sort */
 
 /********************************************************************//**
 Initialize FTS parallel sort structures.
diff --git a/storage/innobase/include/row0import.h b/storage/innobase/include/row0import.h
index 12fe6b4f5d2..94df1a61341 100644
--- a/storage/innobase/include/row0import.h
+++ b/storage/innobase/include/row0import.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -46,21 +46,13 @@ row_import_for_mysql(
 						in MySQL */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/*****************************************************************//**
-Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
-@return DB_SUCCESS or error code. */
-dberr_t
-row_import_update_discarded_flag(
-/*=============================*/
-	trx_t*		trx,			/*!< in/out: transaction that
-						covers the update */
-	table_id_t	table_id,		/*!< in: Table for which we want
-						to set the root table->flags2 */
-	bool		discarded,		/*!< in: set MIX_LEN column bit
-						to discarded, if true */
-	bool		dict_locked)		/*!< in: Set to true if the
-						caller already owns the
-						dict_sys_t:: mutex. */
+/** Update the DICT_TF2_DISCARDED flag in SYS_TABLES.MIX_LEN.
+@param[in,out]	trx		dictionary transaction
+@param[in]	table_id	table identifier
+@param[in]	discarded	whether to set or clear the flag
+@return DB_SUCCESS or error code */
+dberr_t row_import_update_discarded_flag(trx_t* trx, table_id_t table_id,
+					 bool discarded)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /*****************************************************************//**
diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h
index 05f16d28327..8ad955e8ed5 100644
--- a/storage/innobase/include/row0ins.h
+++ b/storage/innobase/include/row0ins.h
@@ -191,6 +191,8 @@ struct ins_node_t{
 				+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
 	trx_id_t	trx_id;	/*!< trx id or the last trx which executed the
 				node */
+	byte		vers_start_buf[8]; /* Buffers for System Versioning */
+	byte		vers_end_buf[8];   /* system fields. */
 	mem_heap_t*	entry_sys_heap;
 				/* memory heap used as auxiliary storage;
 				entry_list and sys fields are stored here;
@@ -216,5 +218,4 @@ struct ins_node_t{
 #define INS_NODE_ALLOC_ROW_ID	2	/* row id should be allocated */
 #define	INS_NODE_INSERT_ENTRIES 3	/* index entries should be built and
 					inserted */
-
 #endif
diff --git a/storage/innobase/include/row0log.h b/storage/innobase/include/row0log.h
index e17ec1b8569..fa6592f4628 100644
--- a/storage/innobase/include/row0log.h
+++ b/storage/innobase/include/row0log.h
@@ -47,17 +47,21 @@ for online creation.
 bool
 row_log_allocate(
 /*=============*/
+	const trx_t*	trx,	/*!< in: the ALTER TABLE transaction */
 	dict_index_t*	index,	/*!< in/out: index */
 	dict_table_t*	table,	/*!< in/out: new table being rebuilt,
 				or NULL when creating a secondary index */
 	bool		same_pk,/*!< in: whether the definition of the
 				PRIMARY KEY has remained the same */
-	const dtuple_t*	add_cols,
+	const dtuple_t*	defaults,
 				/*!< in: default values of
-				added columns, or NULL */
+				added, changed columns, or NULL */
 	const ulint*	col_map,/*!< in: mapping of old column
 				numbers to new ones, or NULL if !table */
-	const char*	path)	/*!< in: where to create temporary file */
+	const char*	path,	/*!< in: where to create temporary file */
+	const TABLE*	old_table,	/*!< in:table definition before alter */
+	bool		allow_not_null) /*!< in: allow null to non-null
+					conversion */
 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
 
 /******************************************************//**
@@ -205,13 +209,15 @@ row_log_table_blob_alloc(
 @param[in,out]	stage		performance schema accounting object, used by
 ALTER TABLE. stage->begin_phase_log_table() will be called initially and then
 stage->inc() will be called for each block of log that is applied.
+@param[in]	new_table	Altered table
 @return DB_SUCCESS, or error code on failure */
 dberr_t
 row_log_table_apply(
 	que_thr_t*		thr,
 	dict_table_t*		old_table,
 	struct TABLE*		table,
-	ut_stage_alter_t*	stage)
+	ut_stage_alter_t*	stage,
+	dict_table_t*		new_table)
 	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
index f1bb45d76d9..7d49f0ee346 100644
--- a/storage/innobase/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
@@ -58,11 +58,11 @@ struct ib_sequence_t;
 
 /** @brief Block size for I/O operations in merge sort.
 
-The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
+The minimum is srv_page_size, or page_get_free_space_of_empty()
 rounded to a power of 2.
 
 When not creating a PRIMARY KEY that contains column prefixes, this
-can be set as small as UNIV_PAGE_SIZE / 2. */
+can be set as small as srv_page_size / 2. */
 typedef byte	row_merge_block_t;
 
 /** @brief Secondary buffer for I/O operations of merge records.
@@ -98,7 +98,7 @@ struct row_merge_buf_t {
 
 /** Information about temporary files used in merge sort */
 struct merge_file_t {
-	int		fd;		/*!< file descriptor */
+	pfs_os_file_t	fd;		/*!< file descriptor */
 	ulint		offset;		/*!< file offset (end of file) */
 	ib_uint64_t	n_rec;		/*!< number of records in the file */
 };
@@ -190,7 +190,7 @@ row_merge_drop_temp_indexes(void);
 UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
 @param[in]	path	location for creating temporary merge files, or NULL
 @return File descriptor */
-int
+pfs_os_file_t
 row_merge_file_create_low(
 	const char*	path)
 	MY_ATTRIBUTE((warn_unused_result));
@@ -200,7 +200,7 @@ if UNIV_PFS_IO is defined. */
 void
 row_merge_file_destroy_low(
 /*=======================*/
-	int		fd);	/*!< in: merge file descriptor */
+	const pfs_os_file_t&	fd);	/*!< in: merge file descriptor */
 
 /*********************************************************************//**
 Provide a new pathname for a table that is being renamed if it belongs to
@@ -257,7 +257,6 @@ row_merge_rename_index_to_drop(
 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
 
 /** Create the index and load in to the dictionary.
-@param[in,out]	trx		trx (sets error_state)
 @param[in,out]	table		the index is on this table
 @param[in]	index_def	the index definition
 @param[in]	add_v		new virtual columns added along with add
@@ -265,7 +264,6 @@ row_merge_rename_index_to_drop(
 @return index, or NULL on error */
 dict_index_t*
 row_merge_create_index(
-	trx_t*			trx,
 	dict_table_t*		table,
 	const index_def_t*	index_def,
 	const dict_add_v_col_t*	add_v)
@@ -307,7 +305,7 @@ old_table unless creating a PRIMARY KEY
 @param[in]	n_indexes	size of indexes[]
 @param[in,out]	table		MySQL table, for reporting erroneous key value
 if applicable
-@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	defaults	default values of added, changed columns, or NULL
 @param[in]	col_map		mapping of old column numbers to new ones, or
 NULL if old_table == new_table
 @param[in]	add_autoinc	number of added AUTO_INCREMENT columns, or
@@ -321,6 +319,7 @@ this function and it will be passed to other functions for further accounting.
 @param[in]	add_v		new virtual columns added along with indexes
 @param[in]	eval_table	mysql table used to evaluate virtual column
 				value, see innobase_get_computed_value().
+@param[in]	allow_non_null	allow the conversion from null to not-null
 @return DB_SUCCESS or error code */
 dberr_t
 row_merge_build_indexes(
@@ -332,14 +331,15 @@ row_merge_build_indexes(
 	const ulint*		key_numbers,
 	ulint			n_indexes,
 	struct TABLE*		table,
-	const dtuple_t*		add_cols,
+	const dtuple_t*		defaults,
 	const ulint*		col_map,
 	ulint			add_autoinc,
 	ib_sequence_t&		sequence,
 	bool			skip_pk_sort,
 	ut_stage_alter_t*	stage,
 	const dict_add_v_col_t*	add_v,
-	struct TABLE*		eval_table)
+	struct TABLE*		eval_table,
+	bool			allow_non_null)
 	MY_ATTRIBUTE((warn_unused_result));
 
 /********************************************************************//**
@@ -369,7 +369,7 @@ UNIV_INTERN
 bool
 row_merge_write(
 /*============*/
-	int		fd,	/*!< in: file descriptor */
+	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 	ulint		offset,	/*!< in: offset where to write,
 				in number of row_merge_block_t elements */
 	const void*	buf,	/*!< in: data */
@@ -390,7 +390,7 @@ row_merge_buf_empty(
 @param[out]	merge_file	merge file structure
 @param[in]	path		location for creating temporary file, or NULL
 @return file descriptor, or -1 on failure */
-int
+pfs_os_file_t
 row_merge_file_create(
 	merge_file_t*	merge_file,
 	const char*	path)
@@ -418,7 +418,7 @@ row_merge_sort(
 	const row_merge_dup_t*	dup,
 	merge_file_t*		file,
 	row_merge_block_t*	block,
-	int*			tmpfd,
+	pfs_os_file_t*		tmpfd,
 	const bool		update_progress,
 	const double	pct_progress,
 	const double	pct_cost,
@@ -457,7 +457,7 @@ row_merge_file_destroy(
 bool
 row_merge_read(
 /*===========*/
-	int			fd,	/*!< in: file descriptor */
+	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 	ulint			offset,	/*!< in: offset where to read
 					in number of row_merge_block_t
 					elements */
@@ -476,7 +476,7 @@ row_merge_read_rec(
 	mrec_buf_t*		buf,	/*!< in/out: secondary buffer */
 	const byte*		b,	/*!< in: pointer to record */
 	const dict_index_t*	index,	/*!< in: index of the record */
-	int			fd,	/*!< in: file descriptor */
+	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 	ulint*			foffs,	/*!< in/out: file offset */
 	const mrec_t**		mrec,	/*!< out: pointer to merge record,
 					or NULL on end of list
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index 546540ac640..fb385dbf2ac 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -223,14 +223,26 @@ row_lock_table_autoinc_for_mysql(
 dberr_t
 row_lock_table(row_prebuilt_t* prebuilt);
 
+/** System Versioning: row_insert_for_mysql() modes */
+enum ins_mode_t {
+	/* plain row (without versioning) */
+	ROW_INS_NORMAL = 0,
+	/* row_start = TRX_ID, row_end = MAX */
+	ROW_INS_VERSIONED,
+	/* row_end = TRX_ID */
+	ROW_INS_HISTORICAL
+};
+
 /** Does an insert for MySQL.
 @param[in]	mysql_rec	row in the MySQL format
 @param[in,out]	prebuilt	prebuilt struct in MySQL handle
+@param[in]	ins_mode	what row type we're inserting
 @return error code or DB_SUCCESS*/
 dberr_t
 row_insert_for_mysql(
 	const byte*		mysql_rec,
-	row_prebuilt_t*		prebuilt)
+	row_prebuilt_t*		prebuilt,
+	ins_mode_t		ins_mode)
 	MY_ATTRIBUTE((warn_unused_result));
 
 /*********************************************************************//**
@@ -254,7 +266,8 @@ row_get_prebuilt_update_vector(
 @param[in,out]	prebuilt	prebuilt struct in MySQL handle
 @return error code or DB_SUCCESS */
 dberr_t
-row_update_for_mysql(row_prebuilt_t* prebuilt)
+row_update_for_mysql(
+	row_prebuilt_t*		prebuilt)
 	MY_ATTRIBUTE((warn_unused_result));
 
 /** This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
@@ -625,6 +638,8 @@ struct row_prebuilt_t {
 					not to be confused with InnoDB
 					externally stored columns
 					(VARCHAR can be off-page too) */
+	unsigned	versioned_write:1;/*!< whether this is
+					a versioned write */
 	mysql_row_templ_t* mysql_template;/*!< template used to transform
 					rows fast between MySQL and Innobase
 					formats; memory for this template
@@ -740,7 +755,7 @@ struct row_prebuilt_t {
 					allocated mem buf start, because
 					there is a 4 byte magic number at the
 					start and at the end */
-	ibool		keep_other_fields_on_keyread; /*!< when using fetch
+	bool		keep_other_fields_on_keyread; /*!< when using fetch
 					cache with HA_EXTRA_KEYREAD, don't
 					overwrite other fields in mysql row
 					row buffer.*/
@@ -801,6 +816,20 @@ struct row_prebuilt_t {
 
 	/** The MySQL table object */
 	TABLE*		m_mysql_table;
+
+	/** Get template by dict_table_t::cols[] number */
+	const mysql_row_templ_t* get_template_by_col(ulint col) const
+	{
+		ut_ad(col < n_template);
+		ut_ad(mysql_template);
+		for (ulint i = col; i < n_template; ++i) {
+			const mysql_row_templ_t* templ = &mysql_template[i];
+			if (!templ->is_virtual && templ->col_no == col) {
+				return templ;
+			}
+		}
+		return NULL;
+	}
 };
 
 /** Callback for row_mysql_sys_index_iterate() */
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
index ceb4a34cd72..25cd43979bf 100644
--- a/storage/innobase/include/row0purge.h
+++ b/storage/innobase/include/row0purge.h
@@ -108,7 +108,7 @@ struct purge_node_t{
 
 	upd_t*		update;	/*!< update vector for a clustered index
 				record */
-	dtuple_t*	ref;	/*!< NULL, or row reference to the next row to
+	const dtuple_t*	ref;	/*!< NULL, or row reference to the next row to
 				handle */
 	dtuple_t*	row;	/*!< NULL, or a copy (also fields copied to
 				heap) of the indexed fields of the row to
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
index d4381e55096..630a40b0765 100644
--- a/storage/innobase/include/row0row.h
+++ b/storage/innobase/include/row0row.h
@@ -87,8 +87,8 @@ row_build_index_entry_low(
 					inserted or purged */
 	const row_ext_t*	ext,	/*!< in: externally stored column
 					prefixes, or NULL */
-	dict_index_t*		index,	/*!< in: index on the table */
-	mem_heap_t*		heap,	/*!< in: memory heap from which
+	const dict_index_t*	index,	/*!< in: index on the table */
+	mem_heap_t*		heap,	/*!< in,out: memory heap from which
 					the memory for the index entry
 					is allocated */
 	ulint			flag)	/*!< in: ROW_BUILD_NORMAL,
@@ -109,8 +109,8 @@ row_build_index_entry(
 					inserted or purged */
 	const row_ext_t*	ext,	/*!< in: externally stored column
 					prefixes, or NULL */
-	dict_index_t*		index,	/*!< in: index on the table */
-	mem_heap_t*		heap)	/*!< in: memory heap from which
+	const dict_index_t*	index,	/*!< in: index on the table */
+	mem_heap_t*		heap)	/*!< in,out: memory heap from which
 					the memory for the index entry
 					is allocated */
 	MY_ATTRIBUTE((warn_unused_result, nonnull(1,3,4)));
@@ -150,9 +150,9 @@ row_build(
 					consulted instead; the user
 					columns in this table should be
 					the same columns as in index->table */
-	const dtuple_t*		add_cols,
+	const dtuple_t*		defaults,
 					/*!< in: default values of
-					added columns, or NULL */
+					added, changed columns, or NULL */
 	const ulint*		col_map,/*!< in: mapping of old column
 					numbers to new ones, or NULL */
 	row_ext_t**		ext,	/*!< out, own: cache of
@@ -174,7 +174,7 @@ addition of new virtual columns.
 				of an index, or NULL if
 				index->table should be
 				consulted instead
-@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	defaults	default values of added, changed columns, or NULL
 @param[in]	add_v		new virtual columns added
 				along with new indexes
 @param[in]	col_map		mapping of old column
@@ -191,7 +191,7 @@ row_build_w_add_vcol(
 	const rec_t*		rec,
 	const ulint*		offsets,
 	const dict_table_t*	col_table,
-	const dtuple_t*		add_cols,
+	const dtuple_t*		defaults,
 	const dict_add_v_col_t*	add_v,
 	const ulint*		col_map,
 	row_ext_t**		ext,
@@ -266,9 +266,8 @@ row_build_row_ref_in_tuple(
 					held as long as the row
 					reference is used! */
 	const dict_index_t*	index,	/*!< in: secondary index */
-	ulint*			offsets,/*!< in: rec_get_offsets(rec, index)
+	ulint*			offsets)/*!< in: rec_get_offsets(rec, index)
 					or NULL */
-	trx_t*			trx)	/*!< in: transaction or NULL */
 	MY_ATTRIBUTE((nonnull(1,2,3)));
 /*******************************************************************//**
 Builds from a secondary index record a row reference with which we can
@@ -282,8 +281,8 @@ row_build_row_ref_fast(
 	const ulint*	map,	/*!< in: array of field numbers in rec
 				telling how ref should be built from
 				the fields of rec */
-	const rec_t*	rec,	/*!< in: record in the index; must be
-				preserved while ref is used, as we do
+	const rec_t*	rec,	/*!< in: secondary index record;
+				must be preserved while ref is used, as we do
 				not copy field values to heap */
 	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
 /***************************************************************//**
@@ -395,7 +394,7 @@ row_mtr_start(mtr_t* mtr, dict_index_t* index, bool pessimistic)
 {
 	mtr->start();
 
-	switch (index->space) {
+	switch (index->table->space_id) {
 	case IBUF_SPACE_ID:
 		if (pessimistic
 		    && !(index->type & (DICT_UNIQUE | DICT_SPATIAL))) {
@@ -406,7 +405,7 @@ row_mtr_start(mtr_t* mtr, dict_index_t* index, bool pessimistic)
 		mtr->set_log_mode(MTR_LOG_NO_REDO);
 		break;
 	default:
-		mtr->set_named_space(index->space);
+		index->set_modified(*mtr);
 		break;
 	}
 
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
index 8a32bb3ffd2..e1a3b5f6a1a 100644
--- a/storage/innobase/include/row0row.ic
+++ b/storage/innobase/include/row0row.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -118,8 +119,8 @@ row_build_index_entry(
 					inserted or purged */
 	const row_ext_t*	ext,	/*!< in: externally stored column
 					prefixes, or NULL */
-	dict_index_t*		index,	/*!< in: index on the table */
-	mem_heap_t*		heap)	/*!< in: memory heap from which
+	const dict_index_t*	index,	/*!< in: index on the table */
+	mem_heap_t*		heap)	/*!< in,out: memory heap from which
 					the memory for the index entry
 					is allocated */
 {
@@ -144,8 +145,8 @@ row_build_row_ref_fast(
 	const ulint*	map,	/*!< in: array of field numbers in rec
 				telling how ref should be built from
 				the fields of rec */
-	const rec_t*	rec,	/*!< in: record in the index; must be
-				preserved while ref is used, as we do
+	const rec_t*	rec,	/*!< in: secondary index record;
+				must be preserved while ref is used, as we do
 				not copy field values to heap */
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
index ab20f9bd6bf..1e58686dfcb 100644
--- a/storage/innobase/include/row0sel.h
+++ b/storage/innobase/include/row0sel.h
@@ -133,8 +133,7 @@ row_sel_convert_mysql_key_to_innobase(
 	ulint		buf_len,	/*!< in: buffer length */
 	dict_index_t*	index,		/*!< in: index of the key value */
 	const byte*	key_ptr,	/*!< in: MySQL key value */
-	ulint		key_len,	/*!< in: MySQL key value length */
-	trx_t*		trx);		/*!< in: transaction */
+	ulint		key_len);	/*!< in: MySQL key value length */
 
 
 /** Searches for rows in the database. This is used in the interface to
diff --git a/storage/innobase/include/row0trunc.h b/storage/innobase/include/row0trunc.h
index 5915596a389..a7592f33cf7 100644
--- a/storage/innobase/include/row0trunc.h
+++ b/storage/innobase/include/row0trunc.h
@@ -181,19 +181,16 @@ public:
 	/** Create an index for a table.
 	@param[in]	table_name		table name, for which to create
 	the index
-	@param[in]	space_id		space id where we have to
-	create the index
-	@param[in]	page_size		page size of the .ibd file
+	@param[in,out]	space			tablespace
 	@param[in]	index_type		type of index to truncate
 	@param[in]	index_id		id of index to truncate
 	@param[in]	btr_redo_create_info	control info for ::btr_create()
 	@param[in,out]	mtr			mini-transaction covering the
 	create index
 	@return root page no or FIL_NULL on failure */
-	ulint create_index(
+	inline ulint create_index(
 		const char*		table_name,
-		ulint			space_id,
-		const page_size_t&	page_size,
+		fil_space_t*		space,
 		ulint			index_type,
 		index_id_t      	index_id,
 		const btr_create_t&	btr_redo_create_info,
@@ -202,31 +199,27 @@ public:
 	/** Create the indexes for a table
 	@param[in]	table_name	table name, for which to create the
 	indexes
-	@param[in]	space_id	space id where we have to create the
-	indexes
-	@param[in]	page_size	page size of the .ibd file
-	@param[in]	flags		tablespace flags
+	@param[in,out]	space		tablespace
 	@param[in]	format_flags	page format flags
 	@return DB_SUCCESS or error code. */
-	dberr_t create_indexes(
+	inline dberr_t create_indexes(
 		const char*		table_name,
-		ulint			space_id,
-		const page_size_t&	page_size,
-		ulint			flags,
+		fil_space_t*		space,
 		ulint			format_flags);
 
 	/** Check if index has been modified since TRUNCATE log snapshot
 	was recorded.
-	@param space_id	space_id where table/indexes resides.
+	@param[in]	space		tablespace
+	@param[in]	root_page_no	index root page number
 	@return true if modified else false */
-	bool is_index_modified_since_logged(
-		ulint		space_id,
-		ulint		root_page_no) const;
+	inline bool is_index_modified_since_logged(
+		const fil_space_t*	space,
+		ulint			root_page_no) const;
 
 	/** Drop indexes for a table.
-	@param space_id		space_id where table/indexes resides.
+	@param[in,out] space		tablespace
 	@return DB_SUCCESS or error code. */
-	void drop_indexes(ulint	space_id) const;
+	void drop_indexes(fil_space_t* space) const;
 
 	/**
 	Parses log record during recovery
@@ -420,9 +413,4 @@ private:
 		const char*		log_file_name);
 };
 
-/** MySQL 5.7 TRUNCATE TABLE.
-@param table		table being truncated
-@param trx		transaction covering the truncate
-@return	error code or DB_SUCCESS */
-dberr_t row_truncate_table_for_mysql(dict_table_t* table, trx_t* trx);
 #endif /* row0trunc_h */
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
index abf4f61329a..5ac2c7c5ee0 100644
--- a/storage/innobase/include/row0undo.h
+++ b/storage/innobase/include/row0undo.h
@@ -107,7 +107,7 @@ struct undo_node_t{
 	ulint		cmpl_info;/*!< compiler analysis of an update */
 	upd_t*		update;	/*!< update vector for a clustered index
 				record */
-	dtuple_t*	ref;	/*!< row reference to the next row to handle */
+	const dtuple_t*	ref;	/*!< row reference to the next row to handle */
 	dtuple_t*	row;	/*!< a copy (also fields copied to heap) of the
 				row to handle */
 	row_ext_t*	ext;	/*!< NULL, or prefixes of the externally
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
index 9686f8d6896..742f897f3ec 100644
--- a/storage/innobase/include/row0upd.h
+++ b/storage/innobase/include/row0upd.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -232,27 +232,19 @@ row_upd_build_difference_binary(
 	mem_heap_t*	heap,
 	TABLE*		mysql_table)
 	MY_ATTRIBUTE((nonnull(1,2,3,7), warn_unused_result));
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the index entry
-given. */
+/** Apply an update vector to an index entry.
+@param[in,out]	entry	index entry to be updated; the clustered index record
+			must be covered by a lock or a page latch to prevent
+			deletion (rollback or purge)
+@param[in]	index	index of the entry
+@param[in]	update	update vector built for the entry
+@param[in,out]	heap	memory heap for copying off-page columns */
 void
 row_upd_index_replace_new_col_vals_index_pos(
-/*=========================================*/
-	dtuple_t*	entry,	/*!< in/out: index entry where replaced;
-				the clustered index record must be
-				covered by a lock or a page latch to
-				prevent deletion (rollback or purge) */
-	dict_index_t*	index,	/*!< in: index; NOTE that this may also be a
-				non-clustered index */
-	const upd_t*	update,	/*!< in: an update vector built for the index so
-				that the field number in an upd_field is the
-				index position */
-	ibool		order_only,
-				/*!< in: if TRUE, limit the replacement to
-				ordering fields of index; note that this
-				does not work for non-clustered indexes. */
-	mem_heap_t*	heap)	/*!< in: memory heap for allocating and
-				copying the new values */
+	dtuple_t*		entry,
+	const dict_index_t*	index,
+	const upd_t*		update,
+	mem_heap_t*		heap)
 	MY_ATTRIBUTE((nonnull));
 /***********************************************************//**
 Replaces the new column values stored in the update vector to the index entry
@@ -460,6 +452,7 @@ struct upd_t{
 					virtual column update now */
 	ulint		n_fields;	/*!< number of update fields */
 	upd_field_t*	fields;		/*!< array of update fields */
+	byte		vers_sys_value[8]; /*!< buffer for updating system fields */
 
 	/** Append an update field to the end of array
 	@param[in]	field	an update field */
@@ -480,6 +473,22 @@ struct upd_t{
 		return(false);
 	}
 
+	/** Determine if the update affects a system versioned column or row_end. */
+	bool affects_versioned() const
+	{
+		for (ulint i = 0; i < n_fields; i++) {
+			dtype_t type = fields[i].new_val.type;
+			if (type.is_versioned()) {
+				return true;
+			}
+			// versioned DELETE is UPDATE SET row_end=NOW
+			if (type.vers_sys_end()) {
+				return true;
+			}
+		}
+		return false;
+	}
+
 #ifdef UNIV_DEBUG
         bool validate() const
         {
@@ -496,17 +505,24 @@ struct upd_t{
 
 };
 
+/** Kinds of update operation */
+enum delete_mode_t {
+	NO_DELETE = 0,		/*!< this operation does not delete */
+	PLAIN_DELETE,		/*!< ordinary delete */
+	VERSIONED_DELETE	/*!< update old and insert a new row */
+};
+
 /* Update node structure which also implements the delete operation
 of a row */
 
 struct upd_node_t{
 	que_common_t	common;	/*!< node type: QUE_NODE_UPDATE */
-	ibool		is_delete;/* TRUE if delete, FALSE if update */
+	delete_mode_t	is_delete;	/*!< kind of DELETE */
 	ibool		searched_update;
 				/* TRUE if searched update, FALSE if
 				positioned */
-	ibool		in_mysql_interface;
-				/* TRUE if the update node was created
+	bool		in_mysql_interface;
+				/* whether the update node was created
 				for the MySQL interface */
 	dict_foreign_t*	foreign;/* NULL or pointer to a foreign key
 				constraint if this update node is used in
@@ -551,6 +567,12 @@ struct upd_node_t{
 	dtuple_t*	row;	/*!< NULL, or a copy (also fields copied to
 				heap) of the row to update; this must be reset
 				to NULL after a successful update */
+	dtuple_t*	historical_row;	/*!< historical row used in
+				CASCADE UPDATE/SET NULL;
+				allocated from historical_heap  */
+	mem_heap_t*	historical_heap; /*!< heap for historical row insertion;
+				created when row to update is located;
+				freed right before row update */
 	row_ext_t*	ext;	/*!< NULL, or prefixes of the externally
 				stored columns in the old row */
 	dtuple_t*	upd_row;/* NULL, or a copy of the updated row */
@@ -565,6 +587,22 @@ struct upd_node_t{
 				/* column assignment list */
 	ulint		magic_n;
 
+	/** Also set row_start = CURRENT_TIMESTAMP/trx->id
+	@param[in]	trx	transaction */
+	void make_versioned_update(const trx_t* trx);
+	/** Only set row_end = CURRENT_TIMESTAMP/trx->id.
+	Do not touch other fields at all.
+	@param[in]	trx	transaction */
+	void make_versioned_delete(const trx_t* trx);
+
+private:
+	/** Appends row_start or row_end field to update vector and sets a
+	CURRENT_TIMESTAMP/trx->id value to it.
+	Supposed to be called only by make_versioned_update() and
+	make_versioned_delete().
+	@param[in]	trx	transaction
+	@param[in]	vers_sys_idx	table->row_start or table->row_end */
+	void make_versioned_helper(const trx_t* trx, ulint idx);
 };
 
 #define	UPD_NODE_MAGIC_N	1579975
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
index 364c876ecc7..5e43a272388 100644
--- a/storage/innobase/include/row0upd.ic
+++ b/storage/innobase/include/row0upd.ic
@@ -181,9 +181,8 @@ row_upd_rec_sys_fields(
 			offset = row_get_trx_id_offset(index, offsets);
 		}
 
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
-#endif
+		compile_time_assert(DATA_TRX_ID + 1 == DATA_ROLL_PTR);
+
 		/* During IMPORT the trx id in the record can be in the
 		future, if the .ibd file is being imported from another
 		instance. During IMPORT roll_ptr will be 0. */
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
index ac03478e083..dd438c366a8 100644
--- a/storage/innobase/include/row0vers.h
+++ b/storage/innobase/include/row0vers.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -40,33 +40,19 @@ class ReadView;
 
 /** Determine if an active transaction has inserted or modified a secondary
 index record.
+@param[in,out]	caller_trx	trx of current thread
 @param[in]	rec	secondary index record
 @param[in]	index	secondary index
 @param[in]	offsets	rec_get_offsets(rec, index)
-@return	the active transaction; trx_release_reference() must be invoked
+@return	the active transaction; trx->release_reference() must be invoked
 @retval	NULL if the record was committed */
 trx_t*
 row_vers_impl_x_locked(
+	trx_t*		caller_trx,
 	const rec_t*	rec,
 	dict_index_t*	index,
 	const ulint*	offsets);
 
-/*****************************************************************//**
-Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view.
-@param[in]	trx_id		transaction id in the version
-@param[in]	name		table name
-@param[in,out]	mtr		mini transaction  holding the latch on the
-				clustered index record; it will also hold
-				 the latch on purge_view
-@return TRUE if earlier version should be preserved */
-ibool
-row_vers_must_preserve_del_marked(
-/*==============================*/
-	trx_id_t		trx_id,
-	const table_name_t&	name,
-	mtr_t*			mtr);
-
 /** Finds out if a version of the record, where the version >= the current
 purge view, should have ientry as its secondary index entry. We check
 if there is any not delete marked version of the record where the trx
@@ -132,6 +118,7 @@ which should be seen by a semi-consistent read. */
 void
 row_vers_build_for_semi_consistent_read(
 /*====================================*/
+	trx_t*		caller_trx,/*!<in/out: trx of current thread */
 	const rec_t*	rec,	/*!< in: record in a clustered index; the
 				caller must have a latch on the page; this
 				latch locks the top of the stack of versions
diff --git a/storage/innobase/include/srv0conc.h b/storage/innobase/include/srv0conc.h
index 9573c5add84..35937fe1204 100644
--- a/storage/innobase/include/srv0conc.h
+++ b/storage/innobase/include/srv0conc.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -41,9 +42,7 @@ Created 2011/04/18 Sunny Bains
 #define srv_conc_h
 
 /** We are prepared for a situation that we have this many threads waiting for
-a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
-value. */
-
+a semaphore inside InnoDB. srv_start() sets the value. */
 extern	ulint	srv_max_n_threads;
 
 /** The following controls how many threads we let inside InnoDB concurrently:
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index e4034f3a6ff..069ab5cf93a 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -2,7 +2,7 @@
 
 Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
@@ -503,18 +503,18 @@ extern ulint		monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT - 1) /
 
 /** Macros to turn on/off the control bit in monitor_set_tbl for a monitor
 counter option. */
-#define MONITOR_ON(monitor)				\
-	(monitor_set_tbl[monitor / NUM_BITS_ULINT] |=	\
-			((ulint)1 << (monitor % NUM_BITS_ULINT)))
+#define MONITOR_ON(monitor)					\
+	(monitor_set_tbl[unsigned(monitor) / NUM_BITS_ULINT] |=	\
+	 (ulint(1) << (unsigned(monitor) % NUM_BITS_ULINT)))
 
-#define MONITOR_OFF(monitor)				\
-	(monitor_set_tbl[monitor / NUM_BITS_ULINT] &=	\
-			~((ulint)1 << (monitor % NUM_BITS_ULINT)))
+#define MONITOR_OFF(monitor)					\
+	(monitor_set_tbl[unsigned(monitor) / NUM_BITS_ULINT] &=	\
+	 ~(ulint(1) << (unsigned(monitor) % NUM_BITS_ULINT)))
 
 /** Check whether the requested monitor is turned on/off */
-#define MONITOR_IS_ON(monitor)				\
-	(monitor_set_tbl[monitor / NUM_BITS_ULINT] &	\
-			((ulint)1 << (monitor % NUM_BITS_ULINT)))
+#define MONITOR_IS_ON(monitor)					\
+	(monitor_set_tbl[unsigned(monitor) / NUM_BITS_ULINT] &	\
+	 (ulint(1) << (unsigned(monitor) % NUM_BITS_ULINT)))
 
 /** The actual monitor counter array that records each monintor counter
 value */
@@ -608,8 +608,9 @@ Use MONITOR_INC if appropriate mutex protection exists.
 #define MONITOR_ATOMIC_INC_LOW(monitor, enabled)			\
 	if (enabled) {							\
 		ib_uint64_t	value;					\
-		value  = my_atomic_add64(				\
-			(int64*) &MONITOR_VALUE(monitor), 1) + 1;	\
+		value  = my_atomic_add64_explicit(			\
+			(int64*) &MONITOR_VALUE(monitor), 1,		\
+			MY_MEMORY_ORDER_RELAXED) + 1;			\
 		/* Note: This is not 100% accurate because of the	\
 		inherent race, we ignore it due to performance. */	\
 		if (value > (ib_uint64_t) MONITOR_MAX_VALUE(monitor)) {	\
@@ -624,8 +625,9 @@ Use MONITOR_DEC if appropriate mutex protection exists.
 #define MONITOR_ATOMIC_DEC_LOW(monitor, enabled)			\
 	if (enabled) {							\
 		ib_uint64_t	value;					\
-		value = my_atomic_add64(				\
-			(int64*) &MONITOR_VALUE(monitor), -1) - 1;	\
+		value = my_atomic_add64_explicit(			\
+			(int64*) &MONITOR_VALUE(monitor), -1,		\
+			MY_MEMORY_ORDER_RELAXED) - 1;			\
 		/* Note: This is not 100% accurate because of the	\
 		inherent race, we ignore it due to performance. */	\
 		if (value < (ib_uint64_t) MONITOR_MIN_VALUE(monitor)) {	\
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 20206f9d9eb..c6873ca8938 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -77,7 +77,7 @@ struct srv_stats_t
 	lsn_ctr_1_t		os_log_written;
 
 	/** Number of writes being done to the log files.
-	Protected by log_sys->write_mutex. */
+	Protected by log_sys.write_mutex. */
 	ulint_ctr_1_t		os_log_pending_writes;
 
 	/** We increase this counter, when we don't have enough
@@ -144,7 +144,7 @@ struct srv_stats_t
 	ulint_ctr_1_t		n_lock_wait_count;
 
 	/** Number of threads currently waiting on database locks */
-	simple_counter<ulint, true> n_lock_wait_current_count;
+	simple_atomic_counter<>	n_lock_wait_current_count;
 
 	/** Number of rows read. */
 	ulint_ctr_64_t		n_rows_read;
@@ -252,20 +252,11 @@ extern my_bool	high_level_read_only;
 /** store to its own file each table created by an user; data
 dictionary tables are in the system tablespace 0 */
 extern my_bool	srv_file_per_table;
-/** whether to use backup-safe TRUNCATE and crash-safe RENAME
-instead of the MySQL 5.7 WL#6501 TRUNCATE TABLE implementation */
-extern my_bool	srv_safe_truncate;
 /** Sleep delay for threads waiting to enter InnoDB. In micro-seconds. */
 extern	ulong	srv_thread_sleep_delay;
 /** Maximum sleep delay (in micro-seconds), value of 0 disables it.*/
 extern	ulong	srv_adaptive_max_sleep_delay;
 
-/** The file format to use on new *.ibd files. */
-extern ulint	srv_file_format;
-/** Whether to check file format during startup.  A value of
-UNIV_FORMAT_MAX + 1 means no checking ie. FALSE.  The default is to
-set it to the highest format we support. */
-extern ulint	srv_max_file_format_at_startup;
 /** Place locks to records only i.e. do not use next-key locking except
 on duplicate key checking and foreign key checking */
 extern ibool	srv_locks_unsafe_for_binlog;
@@ -282,25 +273,12 @@ Currently we support native aio on windows and linux */
 extern my_bool	srv_use_native_aio;
 extern my_bool	srv_numa_interleave;
 
-/* Use trim operation */
-extern my_bool srv_use_trim;
-
 /* Use atomic writes i.e disable doublewrite buffer */
 extern my_bool srv_use_atomic_writes;
 
 /* Compression algorithm*/
 extern ulong innodb_compression_algorithm;
 
-/* Number of flush threads */
-#define MTFLUSH_MAX_WORKER		64
-#define MTFLUSH_DEFAULT_WORKER		8
-
-/* Number of threads used for multi-threaded flush */
-extern long    srv_mtflush_threads;
-
-/* If this flag is TRUE, then we will use multi threaded flush. */
-extern my_bool	srv_use_mtflush;
-
 /** TRUE if the server was successfully started */
 extern bool	srv_was_started;
 
@@ -358,17 +336,15 @@ extern const ulint	SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
 
 extern char*	srv_log_group_home_dir;
 
-/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
-#define SRV_N_LOG_FILES_MAX 100
 extern ulong	srv_n_log_files;
 /** The InnoDB redo log file size, or 0 when changing the redo log format
 at startup (while disallowing writes to the redo log). */
 extern ulonglong	srv_log_file_size;
-extern ulint	srv_log_buffer_size;
+extern ulong	srv_log_buffer_size;
 extern ulong	srv_flush_log_at_trx_commit;
 extern uint	srv_flush_log_at_timeout;
 extern ulong	srv_log_write_ahead_size;
-extern char	srv_adaptive_flushing;
+extern my_bool	srv_adaptive_flushing;
 extern my_bool	srv_flush_sync;
 
 #ifdef WITH_INNODB_DISALLOW_WRITES
@@ -399,8 +375,6 @@ extern ulong	srv_n_page_hash_locks;
 /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
 extern ulong	srv_LRU_scan_depth;
 /** Whether or not to flush neighbors of a block */
-extern ulong	srv_buf_pool_dump_pct;	/*!< dump that may % of each buffer
-					pool during BP dump */
 extern ulong	srv_flush_neighbors;
 /** Previously requested size */
 extern ulint	srv_buf_pool_old_size;
@@ -410,14 +384,18 @@ extern ulint	srv_buf_pool_base_size;
 extern ulint	srv_buf_pool_curr_size;
 /** Dump this % of each buffer pool during BP dump */
 extern ulong	srv_buf_pool_dump_pct;
+#ifdef UNIV_DEBUG
+/** Abort load after this amount of pages */
+extern ulong srv_buf_pool_load_pages_abort;
+#endif
 /** Lock table size in bytes */
 extern ulint	srv_lock_table_size;
 
 extern ulint	srv_n_file_io_threads;
 extern my_bool	srv_random_read_ahead;
 extern ulong	srv_read_ahead_threshold;
-extern ulint	srv_n_read_io_threads;
-extern ulint	srv_n_write_io_threads;
+extern ulong	srv_n_read_io_threads;
+extern ulong	srv_n_write_io_threads;
 
 /* Defragmentation, Origianlly facebook default value is 100, but it's too high */
 #define SRV_DEFRAGMENT_FREQUENCY_DEFAULT 40
@@ -451,8 +429,6 @@ to treat NULL value when collecting statistics. It is not defined
 as enum type because the configure option takes unsigned integer type. */
 extern ulong	srv_innodb_stats_method;
 
-extern char*	srv_file_flush_method_str;
-
 extern ulint	srv_max_n_open_files;
 
 extern ulong	srv_n_page_cleaners;
@@ -487,7 +463,7 @@ extern my_bool			srv_stats_include_delete_marked;
 extern unsigned long long	srv_stats_modified_counter;
 extern my_bool			srv_stats_sample_traditional;
 
-extern ibool	srv_use_doublewrite_buf;
+extern my_bool	srv_use_doublewrite_buf;
 extern ulong	srv_doublewrite_batch_size;
 extern ulong	srv_checksum_algorithm;
 
@@ -629,16 +605,16 @@ extern mysql_pfs_key_t	trx_rollback_clean_thread_key;
 schema */
 #  define pfs_register_thread(key)			\
 do {								\
-	struct PSI_thread* psi = PSI_THREAD_CALL(new_thread)(key, NULL, 0);\
+	struct PSI_thread* psi = PSI_CALL_new_thread(key, NULL, 0);\
 	/* JAN: TODO: MYSQL 5.7 PSI                             \
-	PSI_THREAD_CALL(set_thread_os_id)(psi);	*/		\
-	PSI_THREAD_CALL(set_thread)(psi);			\
+	PSI_CALL_set_thread_os_id(psi);	*/		\
+	PSI_CALL_set_thread(psi);			\
 } while (0)
 
 /* This macro delist the current thread from performance schema */
 #  define pfs_delete_thread()				\
 do {								\
-	PSI_THREAD_CALL(delete_current_thread)();		\
+	PSI_CALL_delete_current_thread();		\
 } while (0)
 # else
 #  define pfs_register_thread(key)
@@ -679,10 +655,9 @@ extern PSI_stage_info	srv_stage_buffer_pool_load;
 #endif /* HAVE_PSI_STAGE_INTERFACE */
 
 
-/** Alternatives for the file flush option in Unix; see the InnoDB manual
-about what these mean */
+/** Alternatives for innodb_flush_method */
 enum srv_flush_t {
-	SRV_FSYNC = 1,	/*!< fsync, the default */
+	SRV_FSYNC = 0,	/*!< fsync, the default */
 	SRV_O_DSYNC,	/*!< open log files in O_SYNC mode */
 	SRV_LITTLESYNC,	/*!< do not call os_file_flush()
 				when writing data files, but do flush
@@ -694,18 +669,21 @@ enum srv_flush_t {
 				the reason for which is that some FS
 				do not flush meta-data when
 				unbuffered IO happens */
-	SRV_O_DIRECT_NO_FSYNC,
+	SRV_O_DIRECT_NO_FSYNC
 				/*!< do not use fsync() when using
 				direct IO i.e.: it can be set to avoid
 				the fsync() call that we make when
 				using SRV_UNIX_O_DIRECT. However, in
 				this case user/DBA should be sure about
 				the integrity of the meta-data */
-	SRV_ALL_O_DIRECT_FSYNC
+#ifdef _WIN32
+	,SRV_ALL_O_DIRECT_FSYNC
 				/*!< Traditional Windows appoach to open 
 				all files without caching, and do FileFlushBuffers()*/
+#endif
 };
-extern enum srv_flush_t	srv_file_flush_method;
+/** innodb_flush_method */
+extern ulong srv_file_flush_method;
 
 /** Alternatives for srv_force_recovery. Non-zero values are intended
 to help the user get a damaged database up so that he can dump intact
@@ -942,16 +920,10 @@ srv_was_tablespace_truncated(const fil_space_t* space);
 #ifdef UNIV_DEBUG
 /** Disables master thread. It's used by:
 	SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
-@param[in]	thd		thread handle
-@param[in]	var		pointer to system variable
-@param[out]	var_ptr		where the formal string goes
 @param[in]	save		immediate result from check function */
 void
-srv_master_thread_disabled_debug_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save);
+srv_master_thread_disabled_debug_update(THD*, st_mysql_sys_var*, void*,
+					const void* save);
 #endif /* UNIV_DEBUG */
 
 /** Status variables to be passed to MySQL */
@@ -967,6 +939,7 @@ struct export_var_t{
 	char  innodb_buffer_pool_dump_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool dump status */
 	char  innodb_buffer_pool_load_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool load status */
 	char  innodb_buffer_pool_resize_status[512];/*!< Buf pool resize status */
+	my_bool innodb_buffer_pool_load_incomplete;/*!< Buf pool load incomplete */
 	ulint innodb_buffer_pool_pages_total;	/*!< Buffer pool size */
 	ulint innodb_buffer_pool_pages_data;	/*!< Data pages */
 	ulint innodb_buffer_pool_bytes_data;	/*!< File bytes used */
@@ -995,7 +968,7 @@ struct export_var_t{
 	ulint innodb_os_log_fsyncs;		/*!< fil_n_log_flushes */
 	ulint innodb_os_log_pending_writes;	/*!< srv_os_log_pending_writes */
 	ulint innodb_os_log_pending_fsyncs;	/*!< fil_n_pending_log_flushes */
-	ulint innodb_page_size;			/*!< UNIV_PAGE_SIZE */
+	ulint innodb_page_size;			/*!< srv_page_size */
 	ulint innodb_pages_created;		/*!< buf_pool->stat.n_pages_created */
 	ulint innodb_pages_read;		/*!< buf_pool->stat.n_pages_read*/
 	ulint innodb_page0_read;		/*!< srv_stats.page0_read */
@@ -1032,6 +1005,9 @@ struct export_var_t{
 	ulint innodb_defragment_count;		/*!< Number of defragment
 						operations*/
 
+	/** Number of instant ALTER TABLE operations that affect columns */
+	ulong innodb_instant_alter_column;
+
 	ulint innodb_onlineddl_rowlog_rows;	/*!< Online alter rows */
 	ulint innodb_onlineddl_rowlog_pct_used; /*!< Online alter percentage
 						of used row log buffer */
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
index 54876d686a5..c2665a360cc 100644
--- a/storage/innobase/include/srv0start.h
+++ b/storage/innobase/include/srv0start.h
@@ -43,20 +43,16 @@ only one buffer pool instance is used. */
 dberr_t
 srv_undo_tablespaces_init(bool create_new_db);
 
-/****************************************************************//**
-Starts Innobase and creates a new database if database files
-are not found and the user wants.
+/** Start InnoDB.
+@param[in]	create_new_db	whether to create a new database
 @return DB_SUCCESS or error code */
-dberr_t
-innobase_start_or_create_for_mysql();
+dberr_t srv_start(bool create_new_db);
 
 /** Shut down InnoDB. */
-void
-innodb_shutdown();
+void innodb_shutdown();
 
 /** Shut down background threads that can generate undo log. */
-void
-srv_shutdown_bg_undo_sources();
+void srv_shutdown_bg_undo_sources();
 
 /*************************************************************//**
 Copy the file path component of the physical file to parameter. It will
diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
index 83c40e50173..2fefd4ed7c3 100644
--- a/storage/innobase/include/sync0arr.h
+++ b/storage/innobase/include/sync0arr.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
+Copyright (c) 2015, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -98,16 +98,11 @@ void
 sync_array_print(
 	FILE*		file);	/*!< in: file where to print */
 
-/**********************************************************************//**
-Create the primary system wait array(s), they are protected by an OS mutex */
-void
-sync_array_init(
-	ulint		n_threads);	/*!< in: Number of slots to create */
+/** Create the primary system wait arrays */
+void sync_array_init();
 
-/**********************************************************************//**
-Close sync array wait sub-system. */
-void
-sync_array_close();
+/** Destroy the sync array wait sub-system. */
+void sync_array_close();
 
 /**********************************************************************//**
 Get an instance of the sync wait array. */
diff --git a/storage/innobase/include/sync0policy.h b/storage/innobase/include/sync0policy.h
index 0ea84aefcb7..93a56e24040 100644
--- a/storage/innobase/include/sync0policy.h
+++ b/storage/innobase/include/sync0policy.h
@@ -48,7 +48,7 @@ public:
 			m_mutex(),
 			m_filename(),
 			m_line(),
-			m_thread_id(os_thread_id_t(ULINT_UNDEFINED))
+			m_thread_id(ULINT_UNDEFINED)
 		{
 			/* No op */
 		}
@@ -74,7 +74,8 @@ public:
 		{
 			m_mutex = mutex;
 
-			m_thread_id = os_thread_get_curr_id();
+			my_atomic_storelint(&m_thread_id,
+					    ulint(os_thread_get_curr_id()));
 
 			m_filename = filename;
 
@@ -87,7 +88,7 @@ public:
 		{
 			m_mutex = NULL;
 
-			m_thread_id = os_thread_id_t(ULINT_UNDEFINED);
+			my_atomic_storelint(&m_thread_id, ULINT_UNDEFINED);
 
 			m_filename = NULL;
 
@@ -103,7 +104,7 @@ public:
 
 			msg << m_mutex->policy().to_string();
 
-			if (os_thread_pf(m_thread_id) != ULINT_UNDEFINED) {
+			if (m_thread_id != ULINT_UNDEFINED) {
 
 				msg << " addr: " << m_mutex
 				    << " acquired: " << locked_from().c_str();
@@ -136,7 +137,7 @@ public:
 		unsigned	m_line;
 
 		/** Thread ID of the thread that own(ed) the mutex */
-		os_thread_id_t	m_thread_id;
+		ulint		m_thread_id;
 	};
 
 	/** Constructor. */
@@ -155,7 +156,7 @@ public:
 	/** Mutex is being destroyed. */
 	void destroy() UNIV_NOTHROW
 	{
-		ut_ad(m_context.m_thread_id == os_thread_id_t(ULINT_UNDEFINED));
+		ut_ad((ulint)my_atomic_loadlint(&m_context.m_thread_id) == ULINT_UNDEFINED);
 
 		m_magic_n = 0;
 
@@ -165,8 +166,7 @@ public:
 	/** Called when the mutex is "created". Note: Not from the constructor
 	but when the mutex is initialised.
 	@param[in]	id              Mutex ID */
-	void init(latch_id_t id)
-		UNIV_NOTHROW;
+	void init(latch_id_t id) UNIV_NOTHROW;
 
 	/** Called when an attempt is made to lock the mutex
 	@param[in]	mutex		Mutex instance to be locked
@@ -197,7 +197,7 @@ public:
 	bool is_owned() const UNIV_NOTHROW
 	{
 		return(os_thread_eq(
-				m_context.m_thread_id,
+				(os_thread_id_t)my_atomic_loadlint(&m_context.m_thread_id),
 				os_thread_get_curr_id()));
 	}
 
@@ -219,7 +219,7 @@ public:
 	os_thread_id_t get_thread_id() const
 		UNIV_NOTHROW
 	{
-		return(m_context.m_thread_id);
+		return((os_thread_id_t)my_atomic_loadlint(&m_context.m_thread_id));
 	}
 
 	/** Magic number to check for memory corruption. */
@@ -239,7 +239,7 @@ struct NoPolicy {
 	void init(const Mutex&, latch_id_t, const char*, uint32_t)
 		UNIV_NOTHROW { }
 	void destroy() UNIV_NOTHROW { }
-	void enter(const Mutex&, const char*, unsigned line) UNIV_NOTHROW { }
+	void enter(const Mutex&, const char*, unsigned) UNIV_NOTHROW { }
 	void add(uint32_t, uint32_t) UNIV_NOTHROW { }
 	void locked(const Mutex&, const char*, ulint) UNIV_NOTHROW { }
 	void release(const Mutex&) UNIV_NOTHROW { }
@@ -273,12 +273,11 @@ public:
 
 	/** Called when the mutex is "created". Note: Not from the constructor
 	but when the mutex is initialised.
-	@param[in]	mutex		Mutex instance to track
 	@param[in]	id              Mutex ID
 	@param[in]	filename	File where mutex was created
 	@param[in]	line		Line in filename */
 	void init(
-		const MutexType&	mutex,
+		const Mutex&,
 		latch_id_t		id,
 		const char*		filename,
 		uint32_t		line)
@@ -421,15 +420,8 @@ public:
 
 	/** Called when the mutex is "created". Note: Not from the constructor
 	but when the mutex is initialised.
-	@param[in]	mutex		Mutex instance to track
-	@param[in]	id              Mutex ID
-	@param[in]	filename	File where mutex was created
-	@param[in]	line		Line in filename */
-	void init(
-		const MutexType&	mutex,
-		latch_id_t		id,
-		const char*		filename,
-		uint32_t		line)
+	@param[in]	id              Mutex ID */
+	void init(const Mutex&, latch_id_t id, const char*, uint32)
 		UNIV_NOTHROW
 	{
 		/* It can be LATCH_ID_BUF_BLOCK_MUTEX or
diff --git a/storage/innobase/include/sync0policy.ic b/storage/innobase/include/sync0policy.ic
index f3526bbfef5..a28e3c382b4 100644
--- a/storage/innobase/include/sync0policy.ic
+++ b/storage/innobase/include/sync0policy.ic
@@ -80,7 +80,7 @@ void MutexDebug<Mutex>::locked(
 	UNIV_NOTHROW
 {
 	ut_ad(!is_owned());
-	ut_ad(m_context.m_thread_id == os_thread_id_t(ULINT_UNDEFINED));
+	ut_ad(m_context.m_thread_id == ULINT_UNDEFINED);
 
 	m_context.locked(mutex, name, line);
 
@@ -88,7 +88,7 @@ void MutexDebug<Mutex>::locked(
 }
 
 template <typename Mutex>
-void MutexDebug<Mutex>::release(const Mutex* mutex)
+void MutexDebug<Mutex>::release(const Mutex*)
 	UNIV_NOTHROW
 {
 	ut_ad(is_owned());
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
index 9dfe76c3348..429560f637e 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innobase/include/sync0rw.h
@@ -499,13 +499,13 @@ bool
 rw_lock_lock_word_decr(
 /*===================*/
 	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	ulint		amount,		/*!< in: amount to decrement */
-	lint		threshold);	/*!< in: threshold of judgement */
+	int32_t		amount,		/*!< in: amount to decrement */
+	int32_t		threshold);	/*!< in: threshold of judgement */
 #ifdef UNIV_DEBUG
 /******************************************************************//**
 Checks if the thread has locked the rw-lock in the specified mode, with
 the pass value == 0. */
-ibool
+bool
 rw_lock_own(
 /*========*/
 	rw_lock_t*	lock,		/*!< in: rw-lock */
@@ -569,10 +569,10 @@ struct rw_lock_t
 #endif /* UNIV_DEBUG */
 {
 	/** Holds the state of the lock. */
-	volatile lint	lock_word;
+	int32_t	lock_word;
 
 	/** 1: there are waiters */
-	volatile uint32_t	waiters;
+	int32_t	waiters;
 
 	/** number of granted SX locks. */
 	volatile ulint	sx_recursive;
@@ -601,9 +601,6 @@ struct rw_lock_t
 	/** File name where lock created */
 	const char*	cfile_name;
 
-	/** last s-lock file/line is not guaranteed to be correct */
-	const char*	last_s_file_name;
-
 	/** File name where last x-locked */
 	const char*	last_x_file_name;
 
@@ -613,9 +610,6 @@ struct rw_lock_t
 	/** If 1 then the rw-lock is a block lock */
 	unsigned	is_block_lock:1;
 
-	/** Line number where last time s-locked */
-	unsigned	last_s_line:14;
-
 	/** Line number where last time x-locked */
 	unsigned	last_x_line:14;
 
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
index 404c7cb9b86..f0c33ecbeda 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
@@ -2,7 +2,7 @@
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -77,7 +77,8 @@ rw_lock_get_writer(
 /*===============*/
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
-	lint lock_word = lock->lock_word;
+	int32_t lock_word = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+						      MY_MEMORY_ORDER_RELAXED);
 
 	ut_ad(lock_word <= X_LOCK_DECR);
 	if (lock_word > X_LOCK_HALF_DECR) {
@@ -109,15 +110,16 @@ rw_lock_get_reader_count(
 /*=====================*/
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
-	lint lock_word = lock->lock_word;
+	int32_t lock_word = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+						      MY_MEMORY_ORDER_RELAXED);
 	ut_ad(lock_word <= X_LOCK_DECR);
 
 	if (lock_word > X_LOCK_HALF_DECR) {
 		/* s-locked, no x-waiter */
-		return(X_LOCK_DECR - lock_word);
+		return ulint(X_LOCK_DECR - lock_word);
 	} else if (lock_word > 0) {
 		/* s-locked, with sx-locks only */
-		return(X_LOCK_HALF_DECR - lock_word);
+		return ulint(X_LOCK_HALF_DECR - lock_word);
 	} else if (lock_word == 0) {
 		/* x-locked */
 		return(0);
@@ -145,7 +147,8 @@ rw_lock_get_x_lock_count(
 /*=====================*/
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
-	lint lock_copy = lock->lock_word;
+	int32_t lock_copy = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+						      MY_MEMORY_ORDER_RELAXED);
 	ut_ad(lock_copy <= X_LOCK_DECR);
 
 	if (lock_copy == 0 || lock_copy == -X_LOCK_HALF_DECR) {
@@ -158,12 +161,12 @@ rw_lock_get_x_lock_count(
 		/* no s-lock, no sx-lock, 2 or more x-locks.
 		First 2 x-locks are set with -X_LOCK_DECR,
 		all other recursive x-locks are set with -1 */
-		return(2 - (lock_copy + X_LOCK_DECR));
+		return ulint(2 - X_LOCK_DECR - lock_copy);
 	} else {
 		/* no s-lock, 1 or more sx-lock, 2 or more x-locks.
 		First 2 x-locks are set with -(X_LOCK_DECR + X_LOCK_HALF_DECR),
 		all other recursive x-locks are set with -1 */
-		return(2 - (lock_copy + X_LOCK_DECR + X_LOCK_HALF_DECR));
+		return ulint(2 - X_LOCK_DECR - X_LOCK_HALF_DECR - lock_copy);
 	}
 }
 
@@ -178,7 +181,8 @@ rw_lock_get_sx_lock_count(
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 #ifdef UNIV_DEBUG
-	lint lock_copy = lock->lock_word;
+	int32_t lock_copy = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+						      MY_MEMORY_ORDER_RELAXED);
 
 	ut_ad(lock_copy <= X_LOCK_DECR);
 
@@ -197,9 +201,7 @@ rw_lock_get_sx_lock_count(
 }
 
 /******************************************************************//**
-Two different implementations for decrementing the lock_word of a rw_lock:
-one for systems supporting atomic operations, one for others. This does
-does not support recusive x-locks: they should be handled by the caller and
+Recursive x-locks are not supported: they should be handled by the caller and
 need not be atomic since they are performed by the current lock holder.
 Returns true if the decrement was made, false if not.
 @return true if decr occurs */
@@ -208,16 +210,17 @@ bool
 rw_lock_lock_word_decr(
 /*===================*/
 	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	ulint		amount,		/*!< in: amount to decrement */
-	lint		threshold)	/*!< in: threshold of judgement */
+	int32_t		amount,		/*!< in: amount to decrement */
+	int32_t		threshold)	/*!< in: threshold of judgement */
 {
-	lint local_lock_word;
-
-	local_lock_word = lock->lock_word;
-	while (local_lock_word > threshold) {
-		if (my_atomic_caslint(&lock->lock_word,
-				      &local_lock_word,
-				      local_lock_word - amount)) {
+	int32_t lock_copy = my_atomic_load32_explicit(&lock->lock_word,
+						      MY_MEMORY_ORDER_RELAXED);
+	while (lock_copy > threshold) {
+		if (my_atomic_cas32_strong_explicit(&lock->lock_word,
+						    &lock_copy,
+						    lock_copy - amount,
+						    MY_MEMORY_ORDER_ACQUIRE,
+						    MY_MEMORY_ORDER_RELAXED)) {
 			return(true);
 		}
 	}
@@ -246,11 +249,6 @@ rw_lock_s_lock_low(
 
 	ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_S, file_name, line));
 
-	/* These debugging values are not set safely: they may be incorrect
-	or even refer to a line that is invalid for the file name. */
-	lock->last_s_file_name = file_name;
-	lock->last_s_line = line;
-
 	return(TRUE);	/* locking succeeded */
 }
 
@@ -304,29 +302,32 @@ rw_lock_x_lock_func_nowait(
 	const char*	file_name,/*!< in: file name where lock requested */
 	unsigned	line)	/*!< in: line where requested */
 {
-	lint oldval = X_LOCK_DECR;
+	int32_t oldval = X_LOCK_DECR;
 
-	if (my_atomic_caslint(&lock->lock_word, &oldval, 0)) {
+	if (my_atomic_cas32_strong_explicit(&lock->lock_word, &oldval, 0,
+					    MY_MEMORY_ORDER_ACQUIRE,
+					    MY_MEMORY_ORDER_RELAXED)) {
 		lock->writer_thread = os_thread_get_curr_id();
 
 	} else if (os_thread_eq(lock->writer_thread, os_thread_get_curr_id())) {
-		/* Relock: this lock_word modification is safe since no other
-		threads can modify (lock, unlock, or reserve) lock_word while
-		there is an exclusive writer and this is the writer thread. */
-		if (lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR) {
+		/* Relock: even though no other thread can modify (lock, unlock
+		or reserve) lock_word while there is an exclusive writer and
+		this is the writer thread, we still want concurrent threads to
+		observe consistent values. */
+		if (oldval == 0 || oldval == -X_LOCK_HALF_DECR) {
 			/* There are 1 x-locks */
-			lock->lock_word -= X_LOCK_DECR;
-		} else if (lock->lock_word <= -X_LOCK_DECR) {
+			my_atomic_add32_explicit(&lock->lock_word, -X_LOCK_DECR,
+						 MY_MEMORY_ORDER_RELAXED);
+		} else if (oldval <= -X_LOCK_DECR) {
 			/* There are 2 or more x-locks */
-			lock->lock_word--;
+			my_atomic_add32_explicit(&lock->lock_word, -1,
+						 MY_MEMORY_ORDER_RELAXED);
+			/* Watch for too many recursive locks */
+			ut_ad(oldval < 1);
 		} else {
 			/* Failure */
 			return(FALSE);
 		}
-
-		/* Watch for too many recursive locks */
-		ut_ad(lock->lock_word < 0);
-
 	} else {
 		/* Failure */
 		return(FALSE);
@@ -354,14 +355,19 @@ rw_lock_s_unlock_func(
 #endif /* UNIV_DEBUG */
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
-	ut_ad(lock->lock_word > -X_LOCK_DECR);
-	ut_ad(lock->lock_word != 0);
-	ut_ad(lock->lock_word < X_LOCK_DECR);
+#ifdef UNIV_DEBUG
+	int32_t	dbg_lock_word = my_atomic_load32_explicit(&lock->lock_word,
+							  MY_MEMORY_ORDER_RELAXED);
+	ut_ad(dbg_lock_word > -X_LOCK_DECR);
+	ut_ad(dbg_lock_word != 0);
+	ut_ad(dbg_lock_word < X_LOCK_DECR);
+#endif
 
 	ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_S));
 
 	/* Increment lock_word to indicate 1 less reader */
-	lint	lock_word = my_atomic_addlint(&lock->lock_word, 1) + 1;
+	int32_t	lock_word = my_atomic_add32_explicit(&lock->lock_word, 1,
+						     MY_MEMORY_ORDER_RELEASE) + 1;
 	if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) {
 
 		/* wait_ex waiter exists. It may not be asleep, but we signal
@@ -387,41 +393,49 @@ rw_lock_x_unlock_func(
 #endif /* UNIV_DEBUG */
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
-	ut_ad(lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR
-	      || lock->lock_word <= -X_LOCK_DECR);
+	int32_t	lock_word = my_atomic_load32_explicit(&lock->lock_word,
+						      MY_MEMORY_ORDER_RELAXED);
 
-	if (lock->lock_word == 0) {
+	ut_ad(lock_word == 0 || lock_word == -X_LOCK_HALF_DECR
+	      || lock_word <= -X_LOCK_DECR);
+
+	if (lock_word == 0) {
 		/* Last caller in a possible recursive chain. */
 		lock->writer_thread = 0;
 	}
 
 	ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X));
 
-	if (lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR) {
-		/* There is 1 x-lock */
-		/* atomic increment is needed, because it is last */
-		if (my_atomic_addlint(&lock->lock_word, X_LOCK_DECR) <= -X_LOCK_DECR) {
-			ut_error;
-		}
+	if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) {
+		/* Last X-lock owned by this thread, it may still hold SX-locks.
+		ACQ_REL due to...
+		RELEASE: we release rw-lock
+		ACQUIRE: we want waiters to be loaded after lock_word is stored */
+		my_atomic_add32_explicit(&lock->lock_word, X_LOCK_DECR,
+					 MY_MEMORY_ORDER_ACQ_REL);
 
 		/* This no longer has an X-lock but it may still have
 		an SX-lock. So it is now free for S-locks by other threads.
 		We need to signal read/write waiters.
 		We do not need to signal wait_ex waiters, since they cannot
 		exist when there is a writer. */
-		if (lock->waiters) {
-			my_atomic_store32((int32*) &lock->waiters, 0);
+		if (my_atomic_load32_explicit(&lock->waiters,
+					      MY_MEMORY_ORDER_RELAXED)) {
+			my_atomic_store32_explicit(&lock->waiters, 0,
+						   MY_MEMORY_ORDER_RELAXED);
 			os_event_set(lock->event);
 			sync_array_object_signalled();
 		}
-	} else if (lock->lock_word == -X_LOCK_DECR
-		   || lock->lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
+	} else if (lock_word == -X_LOCK_DECR
+		   || lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
 		/* There are 2 x-locks */
-		lock->lock_word += X_LOCK_DECR;
+		my_atomic_add32_explicit(&lock->lock_word, X_LOCK_DECR,
+					 MY_MEMORY_ORDER_RELAXED);
 	} else {
 		/* There are more than 2 x-locks. */
-		ut_ad(lock->lock_word < -X_LOCK_DECR);
-		lock->lock_word += 1;
+		ut_ad(lock_word < -X_LOCK_DECR);
+		my_atomic_add32_explicit(&lock->lock_word, 1,
+					 MY_MEMORY_ORDER_RELAXED);
 	}
 
 	ut_ad(rw_lock_validate(lock));
@@ -447,28 +461,37 @@ rw_lock_sx_unlock_func(
 	ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_SX));
 
 	if (lock->sx_recursive == 0) {
+		int32_t	lock_word = my_atomic_load32_explicit(&lock->lock_word,
+							      MY_MEMORY_ORDER_RELAXED);
 		/* Last caller in a possible recursive chain. */
-		if (lock->lock_word > 0) {
+		if (lock_word > 0) {
 			lock->writer_thread = 0;
+			ut_ad(lock_word <= INT_MAX32 - X_LOCK_HALF_DECR);
+
+			/* Last SX-lock owned by this thread, doesn't own X-lock.
+			ACQ_REL due to...
+			RELEASE: we release rw-lock
+			ACQUIRE: we want waiters to be loaded after lock_word is stored */
+			my_atomic_add32_explicit(&lock->lock_word, X_LOCK_HALF_DECR,
+						 MY_MEMORY_ORDER_ACQ_REL);
 
-			if (my_atomic_addlint(&lock->lock_word, X_LOCK_HALF_DECR) <= 0) {
-				ut_error;
-			}
 			/* Lock is now free. May have to signal read/write
 			waiters. We do not need to signal wait_ex waiters,
 			since they cannot exist when there is an sx-lock
 			holder. */
-			if (lock->waiters) {
-				my_atomic_store32((int32*) &lock->waiters, 0);
+			if (my_atomic_load32_explicit(&lock->waiters,
+						      MY_MEMORY_ORDER_RELAXED)) {
+				my_atomic_store32_explicit(&lock->waiters, 0,
+							   MY_MEMORY_ORDER_RELAXED);
 				os_event_set(lock->event);
 				sync_array_object_signalled();
 			}
 		} else {
 			/* still has x-lock */
-			ut_ad(lock->lock_word == -X_LOCK_HALF_DECR
-			      || lock->lock_word <= -(X_LOCK_DECR
-						      + X_LOCK_HALF_DECR));
-			lock->lock_word += X_LOCK_HALF_DECR;
+			ut_ad(lock_word == -X_LOCK_HALF_DECR ||
+			      lock_word <= -(X_LOCK_DECR + X_LOCK_HALF_DECR));
+			my_atomic_add32_explicit(&lock->lock_word, X_LOCK_HALF_DECR,
+						 MY_MEMORY_ORDER_RELAXED);
 		}
 	}
 
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index 5ec967507b9..5af2cbfe6c5 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -56,7 +56,6 @@ extern mysql_pfs_key_t	buf_pool_zip_mutex_key;
 extern mysql_pfs_key_t	cache_last_read_mutex_key;
 extern mysql_pfs_key_t	dict_foreign_err_mutex_key;
 extern mysql_pfs_key_t	dict_sys_mutex_key;
-extern mysql_pfs_key_t	file_format_max_mutex_key;
 extern mysql_pfs_key_t	fil_system_mutex_key;
 extern mysql_pfs_key_t	flush_list_mutex_key;
 extern mysql_pfs_key_t	fts_bg_threads_mutex_key;
@@ -94,7 +93,6 @@ extern mysql_pfs_key_t	srv_innodb_monitor_mutex_key;
 extern mysql_pfs_key_t	srv_misc_tmpfile_mutex_key;
 extern mysql_pfs_key_t	srv_monitor_file_mutex_key;
 extern mysql_pfs_key_t	buf_dblwr_mutex_key;
-extern mysql_pfs_key_t	trx_undo_mutex_key;
 extern mysql_pfs_key_t	trx_mutex_key;
 extern mysql_pfs_key_t	trx_pool_mutex_key;
 extern mysql_pfs_key_t	trx_pool_manager_mutex_key;
@@ -109,6 +107,7 @@ extern mysql_pfs_key_t	sync_array_mutex_key;
 extern mysql_pfs_key_t	thread_mutex_key;
 extern mysql_pfs_key_t  zip_pad_mutex_key;
 extern mysql_pfs_key_t  row_drop_list_mutex_key;
+extern mysql_pfs_key_t	rw_trx_hash_element_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
 #ifdef UNIV_PFS_RWLOCK
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
index 13c9e8fbabc..53332af46ab 100644
--- a/storage/innobase/include/sync0types.h
+++ b/storage/innobase/include/sync0types.h
@@ -106,16 +106,6 @@ V
 Transaction system header
 |
 V
-Transaction undo mutex			The undo log entry must be written
-|					before any index page is modified.
-|					Transaction undo mutex is for the undo
-|					logs the analogue of the tree latch
-|					for a B-tree. If a thread has the
-|					trx undo mutex reserved, it is allowed
-|					to latch the undo log pages in any
-|					order, and also after it has acquired
-|					the fsp latch.
-V
 Rollback segment mutex			The rollback segment mutex must be
 |					reserved, if, e.g., a new page must
 |					be added to an undo log. The rollback
@@ -158,7 +148,7 @@ V
 lock_sys_mutex				Mutex protecting lock_sys_t
 |
 V
-trx_sys->mutex				Mutex protecting trx_sys_t
+trx_sys.mutex				Mutex protecting trx_sys_t
 |
 V
 Threads mutex				Background thread scheduling mutex
@@ -231,6 +221,7 @@ enum latch_level_t {
 	SYNC_REC_LOCK,
 	SYNC_THREADS,
 	SYNC_TRX,
+	SYNC_RW_TRX_HASH_ELEMENT,
 	SYNC_TRX_SYS,
 	SYNC_LOCK_SYS,
 	SYNC_LOCK_WAIT_SYS,
@@ -253,7 +244,6 @@ enum latch_level_t {
 	SYNC_RSEG_HEADER_NEW,
 	SYNC_NOREDO_RSEG,
 	SYNC_REDO_RSEG,
-	SYNC_TRX_UNDO,
 	SYNC_PURGE_LATCH,
 	SYNC_TREE_NODE,
 	SYNC_TREE_NODE_FROM_HASH,
@@ -268,8 +258,6 @@ enum latch_level_t {
 	SYNC_DICT,
 	SYNC_FTS_CACHE,
 
-	SYNC_FILE_FORMAT_TAG,
-
 	SYNC_DICT_OPERATION,
 
 	SYNC_TRX_I_S_LAST_READ,
@@ -337,7 +325,6 @@ enum latch_id_t {
 	LATCH_ID_SRV_MISC_TMPFILE,
 	LATCH_ID_SRV_MONITOR_FILE,
 	LATCH_ID_BUF_DBLWR,
-	LATCH_ID_TRX_UNDO,
 	LATCH_ID_TRX_POOL,
 	LATCH_ID_TRX_POOL_MANAGER,
 	LATCH_ID_TRX,
@@ -378,12 +365,11 @@ enum latch_id_t {
 	LATCH_ID_SCRUB_STAT_MUTEX,
 	LATCH_ID_DEFRAGMENT_MUTEX,
 	LATCH_ID_BTR_DEFRAGMENT_MUTEX,
-	LATCH_ID_MTFLUSH_THREAD_MUTEX,
-	LATCH_ID_MTFLUSH_MUTEX,
 	LATCH_ID_FIL_CRYPT_MUTEX,
 	LATCH_ID_FIL_CRYPT_STAT_MUTEX,
 	LATCH_ID_FIL_CRYPT_DATA_MUTEX,
 	LATCH_ID_FIL_CRYPT_THREADS_MUTEX,
+	LATCH_ID_RW_TRX_HASH_ELEMENT,
 	LATCH_ID_TEST_MUTEX,
 	LATCH_ID_MAX = LATCH_ID_TEST_MUTEX
 };
@@ -491,10 +477,10 @@ struct OSMutex {
 	}
 
 private:
-#ifdef UNIV_DEBUG
+#ifdef DBUG_ASSERT_EXISTS
 	/** true if the mutex has been freed/destroyed. */
 	bool			m_freed;
-#endif /* UNIV_DEBUG */
+#endif /* DBUG_ASSERT_EXISTS */
 
 	sys_mutex_t		m_mutex;
 };
@@ -991,8 +977,7 @@ struct latch_t {
 		UNIV_NOTHROW
 		:
 		m_id(id),
-		m_rw_lock(),
-		m_temp_fsp() { }
+		m_rw_lock() {}
 
 	/** Destructor */
 	virtual ~latch_t() UNIV_NOTHROW { }
@@ -1026,24 +1011,6 @@ struct latch_t {
 		return(sync_latch_get_level(m_id));
 	}
 
-	/** @return true if the latch is for a temporary file space*/
-	bool is_temp_fsp() const
-		UNIV_NOTHROW
-	{
-		return(m_temp_fsp);
-	}
-
-	/** Set the temporary tablespace flag. (For internal temporary
-	tables, MySQL 5.7 does not always acquire the index->lock. We
-	need to figure out the context and add some special rules
-	during the checks.) */
-	void set_temp_fsp()
-		UNIV_NOTHROW
-	{
-		ut_ad(get_id() == LATCH_ID_FIL_SPACE);
-		m_temp_fsp = true;
-	}
-
 	/** @return the latch name, m_id must be set  */
 	const char* get_name() const
 		UNIV_NOTHROW
@@ -1059,9 +1026,6 @@ struct latch_t {
 	/** true if it is a rw-lock. In debug mode, rw_lock_t derives from
 	this class and sets this variable. */
 	bool		m_rw_lock;
-
-	/** true if it is an temporary space latch */
-	bool		m_temp_fsp;
 };
 
 /** Subclass this to iterate over a thread's acquired latch levels. */
@@ -1153,92 +1117,88 @@ enum rw_lock_flag_t {
 
 #endif /* UNIV_INNOCHECKSUM */
 
-#ifdef _WIN64
 static inline ulint my_atomic_addlint(ulint *A, ulint B)
 {
+#ifdef _WIN64
   return ulint(my_atomic_add64((volatile int64*)A, B));
+#else
+  return ulint(my_atomic_addlong(A, B));
+#endif
 }
 
 static inline ulint my_atomic_loadlint(const ulint *A)
 {
+#ifdef _WIN64
   return ulint(my_atomic_load64((volatile int64*)A));
+#else
+  return ulint(my_atomic_loadlong(A));
+#endif
 }
 
 static inline lint my_atomic_addlint(volatile lint *A, lint B)
 {
+#ifdef _WIN64
   return my_atomic_add64((volatile int64*)A, B);
+#else
+  return my_atomic_addlong(A, B);
+#endif
 }
 
 static inline lint my_atomic_loadlint(const lint *A)
 {
+#ifdef _WIN64
   return lint(my_atomic_load64((volatile int64*)A));
+#else
+  return my_atomic_loadlong(A);
+#endif
 }
 
 static inline void my_atomic_storelint(ulint *A, ulint B)
 {
+#ifdef _WIN64
   my_atomic_store64((volatile int64*)A, B);
+#else
+  my_atomic_storelong(A, B);
+#endif
 }
 
-static inline lint my_atomic_caslint(volatile lint *A, lint *B, lint C)
+/** Simple non-atomic counter aligned to CACHE_LINE_SIZE
+@tparam	Type	the integer type of the counter */
+template <typename Type>
+struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) simple_counter
 {
-  return my_atomic_cas64((volatile int64*)A, (int64 *)B, C);
-}
+	/** Increment the counter */
+	Type inc() { return add(1); }
+	/** Decrement the counter */
+	Type dec() { return add(Type(~0)); }
 
-static inline ulint my_atomic_caslint(ulint *A, ulint *B, ulint C)
-{
-  return my_atomic_cas64((volatile int64*)A, (int64 *)B, (int64)C);
-}
+	/** Add to the counter
+	@param[in]	i	amount to be added
+	@return	the value of the counter after adding */
+	Type add(Type i) { return m_counter += i; }
 
-#else
-#define my_atomic_addlint my_atomic_addlong
-#define my_atomic_loadlint my_atomic_loadlong
-#define my_atomic_caslint my_atomic_caslong
-#endif
+	/** @return the value of the counter */
+	operator Type() const { return m_counter; }
 
-/** Simple counter aligned to CACHE_LINE_SIZE
-@tparam	Type	the integer type of the counter
-@tparam	atomic	whether to use atomic memory access */
-template <typename Type = ulint, bool atomic = false>
-struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) simple_counter
+private:
+	/** The counter */
+	Type	m_counter;
+};
+
+/** Simple atomic counter aligned to CACHE_LINE_SIZE
+@tparam	Type	lint or ulint */
+template <typename Type = ulint>
+struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) simple_atomic_counter
 {
 	/** Increment the counter */
 	Type inc() { return add(1); }
 	/** Decrement the counter */
-	Type dec() { return sub(1); }
+	Type dec() { return add(Type(~0)); }
 
 	/** Add to the counter
 	@param[in]	i	amount to be added
-	@return	the value of the counter after adding */
-	Type add(Type i)
-	{
-		compile_time_assert(!atomic || sizeof(Type) == sizeof(lint));
-		if (atomic) {
-#ifdef _MSC_VER
-// Suppress type conversion/ possible loss of data warning
-#pragma warning (push)
-#pragma warning (disable : 4244)
-#endif
-			return Type(my_atomic_addlint(reinterpret_cast<ulint*>
-						      (&m_counter), i));
-#ifdef _MSC_VER
-#pragma warning (pop)
-#endif
-		} else {
-			return m_counter += i;
-		}
-	}
-	/** Subtract from the counter
-	@param[in]	i	amount to be subtracted
-	@return	the value of the counter after adding */
-	Type sub(Type i)
-	{
-		compile_time_assert(!atomic || sizeof(Type) == sizeof(lint));
-		if (atomic) {
-			return Type(my_atomic_addlint(&m_counter, -lint(i)));
-		} else {
-			return m_counter -= i;
-		}
-	}
+	@return	the value of the counter before adding */
+	Type add(Type i) { return my_atomic_addlint(&m_counter, i); }
 
 	/** @return the value of the counter (non-atomic access)! */
 	operator Type() const { return m_counter; }
diff --git a/storage/innobase/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h
index 9eb19ec06c2..b1145f33586 100644
--- a/storage/innobase/include/trx0i_s.h
+++ b/storage/innobase/include/trx0i_s.h
@@ -263,10 +263,10 @@ trx_i_s_possibly_fetch_data_into_cache(
 	trx_i_s_cache_t*	cache);	/*!< in/out: cache */
 
 /*******************************************************************//**
-Returns TRUE if the data in the cache is truncated due to the memory
+Returns true, if the data in the cache is truncated due to the memory
 limit posed by TRX_I_S_MEM_LIMIT.
 @return TRUE if truncated */
-ibool
+bool
 trx_i_s_cache_is_truncated(
 /*=======================*/
 	trx_i_s_cache_t*	cache);	/*!< in: cache */
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index c6085b312a5..43d771c646b 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -27,9 +27,8 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef trx0purge_h
 #define trx0purge_h
 
-#include "trx0sys.h"
+#include "trx0rseg.h"
 #include "que0types.h"
-#include "page0page.h"
 
 #include <queue>
 
@@ -47,16 +46,13 @@ trx_purge_get_log_from_hist(
 /*========================*/
 	fil_addr_t	node_addr);	/*!< in: file address of the history
 					list node of the log */
-/************************************************************************
-Adds the update undo log as the first log in the history list. Removes the
-update undo log segment from the rseg slot if it is too big for reuse. */
+/** Prepend the history list with an undo log.
+Remove the undo log segment from the rseg slot if it is too big for reuse.
+@param[in]	trx		transaction
+@param[in,out]	undo		undo log
+@param[in,out]	mtr		mini-transaction */
 void
-trx_purge_add_update_undo_to_history(
-/*=================================*/
-	trx_t*		trx,		/*!< in: transaction */
-	page_t*		undo_page,	/*!< in: update undo log header page,
-					x-latched */
-	mtr_t*		mtr);		/*!< in: mtr */
+trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr);
 /*******************************************************************//**
 This function runs a purge batch.
 @return number of undo log pages handled in the batch */
@@ -65,35 +61,7 @@ trx_purge(
 /*======*/
 	ulint	n_purge_threads,	/*!< in: number of purge tasks to
 					submit to task queue. */
-	ulint	limit,			/*!< in: the maximum number of
-					records to purge in one batch */
 	bool	truncate);		/*!< in: truncate history if true */
-/*******************************************************************//**
-Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
-void
-trx_purge_stop(void);
-/*================*/
-/*******************************************************************//**
-Resume purge, move to PURGE_STATE_RUN. */
-void
-trx_purge_run(void);
-/*================*/
-
-/** Purge states */
-enum purge_state_t {
-	PURGE_STATE_INIT,		/*!< Purge instance created */
-	PURGE_STATE_RUN,		/*!< Purge should be running */
-	PURGE_STATE_STOP,		/*!< Purge should be stopped */
-	PURGE_STATE_EXIT,		/*!< Purge has been shutdown */
-	PURGE_STATE_DISABLED		/*!< Purge was never started */
-};
-
-/*******************************************************************//**
-Get the purge state.
-@return purge state. */
-purge_state_t
-trx_purge_state(void);
-/*=================*/
 
 /** Rollback segements from a given transaction with trx-no
 scheduled for purge. */
@@ -103,69 +71,28 @@ private:
 		trx_rsegs_t;
 public:
 	typedef trx_rsegs_t::iterator iterator;
+	typedef trx_rsegs_t::const_iterator const_iterator;
 
 	/** Default constructor */
-	TrxUndoRsegs() : m_trx_no() { }
-
-	explicit TrxUndoRsegs(trx_id_t trx_no)
-		:
-		m_trx_no(trx_no)
-	{
-		// Do nothing
-	}
-
-	/** Get transaction number
-	@return trx_id_t - get transaction number. */
-	trx_id_t get_trx_no() const
-	{
-		return(m_trx_no);
-	}
-
-	/** Add rollback segment.
-	@param rseg rollback segment to add. */
-	void push_back(trx_rseg_t* rseg)
-	{
-		m_rsegs.push_back(rseg);
-	}
-
-	/** Erase the element pointed by given iterator.
-	@param[in]	iterator	iterator */
-	void erase(iterator& it)
-	{
-		m_rsegs.erase(it);
-	}
-
-	/** Number of registered rsegs.
-	@return size of rseg list. */
-	ulint size() const
-	{
-		return(m_rsegs.size());
-	}
-
-	/**
-	@return an iterator to the first element */
-	iterator begin()
-	{
-		return(m_rsegs.begin());
-	}
-
-	/**
-	@return an iterator to the end */
-	iterator end()
-	{
-		return(m_rsegs.end());
-	}
+	TrxUndoRsegs() {}
+	/** Constructor */
+	TrxUndoRsegs(trx_rseg_t& rseg)
+		: m_commit(rseg.last_commit), m_rsegs(1, &rseg) {}
+	/** Constructor */
+	TrxUndoRsegs(trx_id_t trx_no, trx_rseg_t& rseg)
+		: m_commit(trx_no << 1), m_rsegs(1, &rseg) {}
 
-	/** Append rollback segments from referred instance to current
-	instance. */
-	void append(const TrxUndoRsegs& append_from)
-	{
-		ut_ad(get_trx_no() == append_from.get_trx_no());
+	/** @return the transaction commit identifier */
+	trx_id_t trx_no() const { return m_commit >> 1; }
 
-		m_rsegs.insert(m_rsegs.end(),
-			       append_from.m_rsegs.begin(),
-			       append_from.m_rsegs.end());
-	}
+	bool operator!=(const TrxUndoRsegs& other) const
+	{ return m_commit != other.m_commit; }
+	bool empty() const { return m_rsegs.empty(); }
+	void erase(iterator& it) { m_rsegs.erase(it); }
+	iterator begin() { return(m_rsegs.begin()); }
+	iterator end() { return(m_rsegs.end()); }
+	const_iterator begin() const { return m_rsegs.begin(); }
+	const_iterator end() const { return m_rsegs.end(); }
 
 	/** Compare two TrxUndoRsegs based on trx_no.
 	@param elem1 first element to compare
@@ -173,17 +100,12 @@ public:
 	@return true if elem1 > elem2 else false.*/
 	bool operator()(const TrxUndoRsegs& lhs, const TrxUndoRsegs& rhs)
 	{
-		return(lhs.m_trx_no > rhs.m_trx_no);
+		return(lhs.m_commit > rhs.m_commit);
 	}
 
-	/** Compiler defined copy-constructor/assignment operator
-	should be fine given that there is no reference to a memory
-	object outside scope of class object.*/
-
 private:
-	/** The rollback segments transaction number. */
-	trx_id_t		m_trx_no;
-
+	/** Copy trx_rseg_t::last_commit */
+	trx_id_t		m_commit;
 	/** Rollback segments of a transaction, scheduled for purge. */
 	trx_rsegs_t		m_rsegs;
 };
@@ -193,16 +115,14 @@ typedef std::priority_queue<
 	std::vector<TrxUndoRsegs, ut_allocator<TrxUndoRsegs> >,
 	TrxUndoRsegs>	purge_pq_t;
 
-/**
-Chooses the rollback segment with the smallest trx_no. */
+/** Chooses the rollback segment with the oldest committed transaction */
 struct TrxUndoRsegsIterator {
-
 	/** Constructor */
 	TrxUndoRsegsIterator();
-
 	/** Sets the next rseg to purge in purge_sys.
+	Executed in the purge coordinator thread.
 	@return whether anything is to be purged */
-	bool set_next();
+	inline bool set_next();
 
 private:
 	// Disable copying
@@ -210,38 +130,11 @@ private:
 	TrxUndoRsegsIterator& operator=(const TrxUndoRsegsIterator&);
 
 	/** The current element to process */
-	TrxUndoRsegs			m_trx_undo_rsegs;
-
-	/** Track the current element in m_trx_undo_rseg */
-	TrxUndoRsegs::iterator		m_iter;
-
-	/** Sentinel value */
-	static const TrxUndoRsegs	NullElement;
+	TrxUndoRsegs			m_rsegs;
+	/** Track the current element in m_rsegs */
+	TrxUndoRsegs::const_iterator	m_iter;
 };
 
-/** This is the purge pointer/iterator. We need both the undo no and the
-transaction no up to which purge has parsed and applied the records. */
-struct purge_iter_t {
-	purge_iter_t()
-		:
-		trx_no(),
-		undo_no(),
-		undo_rseg_space(ULINT_UNDEFINED)
-	{
-		// Do nothing
-	}
-
-	trx_id_t	trx_no;		/*!< Purge has advanced past all
-					transactions whose number is less
-					than this */
-	undo_no_t	undo_no;	/*!< Purge has advanced past all records
-					whose undo number is less than this */
-	ulint		undo_rseg_space;
-					/*!< Last undo record resided in this
-					space id. */
-};
-
-
 /* Namespace to hold all the related functions and variables need for truncate
 of undo tablespace. */
 namespace undo {
@@ -266,17 +159,12 @@ namespace undo {
 	/** Track UNDO tablespace mark for truncate. */
 	class Truncate {
 	public:
-
-		Truncate()
-			:
-			m_undo_for_trunc(ULINT_UNDEFINED),
-			m_rseg_for_trunc(),
-			m_scan_start(1),
-			m_purge_rseg_truncate_frequency(
-				static_cast<ulint>(
-				srv_purge_rseg_truncate_frequency))
+		void create()
 		{
-			/* Do Nothing. */
+			m_undo_for_trunc = ULINT_UNDEFINED;
+			m_scan_start = 1;
+			m_purge_rseg_truncate_frequency =
+				ulint(srv_purge_rseg_truncate_frequency);
 		}
 
 		/** Clear the cached rollback segment. Normally done
@@ -447,53 +335,58 @@ namespace undo {
 class purge_sys_t
 {
 public:
-	/** Construct the purge system. */
-	purge_sys_t();
-	/** Destruct the purge system. */
-	~purge_sys_t();
-
-	rw_lock_t	latch;		/*!< The latch protecting the purge
-					view. A purge operation must acquire an
-					x-latch here for the instant at which
-					it changes the purge view: an undo
-					log operation can prevent this by
-					obtaining an s-latch here. It also
-					protects state and running */
-	os_event_t	event;		/*!< State signal event;
-					os_event_set() and os_event_reset()
-					are protected by purge_sys_t::latch
-					X-lock */
-	ulint		n_stop;		/*!< Counter to track number stops */
-	volatile bool	running;	/*!< true, if purge is active,
-					we check this without the latch too */
-	volatile purge_state_t	state;	/*!< Purge coordinator thread states,
-					we check this in several places
-					without holding the latch. */
+	/** signal state changes; os_event_reset() and os_event_set()
+	are protected by rw_lock_x_lock(latch) */
+	MY_ALIGNED(CACHE_LINE_SIZE)
+	os_event_t	event;
+	/** latch protecting view, m_enabled */
+	MY_ALIGNED(CACHE_LINE_SIZE)
+	rw_lock_t	latch;
+private:
+	/** whether purge is enabled; protected by latch and my_atomic */
+	int32_t		m_enabled;
+	/** number of pending stop() calls without resume() */
+	int32_t		m_paused;
+public:
 	que_t*		query;		/*!< The query graph which will do the
 					parallelized purge operation */
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	ReadView	view;		/*!< The purge will not remove undo logs
 					which are >= this view (purge view) */
-	ulint	n_submitted;	/*!< Count of total tasks submitted
-					to the task queue */
-	ulint	n_completed;	/*!< Count of total tasks completed */
-
-	/*------------------------------*/
-	/* The following two fields form the 'purge pointer' which advances
-	during a purge, and which is used in history list truncation */
-
-	purge_iter_t	iter;		/* Limit up to which we have read and
-					parsed the UNDO log records.  Not
-					necessarily purged from the indexes.
-					Note that this can never be less than
-					the limit below, we check for this
-					invariant in trx0purge.cc */
-	purge_iter_t	limit;		/* The 'purge pointer' which advances
-					during a purge, and which is used in
-					history list truncation */
-#ifdef UNIV_DEBUG
-	purge_iter_t	done;		/* Indicate 'purge pointer' which have
-					purged already accurately. */
-#endif /* UNIV_DEBUG */
+	/** Total number of tasks submitted by srv_purge_coordinator_thread.
+	Not accessed by other threads. */
+	ulint	n_submitted;
+	/** Number of completed tasks. Accessed by srv_purge_coordinator
+	and srv_worker_thread by my_atomic. */
+	ulint	n_completed;
+
+	/** Iterator to the undo log records of committed transactions */
+	struct iterator
+	{
+		bool operator<=(const iterator& other) const
+		{
+			if (commit < other.commit) return true;
+			if (commit > other.commit) return false;
+			return undo_no <= other.undo_no;
+		}
+
+		/** @return the commit number of the transaction */
+		trx_id_t trx_no() const { return commit >> 1; }
+		void reset_trx_no(trx_id_t trx_no) { commit = trx_no << 1; }
+
+		/** 2 * trx_t::no + old_insert of the committed transaction */
+		trx_id_t	commit;
+		/** The record number within the committed transaction's undo
+		log, increasing, purged from from 0 onwards */
+		undo_no_t	undo_no;
+	};
+
+	/** The tail of the purge queue; the last parsed undo log of a
+	committed transaction. */
+	iterator	tail;
+	/** The head of the purge queue; any older undo logs of committed
+	transactions may be discarded (history list truncation). */
+	iterator	head;
 	/*-----------------------------*/
 	bool		next_stored;	/*!< whether rseg holds the next record
 					to purge */
@@ -521,10 +414,70 @@ public:
 
 	undo::Truncate	undo_trunc;	/*!< Track UNDO tablespace marked
 					for truncate. */
+
+
+  /**
+    Constructor.
+
+    Some members may require late initialisation, thus we just mark object as
+    uninitialised. Real initialisation happens in create().
+  */
+
+  purge_sys_t() : event(NULL), m_enabled(false) {}
+
+
+  /** Create the instance */
+  void create();
+
+  /** Close the purge system on shutdown */
+  void close();
+
+  /** @return whether purge is enabled */
+  bool enabled()
+  {
+    return my_atomic_load32_explicit(&m_enabled, MY_MEMORY_ORDER_RELAXED);
+  }
+  /** @return whether purge is enabled */
+  bool enabled_latched()
+  {
+    ut_ad(rw_lock_own_flagged(&latch, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+    return bool(m_enabled);
+  }
+  /** @return whether the purge coordinator is paused */
+  bool paused()
+  { return my_atomic_load32_explicit(&m_paused, MY_MEMORY_ORDER_RELAXED); }
+  /** @return whether the purge coordinator is paused */
+  bool paused_latched()
+  {
+    ut_ad(rw_lock_own_flagged(&latch, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+    return m_paused != 0;
+  }
+
+  /** Enable purge at startup. Not protected by latch; the main thread
+  will wait for purge_sys.enabled() in srv_start() */
+  void coordinator_startup()
+  {
+    ut_ad(!enabled());
+    my_atomic_store32_explicit(&m_enabled, true, MY_MEMORY_ORDER_RELAXED);
+  }
+
+  /** Disable purge at shutdown */
+  void coordinator_shutdown()
+  {
+    ut_ad(enabled());
+    my_atomic_store32_explicit(&m_enabled, false, MY_MEMORY_ORDER_RELAXED);
+  }
+
+  /** @return whether the purge coordinator thread is active */
+  bool running();
+  /** Stop purge during FLUSH TABLES FOR EXPORT */
+  void stop();
+  /** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
+  void resume();
 };
 
 /** The global data structure coordinating a purge */
-extern purge_sys_t*	purge_sys;
+extern purge_sys_t	purge_sys;
 
 /** Info required to purge a record */
 struct trx_purge_rec_t {
diff --git a/storage/innobase/include/trx0purge.ic b/storage/innobase/include/trx0purge.ic
index c32651b7a00..cd519a8e64d 100644
--- a/storage/innobase/include/trx0purge.ic
+++ b/storage/innobase/include/trx0purge.ic
@@ -40,24 +40,3 @@ trx_purge_get_log_from_hist(
 
 	return(node_addr);
 }
-
-/********************************************************************//**
-address of its history list node.
-@return true if purge_sys_t::limit <= purge_sys_t::iter */
-UNIV_INLINE
-bool
-trx_purge_check_limit(void)
-/*=======================*/
-{
-	/* limit is used to track till what point purge element has been
-	processed and so limit <= iter.
-	undo_no ordering is enforced only within the same rollback segment.
-	If a transaction uses multiple rollback segments then we need to
-	consider the rollback segment space id too. */
-	return(purge_sys->iter.trx_no > purge_sys->limit.trx_no
-	       || (purge_sys->iter.trx_no == purge_sys->limit.trx_no
-		   && ((purge_sys->iter.undo_no >= purge_sys->limit.undo_no)
-		       || (purge_sys->iter.undo_rseg_space
-			   != purge_sys->limit.undo_rseg_space))));
-}
-
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
index ea6269b5ba4..bf8835f189f 100644
--- a/storage/innobase/include/trx0rec.h
+++ b/storage/innobase/include/trx0rec.h
@@ -53,22 +53,6 @@ trx_undo_rec_get_type(
 /*==================*/
 	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
 /**********************************************************************//**
-Reads from an undo log record the record compiler info.
-@return compiler info */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
-	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
-/**********************************************************************//**
-Returns TRUE if an undo log record contains an extern storage field.
-@return TRUE if extern */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
-	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
-/**********************************************************************//**
 Reads the undo log record number.
 @return undo no */
 UNIV_INLINE
@@ -111,7 +95,7 @@ trx_undo_rec_get_row_ref(
 				used, as we do NOT copy the data in the
 				record! */
 	dict_index_t*	index,	/*!< in: clustered index */
-	dtuple_t**	ref,	/*!< out, own: row reference */
+	const dtuple_t**ref,	/*!< out, own: row reference */
 	mem_heap_t*	heap);	/*!< in: memory heap from which the memory
 				needed is allocated */
 /**********************************************************************//**
@@ -256,25 +240,22 @@ trx_undo_prev_version_build(
 				into this function by purge thread or not.
 				And if we read "after image" of undo log */
 
-/***********************************************************//**
-Parses a redo log record of adding an undo log record.
-@return end of log record or NULL */
+/** Parse MLOG_UNDO_INSERT.
+@param[in]	ptr	log record
+@param[in]	end_ptr	end of log record buffer
+@param[in,out]	page	page or NULL
+@return	end of log record
+@retval	NULL	if the log record is incomplete */
 byte*
 trx_undo_parse_add_undo_rec(
-/*========================*/
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	page_t*	page);	/*!< in: page or NULL */
-/***********************************************************//**
-Parses a redo log record of erasing of an undo page end.
-@return end of log record or NULL */
-byte*
-trx_undo_parse_erase_page_end(
-/*==========================*/
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	page_t*	page,	/*!< in: page or NULL */
-	mtr_t*	mtr);	/*!< in: mtr or NULL */
+	const byte*	ptr,
+	const byte*	end_ptr,
+	page_t*		page);
+/** Erase the unused undo log page end.
+@param[in,out]	undo_page	undo log page
+@return whether the page contained something */
+bool
+trx_undo_erase_page_end(page_t* undo_page);
 
 /** Read from an undo log record a non-virtual column value.
 @param[in,out]	ptr		pointer to remaining part of the undo record
@@ -326,6 +307,8 @@ compilation info multiplied by 16 is ORed to this value in an undo log
 record */
 
 #define	TRX_UNDO_RENAME_TABLE	9	/*!< RENAME TABLE */
+#define	TRX_UNDO_INSERT_METADATA 10	/*!< insert a metadata
+					pseudo-record for instant ALTER */
 #define	TRX_UNDO_INSERT_REC	11	/* fresh insert into clustered index */
 #define	TRX_UNDO_UPD_EXIST_REC	12	/* update of a non-delete-marked
 					record */
@@ -341,6 +324,9 @@ record */
 					storage fields: used by purge to
 					free the external storage */
 
+/** The search tuple corresponding to TRX_UNDO_INSERT_METADATA */
+extern const dtuple_t trx_undo_metadata;
+
 #include "trx0rec.ic"
 
 #endif /* trx0rec_h */
diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
index d0771a94b05..a9794eb213d 100644
--- a/storage/innobase/include/trx0rec.ic
+++ b/storage/innobase/include/trx0rec.ic
@@ -36,35 +36,6 @@ trx_undo_rec_get_type(
 }
 
 /**********************************************************************//**
-Reads from an undo log record the record compiler info.
-@return compiler info */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
-	const trx_undo_rec_t*	undo_rec)	/*!< in: undo log record */
-{
-	return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
-}
-
-/**********************************************************************//**
-Returns TRUE if an undo log record contains an extern storage field.
-@return TRUE if extern */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
-	const trx_undo_rec_t*	undo_rec)	/*!< in: undo log record */
-{
-	if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/**********************************************************************//**
 Reads the undo log record number.
 @return undo no */
 UNIV_INLINE
@@ -93,8 +64,8 @@ trx_undo_rec_copy(
 	ulint		len;
 
 	len = mach_read_from_2(undo_rec)
-		- ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
-	ut_ad(len < UNIV_PAGE_SIZE);
+		- ut_align_offset(undo_rec, srv_page_size);
+	ut_ad(len < srv_page_size);
 	trx_undo_rec_t* rec = static_cast<trx_undo_rec_t*>(
 		mem_heap_dup(heap, undo_rec, len));
 	mach_write_to_2(rec, len);
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
index d6fe576da90..b74b56eae98 100644
--- a/storage/innobase/include/trx0roll.h
+++ b/storage/innobase/include/trx0roll.h
@@ -31,7 +31,7 @@ Created 3/26/1996 Heikki Tuuri
 #include "mtr0mtr.h"
 #include "trx0sys.h"
 
-extern bool		trx_rollback_or_clean_is_active;
+extern bool		trx_rollback_is_active;
 extern const trx_t*	trx_roll_crash_recv_trx;
 
 /*******************************************************************//**
@@ -61,20 +61,17 @@ trx_undo_rec_t*
 trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/** Report progress when rolling back a row of a recovered transaction.
-@return	whether the rollback should be aborted due to pending shutdown */
-bool
-trx_roll_must_shutdown();
+/** Report progress when rolling back a row of a recovered transaction. */
+void trx_roll_report_progress();
 /*******************************************************************//**
 Rollback or clean up any incomplete transactions which were
 encountered in crash recovery.  If the transaction already was
 committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back. */
+transaction was not yet committed, then we roll it back.
+@param all true=roll back all recovered active transactions;
+false=roll back any incomplete dictionary transaction */
 void
-trx_rollback_or_clean_recovered(
-/*============================*/
-	ibool	all);	/*!< in: FALSE=roll back dictionary transactions;
-			TRUE=roll back all non-PREPARED transactions */
+trx_rollback_recovered(bool all);
 /*******************************************************************//**
 Rollback or clean up any incomplete transactions which were
 encountered in crash recovery.  If the transaction already was
@@ -84,11 +81,7 @@ Note: this is done in a background thread.
 @return a dummy parameter */
 extern "C"
 os_thread_ret_t
-DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
-/*================================================*/
-	void*	arg MY_ATTRIBUTE((unused)));
-			/*!< in: a dummy parameter required by
-			os_thread_create */
+DECLARE_THREAD(trx_rollback_all_recovered)(void*);
 /*********************************************************************//**
 Creates a rollback command node struct.
 @return own: rollback node struct */
@@ -223,6 +216,4 @@ struct trx_named_savept_t{
 					transaction */
 };
 
-#include "trx0roll.ic"
-
 #endif
diff --git a/storage/innobase/include/trx0roll.ic b/storage/innobase/include/trx0roll.ic
deleted file mode 100644
index b09a1471150..00000000000
--- a/storage/innobase/include/trx0roll.ic
+++ /dev/null
@@ -1,62 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0roll.ic
-Transaction rollback
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Check if undo numbering is maintained while processing undo records
-for rollback.
-@return true if undo numbering is maintained. */
-UNIV_INLINE
-bool
-trx_roll_check_undo_rec_ordering(
-/*=============================*/
-	undo_no_t	curr_undo_rec_no,	/*!< in: record number of
-						undo record to process. */
-	ulint		curr_undo_space_id,	/*!< in: space-id of rollback
-						segment that contains the
-						undo record to process. */
-	const trx_t*	trx)			/*!< in: transaction */
-{
-	/* Each transaction now can have multiple rollback segments.
-	If a transaction involves temp and non-temp tables, both the rollback
-	segments will be active. In this case undo records will be distrubuted
-	across the two rollback segments.
-	CASE-1: UNDO action will apply all undo records from one rollback
-	segment before moving to next. This means undo record numbers can't be
-	sequential but ordering is still enforced as next undo record number
-	should be < processed undo record number.
-	CASE-2: For normal rollback (not initiated by crash) all rollback
-	segments will be active (including non-redo).
-	Based on transaction operation pattern undo record number of first
-	undo record from this new rollback segment can be > last undo number
-	from previous rollback segment and so we ignore this check if
-	rollback segments are switching. Once switched new rollback segment
-	should re-follow undo record number pattern (as mentioned in CASE-1). */
-
-	return(curr_undo_space_id != trx->undo_rseg_space
-	       || curr_undo_rec_no + 1 <= trx->undo_no);
-}
-#endif /* UNIV_DEBUG */
-
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index 48c5133644c..dbd80486b71 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,10 +27,8 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef trx0rseg_h
 #define trx0rseg_h
 
-#include "trx0types.h"
 #include "trx0sys.h"
 #include "fut0lst.h"
-#include <vector>
 
 /** Gets a rollback segment header.
 @param[in]	space		space where placed
@@ -39,10 +37,7 @@ Created 3/26/1996 Heikki Tuuri
 @return rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
-trx_rsegf_get(
-	ulint			space,
-	ulint			page_no,
-	mtr_t*			mtr);
+trx_rsegf_get(fil_space_t* space, ulint page_no, mtr_t* mtr);
 
 /** Gets a newly created rollback segment header.
 @param[in]	space		space where placed
@@ -57,16 +52,6 @@ trx_rsegf_get_new(
 	mtr_t*			mtr);
 
 /***************************************************************//**
-Gets the file page number of the nth undo log slot.
-@return page number of the undo log segment */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
-	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
-	ulint		n,	/*!< in: index of slot */
-	mtr_t*		mtr);	/*!< in: mtr */
-/***************************************************************//**
 Sets the file page number of the nth undo log slot. */
 UNIV_INLINE
 void
@@ -81,25 +66,20 @@ Looks for a free slot for an undo log segment.
 @return slot index or ULINT_UNDEFINED if not found */
 UNIV_INLINE
 ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
-	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
-	mtr_t*		mtr);	/*!< in: mtr */
+trx_rsegf_undo_find_free(const trx_rsegf_t* rsegf);
 
-/** Creates a rollback segment header.
-This function is called only when a new rollback segment is created in
-the database.
-@param[in]	space		space id
-@param[in]	max_size	max size in pages
-@param[in]	rseg_slot_no	rseg id == slot number in trx sys
+/** Create a rollback segment header.
+@param[in,out]	space		system, undo, or temporary tablespace
+@param[in]	rseg_id		rollback segment identifier
+@param[in,out]	sys_header	the TRX_SYS page (NULL for temporary rseg)
 @param[in,out]	mtr		mini-transaction
 @return page number of the created segment, FIL_NULL if fail */
 ulint
 trx_rseg_header_create(
-	ulint			space,
-	ulint			max_size,
-	ulint			rseg_slot_no,
-	mtr_t*			mtr);
+	fil_space_t*	space,
+	ulint		rseg_id,
+	buf_block_t*	sys_header,
+	mtr_t*		mtr);
 
 /** Initialize the rollback segments in memory at database startup. */
 void
@@ -133,7 +113,7 @@ trx_rseg_get_n_undo_tablespaces(
 	ulint*		space_ids);	/*!< out: array of space ids of
 					UNDO tablespaces */
 /* Number of undo log slots in a rollback segment file copy */
-#define TRX_RSEG_N_SLOTS	(UNIV_PAGE_SIZE / 16)
+#define TRX_RSEG_N_SLOTS	(srv_page_size / 16)
 
 /* Maximum number of transactions supported by a single rollback segment */
 #define TRX_RSEG_MAX_N_TRXS	(TRX_RSEG_N_SLOTS / 2)
@@ -150,32 +130,25 @@ struct trx_rseg_t {
 	RsegMutex			mutex;
 
 	/** space where the rollback segment header is placed */
-	ulint				space;
+	fil_space_t*			space;
 
 	/** page number of the rollback segment header */
 	ulint				page_no;
 
-	/** maximum allowed size in pages */
-	ulint				max_size;
-
 	/** current size in pages */
 	ulint				curr_size;
 
 	/*--------------------------------------------------------*/
-	/* Fields for update undo logs */
-	/** List of update undo logs */
-	UT_LIST_BASE_NODE_T(trx_undo_t)	update_undo_list;
-
-	/** List of update undo log segments cached for fast reuse */
-	UT_LIST_BASE_NODE_T(trx_undo_t)	update_undo_cached;
+	/* Fields for undo logs */
+	/** List of undo logs */
+	UT_LIST_BASE_NODE_T(trx_undo_t)	undo_list;
 
-	/*--------------------------------------------------------*/
-	/* Fields for insert undo logs */
-	/** List of insert undo logs */
-	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list;
+	/** List of undo log segments cached for fast reuse */
+	UT_LIST_BASE_NODE_T(trx_undo_t)	undo_cached;
 
-	/** List of insert undo log segments cached for fast reuse */
-	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
+	/** List of recovered old insert_undo logs of incomplete
+	transactions (to roll back or XA COMMIT & purge) */
+	UT_LIST_BASE_NODE_T(trx_undo_t) old_insert_list;
 
 	/*--------------------------------------------------------*/
 
@@ -186,11 +159,11 @@ struct trx_rseg_t {
 	/** Byte offset of the last not yet purged log header */
 	ulint				last_offset;
 
-	/** Transaction number of the last not yet purged log */
-	trx_id_t			last_trx_no;
+	/** trx_t::no * 2 + old_insert of the last not yet purged log */
+	trx_id_t			last_commit;
 
-	/** TRUE if the last not yet purged log needs purging */
-	ibool				last_del_marks;
+	/** Whether the log segment needs purge */
+	bool				needs_purge;
 
 	/** Reference counter to track rseg allocated transactions. */
 	ulint				trx_ref_count;
@@ -199,23 +172,31 @@ struct trx_rseg_t {
 	UNDO-tablespace marked for truncate. */
 	bool				skip_allocation;
 
+	/** @return the commit ID of the last committed transaction */
+	trx_id_t last_trx_no() const { return last_commit >> 1; }
+
+	void set_last_trx_no(trx_id_t trx_no, bool is_update)
+	{
+		last_commit = trx_no << 1 | trx_id_t(is_update);
+	}
+
 	/** @return whether the rollback segment is persistent */
 	bool is_persistent() const
 	{
-		ut_ad(space == SRV_TMP_SPACE_ID
-		      || space == TRX_SYS_SPACE
+		ut_ad(space == fil_system.temp_space
+		      || space == fil_system.sys_space
 		      || (srv_undo_space_id_start > 0
-			  && space >= srv_undo_space_id_start
-			  && space <= srv_undo_space_id_start
+			  && space->id >= srv_undo_space_id_start
+			  && space->id <= srv_undo_space_id_start
 			  + TRX_SYS_MAX_UNDO_SPACES));
-		ut_ad(space == SRV_TMP_SPACE_ID
-		      || space == TRX_SYS_SPACE
+		ut_ad(space == fil_system.temp_space
+		      || space == fil_system.sys_space
 		      || (srv_undo_space_id_start > 0
-			  && space >= srv_undo_space_id_start
-			  && space <= srv_undo_space_id_start
+			  && space->id >= srv_undo_space_id_start
+			  && space->id <= srv_undo_space_id_start
 			  + srv_undo_tablespaces_active)
 		      || !srv_was_started);
-		return(space != SRV_TMP_SPACE_ID);
+		return(space->id != SRV_TMP_SPACE_ID);
 	}
 };
 
@@ -232,19 +213,99 @@ struct trx_rseg_t {
 
 /* Transaction rollback segment header */
 /*-------------------------------------------------------------*/
-#define	TRX_RSEG_MAX_SIZE	0	/* Maximum allowed size for rollback
-					segment in pages */
-#define	TRX_RSEG_HISTORY_SIZE	4	/* Number of file pages occupied
-					by the logs in the history list */
-#define	TRX_RSEG_HISTORY	8	/* The update undo logs for committed
-					transactions */
+/** 0xfffffffe = pre-MariaDB 10.3.5 format; 0=MariaDB 10.3.5 or later */
+#define	TRX_RSEG_FORMAT		0
+/** Number of pages in the TRX_RSEG_HISTORY list */
+#define	TRX_RSEG_HISTORY_SIZE	4
+/** Committed transaction logs that have not been purged yet */
+#define	TRX_RSEG_HISTORY	8
 #define	TRX_RSEG_FSEG_HEADER	(8 + FLST_BASE_NODE_SIZE)
 					/* Header for the file segment where
 					this page is placed */
 #define TRX_RSEG_UNDO_SLOTS	(8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE)
 					/* Undo log segment slots */
+/** Maximum transaction ID (valid only if TRX_RSEG_FORMAT is 0) */
+#define TRX_RSEG_MAX_TRX_ID	(TRX_RSEG_UNDO_SLOTS + TRX_RSEG_N_SLOTS	\
+				 * TRX_RSEG_SLOT_SIZE)
+
+/** 8 bytes offset within the binlog file */
+#define TRX_RSEG_BINLOG_OFFSET		TRX_RSEG_MAX_TRX_ID + 8
+/** MySQL log file name, 512 bytes, including terminating NUL
+(valid only if TRX_RSEG_FORMAT is 0).
+If no binlog information is present, the first byte is NUL. */
+#define TRX_RSEG_BINLOG_NAME		TRX_RSEG_MAX_TRX_ID + 16
+/** Maximum length of binlog file name, including terminating NUL, in bytes */
+#define TRX_RSEG_BINLOG_NAME_LEN	512
+
+#ifdef WITH_WSREP
+/** The offset to WSREP XID headers */
+#define	TRX_RSEG_WSREP_XID_INFO		TRX_RSEG_MAX_TRX_ID + 16 + 512
+
+/** WSREP XID format (1 if present and valid, 0 if not present) */
+#define TRX_RSEG_WSREP_XID_FORMAT	TRX_RSEG_WSREP_XID_INFO
+/** WSREP XID GTRID length */
+#define TRX_RSEG_WSREP_XID_GTRID_LEN	TRX_RSEG_WSREP_XID_INFO + 4
+/** WSREP XID bqual length */
+#define TRX_RSEG_WSREP_XID_BQUAL_LEN	TRX_RSEG_WSREP_XID_INFO + 8
+/** WSREP XID data (XIDDATASIZE bytes) */
+#define TRX_RSEG_WSREP_XID_DATA		TRX_RSEG_WSREP_XID_INFO + 12
+#endif /* WITH_WSREP*/
+
 /*-------------------------------------------------------------*/
 
+/** Read the page number of an undo log slot.
+@param[in]	rsegf	rollback segment header
+@param[in]	n	slot number */
+inline
+uint32_t
+trx_rsegf_get_nth_undo(const trx_rsegf_t* rsegf, ulint n)
+{
+	ut_ad(n < TRX_RSEG_N_SLOTS);
+	return mach_read_from_4(rsegf + TRX_RSEG_UNDO_SLOTS
+				+ n * TRX_RSEG_SLOT_SIZE);
+}
+
+#ifdef WITH_WSREP
+/** Update the WSREP XID information in rollback segment header.
+@param[in,out]	rseg_header	rollback segment header
+@param[in]	xid		WSREP XID
+@param[in,out]	mtr		mini-transaction */
+void
+trx_rseg_update_wsrep_checkpoint(
+	trx_rsegf_t*	rseg_header,
+	const XID*	xid,
+	mtr_t*		mtr);
+
+/** Update WSREP checkpoint XID in first rollback segment header
+as part of wsrep_set_SE_checkpoint() when it is guaranteed that there
+are no wsrep transactions committing.
+If the UUID part of the WSREP XID does not match to the UUIDs of XIDs already
+stored into rollback segments, the WSREP XID in all the remaining rollback
+segments will be reset.
+@param[in]	xid		WSREP XID */
+void trx_rseg_update_wsrep_checkpoint(const XID* xid);
+
+/** Recover the latest WSREP checkpoint XID.
+@param[out]	xid	WSREP XID
+@return	whether the WSREP XID was found */
+bool trx_rseg_read_wsrep_checkpoint(XID& xid);
+#endif /* WITH_WSREP */
+
+/** Upgrade a rollback segment header page to MariaDB 10.3 format.
+@param[in,out]	rseg_header	rollback segment header page
+@param[in,out]	mtr		mini-transaction */
+void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr);
+
+/** Update the offset information about the end of the binlog entry
+which corresponds to the transaction just being committed.
+In a replication slave, this updates the master binlog position
+up to which replication has proceeded.
+@param[in,out]	rseg_header	rollback segment header
+@param[in]	trx		committing transaction
+@param[in,out]	mtr		mini-transaction */
+void
+trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr);
+
 #include "trx0rseg.ic"
 
 #endif
diff --git a/storage/innobase/include/trx0rseg.ic b/storage/innobase/include/trx0rseg.ic
index dac7dadfb68..9edfe897155 100644
--- a/storage/innobase/include/trx0rseg.ic
+++ b/storage/innobase/include/trx0rseg.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,28 +34,18 @@ Created 3/26/1996 Heikki Tuuri
 @return rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
-trx_rsegf_get(
-	ulint			space,
-	ulint			page_no,
-	mtr_t*			mtr)
+trx_rsegf_get(fil_space_t* space, ulint page_no, mtr_t* mtr)
 {
-	buf_block_t*	block;
-	trx_rsegf_t*	header;
-
-	ut_ad(space <= srv_undo_space_id_start + srv_undo_tablespaces_active
-	      || space == SRV_TMP_SPACE_ID
+	ut_ad(space == fil_system.sys_space || space == fil_system.temp_space
+	      || srv_is_undo_tablespace(space->id)
 	      || !srv_was_started);
-	ut_ad(space <= srv_undo_space_id_start + TRX_SYS_MAX_UNDO_SPACES
-	      || space == SRV_TMP_SPACE_ID);
 
-	block = buf_page_get(
-		page_id_t(space, page_no), univ_page_size, RW_X_LATCH, mtr);
+	buf_block_t* block = buf_page_get(page_id_t(space->id, page_no),
+					  univ_page_size, RW_X_LATCH, mtr);
 
 	buf_block_dbg_add_level(block, SYNC_RSEG_HEADER);
 
-	header = TRX_RSEG + buf_block_get_frame(block);
-
-	return(header);
+	return TRX_RSEG + block->frame;
 }
 
 /** Gets a newly created rollback segment header.
@@ -88,23 +78,6 @@ trx_rsegf_get_new(
 }
 
 /***************************************************************//**
-Gets the file page number of the nth undo log slot.
-@return page number of the undo log segment */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
-	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
-	ulint		n,	/*!< in: index of slot */
-	mtr_t*		mtr)	/*!< in: mtr */
-{
-	ut_a(n < TRX_RSEG_N_SLOTS);
-
-	return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS
-			      + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
-}
-
-/***************************************************************//**
 Sets the file page number of the nth undo log slot. */
 UNIV_INLINE
 void
@@ -126,10 +99,7 @@ Looks for a free slot for an undo log segment.
 @return slot index or ULINT_UNDEFINED if not found */
 UNIV_INLINE
 ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
-	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
-	mtr_t*		mtr)	/*!< in: mtr */
+trx_rsegf_undo_find_free(const trx_rsegf_t* rsegf)
 {
 	ulint		i;
 	ulint		page_no;
@@ -143,7 +113,7 @@ trx_rsegf_undo_find_free(
 #endif
 
 	for (i = 0; i < max_slots; i++) {
-		page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
+		page_no = trx_rsegf_get_nth_undo(rsegf, i);
 
 		if (page_no == FIL_NULL) {
 			return(i);
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index 519ef835fba..8c5be47fb85 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,6 @@ Created 3/26/1996 Heikki Tuuri
 #include "mem0mem.h"
 #include "mtr0mtr.h"
 #include "ut0byte.h"
-#include "mem0mem.h"
 #include "ut0lst.h"
 #include "read0types.h"
 #include "page0types.h"
@@ -45,182 +44,82 @@ Created 3/26/1996 Heikki Tuuri
 
 typedef UT_LIST_BASE_NODE_T(trx_t) trx_ut_list_t;
 
-// Forward declaration
-class MVCC;
-class ReadView;
-
-/** The transaction system */
-extern trx_sys_t*	trx_sys;
-
 /** Checks if a page address is the trx sys header page.
 @param[in]	page_id	page id
 @return true if trx sys header page */
-inline bool trx_sys_hdr_page(const page_id_t page_id);
-
-/** Initialize the transaction system main-memory data structures. */
-void trx_sys_init_at_db_start();
+inline bool trx_sys_hdr_page(const page_id_t& page_id)
+{
+	return(page_id.space() == TRX_SYS_SPACE
+	       && page_id.page_no() == TRX_SYS_PAGE_NO);
+}
 
 /*****************************************************************//**
-Creates the trx_sys instance and initializes purge_queue and mutex. */
-void
-trx_sys_create(void);
-/*================*/
-/*****************************************************************//**
 Creates and initializes the transaction system at the database creation. */
 void
 trx_sys_create_sys_pages(void);
 /*==========================*/
-/** @return an unallocated rollback segment slot in the TRX_SYS header
+/** Find an available rollback segment.
+@param[in]	sys_header
+@return an unallocated rollback segment slot in the TRX_SYS header
 @retval ULINT_UNDEFINED if not found */
 ulint
-trx_sysf_rseg_find_free(mtr_t* mtr);
-/**********************************************************************//**
-Gets a pointer to the transaction system file copy and x-locks its page.
-@return pointer to system file copy, page x-locked */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
-	mtr_t*	mtr);	/*!< in: mtr */
-/*****************************************************************//**
-Gets the space of the nth rollback segment slot in the trx system
-file copy.
-@return space id */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
-	ulint		i,		/*!< in: slot index == rseg id */
-	mtr_t*		mtr);		/*!< in: mtr */
-/*****************************************************************//**
-Gets the page number of the nth rollback segment slot in the trx system
-file copy.
-@return page number, FIL_NULL if slot unused */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
-	ulint		i,		/*!< in: slot index == rseg id */
-	mtr_t*		mtr);		/*!< in: mtr */
-/*****************************************************************//**
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
-	ulint		i,		/*!< in: slot index == rseg id */
-	ulint		space,		/*!< in: space id */
-	mtr_t*		mtr);		/*!< in: mtr */
-/*****************************************************************//**
-Sets the page number of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
-	ulint		i,		/*!< in: slot index == rseg id */
-	ulint		page_no,	/*!< in: page number, FIL_NULL if
-					the slot is reset to unused */
-	mtr_t*		mtr);		/*!< in: mtr */
-/*****************************************************************//**
-Allocates a new transaction id.
-@return new, allocated trx id */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_id();
-/*===================*/
-/*****************************************************************//**
-Determines the maximum transaction id.
-@return maximum currently allocated trx id; will be stale after the
-next call to trx_sys_get_new_trx_id() */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_max_trx_id(void);
-/*========================*/
+trx_sys_rseg_find_free(const buf_block_t* sys_header);
+/** Request the TRX_SYS page.
+@param[in]	rw	whether to lock the page for writing
+@return the TRX_SYS page
+@retval	NULL	if the page cannot be read */
+inline
+buf_block_t*
+trx_sysf_get(mtr_t* mtr, bool rw = true)
+{
+	buf_block_t* block = buf_page_get(
+		page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
+		univ_page_size, rw ? RW_X_LATCH : RW_S_LATCH, mtr);
+	if (block) {
+		buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
+	}
+	return block;
+}
 
 #ifdef UNIV_DEBUG
 /* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
 extern uint			trx_rseg_n_slots_debug;
 #endif
 
-/*****************************************************************//**
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
+/** Write DB_TRX_ID.
+@param[out]	db_trx_id	the DB_TRX_ID field to be written to
+@param[in]	id		transaction ID */
 UNIV_INLINE
 void
-trx_write_trx_id(
-/*=============*/
-	byte*		ptr,	/*!< in: pointer to memory where written */
-	trx_id_t	id);	/*!< in: id */
-/*****************************************************************//**
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_...
+trx_write_trx_id(byte* db_trx_id, trx_id_t id)
+{
+	compile_time_assert(DATA_TRX_ID_LEN == 6);
+	ut_ad(id);
+	mach_write_to_6(db_trx_id, id);
+}
+
+/** Read a transaction identifier.
 @return id */
-UNIV_INLINE
-trx_id_t
-trx_read_trx_id(
-/*============*/
-	const byte*	ptr);	/*!< in: pointer to memory from where to read */
-/****************************************************************//**
-Looks for the trx instance with the given id in the rw trx_list.
-@return	the trx handle or NULL if not found */
-UNIV_INLINE
-trx_t*
-trx_get_rw_trx_by_id(
-/*=================*/
-	trx_id_t	trx_id);/*!< in: trx id to search for */
-/****************************************************************//**
-Returns the minimum trx id in rw trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->state to
-find out if the minimum trx id transaction itself is active, or already
-committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
-UNIV_INLINE
+inline
 trx_id_t
-trx_rw_min_trx_id(void);
-/*===================*/
-/****************************************************************//**
-Checks if a rw transaction with the given id is active.
-@return transaction instance if active, or NULL */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active_low(
-/*=================*/
-	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
-	ibool*		corrupt);	/*!< in: NULL or pointer to a flag
-					that will be set if corrupt */
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. If the caller is
-not holding trx_sys->mutex, the transaction may already have been
-committed.
-@return transaction instance if active, or NULL; */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active(
-/*=============*/
-	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
-	ibool*		corrupt,	/*!< in: NULL or pointer to a flag
-					that will be set if corrupt */
-	bool		do_ref_count);	/*!< in: if true then increment the
-					trx_t::n_ref_count */
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-/***********************************************************//**
-Assert that a transaction has been recovered.
-@return TRUE */
-UNIV_INLINE
-ibool
-trx_assert_recovered(
-/*=================*/
-	trx_id_t	trx_id)		/*!< in: transaction identifier */
-	MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+trx_read_trx_id(const byte* ptr)
+{
+	compile_time_assert(DATA_TRX_ID_LEN == 6);
+	return(mach_read_from_6(ptr));
+}
+
+#ifdef UNIV_DEBUG
+/** Check that the DB_TRX_ID in a record is valid.
+@param[in]	db_trx_id	the DB_TRX_ID column to validate
+@param[in]	trx_id		the id of the ALTER TABLE transaction */
+inline bool trx_id_check(const void* db_trx_id, trx_id_t trx_id)
+{
+	trx_id_t id = trx_read_trx_id(static_cast<const byte*>(db_trx_id));
+	ut_ad(id == 0 || id > trx_id);
+	return true;
+}
+#endif
+
 /*****************************************************************//**
 Updates the offset information about the end of the MySQL binlog entry
 which corresponds to the transaction just being committed. In a MySQL
@@ -231,138 +130,17 @@ trx_sys_update_mysql_binlog_offset(
 /*===============================*/
 	const char*	file_name,/*!< in: MySQL log file name */
 	int64_t		offset,	/*!< in: position in that log file */
-        trx_sysf_t*     sys_header, /*!< in: trx sys header */
-	mtr_t*		mtr);	/*!< in: mtr */
+	buf_block_t*	sys_header, /*!< in,out: trx sys header */
+	mtr_t*		mtr);	/*!< in,out: mini-transaction */
 /** Display the MySQL binlog offset info if it is present in the trx
 system header. */
 void
 trx_sys_print_mysql_binlog_offset();
-#ifdef WITH_WSREP
 
-/** Update WSREP XID info in sys_header of TRX_SYS_PAGE_NO = 5.
-@param[in]	xid		Transaction XID
-@param[in,out]	sys_header	sys_header
-@param[in]	mtr		minitransaction */
-UNIV_INTERN
-void
-trx_sys_update_wsrep_checkpoint(
-	const XID*	xid,
-	trx_sysf_t*	sys_header,
-	mtr_t*		mtr);
-
-/** Read WSREP checkpoint XID from sys header.
-@param[out]	xid	WSREP XID
-@return	whether the checkpoint was present */
-UNIV_INTERN
-bool
-trx_sys_read_wsrep_checkpoint(XID* xid);
-#endif /* WITH_WSREP */
-
-/** Initializes the tablespace tag system. */
-void
-trx_sys_file_format_init(void);
-/*==========================*/
-
-/*****************************************************************//**
-Closes the tablespace tag system. */
-void
-trx_sys_file_format_close(void);
-/*===========================*/
-
-/********************************************************************//**
-Tags the system table space with minimum format id if it has not been
-tagged yet.
-WARNING: This function is only called during the startup and AFTER the
-redo log application during recovery has finished. */
-void
-trx_sys_file_format_tag_init(void);
-/*==============================*/
-
-/*****************************************************************//**
-Shutdown/Close the transaction system. */
-void
-trx_sys_close(void);
-/*===============*/
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
-	const ulint	id);		/*!< in: id of the file format */
-/*****************************************************************//**
-Set the file format id unconditionally except if it's already the
-same value.
-@return TRUE if value updated */
-ibool
-trx_sys_file_format_max_set(
-/*========================*/
-	ulint		format_id,	/*!< in: file format id */
-	const char**	name);		/*!< out: max file format name or
-					NULL if not needed. */
 /** Create the rollback segments.
 @return	whether the creation succeeded */
 bool
 trx_sys_create_rsegs();
-/*****************************************************************//**
-Get the number of transaction in the system, independent of their state.
-@return count of transactions in trx_sys_t::trx_list */
-UNIV_INLINE
-ulint
-trx_sys_get_n_rw_trx(void);
-/*======================*/
-
-/*********************************************************************
-Check if there are any active (non-prepared) transactions.
-@return total number of active transactions or 0 if none */
-ulint
-trx_sys_any_active_transactions(void);
-/*=================================*/
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the max format name */
-const char*
-trx_sys_file_format_max_get(void);
-/*=============================*/
-/*****************************************************************//**
-Check for the max file format tag stored on disk.
-@return DB_SUCCESS or error code */
-dberr_t
-trx_sys_file_format_max_check(
-/*==========================*/
-	ulint		max_format_id);	/*!< in: the max format id to check */
-/********************************************************************//**
-Update the file format tag in the system tablespace only if the given
-format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
-ibool
-trx_sys_file_format_max_upgrade(
-/*============================*/
-	const char**	name,		/*!< out: max file format name */
-	ulint		format_id);	/*!< in: file format identifier */
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
-	const ulint	id);	/*!< in: id of the file format */
-
-/**
-Add the transaction to the RW transaction set
-@param trx		transaction instance to add */
-UNIV_INLINE
-void
-trx_sys_rw_trx_add(trx_t* trx);
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Validate the trx_sys_t::rw_trx_list.
-@return true if the list is valid */
-bool
-trx_sys_validate_trx_list();
-/*========================*/
-#endif /* UNIV_DEBUG */
 
 /** The automatically created system rollback segment has this id */
 #define TRX_SYS_SYSTEM_RSEG_ID	0
@@ -372,18 +150,13 @@ trx_sys_validate_trx_list();
 
 /** Transaction system header */
 /*------------------------------------------------------------- @{ */
-#define	TRX_SYS_TRX_ID_STORE	0	/*!< the maximum trx id or trx
-					number modulo
-					TRX_SYS_TRX_ID_UPDATE_MARGIN
-					written to a file page by any
-					transaction; the assignment of
-					transaction ids continues from
-					this number rounded up by
-					TRX_SYS_TRX_ID_UPDATE_MARGIN
-					plus
-					TRX_SYS_TRX_ID_UPDATE_MARGIN
-					when the database is
-					started */
+/** In old versions of InnoDB, this persisted the value of
+trx_sys.get_max_trx_id(). Starting with MariaDB 10.3.5,
+the field TRX_RSEG_MAX_TRX_ID in rollback segment header pages
+and the fields TRX_UNDO_TRX_ID, TRX_UNDO_TRX_NO in undo log pages
+are used instead. The field only exists for the purpose of upgrading
+from older MySQL or MariaDB versions. */
+#define	TRX_SYS_TRX_ID_STORE	0
 #define TRX_SYS_FSEG_HEADER	8	/*!< segment header for the
 					tablespace segment the trx
 					system is created into */
@@ -393,16 +166,52 @@ trx_sys_validate_trx_list();
 					slots */
 /*------------------------------------------------------------- @} */
 
-/* Max number of rollback segments: the number of segment specification slots
-in the transaction system array; rollback segment id must fit in one (signed)
-byte, therefore 128; each slot is currently 8 bytes in size. If you want
-to raise the level to 256 then you will need to fix some assertions that
-impose the 7 bit restriction. e.g., mach_write_to_3() */
+/** The number of rollback segments; rollback segment id must fit in
+the 7 bits reserved for it in DB_ROLL_PTR. */
 #define	TRX_SYS_N_RSEGS			128
 /** Maximum number of undo tablespaces (not counting the system tablespace) */
 #define TRX_SYS_MAX_UNDO_SPACES		(TRX_SYS_N_RSEGS - 1)
 
-/** Maximum length of MySQL binlog file name, in bytes. */
+/* Rollback segment specification slot offsets */
+
+/** the tablespace ID of an undo log header; starting with
+MySQL/InnoDB 5.1.7, this is FIL_NULL if the slot is unused */
+#define	TRX_SYS_RSEG_SPACE	0
+/** the page number of an undo log header, or FIL_NULL if unused */
+#define	TRX_SYS_RSEG_PAGE_NO	4
+/** Size of a rollback segment specification slot */
+#define TRX_SYS_RSEG_SLOT_SIZE	8
+
+/** Read the tablespace ID of a rollback segment slot.
+@param[in]	sys_header	TRX_SYS page
+@param[in]	rseg_id		rollback segment identifier
+@return	undo tablespace id */
+inline
+uint32_t
+trx_sysf_rseg_get_space(const buf_block_t* sys_header, ulint rseg_id)
+{
+	ut_ad(rseg_id < TRX_SYS_N_RSEGS);
+	return mach_read_from_4(TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_SPACE
+				+ rseg_id * TRX_SYS_RSEG_SLOT_SIZE
+				+ sys_header->frame);
+}
+
+/** Read the page number of a rollback segment slot.
+@param[in]	sys_header	TRX_SYS page
+@param[in]	rseg_id		rollback segment identifier
+@return	undo page number */
+inline
+uint32_t
+trx_sysf_rseg_get_page_no(const buf_block_t* sys_header, ulint rseg_id)
+{
+	ut_ad(rseg_id < TRX_SYS_N_RSEGS);
+	return mach_read_from_4(TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_PAGE_NO
+				+ rseg_id * TRX_SYS_RSEG_SLOT_SIZE
+				+ sys_header->frame);
+}
+
+/** Maximum length of MySQL binlog file name, in bytes.
+(Used before MariaDB 10.3.5.) */
 #define TRX_SYS_MYSQL_LOG_NAME_LEN	512
 /** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
 #define TRX_SYS_MYSQL_LOG_MAGIC_N	873422344
@@ -411,7 +220,7 @@ impose the 7 bit restriction. e.g., mach_write_to_3() */
 # error "UNIV_PAGE_SIZE_MIN < 4096"
 #endif
 /** The offset of the MySQL binlog offset info in the trx system header */
-#define TRX_SYS_MYSQL_LOG_INFO		(UNIV_PAGE_SIZE - 1000)
+#define TRX_SYS_MYSQL_LOG_INFO		(srv_page_size - 1000)
 #define	TRX_SYS_MYSQL_LOG_MAGIC_N_FLD	0	/*!< magic number which is
 						TRX_SYS_MYSQL_LOG_MAGIC_N
 						if we have valid data in the
@@ -420,7 +229,7 @@ impose the 7 bit restriction. e.g., mach_write_to_3() */
 						within that file */
 #define TRX_SYS_MYSQL_LOG_NAME		12	/*!< MySQL log file name */
 
-/** Memory map TRX_SYS_PAGE_NO = 5 when UNIV_PAGE_SIZE = 4096
+/** Memory map TRX_SYS_PAGE_NO = 5 when srv_page_size = 4096
 
 0...37 FIL_HEADER
 38...45 TRX_SYS_TRX_ID_STORE
@@ -436,7 +245,7 @@ impose the 7 bit restriction. e.g., mach_write_to_3() */
 ...
   ...1063  TRX_SYS_RSEG_PAGE_NO     for slot 126
 
-(UNIV_PAGE_SIZE-3500 WSREP ::: FAIL would overwrite undo tablespace
+(srv_page_size-3500 WSREP ::: FAIL would overwrite undo tablespace
 space_id, page_no pairs :::)
 596 TRX_SYS_WSREP_XID_INFO             TRX_SYS_WSREP_XID_MAGIC_N_FLD
 600 TRX_SYS_WSREP_XID_FORMAT
@@ -446,7 +255,7 @@ space_id, page_no pairs :::)
 739 TRX_SYS_WSREP_XID_DATA_END
 
 FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
-(UNIV_PAGE_SIZE-2500)
+(srv_page_size-2500)
 1596 TRX_SYS_WSREP_XID_INFO             TRX_SYS_WSREP_XID_MAGIC_N_FLD
 1600 TRX_SYS_WSREP_XID_FORMAT
 1604 TRX_SYS_WSREP_XID_GTRID_LEN
@@ -454,19 +263,19 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
 1612 TRX_SYS_WSREP_XID_DATA   (len = 128)
 1739 TRX_SYS_WSREP_XID_DATA_END
 
-(UNIV_PAGE_SIZE - 2000 MYSQL MASTER LOG)
+(srv_page_size - 2000 MYSQL MASTER LOG)
 2096   TRX_SYS_MYSQL_MASTER_LOG_INFO   TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
 2100   TRX_SYS_MYSQL_LOG_OFFSET_HIGH
 2104   TRX_SYS_MYSQL_LOG_OFFSET_LOW
 2108   TRX_SYS_MYSQL_LOG_NAME
 
-(UNIV_PAGE_SIZE - 1000 MYSQL LOG)
+(srv_page_size - 1000 MYSQL LOG)
 3096   TRX_SYS_MYSQL_LOG_INFO          TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
 3100   TRX_SYS_MYSQL_LOG_OFFSET_HIGH
 3104   TRX_SYS_MYSQL_LOG_OFFSET_LOW
 3108   TRX_SYS_MYSQL_LOG_NAME
 
-(UNIV_PAGE_SIZE - 200 DOUBLEWRITE)
+(srv_page_size - 200 DOUBLEWRITE)
 3896   TRX_SYS_DOUBLEWRITE		TRX_SYS_DOUBLEWRITE_FSEG
 3906         TRX_SYS_DOUBLEWRITE_MAGIC
 3910         TRX_SYS_DOUBLEWRITE_BLOCK1
@@ -474,12 +283,12 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
 3918         TRX_SYS_DOUBLEWRITE_REPEAT
 3930         TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N
 
-(UNIV_PAGE_SIZE - 8, TAILER)
+(srv_page_size - 8, TAILER)
 4088..4096	FIL_TAILER
 
 */
 #ifdef WITH_WSREP
-/** The offset to WSREP XID headers */
+/** The offset to WSREP XID headers (used before MariaDB 10.3.5) */
 #define TRX_SYS_WSREP_XID_INFO std::max(srv_page_size - 3500, 1596UL)
 #define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0
 #define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265
@@ -495,7 +304,7 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
 /** Doublewrite buffer */
 /* @{ */
 /** The offset of the doublewrite buffer header on the trx system header page */
-#define TRX_SYS_DOUBLEWRITE		(UNIV_PAGE_SIZE - 200)
+#define TRX_SYS_DOUBLEWRITE		(srv_page_size - 200)
 /*-------------------------------------------------------------*/
 #define TRX_SYS_DOUBLEWRITE_FSEG	0	/*!< fseg header of the fseg
 						containing the doublewrite
@@ -543,83 +352,477 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
 #define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE	FSP_EXTENT_SIZE
 /* @} */
 
-/** File format tag */
-/* @{ */
-/** The offset of the file format tag on the trx system header page
-(TRX_SYS_PAGE_NO of TRX_SYS_SPACE) */
-#define TRX_SYS_FILE_FORMAT_TAG		(UNIV_PAGE_SIZE - 16)
-
-/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
-identifier is added to this constant. */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW	3645922177UL
-/** Contents of TRX_SYS_FILE_FORMAT_TAG+4 when valid */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH	2745987765UL
-/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
-identifier is added to this 64-bit constant. */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N					\
-	((ib_uint64_t) TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH << 32	\
-	 | TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW)
-/* @} */
+trx_t* current_trx();
+
+struct rw_trx_hash_element_t
+{
+  rw_trx_hash_element_t(): trx(0)
+  {
+    mutex_create(LATCH_ID_RW_TRX_HASH_ELEMENT, &mutex);
+  }
+
+
+  ~rw_trx_hash_element_t()
+  {
+    mutex_free(&mutex);
+  }
+
+
+  trx_id_t id; /* lf_hash_init() relies on this to be first in the struct */
+  trx_id_t no;
+  trx_t *trx;
+  ib_mutex_t mutex;
+};
+
+
+/**
+  Wrapper around LF_HASH to store set of in memory read-write transactions.
+*/
+
+class rw_trx_hash_t
+{
+  LF_HASH hash;
+
+
+  /**
+    Constructor callback for lock-free allocator.
+
+    Object is just allocated and is not yet accessible via rw_trx_hash by
+    concurrent threads. Object can be reused multiple times before it is freed.
+    Every time object is being reused initializer() callback is called.
+  */
+
+  static void rw_trx_hash_constructor(uchar *arg)
+  {
+    new(arg + LF_HASH_OVERHEAD) rw_trx_hash_element_t();
+  }
+
+
+  /**
+    Destructor callback for lock-free allocator.
+
+    Object is about to be freed and is not accessible via rw_trx_hash by
+    concurrent threads.
+  */
+
+  static void rw_trx_hash_destructor(uchar *arg)
+  {
+    reinterpret_cast<rw_trx_hash_element_t*>
+      (arg + LF_HASH_OVERHEAD)->~rw_trx_hash_element_t();
+  }
+
+
+  /**
+    Destructor callback for lock-free allocator.
+
+    This destructor is used at shutdown. It frees remaining transaction
+    objects.
+
+    XA PREPARED transactions may remain if they haven't been committed or
+    rolled back. ACTIVE transactions may remain if startup was interrupted or
+    server is running in read-only mode or for certain srv_force_recovery
+    levels.
+  */
+
+  static void rw_trx_hash_shutdown_destructor(uchar *arg)
+  {
+    rw_trx_hash_element_t *element=
+      reinterpret_cast<rw_trx_hash_element_t*>(arg + LF_HASH_OVERHEAD);
+    if (trx_t *trx= element->trx)
+    {
+      ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED) ||
+            (trx_state_eq(trx, TRX_STATE_ACTIVE) &&
+             (!srv_was_started ||
+              srv_read_only_mode ||
+              srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO)));
+      trx_free_at_shutdown(trx);
+    }
+    element->~rw_trx_hash_element_t();
+  }
+
+
+  /**
+    Initializer callback for lock-free hash.
+
+    Object is not yet accessible via rw_trx_hash by concurrent threads, but is
+    about to become such. Object id can be changed only by this callback and
+    remains the same until all pins to this object are released.
+
+    Object trx can be changed to 0 by erase() under object mutex protection,
+    which indicates it is about to be removed from lock-free hash and become
+    not accessible by concurrent threads.
+  */
+
+  static void rw_trx_hash_initializer(LF_HASH *,
+                                      rw_trx_hash_element_t *element,
+                                      trx_t *trx)
+  {
+    ut_ad(element->trx == 0);
+    element->trx= trx;
+    element->id= trx->id;
+    element->no= TRX_ID_MAX;
+    trx->rw_trx_hash_element= element;
+  }
+
+
+  /**
+    Gets LF_HASH pins.
+
+    Pins are used to protect object from being destroyed or reused. They are
+    normally stored in trx object for quick access. If caller doesn't have trx
+    available, we try to get it using currnet_trx(). If caller doesn't have trx
+    at all, temporary pins are allocated.
+  */
+
+  LF_PINS *get_pins(trx_t *trx)
+  {
+    if (!trx->rw_trx_hash_pins)
+    {
+      trx->rw_trx_hash_pins= lf_hash_get_pins(&hash);
+      ut_a(trx->rw_trx_hash_pins);
+    }
+    return trx->rw_trx_hash_pins;
+  }
+
+
+  struct eliminate_duplicates_arg
+  {
+    trx_ids_t ids;
+    my_hash_walk_action action;
+    void *argument;
+    eliminate_duplicates_arg(size_t size, my_hash_walk_action act, void* arg):
+      action(act), argument(arg) { ids.reserve(size); }
+  };
+
+
+  static my_bool eliminate_duplicates(rw_trx_hash_element_t *element,
+                                      eliminate_duplicates_arg *arg)
+  {
+    for (trx_ids_t::iterator it= arg->ids.begin(); it != arg->ids.end(); it++)
+    {
+      if (*it == element->id)
+        return 0;
+    }
+    arg->ids.push_back(element->id);
+    return arg->action(element, arg->argument);
+  }
+
 
-/** The transaction system central memory data structure. */
-struct trx_sys_t {
-
-	TrxSysMutex	mutex;		/*!< mutex protecting most fields in
-					this structure except when noted
-					otherwise */
-
-	MVCC*		mvcc;		/*!< Multi version concurrency control
-					manager */
-	volatile trx_id_t
-			max_trx_id;	/*!< The smallest number not yet
-					assigned as a transaction id or
-					transaction number. This is declared
-					volatile because it can be accessed
-					without holding any mutex during
-					AC-NL-RO view creation. */
-	trx_ut_list_t	serialisation_list;
-					/*!< Ordered on trx_t::no of all the
-					currenrtly active RW transactions */
 #ifdef UNIV_DEBUG
-	trx_id_t	rw_max_trx_id;	/*!< Max trx id of read-write
-					transactions which exist or existed */
-#endif /* UNIV_DEBUG */
-
-	/** Avoid false sharing */
-	const char	pad1[CACHE_LINE_SIZE];
-	trx_ut_list_t	rw_trx_list;	/*!< List of active and committed in
-					memory read-write transactions, sorted
-					on trx id, biggest first. Recovered
-					transactions are always on this list. */
-
-	/** Avoid false sharing */
-	const char	pad2[CACHE_LINE_SIZE];
-	trx_ut_list_t	mysql_trx_list;	/*!< List of transactions created
-					for MySQL. All user transactions are
-					on mysql_trx_list. The rw_trx_list
-					can contain system transactions and
-					recovered transactions that will not
-					be in the mysql_trx_list.
-					mysql_trx_list may additionally contain
-					transactions that have not yet been
-					started in InnoDB. */
-
-	trx_ids_t	rw_trx_ids;	/*!< Array of Read write transaction IDs
-					for MVCC snapshot. A ReadView would take
-					a snapshot of these transactions whose
-					changes are not visible to it. We should
-					remove transactions from the list before
-					committing in memory and releasing locks
-					to ensure right order of removal and
-					consistent snapshot. */
-
-	/** Avoid false sharing */
-	const char	pad3[CACHE_LINE_SIZE];
+  static void validate_element(trx_t *trx)
+  {
+    ut_ad(!trx->read_only || !trx->rsegs.m_redo.rseg);
+    ut_ad(!trx_is_autocommit_non_locking(trx));
+    mutex_enter(&trx->mutex);
+    ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) ||
+          trx_state_eq(trx, TRX_STATE_PREPARED));
+    mutex_exit(&trx->mutex);
+  }
+
+
+  struct debug_iterator_arg
+  {
+    my_hash_walk_action action;
+    void *argument;
+  };
+
+
+  static my_bool debug_iterator(rw_trx_hash_element_t *element,
+                                debug_iterator_arg *arg)
+  {
+    mutex_enter(&element->mutex);
+    if (element->trx)
+      validate_element(element->trx);
+    mutex_exit(&element->mutex);
+    return arg->action(element, arg->argument);
+  }
+#endif
+
+
+public:
+  void init()
+  {
+    lf_hash_init(&hash, sizeof(rw_trx_hash_element_t), LF_HASH_UNIQUE, 0,
+                 sizeof(trx_id_t), 0, &my_charset_bin);
+    hash.alloc.constructor= rw_trx_hash_constructor;
+    hash.alloc.destructor= rw_trx_hash_destructor;
+    hash.initializer=
+      reinterpret_cast<lf_hash_initializer>(rw_trx_hash_initializer);
+  }
+
+
+  void destroy()
+  {
+    hash.alloc.destructor= rw_trx_hash_shutdown_destructor;
+    lf_hash_destroy(&hash);
+  }
+
+
+  /**
+    Releases LF_HASH pins.
+
+    Must be called by thread that owns trx_t object when the latter is being
+    "detached" from thread (e.g. released to the pool by trx_free()). Can be
+    called earlier if thread is expected not to use rw_trx_hash.
+
+    Since pins are not allowed to be transferred to another thread,
+    initialisation thread calls this for recovered transactions.
+  */
+
+  void put_pins(trx_t *trx)
+  {
+    if (trx->rw_trx_hash_pins)
+    {
+      lf_hash_put_pins(trx->rw_trx_hash_pins);
+      trx->rw_trx_hash_pins= 0;
+    }
+  }
+
+
+  /**
+    Finds trx object in lock-free hash with given id.
+
+    Only ACTIVE or PREPARED trx objects may participate in hash. Nevertheless
+    the transaction may get committed before this method returns.
+
+    With do_ref_count == false the caller may dereference returned trx pointer
+    only if lock_sys.mutex was acquired before calling find().
+
+    With do_ref_count == true caller may dereference trx even if it is not
+    holding lock_sys.mutex. Caller is responsible for calling
+    trx->release_reference() when it is done playing with trx.
+
+    Ideally this method should get caller rw_trx_hash_pins along with trx
+    object as a parameter, similar to insert() and erase(). However most
+    callers lose trx early in their call chains and it is not that easy to pass
+    them through.
+
+    So we take more expensive approach: get trx through current_thd()->ha_data.
+    Some threads don't have trx attached to THD, and at least server
+    initialisation thread, fts_optimize_thread, srv_master_thread,
+    dict_stats_thread, srv_monitor_thread, btr_defragment_thread don't even
+    have THD at all. For such cases we allocate pins only for duration of
+    search and free them immediately.
+
+    This has negative performance impact and should be fixed eventually (by
+    passing caller_trx as a parameter). Still stream of DML is more or less Ok.
+
+    @return
+      @retval 0 not found
+      @retval pointer to trx
+  */
+
+  trx_t *find(trx_t *caller_trx, trx_id_t trx_id, bool do_ref_count)
+  {
+    /*
+      In MariaDB 10.3, purge will reset DB_TRX_ID to 0
+      when the history is lost. Read/write transactions will
+      always have a nonzero trx_t::id; there the value 0 is
+      reserved for transactions that did not write or lock
+      anything yet.
+
+      The caller should already have handled trx_id==0 specially.
+    */
+    ut_ad(trx_id);
+    if (caller_trx && caller_trx->id == trx_id)
+    {
+      if (do_ref_count)
+        caller_trx->reference();
+      return caller_trx;
+    }
+
+    trx_t *trx= 0;
+    LF_PINS *pins= caller_trx ? get_pins(caller_trx) : lf_hash_get_pins(&hash);
+    ut_a(pins);
+
+    rw_trx_hash_element_t *element= reinterpret_cast<rw_trx_hash_element_t*>
+      (lf_hash_search(&hash, pins, reinterpret_cast<const void*>(&trx_id),
+                      sizeof(trx_id_t)));
+    if (element)
+    {
+      mutex_enter(&element->mutex);
+      lf_hash_search_unpin(pins);
+      trx= element->trx;
+      if (!trx);
+      else if (UNIV_UNLIKELY(trx_id != trx->id))
+        trx= NULL;
+      else {
+        if (do_ref_count)
+          trx->reference();
+        ut_d(validate_element(trx));
+      }
+      mutex_exit(&element->mutex);
+    }
+    if (!caller_trx)
+      lf_hash_put_pins(pins);
+    return trx;
+  }
+
+
+  /**
+    Inserts trx to lock-free hash.
+
+    Object becomes accessible via rw_trx_hash.
+  */
+
+  void insert(trx_t *trx)
+  {
+    ut_d(validate_element(trx));
+    int res= lf_hash_insert(&hash, get_pins(trx),
+                            reinterpret_cast<void*>(trx));
+    ut_a(res == 0);
+  }
+
+
+  /**
+    Removes trx from lock-free hash.
+
+    Object becomes not accessible via rw_trx_hash. But it still can be pinned
+    by concurrent find(), which is supposed to release it immediately after
+    it sees object trx is 0.
+  */
+
+  void erase(trx_t *trx)
+  {
+    ut_d(validate_element(trx));
+    mutex_enter(&trx->rw_trx_hash_element->mutex);
+    trx->rw_trx_hash_element->trx= 0;
+    mutex_exit(&trx->rw_trx_hash_element->mutex);
+    int res= lf_hash_delete(&hash, get_pins(trx),
+                            reinterpret_cast<const void*>(&trx->id),
+                            sizeof(trx_id_t));
+    ut_a(res == 0);
+  }
+
+
+  /**
+    Returns the number of elements in the hash.
+
+    The number is exact only if hash is protected against concurrent
+    modifications (e.g. single threaded startup or hash is protected
+    by some mutex). Otherwise the number may be used as a hint only,
+    because it may change even before this method returns.
+  */
+
+  uint32_t size()
+  {
+    return uint32_t(my_atomic_load32_explicit(&hash.count,
+					      MY_MEMORY_ORDER_RELAXED));
+  }
+
+
+  /**
+    Iterates the hash.
+
+    @param caller_trx  used to get/set pins
+    @param action      called for every element in hash
+    @param argument    opque argument passed to action
+
+    May return the same element multiple times if hash is under contention.
+    If caller doesn't like to see the same transaction multiple times, it has
+    to call iterate_no_dups() instead.
+
+    May return element with committed transaction. If caller doesn't like to
+    see committed transactions, it has to skip those under element mutex:
+
+      mutex_enter(&element->mutex);
+      if (trx_t trx= element->trx)
+      {
+        // trx is protected against commit in this branch
+      }
+      mutex_exit(&element->mutex);
+
+    May miss concurrently inserted transactions.
+
+    @return
+      @retval 0 iteration completed successfully
+      @retval 1 iteration was interrupted (action returned 1)
+  */
+
+  int iterate(trx_t *caller_trx, my_hash_walk_action action, void *argument)
+  {
+    LF_PINS *pins= caller_trx ? get_pins(caller_trx) : lf_hash_get_pins(&hash);
+    ut_a(pins);
+#ifdef UNIV_DEBUG
+    debug_iterator_arg debug_arg= { action, argument };
+    action= reinterpret_cast<my_hash_walk_action>(debug_iterator);
+    argument= &debug_arg;
+#endif
+    int res= lf_hash_iterate(&hash, pins, action, argument);
+    if (!caller_trx)
+      lf_hash_put_pins(pins);
+    return res;
+  }
+
+
+  int iterate(my_hash_walk_action action, void *argument)
+  {
+    return iterate(current_trx(), action, argument);
+  }
+
+
+  /**
+    Iterates the hash and eliminates duplicate elements.
+
+    @sa iterate()
+  */
+
+  int iterate_no_dups(trx_t *caller_trx, my_hash_walk_action action,
+                      void *argument)
+  {
+    eliminate_duplicates_arg arg(size() + 32, action, argument);
+    return iterate(caller_trx, reinterpret_cast<my_hash_walk_action>
+                   (eliminate_duplicates), &arg);
+  }
+
+
+  int iterate_no_dups(my_hash_walk_action action, void *argument)
+  {
+    return iterate_no_dups(current_trx(), action, argument);
+  }
+};
+
+
+/** The transaction system central memory data structure. */
+class trx_sys_t
+{
+  /**
+    The smallest number not yet assigned as a transaction id or transaction
+    number. Accessed and updated with atomic operations.
+  */
+  MY_ALIGNED(CACHE_LINE_SIZE) trx_id_t m_max_trx_id;
+
+
+  /**
+    Solves race conditions between register_rw() and snapshot_ids() as well as
+    race condition between assign_new_trx_no() and snapshot_ids().
+
+    @sa register_rw()
+    @sa assign_new_trx_no()
+    @sa snapshot_ids()
+  */
+  MY_ALIGNED(CACHE_LINE_SIZE) trx_id_t m_rw_trx_hash_version;
+
+
+  /**
+    TRX_RSEG_HISTORY list length (number of committed transactions to purge)
+  */
+  MY_ALIGNED(CACHE_LINE_SIZE) int32 rseg_history_len;
+
+  bool m_initialised;
+
+public:
+  /** Mutex protecting trx_list. */
+  MY_ALIGNED(CACHE_LINE_SIZE) mutable TrxSysMutex mutex;
+
+  /** List of all transactions. */
+  MY_ALIGNED(CACHE_LINE_SIZE) trx_ut_list_t trx_list;
+
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	/** Temporary rollback segments */
 	trx_rseg_t*	temp_rsegs[TRX_SYS_N_RSEGS];
-	/** Avoid false sharing */
-	const char	pad4[CACHE_LINE_SIZE];
 
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	trx_rseg_t*	rseg_array[TRX_SYS_N_RSEGS];
 					/*!< Pointer array to rollback
 					segments; NULL if slot not in use;
@@ -627,46 +830,378 @@ struct trx_sys_t {
 					single-threaded mode; not protected
 					by any mutex, because it is read-only
 					during multi-threaded operation */
-	ulint		rseg_history_len;
-					/*!< Length of the TRX_RSEG_HISTORY
-					list (update undo logs for committed
-					transactions), protected by
-					rseg->mutex */
-
-	TrxIdSet	rw_trx_set;	/*!< Mapping from transaction id
-					to transaction instance */
-
-	ulint		n_prepared_trx;	/*!< Number of transactions currently
-					in the XA PREPARED state */
-
-	ulint		n_prepared_recovered_trx; /*!< Number of transactions
-					currently in XA PREPARED state that are
-					also recovered. Such transactions cannot
-					be added during runtime. They can only
-					occur after recovery if mysqld crashed
-					while there were XA PREPARED
-					transactions. We disable query cache
-					if such transactions exist. */
-};
 
-/** When a trx id which is zero modulo this number (which must be a power of
-two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
-page is updated */
-#define TRX_SYS_TRX_ID_WRITE_MARGIN	((trx_id_t) 256)
+  /**
+    Lock-free hash of in memory read-write transactions.
+    Works faster when it is on it's own cache line (tested).
+  */
+
+  MY_ALIGNED(CACHE_LINE_SIZE) rw_trx_hash_t rw_trx_hash;
+
+
+#ifdef WITH_WSREP
+  /** Latest recovered XID during startup */
+  XID recovered_wsrep_xid;
+#endif
+  /** Latest recovered binlog offset */
+  uint64_t recovered_binlog_offset;
+  /** Latest recovred binlog file name */
+  char recovered_binlog_filename[TRX_SYS_MYSQL_LOG_NAME_LEN];
+
+
+  /**
+    Constructor.
+
+    Some members may require late initialisation, thus we just mark object as
+    uninitialised. Real initialisation happens in create().
+  */
+
+  trx_sys_t(): m_initialised(false) {}
+
+
+  /**
+    Returns the minimum trx id in rw trx list.
 
-/** Test if trx_sys->mutex is owned. */
-#define trx_sys_mutex_own() (trx_sys->mutex.is_owned())
+    This is the smallest id for which the trx can possibly be active. (But, you
+    must look at the trx->state to find out if the minimum trx id transaction
+    itself is active, or already committed.)
 
-/** Acquire the trx_sys->mutex. */
-#define trx_sys_mutex_enter() do {			\
-	mutex_enter(&trx_sys->mutex);			\
-} while (0)
+    @return the minimum trx id, or m_max_trx_id if the trx list is empty
+  */
 
-/** Release the trx_sys->mutex. */
-#define trx_sys_mutex_exit() do {			\
-	trx_sys->mutex.exit();				\
-} while (0)
+  trx_id_t get_min_trx_id()
+  {
+    trx_id_t id= get_max_trx_id();
+    rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action>
+                        (get_min_trx_id_callback), &id);
+    return id;
+  }
 
-#include "trx0sys.ic"
+
+  /**
+    Determines the maximum transaction id.
+
+    @return maximum currently allocated trx id; will be stale after the
+            next call to trx_sys.get_new_trx_id()
+  */
+
+  trx_id_t get_max_trx_id()
+  {
+    return static_cast<trx_id_t>
+           (my_atomic_load64_explicit(reinterpret_cast<int64*>(&m_max_trx_id),
+                                      MY_MEMORY_ORDER_RELAXED));
+  }
+
+
+  /**
+    Allocates a new transaction id.
+    @return new, allocated trx id
+  */
+
+  trx_id_t get_new_trx_id()
+  {
+    trx_id_t id= get_new_trx_id_no_refresh();
+    refresh_rw_trx_hash_version();
+    return id;
+  }
+
+
+  /**
+    Allocates and assigns new transaction serialisation number.
+
+    There's a gap between m_max_trx_id increment and transaction serialisation
+    number becoming visible through rw_trx_hash. While we're in this gap
+    concurrent thread may come and do MVCC snapshot without seeing allocated
+    but not yet assigned serialisation number. Then at some point purge thread
+    may clone this view. As a result it won't see newly allocated serialisation
+    number and may remove "unnecessary" history data of this transaction from
+    rollback segments.
+
+    m_rw_trx_hash_version is intended to solve this problem. MVCC snapshot has
+    to wait until m_max_trx_id == m_rw_trx_hash_version, which effectively
+    means that all transaction serialisation numbers up to m_max_trx_id are
+    available through rw_trx_hash.
+
+    We rely on refresh_rw_trx_hash_version() to issue RELEASE memory barrier so
+    that m_rw_trx_hash_version increment happens after
+    trx->rw_trx_hash_element->no becomes visible through rw_trx_hash.
+
+    @param trx transaction
+  */
+  void assign_new_trx_no(trx_t *trx)
+  {
+    trx->no= get_new_trx_id_no_refresh();
+    my_atomic_store64_explicit(reinterpret_cast<int64*>
+                               (&trx->rw_trx_hash_element->no),
+                               trx->no, MY_MEMORY_ORDER_RELAXED);
+    refresh_rw_trx_hash_version();
+  }
+
+
+  /**
+    Takes MVCC snapshot.
+
+    To reduce malloc probablility we reserver rw_trx_hash.size() + 32 elements
+    in ids.
+
+    For details about get_rw_trx_hash_version() != get_max_trx_id() spin
+    @sa register_rw() and @sa assign_new_trx_no().
+
+    We rely on get_rw_trx_hash_version() to issue ACQUIRE memory barrier so
+    that loading of m_rw_trx_hash_version happens before accessing rw_trx_hash.
+
+    To optimise snapshot creation rw_trx_hash.iterate() is being used instead
+    of rw_trx_hash.iterate_no_dups(). It means that some transaction
+    identifiers may appear multiple times in ids.
+
+    @param[in,out] caller_trx used to get access to rw_trx_hash_pins
+    @param[out]    ids        array to store registered transaction identifiers
+    @param[out]    max_trx_id variable to store m_max_trx_id value
+    @param[out]    mix_trx_no variable to store min(trx->no) value
+  */
+
+  void snapshot_ids(trx_t *caller_trx, trx_ids_t *ids, trx_id_t *max_trx_id,
+                    trx_id_t *min_trx_no)
+  {
+    ut_ad(!mutex_own(&mutex));
+    snapshot_ids_arg arg(ids);
+
+    while ((arg.m_id= get_rw_trx_hash_version()) != get_max_trx_id())
+      ut_delay(1);
+    arg.m_no= arg.m_id;
+
+    ids->clear();
+    ids->reserve(rw_trx_hash.size() + 32);
+    rw_trx_hash.iterate(caller_trx,
+                        reinterpret_cast<my_hash_walk_action>(copy_one_id),
+                        &arg);
+
+    *max_trx_id= arg.m_id;
+    *min_trx_no= arg.m_no;
+  }
+
+
+  /** Initialiser for m_max_trx_id and m_rw_trx_hash_version. */
+  void init_max_trx_id(trx_id_t value)
+  {
+    m_max_trx_id= m_rw_trx_hash_version= value;
+  }
+
+
+  bool is_initialised() { return m_initialised; }
+
+
+  /** Initialise the transaction subsystem. */
+  void create();
+
+  /** Close the transaction subsystem on shutdown. */
+  void close();
+
+  /** @return total number of active (non-prepared) transactions */
+  ulint any_active_transactions();
+
+
+  /**
+    Registers read-write transaction.
+
+    Transaction becomes visible to MVCC.
+
+    There's a gap between m_max_trx_id increment and transaction becoming
+    visible through rw_trx_hash. While we're in this gap concurrent thread may
+    come and do MVCC snapshot. As a result concurrent read view will be able to
+    observe records owned by this transaction even before it was committed.
+
+    m_rw_trx_hash_version is intended to solve this problem. MVCC snapshot has
+    to wait until m_max_trx_id == m_rw_trx_hash_version, which effectively
+    means that all transactions up to m_max_trx_id are available through
+    rw_trx_hash.
+
+    We rely on refresh_rw_trx_hash_version() to issue RELEASE memory barrier so
+    that m_rw_trx_hash_version increment happens after transaction becomes
+    visible through rw_trx_hash.
+  */
+
+  void register_rw(trx_t *trx)
+  {
+    trx->id= get_new_trx_id_no_refresh();
+    rw_trx_hash.insert(trx);
+    refresh_rw_trx_hash_version();
+  }
+
+
+  /**
+    Deregisters read-write transaction.
+
+    Transaction is removed from rw_trx_hash, which releases all implicit locks.
+    MVCC snapshot won't see this transaction anymore.
+  */
+
+  void deregister_rw(trx_t *trx)
+  {
+    rw_trx_hash.erase(trx);
+  }
+
+
+  bool is_registered(trx_t *caller_trx, trx_id_t id)
+  {
+    return id && find(caller_trx, id, false);
+  }
+
+
+  trx_t *find(trx_t *caller_trx, trx_id_t id, bool do_ref_count= true)
+  {
+    return rw_trx_hash.find(caller_trx, id, do_ref_count);
+  }
+
+
+  /**
+    Registers transaction in trx_sys.
+
+    @param trx transaction
+  */
+  void register_trx(trx_t *trx)
+  {
+    mutex_enter(&mutex);
+    UT_LIST_ADD_FIRST(trx_list, trx);
+    mutex_exit(&mutex);
+  }
+
+
+  /**
+    Deregisters transaction in trx_sys.
+
+    @param trx transaction
+  */
+  void deregister_trx(trx_t *trx)
+  {
+    mutex_enter(&mutex);
+    UT_LIST_REMOVE(trx_list, trx);
+    mutex_exit(&mutex);
+  }
+
+
+  /**
+    Clones the oldest view and stores it in view.
+
+    No need to call ReadView::close(). The caller owns the view that is passed
+    in. This function is called by purge thread to determine whether it should
+    purge the delete marked record or not.
+  */
+  void clone_oldest_view();
+
+
+  /** @return the number of active views */
+  size_t view_count() const
+  {
+    size_t count= 0;
+
+    mutex_enter(&mutex);
+    for (const trx_t *trx= UT_LIST_GET_FIRST(trx_list); trx;
+         trx= UT_LIST_GET_NEXT(trx_list, trx))
+    {
+      if (trx->read_view.get_state() == READ_VIEW_STATE_OPEN)
+        ++count;
+    }
+    mutex_exit(&mutex);
+    return count;
+  }
+
+  /** @return number of committed transactions waiting for purge */
+  ulint history_size() const
+  {
+    return uint32(my_atomic_load32(&const_cast<trx_sys_t*>(this)
+                                   ->rseg_history_len));
+  }
+  /** Add to the TRX_RSEG_HISTORY length (on database startup). */
+  void history_add(int32 len)
+  {
+    my_atomic_add32(&rseg_history_len, len);
+  }
+  /** Register a committed transaction. */
+  void history_insert() { history_add(1); }
+  /** Note that a committed transaction was purged. */
+  void history_remove() { history_add(-1); }
+
+private:
+  static my_bool get_min_trx_id_callback(rw_trx_hash_element_t *element,
+                                         trx_id_t *id)
+  {
+    if (element->id < *id)
+    {
+      mutex_enter(&element->mutex);
+      /* We don't care about read-only transactions here. */
+      if (element->trx && element->trx->rsegs.m_redo.rseg)
+        *id= element->id;
+      mutex_exit(&element->mutex);
+    }
+    return 0;
+  }
+
+
+  struct snapshot_ids_arg
+  {
+    snapshot_ids_arg(trx_ids_t *ids): m_ids(ids) {}
+    trx_ids_t *m_ids;
+    trx_id_t m_id;
+    trx_id_t m_no;
+  };
+
+
+  static my_bool copy_one_id(rw_trx_hash_element_t *element,
+                             snapshot_ids_arg *arg)
+  {
+    if (element->id < arg->m_id)
+    {
+      trx_id_t no= static_cast<trx_id_t>(my_atomic_load64_explicit(
+        reinterpret_cast<int64*>(&element->no), MY_MEMORY_ORDER_RELAXED));
+      arg->m_ids->push_back(element->id);
+      if (no < arg->m_no)
+        arg->m_no= no;
+    }
+    return 0;
+  }
+
+
+  /** Getter for m_rw_trx_hash_version, must issue ACQUIRE memory barrier. */
+  trx_id_t get_rw_trx_hash_version()
+  {
+    return static_cast<trx_id_t>
+           (my_atomic_load64_explicit(reinterpret_cast<int64*>
+                                      (&m_rw_trx_hash_version),
+                                      MY_MEMORY_ORDER_ACQUIRE));
+  }
+
+
+  /** Increments m_rw_trx_hash_version, must issue RELEASE memory barrier. */
+  void refresh_rw_trx_hash_version()
+  {
+    my_atomic_add64_explicit(reinterpret_cast<int64*>(&m_rw_trx_hash_version),
+                             1, MY_MEMORY_ORDER_RELEASE);
+  }
+
+
+  /**
+    Allocates new transaction id without refreshing rw_trx_hash version.
+
+    This method is extracted for exclusive use by register_rw() and
+    assign_new_trx_no() where new id must be allocated atomically with
+    payload of these methods from MVCC snapshot point of view.
+
+    @sa get_new_trx_id()
+    @sa assign_new_trx_no()
+
+    @return new transaction id
+  */
+
+  trx_id_t get_new_trx_id_no_refresh()
+  {
+    return static_cast<trx_id_t>(my_atomic_add64_explicit(
+      reinterpret_cast<int64*>(&m_max_trx_id), 1, MY_MEMORY_ORDER_RELAXED));
+  }
+};
+
+
+/** The transaction system */
+extern trx_sys_t trx_sys;
 
 #endif
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
deleted file mode 100644
index 525ae5a6b98..00000000000
--- a/storage/innobase/include/trx0sys.ic
+++ /dev/null
@@ -1,461 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0sys.ic
-Transaction system
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0trx.h"
-#include "data0type.h"
-#include "srv0srv.h"
-#include "mtr0log.h"
-
-/* The typedef for rseg slot in the file copy */
-typedef byte	trx_sysf_rseg_t;
-
-/* Rollback segment specification slot offsets */
-/*-------------------------------------------------------------*/
-#define	TRX_SYS_RSEG_SPACE	0	/* space where the segment
-					header is placed; starting with
-					MySQL/InnoDB 5.1.7, this is
-					UNIV_UNDEFINED if the slot is unused */
-#define	TRX_SYS_RSEG_PAGE_NO	4	/*  page number where the segment
-					header is placed; this is FIL_NULL
-					if the slot is unused */
-/*-------------------------------------------------------------*/
-/* Size of a rollback segment specification slot */
-#define TRX_SYS_RSEG_SLOT_SIZE	8
-
-/*****************************************************************//**
-Writes the value of max_trx_id to the file based trx system header. */
-void
-trx_sys_flush_max_trx_id(void);
-/*==========================*/
-
-/** Checks if a page address is the trx sys header page.
-@param[in]	page_id	page id
-@return true if trx sys header page */
-inline bool trx_sys_hdr_page(const page_id_t page_id)
-{
-	return(page_id.space() == TRX_SYS_SPACE
-	       && page_id.page_no() == TRX_SYS_PAGE_NO);
-}
-
-/**********************************************************************//**
-Gets a pointer to the transaction system header and x-latches its page.
-@return pointer to system header, page x-latched. */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
-	mtr_t*	mtr)	/*!< in: mtr */
-{
-	buf_block_t*	block = NULL;
-	trx_sysf_t*	header = NULL;
-
-	ut_ad(mtr);
-
-	block = buf_page_get(page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
-			     univ_page_size, RW_X_LATCH, mtr);
-
-	if (block) {
-		buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
-
-		header = TRX_SYS + buf_block_get_frame(block);
-	}
-
-	return(header);
-}
-
-/*****************************************************************//**
-Gets the space of the nth rollback segment slot in the trx system
-file copy.
-@return space id */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys header */
-	ulint		i,		/*!< in: slot index == rseg id */
-	mtr_t*		mtr)		/*!< in: mtr */
-{
-	ut_ad(sys_header);
-	ut_ad(i < TRX_SYS_N_RSEGS);
-
-	return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
-			      + i * TRX_SYS_RSEG_SLOT_SIZE
-			      + TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr));
-}
-
-/*****************************************************************//**
-Gets the page number of the nth rollback segment slot in the trx system
-header.
-@return page number, FIL_NULL if slot unused */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx system header */
-	ulint		i,		/*!< in: slot index == rseg id */
-	mtr_t*		mtr)		/*!< in: mtr */
-{
-	ut_ad(sys_header);
-	ut_ad(i < TRX_SYS_N_RSEGS);
-
-	return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
-			      + i * TRX_SYS_RSEG_SLOT_SIZE
-			      + TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr));
-}
-
-/*****************************************************************//**
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
-	ulint		i,		/*!< in: slot index == rseg id */
-	ulint		space,		/*!< in: space id */
-	mtr_t*		mtr)		/*!< in: mtr */
-{
-	ut_ad(sys_header);
-	ut_ad(i < TRX_SYS_N_RSEGS);
-
-	mlog_write_ulint(sys_header + TRX_SYS_RSEGS
-			 + i * TRX_SYS_RSEG_SLOT_SIZE
-			 + TRX_SYS_RSEG_SPACE,
-			 space,
-			 MLOG_4BYTES, mtr);
-}
-
-/*****************************************************************//**
-Sets the page number of the nth rollback segment slot in the trx system
-header. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys header */
-	ulint		i,		/*!< in: slot index == rseg id */
-	ulint		page_no,	/*!< in: page number, FIL_NULL if the
-					slot is reset to unused */
-	mtr_t*		mtr)		/*!< in: mtr */
-{
-	ut_ad(sys_header);
-	ut_ad(i < TRX_SYS_N_RSEGS);
-
-	mlog_write_ulint(sys_header + TRX_SYS_RSEGS
-			 + i * TRX_SYS_RSEG_SLOT_SIZE
-			 + TRX_SYS_RSEG_PAGE_NO,
-			 page_no,
-			 MLOG_4BYTES, mtr);
-}
-
-/*****************************************************************//**
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_trx_id(
-/*=============*/
-	byte*		ptr,	/*!< in: pointer to memory where written */
-	trx_id_t	id)	/*!< in: id */
-{
-#if DATA_TRX_ID_LEN != 6
-# error "DATA_TRX_ID_LEN != 6"
-#endif
-	ut_ad(id > 0);
-	mach_write_to_6(ptr, id);
-}
-
-/*****************************************************************//**
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_...
-@return id */
-UNIV_INLINE
-trx_id_t
-trx_read_trx_id(
-/*============*/
-	const byte*	ptr)	/*!< in: pointer to memory from where to read */
-{
-#if DATA_TRX_ID_LEN != 6
-# error "DATA_TRX_ID_LEN != 6"
-#endif
-	return(mach_read_from_6(ptr));
-}
-
-/****************************************************************//**
-Looks for the trx handle with the given id in rw_trx_list.
-The caller must be holding trx_sys->mutex.
-@return the trx handle or NULL if not found;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
-UNIV_INLINE
-trx_t*
-trx_get_rw_trx_by_id(
-/*=================*/
-	trx_id_t	trx_id)	/*!< in: trx id to search for */
-{
-	ut_ad(trx_id > 0);
-	ut_ad(trx_sys_mutex_own());
-
-	if (trx_sys->rw_trx_set.empty()) {
-		return(NULL);
-	}
-
-	TrxIdSet::iterator	it;
-
-	it = trx_sys->rw_trx_set.find(TrxTrack(trx_id));
-
-	return(it == trx_sys->rw_trx_set.end() ? NULL : it->m_trx);
-}
-
-/****************************************************************//**
-Returns the minimum trx id in trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->state
-to find out if the minimum trx id transaction itself is active, or already
-committed.). The caller must be holding the trx_sys_t::mutex in shared mode.
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_rw_min_trx_id_low(void)
-/*=======================*/
-{
-	trx_id_t	id;
-
-	ut_ad(trx_sys_mutex_own());
-
-	const trx_t*	trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
-
-	if (trx == NULL) {
-		id = trx_sys->max_trx_id;
-	} else {
-		assert_trx_in_rw_list(trx);
-		id = trx->id;
-	}
-
-	return(id);
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-/***********************************************************//**
-Assert that a transaction has been recovered.
-@return TRUE */
-UNIV_INLINE
-ibool
-trx_assert_recovered(
-/*=================*/
-	trx_id_t	trx_id)		/*!< in: transaction identifier */
-{
-	const trx_t*	trx;
-
-	trx_sys_mutex_enter();
-
-	trx = trx_get_rw_trx_by_id(trx_id);
-	ut_a(trx->is_recovered);
-
-	trx_sys_mutex_exit();
-
-	return(TRUE);
-}
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
-/****************************************************************//**
-Returns the minimum trx id in rw trx list. This is the smallest id for which
-the rw trx can possibly be active. (But, you must look at the trx->state
-to find out if the minimum trx id transaction itself is active, or already
-committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_rw_min_trx_id(void)
-/*===================*/
-{
-	trx_sys_mutex_enter();
-
-	trx_id_t	id = trx_rw_min_trx_id_low();
-
-	trx_sys_mutex_exit();
-
-	return(id);
-}
-
-/****************************************************************//**
-Checks if a rw transaction with the given id is active.  If the caller is
-not holding lock_sys->mutex, the transaction may already have been committed.
-@return transaction instance if active, or NULL */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active_low(
-/*=================*/
-	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
-	ibool*		corrupt)	/*!< in: NULL or pointer to a flag
-					that will be set if corrupt */
-{
-	trx_t*		trx;
-
-	ut_ad(trx_sys_mutex_own());
-
-	if (trx_id < trx_rw_min_trx_id_low()) {
-
-		trx = NULL;
-	} else if (trx_id >= trx_sys->max_trx_id) {
-
-		/* There must be corruption: we let the caller handle the
-		diagnostic prints in this case. */
-
-		trx = NULL;
-		if (corrupt != NULL) {
-			*corrupt = TRUE;
-		}
-	} else {
-		trx = trx_get_rw_trx_by_id(trx_id);
-
-		if (trx != NULL
-		    && trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
-
-			trx = NULL;
-		}
-	}
-
-	return(trx);
-}
-
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. If the caller is
-not holding lock_sys->mutex, the transaction may already have been
-committed.
-@return transaction instance if active, or NULL; */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active(
-/*=============*/
-	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
-	ibool*		corrupt,	/*!< in: NULL or pointer to a flag
-					that will be set if corrupt */
-	bool		do_ref_count)	/*!< in: if true then increment the
-					trx_t::n_ref_count */
-{
-	ut_ad(trx_id);
-
-	trx_sys_mutex_enter();
-
-	trx_t* trx = trx_rw_is_active_low(trx_id, corrupt);
-
-	if (trx) {
-		trx = trx_reference(do_ref_count ? trx_id : 0, trx);
-	}
-
-	trx_sys_mutex_exit();
-
-	return(trx);
-}
-
-/*****************************************************************//**
-Allocates a new transaction id.
-@return new, allocated trx id */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_id()
-/*====================*/
-{
-	/* wsrep_fake_trx_id  violates this assert */
-	ut_ad(trx_sys_mutex_own());
-
-	/* VERY important: after the database is started, max_trx_id value is
-	divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
-	will evaluate to TRUE when this function is first time called,
-	and the value for trx id will be written to disk-based header!
-	Thus trx id values will not overlap when the database is
-	repeatedly started! */
-
-	if (!(trx_sys->max_trx_id % TRX_SYS_TRX_ID_WRITE_MARGIN)) {
-
-		trx_sys_flush_max_trx_id();
-	}
-
-	return(trx_sys->max_trx_id++);
-}
-
-/*****************************************************************//**
-Determines the maximum transaction id.
-@return maximum currently allocated trx id; will be stale after the
-next call to trx_sys_get_new_trx_id() */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_max_trx_id(void)
-/*========================*/
-{
-	ut_ad(!trx_sys_mutex_own());
-
-#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
-	/* Avoid torn reads. */
-
-	trx_sys_mutex_enter();
-
-	trx_id_t	max_trx_id = trx_sys->max_trx_id;
-
-	trx_sys_mutex_exit();
-
-	return(max_trx_id);
-#else
-	/* Perform a dirty read. Callers should be prepared for stale
-	values, and we know that the value fits in a machine word, so
-	that it will be read and written atomically. */
-	return(trx_sys->max_trx_id);
-#endif /* UNIV_WORD_SIZE < DATA_TRX_ID_LEN */
-}
-
-/*****************************************************************//**
-Get the number of transaction in the system, independent of their state.
-@return count of transactions in trx_sys_t::rw_trx_list */
-UNIV_INLINE
-ulint
-trx_sys_get_n_rw_trx(void)
-/*======================*/
-{
-	ulint	n_trx;
-
-	trx_sys_mutex_enter();
-
-	n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
-	trx_sys_mutex_exit();
-
-	return(n_trx);
-}
-
-/**
-Add the transaction to the RW transaction set
-@param trx		transaction instance to add */
-UNIV_INLINE
-void
-trx_sys_rw_trx_add(trx_t* trx)
-{
-	ut_ad(trx->id != 0);
-
-	trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
-	ut_d(trx->in_rw_trx_list = true);
-}
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 8fde5689e40..9fb65c19911 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -34,6 +34,7 @@ Created 3/26/1996 Heikki Tuuri
 #include "trx0xa.h"
 #include "ut0vec.h"
 #include "fts0fts.h"
+#include "read0types.h"
 
 #include <vector>
 #include <set>
@@ -42,11 +43,10 @@ Created 3/26/1996 Heikki Tuuri
 struct mtr_t;
 
 // Forward declaration
-class ReadView;
-
-// Forward declaration
 class FlushObserver;
 
+struct rw_trx_hash_element_t;
+
 /** Set flush observer for the transaction
 @param[in/out]	trx		transaction struct
 @param[in]	observer	flush observer */
@@ -78,45 +78,19 @@ const dict_index_t*
 trx_get_error_info(
 /*===============*/
 	const trx_t*	trx);	/*!< in: trx object */
-/********************************************************************//**
-Creates a transaction object for MySQL.
-@return own: transaction object */
-trx_t*
-trx_allocate_for_mysql(void);
-/*========================*/
-/********************************************************************//**
-Creates a transaction object for background operations by the master thread.
-@return own: transaction object */
-trx_t*
-trx_allocate_for_background(void);
-/*=============================*/
-
-/** Frees and initialize a transaction object instantinated during recovery.
-@param trx trx object to free and initialize during recovery */
-void
-trx_free_resurrected(trx_t* trx);
-
-/** Free a transaction that was allocated by background or user threads.
-@param trx trx object to free */
-void
-trx_free_for_background(trx_t* trx);
 
-/********************************************************************//**
-At shutdown, frees a transaction object that is in the PREPARED state. */
-void
-trx_free_prepared(
-/*==============*/
-	trx_t*	trx);	/*!< in, own: trx object */
+/** @return a trx_t instance from trx_pools. */
+trx_t *trx_create();
 
-/** Free a transaction object for MySQL.
-@param[in,out]	trx	transaction */
-void
-trx_free_for_mysql(trx_t*	trx);
+/**
+  Release a trx_t instance back to the pool.
+  @param trx the instance to release.
+*/
+void trx_free(trx_t*& trx);
 
-/** Disconnect a transaction from MySQL.
-@param[in,out]	trx	transaction */
+/** At shutdown, frees a transaction object. */
 void
-trx_disconnect_plain(trx_t*	trx);
+trx_free_at_shutdown(trx_t *trx);
 
 /** Disconnect a prepared transaction from MySQL.
 @param[in,out]	trx	transaction */
@@ -225,22 +199,10 @@ trx_commit(
 /*=======*/
 	trx_t*	trx);	/*!< in/out: transaction */
 
-/****************************************************************//**
-Commits a transaction and a mini-transaction. */
-void
-trx_commit_low(
-/*===========*/
-	trx_t*	trx,	/*!< in/out: transaction */
-	mtr_t*	mtr);	/*!< in/out: mini-transaction (will be committed),
-			or NULL if trx made no modifications */
-/****************************************************************//**
-Cleans up a transaction at database startup. The cleanup is needed if
-the transaction already got to the middle of a commit when the database
-crashed, and we cannot roll it back. */
-void
-trx_cleanup_at_db_startup(
-/*======================*/
-	trx_t*	trx);	/*!< in: transaction */
+/** Commit a transaction and a mini-transaction.
+@param[in,out]	trx	transaction
+@param[in,out]	mtr	mini-transaction (NULL if no modifications) */
+void trx_commit_low(trx_t* trx, mtr_t* mtr);
 /**********************************************************************//**
 Does the transaction commit for MySQL.
 @return DB_SUCCESS or error number */
@@ -259,13 +221,13 @@ int
 trx_recover_for_mysql(
 /*==================*/
 	XID*	xid_list,	/*!< in/out: prepared transactions */
-	ulint	len);		/*!< in: number of slots in xid_list */
+	uint	len);		/*!< in: number of slots in xid_list */
 /*******************************************************************//**
 This function is used to find one X/Open XA distributed transaction
 which is in the prepared state
 @return trx or NULL; on match, the trx->xid will be invalidated;
 note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
+holding lock_sys.mutex */
 trx_t *
 trx_get_trx_by_xid(
 /*===============*/
@@ -283,31 +245,6 @@ void
 trx_mark_sql_stat_end(
 /*==================*/
 	trx_t*	trx);	/*!< in: trx handle */
-/********************************************************************//**
-Assigns a read view for a consistent read query. All the consistent reads
-within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction. */
-ReadView*
-trx_assign_read_view(
-/*=================*/
-	trx_t*	trx);	/*!< in: active transaction */
-
-/****************************************************************//**
-@return the transaction's read view or NULL if one not assigned. */
-UNIV_INLINE
-ReadView*
-trx_get_read_view(
-/*==============*/
-	trx_t*	trx);
-
-/****************************************************************//**
-@return the transaction's read view or NULL if one not assigned. */
-UNIV_INLINE
-const ReadView*
-trx_get_read_view(
-/*==============*/
-	const trx_t*	trx);
-
 /****************************************************************//**
 Prepares a transaction for commit/rollback. */
 void
@@ -331,7 +268,7 @@ trx_commit_step(
 
 /**********************************************************************//**
 Prints info about a transaction.
-Caller must hold trx_sys->mutex. */
+Caller must hold trx_sys.mutex. */
 void
 trx_print_low(
 /*==========*/
@@ -351,7 +288,7 @@ trx_print_low(
 
 /**********************************************************************//**
 Prints info about a transaction.
-The caller must hold lock_sys->mutex and trx_sys->mutex.
+The caller must hold lock_sys.mutex and trx_sys.mutex.
 When possible, use trx_print() instead. */
 void
 trx_print_latched(
@@ -361,25 +298,9 @@ trx_print_latched(
 	ulint		max_query_len);	/*!< in: max query length to print,
 					or 0 to use the default max length */
 
-#ifdef WITH_WSREP
-/**********************************************************************//**
-Prints info about a transaction.
-Transaction information may be retrieved without having trx_sys->mutex acquired
-so it may not be completely accurate. The caller must own lock_sys->mutex
-and the trx must have some locks to make sure that it does not escape
-without locking lock_sys->mutex. */
-UNIV_INTERN
-void
-wsrep_trx_print_locking(
-	FILE*		f,		/*!< in: output stream */
-	const trx_t*	trx,		/*!< in: transaction */
-	ulint		max_query_len)	/*!< in: max query length to print,
-					or 0 to use the default max length */
-	MY_ATTRIBUTE((nonnull));
-#endif /* WITH_WSREP */
 /**********************************************************************//**
 Prints info about a transaction.
-Acquires and releases lock_sys->mutex and trx_sys->mutex. */
+Acquires and releases lock_sys.mutex. */
 void
 trx_print(
 /*======*/
@@ -409,9 +330,9 @@ trx_set_dict_operation(
 
 /**********************************************************************//**
 Determines if a transaction is in the given state.
-The caller must hold trx_sys->mutex, or it must be the thread
+The caller must hold trx_sys.mutex, or it must be the thread
 that is serving a running transaction.
-A running RW transaction must be in trx_sys->rw_trx_list.
+A running RW transaction must be in trx_sys.rw_trx_hash.
 @return TRUE if trx->state == state */
 UNIV_INLINE
 bool
@@ -427,22 +348,11 @@ trx_state_eq(
 				trx->state == TRX_STATE_NOT_STARTED
 				after an error has been reported */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-# ifdef UNIV_DEBUG
-/**********************************************************************//**
-Asserts that a transaction has been started.
-The caller must hold trx_sys->mutex.
-@return TRUE if started */
-ibool
-trx_assert_started(
-/*===============*/
-	const trx_t*	trx)	/*!< in: transaction */
-	MY_ATTRIBUTE((warn_unused_result));
-# endif /* UNIV_DEBUG */
 
 /**********************************************************************//**
 Determines if the currently running transaction has been interrupted.
-@return TRUE if interrupted */
-ibool
+@return true if interrupted */
+bool
 trx_is_interrupted(
 /*===============*/
 	const trx_t*	trx);	/*!< in: transaction */
@@ -515,18 +425,6 @@ trx_set_rw_mode(
 	trx_t*		trx);
 
 /**
-Release the transaction. Decrease the reference count.
-@param trx Transaction that is being released */
-UNIV_INLINE
-void
-trx_release_reference(
-	trx_t*		trx);
-
-/**
-Check if the transaction is being referenced. */
-#define trx_is_referenced(t)	((t)->n_ref > 0)
-
-/**
 Transactions that aren't started by the MySQL server don't set
 the trx_t::mysql_thd field. For such transactions we set the lock
 wait timeout to 0 instead of the user configured value that comes
@@ -555,15 +453,6 @@ with an explicit check for the read-only status.
 ((t)->read_only && trx_is_autocommit_non_locking((t)))
 
 /**
-Assert that the transaction is in the trx_sys_t::rw_trx_list */
-#define assert_trx_in_rw_list(t) do {					\
-	ut_ad(!(t)->read_only);						\
-	ut_ad((t)->in_rw_trx_list					\
-	      == !((t)->read_only || !(t)->rsegs.m_redo.rseg));		\
-	check_trx_state(t);						\
-} while (0)
-
-/**
 Check transaction state */
 #define check_trx_state(t) do {						\
 	ut_ad(!trx_is_autocommit_non_locking((t)));			\
@@ -585,8 +474,8 @@ Check transaction state */
 	ut_ad(trx_state_eq((t), TRX_STATE_NOT_STARTED));		\
 	ut_ad(!(t)->id);						\
 	ut_ad(!(t)->has_logged());					\
-	ut_ad(!(t)->n_ref);						\
-	ut_ad(!MVCC::is_view_active((t)->read_view));			\
+	ut_ad(!(t)->is_referenced());					\
+	ut_ad(!(t)->read_view.is_open());				\
 	ut_ad((t)->lock.wait_thr == NULL);				\
 	ut_ad(UT_LIST_GET_LEN((t)->lock.trx_locks) == 0);		\
 	ut_ad((t)->dict_operation == TRX_DICT_OP_NONE);			\
@@ -603,16 +492,15 @@ transaction pool.
 #ifdef UNIV_DEBUG
 /*******************************************************************//**
 Assert that an autocommit non-locking select cannot be in the
-rw_trx_list and that it is a read-only transaction.
-The tranasction must be in the mysql_trx_list. */
+rw_trx_hash and that it is a read-only transaction.
+The transaction must have mysql_thd assigned. */
 # define assert_trx_nonlocking_or_in_list(t)				\
 	do {								\
 		if (trx_is_autocommit_non_locking(t)) {			\
 			trx_state_t	t_state = (t)->state;		\
 			ut_ad((t)->read_only);				\
 			ut_ad(!(t)->is_recovered);			\
-			ut_ad(!(t)->in_rw_trx_list);			\
-			ut_ad((t)->in_mysql_trx_list);			\
+			ut_ad((t)->mysql_thd);				\
 			ut_ad(t_state == TRX_STATE_NOT_STARTED		\
 			      || t_state == TRX_STATE_ACTIVE);		\
 		} else {						\
@@ -622,8 +510,8 @@ The tranasction must be in the mysql_trx_list. */
 #else /* UNIV_DEBUG */
 /*******************************************************************//**
 Assert that an autocommit non-locking slect cannot be in the
-rw_trx_list and that it is a read-only transaction.
-The tranasction must be in the mysql_trx_list. */
+rw_trx_hash and that it is a read-only transaction.
+The transaction must have mysql_thd assigned. */
 # define assert_trx_nonlocking_or_in_list(trx) ((void)0)
 #endif /* UNIV_DEBUG */
 
@@ -650,7 +538,7 @@ To query the state either of the mutexes is sufficient within the locking
 code and no mutex is required when the query thread is no longer waiting. */
 
 /** The locks and state of an active transaction. Protected by
-lock_sys->mutex, trx->mutex or both. */
+lock_sys.mutex, trx->mutex or both. */
 struct trx_lock_t {
 	ulint		n_active_thrs;	/*!< number of active query threads */
 
@@ -662,10 +550,10 @@ struct trx_lock_t {
 					TRX_QUE_LOCK_WAIT, this points to
 					the lock request, otherwise this is
 					NULL; set to non-NULL when holding
-					both trx->mutex and lock_sys->mutex;
+					both trx->mutex and lock_sys.mutex;
 					set to NULL when holding
-					lock_sys->mutex; readers should
-					hold lock_sys->mutex, except when
+					lock_sys.mutex; readers should
+					hold lock_sys.mutex, except when
 					they are holding trx->mutex and
 					wait_lock==NULL */
 	ib_uint64_t	deadlock_mark;	/*!< A mark field that is initialized
@@ -679,13 +567,13 @@ struct trx_lock_t {
 					resolution, it sets this to true.
 					Protected by trx->mutex. */
 	time_t		wait_started;	/*!< lock wait started at this time,
-					protected only by lock_sys->mutex */
+					protected only by lock_sys.mutex */
 
 	que_thr_t*	wait_thr;	/*!< query thread belonging to this
 					trx that is in QUE_THR_LOCK_WAIT
 					state. For threads suspended in a
 					lock wait, this is protected by
-					lock_sys->mutex. Otherwise, this may
+					lock_sys.mutex. Otherwise, this may
 					only be modified by the thread that is
 					serving the running transaction. */
 
@@ -704,12 +592,12 @@ struct trx_lock_t {
 	unsigned	table_cached;
 
 	mem_heap_t*	lock_heap;	/*!< memory heap for trx_locks;
-					protected by lock_sys->mutex */
+					protected by lock_sys.mutex */
 
 	trx_lock_list_t trx_locks;	/*!< locks requested by the transaction;
 					insertions are protected by trx->mutex
-					and lock_sys->mutex; removals are
-					protected by lock_sys->mutex */
+					and lock_sys.mutex; removals are
+					protected by lock_sys.mutex */
 
 	lock_list	table_locks;	/*!< All table locks requested by this
 					transaction, including AUTOINC locks */
@@ -728,14 +616,73 @@ struct trx_lock_t {
 	ulint		n_rec_locks;	/*!< number of rec locks in this trx */
 };
 
-/** Type used to store the list of tables that are modified by a given
-transaction. We store pointers to the table objects in memory because
+/** Logical first modification time of a table in a transaction */
+class trx_mod_table_time_t
+{
+	/** First modification of the table */
+	undo_no_t	first;
+	/** First modification of a system versioned column */
+	undo_no_t	first_versioned;
+
+	/** Magic value signifying that a system versioned column of a
+	table was never modified in a transaction. */
+	static const undo_no_t UNVERSIONED = IB_ID_MAX;
+
+public:
+	/** Constructor
+	@param[in]	rows	number of modified rows so far */
+	trx_mod_table_time_t(undo_no_t rows)
+		: first(rows), first_versioned(UNVERSIONED) {}
+
+#ifdef UNIV_DEBUG
+	/** Validation
+	@param[in]	rows	number of modified rows so far
+	@return	whether the object is valid */
+	bool valid(undo_no_t rows = UNVERSIONED) const
+	{
+		return first <= first_versioned && first <= rows;
+	}
+#endif /* UNIV_DEBUG */
+	/** @return if versioned columns were modified */
+	bool is_versioned() const { return first_versioned != UNVERSIONED; }
+
+	/** After writing an undo log record, set is_versioned() if needed
+	@param[in]	rows	number of modified rows so far */
+	void set_versioned(undo_no_t rows)
+	{
+		ut_ad(!is_versioned());
+		first_versioned = rows;
+		ut_ad(valid());
+	}
+
+	/** Invoked after partial rollback
+	@param[in]	limit	number of surviving modified rows
+	@return	whether this should be erased from trx_t::mod_tables */
+	bool rollback(undo_no_t limit)
+	{
+		ut_ad(valid());
+		if (first >= limit) {
+			return true;
+		}
+
+		if (first_versioned < limit && is_versioned()) {
+			first_versioned = UNVERSIONED;
+		}
+
+		return false;
+	}
+};
+
+/** Collection of persistent tables and their first modification
+in a transaction.
+We store pointers to the table objects in memory because
 we know that a table object will not be destroyed while a transaction
 that modified it is running. */
-typedef std::set<
-	dict_table_t*,
+typedef std::map<
+	dict_table_t*, trx_mod_table_time_t,
 	std::less<dict_table_t*>,
-	ut_allocator<dict_table_t*> >	trx_mod_tables_t;
+	ut_allocator<std::pair<dict_table_t* const, trx_mod_table_time_t> > >
+	trx_mod_tables_t;
 
 /** The transaction handle
 
@@ -765,30 +712,31 @@ so without holding any mutex. The following are exceptions to this:
 
 * trx_rollback_resurrected() may access resurrected (connectionless)
 transactions while the system is already processing new user
-transactions. The trx_sys->mutex prevents a race condition between it
+transactions. The trx_sys.mutex prevents a race condition between it
 and lock_trx_release_locks() [invoked by trx_commit()].
 
 * trx_print_low() may access transactions not associated with the current
-thread. The caller must be holding trx_sys->mutex and lock_sys->mutex.
+thread. The caller must be holding lock_sys.mutex.
 
-* When a transaction handle is in the trx_sys->mysql_trx_list or
-trx_sys->trx_list, some of its fields must not be modified without
-holding trx_sys->mutex exclusively.
+* When a transaction handle is in the trx_sys.trx_list, some of its fields
+must not be modified without holding trx->mutex.
 
 * The locking code (in particular, lock_deadlock_recursive() and
 lock_rec_convert_impl_to_expl()) will access transactions associated
 to other connections. The locks of transactions are protected by
-lock_sys->mutex and sometimes by trx->mutex. */
+lock_sys.mutex and sometimes by trx->mutex. */
 
 /** Represents an instance of rollback segment along with its state variables.*/
 struct trx_undo_ptr_t {
 	trx_rseg_t*	rseg;		/*!< rollback segment assigned to the
 					transaction, or NULL if not assigned
 					yet */
-	trx_undo_t*	insert_undo;	/*!< pointer to the insert undo log, or
-					NULL if no inserts performed yet */
-	trx_undo_t*	update_undo;	/*!< pointer to the update undo log, or
-					NULL if no update performed yet */
+	trx_undo_t*	undo;		/*!< pointer to the undo log, or
+					NULL if nothing logged yet */
+	trx_undo_t*     old_insert;	/*!< pointer to recovered
+					insert undo log, or NULL if no
+					INSERT transactions were
+					recovered from old-format undo logs */
 };
 
 /** An instance of temporary rollback segment. */
@@ -812,10 +760,23 @@ struct trx_rsegs_t {
 };
 
 struct trx_t {
+private:
+  /**
+    Count of references.
+
+    We can't release the locks nor commit the transaction until this reference
+    is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify
+    that it is no longer "active".
+  */
+
+  int32_t n_ref;
+
+
+public:
 	TrxMutex	mutex;		/*!< Mutex protecting the fields
 					state and lock (except some fields
 					of lock, which are protected by
-					lock_sys->mutex) */
+					lock_sys.mutex) */
 
 	trx_id_t	id;		/*!< transaction id */
 
@@ -824,7 +785,7 @@ struct trx_t {
 					transaction is moved to
 					COMMITTED_IN_MEMORY state.
 					Protected by trx_sys_t::mutex
-					when trx->in_rw_trx_list. Initially
+					when trx is in rw_trx_hash. Initially
 					set to TRX_ID_MAX. */
 
 	/** State of the trx from the point of view of concurrency control
@@ -851,6 +812,9 @@ struct trx_t {
 	Recovered XA:
 	* NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
 
+	Recovered XA followed by XA ROLLBACK:
+	* NOT_STARTED -> PREPARED -> ACTIVE -> COMMITTED -> (freed)
+
 	XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
 	* NOT_STARTED -> PREPARED -> (freed)
 
@@ -861,11 +825,11 @@ struct trx_t {
 
 	XA (2PC) transactions are always treated as non-autocommit.
 
-	Transitions to ACTIVE or NOT_STARTED occur when
-	!in_rw_trx_list (no trx_sys->mutex needed).
+	Transitions to ACTIVE or NOT_STARTED occur when transaction
+	is not in rw_trx_hash (no trx_sys.mutex needed).
 
 	Autocommit non-locking read-only transactions move between states
-	without holding any mutex. They are !in_rw_trx_list.
+	without holding any mutex. They are not in rw_trx_hash.
 
 	All transactions, unless they are determined to be ac-nl-ro,
 	explicitly tagged as read-only or read-write, will first be put
@@ -874,16 +838,16 @@ struct trx_t {
 	do we remove it from the read-only list and put it on the read-write
 	list. During this switch we assign it a rollback segment.
 
-	When a transaction is NOT_STARTED, it can be in_mysql_trx_list if
-	it is a user transaction. It cannot be in rw_trx_list.
+	When a transaction is NOT_STARTED, it can be in trx_list. It cannot be
+	in rw_trx_hash.
 
-	ACTIVE->PREPARED->COMMITTED is only possible when trx->in_rw_trx_list.
-	The transition ACTIVE->PREPARED is protected by trx_sys->mutex.
+	ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash.
+	The transition ACTIVE->PREPARED is protected by trx_sys.mutex.
 
 	ACTIVE->COMMITTED is possible when the transaction is in
-	rw_trx_list.
+	rw_trx_hash.
 
-	Transitions to COMMITTED are protected by both lock_sys->mutex
+	Transitions to COMMITTED are protected by both lock_sys.mutex
 	and trx->mutex.
 
 	NOTE: Some of these state change constraints are an overkill,
@@ -892,25 +856,16 @@ struct trx_t {
 
 	trx_state_t	state;
 
-	ReadView*	read_view;	/*!< consistent read view used in the
+	ReadView	read_view;	/*!< consistent read view used in the
 					transaction, or NULL if not yet set */
-
-	UT_LIST_NODE_T(trx_t)
-			trx_list;	/*!< list of transactions;
-					protected by trx_sys->mutex. */
-	UT_LIST_NODE_T(trx_t)
-			no_list;	/*!< Required during view creation
-					to check for the view limit for
-					transactions that are committing */
-
 	trx_lock_t	lock;		/*!< Information about the transaction
 					locks and state. Protected by
-					trx->mutex or lock_sys->mutex
+					trx->mutex or lock_sys.mutex
 					or both */
 	bool		is_recovered;	/*!< 0=normal transaction,
 					1=recovered, must be rolled back,
-					protected by trx_sys->mutex when
-					trx->in_rw_trx_list holds */
+					protected by trx_sys.mutex when
+					trx is in rw_trx_hash */
 
 
 	/* These fields are not protected by any mutex. */
@@ -989,7 +944,7 @@ struct trx_t {
 					contains a pointer to the latest file
 					name; this is NULL if binlog is not
 					used */
-	int64_t		mysql_log_offset;
+	ulonglong	mysql_log_offset;
 					/*!< if MySQL binlog is used, this
 					field contains the end offset of the
 					binlog entry */
@@ -1002,21 +957,8 @@ struct trx_t {
 					statement uses, except those
 					in consistent read */
 	/*------------------------------*/
-#ifdef UNIV_DEBUG
-	/** The following two fields are mutually exclusive. */
-	/* @{ */
-
-	bool		in_rw_trx_list;	/*!< true if in trx_sys->rw_trx_list */
-	/* @} */
-#endif /* UNIV_DEBUG */
-	UT_LIST_NODE_T(trx_t)
-			mysql_trx_list;	/*!< list of transactions created for
-					MySQL; protected by trx_sys->mutex */
-#ifdef UNIV_DEBUG
-	bool		in_mysql_trx_list;
-					/*!< true if in
-					trx_sys->mysql_trx_list */
-#endif /* UNIV_DEBUG */
+	UT_LIST_NODE_T(trx_t) trx_list;	/*!< list of all transactions;
+					protected by trx_sys.mutex */
 	/*------------------------------*/
 	dberr_t		error_state;	/*!< 0 if no error, otherwise error
 					number; NOTE That ONLY the thread
@@ -1040,12 +982,6 @@ struct trx_t {
 			trx_savepoints;	/*!< savepoints set with SAVEPOINT ...,
 					oldest first */
 	/*------------------------------*/
-	UndoMutex	undo_mutex;	/*!< mutex protecting the fields in this
-					section (down to undo_no_arr), EXCEPT
-					last_sql_stat_start, which can be
-					accessed only when we know that there
-					cannot be any activity in the undo
-					logs! */
 	undo_no_t	undo_no;	/*!< next undo log record number to
 					assign; since the undo log is
 					private for a transaction, this
@@ -1053,21 +989,15 @@ struct trx_t {
 					with no gaps; thus it represents
 					the number of modified/inserted
 					rows in a transaction */
-	ulint		undo_rseg_space;
-					/*!< space id where last undo record
-					was written */
 	trx_savept_t	last_sql_stat_start;
 					/*!< undo_no when the last sql statement
 					was started: in case of an error, trx
-					is rolled back down to this undo
-					number; see note at undo_mutex! */
+					is rolled back down to this number */
 	trx_rsegs_t	rsegs;		/* rollback segments for undo logging */
 	undo_no_t	roll_limit;	/*!< least undo number to undo during
 					a partial rollback; 0 otherwise */
-#ifdef UNIV_DEBUG
 	bool		in_rollback;	/*!< true when the transaction is
 					executing a partial or full rollback */
-#endif /* UNIV_DEBUG */
 	ulint		pages_undone;	/*!< number of undo log pages undone
 					since the last undo log truncation */
 	/*------------------------------*/
@@ -1079,7 +1009,7 @@ struct trx_t {
 					also in the lock list trx_locks. This
 					vector needs to be freed explicitly
 					when the trx instance is destroyed.
-					Protected by lock_sys->mutex. */
+					Protected by lock_sys.mutex. */
 	/*------------------------------*/
 	bool		read_only;	/*!< true if transaction is flagged
 					as a READ-ONLY transaction.
@@ -1116,14 +1046,6 @@ struct trx_t {
 	const char*	start_file;	/*!< Filename where it was started */
 #endif /* UNIV_DEBUG */
 
-	lint		n_ref;		/*!< Count of references, protected
-					by trx_t::mutex. We can't release the
-					locks nor commit the transaction until
-					this reference is 0.  We can change
-					the state to COMMITTED_IN_MEMORY to
-					signify that it is no longer
-					"active". */
-
 	XID*		xid;		/*!< X/Open XA transaction
 					identification to identify a
 					transaction branch */
@@ -1152,12 +1074,14 @@ struct trx_t {
 	os_event_t	wsrep_event;	/* event waited for in srv_conc_slot */
 #endif /* WITH_WSREP */
 
+	rw_trx_hash_element_t *rw_trx_hash_element;
+	LF_PINS *rw_trx_hash_pins;
 	ulint		magic_n;
 
 	/** @return whether any persistent undo log has been generated */
 	bool has_logged_persistent() const
 	{
-		return(rsegs.m_redo.insert_undo || rsegs.m_redo.update_undo);
+		return(rsegs.m_redo.undo);
 	}
 
 	/** @return whether any undo log has been generated */
@@ -1166,6 +1090,13 @@ struct trx_t {
 		return(has_logged_persistent() || rsegs.m_noredo.undo);
 	}
 
+	/** @return whether any undo log has been generated or
+	recovered */
+	bool has_logged_or_recovered() const
+	{
+		return(has_logged() || rsegs.m_redo.old_insert);
+	}
+
 	/** @return rollback segment for modifying temporary tables */
 	trx_rseg_t* get_temp_rseg()
 	{
@@ -1177,6 +1108,33 @@ struct trx_t {
 		return(assign_temp_rseg());
 	}
 
+
+  bool is_referenced()
+  {
+    return my_atomic_load32_explicit(&n_ref, MY_MEMORY_ORDER_RELAXED) > 0;
+  }
+
+
+  void reference()
+  {
+#ifdef UNIV_DEBUG
+  int32_t old_n_ref=
+#endif
+    my_atomic_add32_explicit(&n_ref, 1, MY_MEMORY_ORDER_RELAXED);
+    ut_ad(old_n_ref >= 0);
+  }
+
+
+  void release_reference()
+  {
+#ifdef UNIV_DEBUG
+  int32_t old_n_ref=
+#endif
+    my_atomic_add32_explicit(&n_ref, -1, MY_MEMORY_ORDER_RELAXED);
+    ut_ad(old_n_ref > 0);
+  }
+
+
 private:
 	/** Assign a rollback segment for modifying temporary tables.
 	@return the assigned rollback segment */
@@ -1262,32 +1220,6 @@ struct commit_node_t{
 	mutex_exit(&t->mutex);			\
 } while (0)
 
-/**
-Increase the reference count. If the transaction is in state
-TRX_STATE_COMMITTED_IN_MEMORY then the transaction is considered
-committed and the reference count is not incremented.
-@param id the transaction ID; 0 if not to increment the reference count
-@param trx Transaction that is being referenced
-@return trx
-@retval	NULL	if the transaction is no longer active */
-inline trx_t* trx_reference(trx_id_t id, trx_t* trx)
-{
-	trx_mutex_enter(trx);
-
-	if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
-		trx = NULL;
-	} else if (!id) {
-	} else if (trx->id != id) {
-		trx = NULL;
-	} else {
-		ut_ad(trx->n_ref >= 0);
-		++trx->n_ref;
-	}
-
-	trx_mutex_exit(trx);
-	return(trx);
-}
-
 #include "trx0trx.ic"
 
 #endif
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
index dd42c8b8368..6589aca4e77 100644
--- a/storage/innobase/include/trx0trx.ic
+++ b/storage/innobase/include/trx0trx.ic
@@ -24,13 +24,11 @@ The transaction
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
-#include "read0read.h"
-
 /**********************************************************************//**
 Determines if a transaction is in the given state.
-The caller must hold trx_sys->mutex, or it must be the thread
+The caller must hold trx_sys.mutex, or it must be the thread
 that is serving a running transaction.
-A running RW transaction must be in trx_sys->rw_trx_list.
+A running RW transaction must be in trx_sys.rw_trx_hash.
 @return TRUE if trx->state == state */
 UNIV_INLINE
 bool
@@ -69,8 +67,6 @@ trx_state_eq(
 		     || (relaxed
 			 && thd_get_error_number(trx->mysql_thd)));
 
-		ut_ad(!trx->in_rw_trx_list);
-
 		return(true);
 	}
 	ut_error;
@@ -209,42 +205,3 @@ ok:
 	trx->ddl = true;
 	trx->dict_operation = op;
 }
-
-/**
-Release the transaction. Decrease the reference count.
-@param trx Transaction that is being released */
-UNIV_INLINE
-void
-trx_release_reference(
-	trx_t*		trx)
-{
-	trx_mutex_enter(trx);
-
-	ut_ad(trx->n_ref > 0);
-	--trx->n_ref;
-
-	trx_mutex_exit(trx);
-}
-
-
-/**
-@param trx		Get the active view for this transaction, if one exists
-@return the transaction's read view or NULL if one not assigned. */
-UNIV_INLINE
-ReadView*
-trx_get_read_view(
-	trx_t*		trx)
-{
-	return(!MVCC::is_view_active(trx->read_view) ? NULL : trx->read_view);
-}
-
-/**
-@param trx		Get the active view for this transaction, if one exists
-@return the transaction's read view or NULL if one not assigned. */
-UNIV_INLINE
-const ReadView*
-trx_get_read_view(
-	const trx_t*	trx)
-{
-	return(!MVCC::is_view_active(trx->read_view) ? NULL : trx->read_view);
-}
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
index de26de44193..252d93796ee 100644
--- a/storage/innobase/include/trx0types.h
+++ b/storage/innobase/include/trx0types.h
@@ -30,11 +30,8 @@ Created 3/26/1996 Heikki Tuuri
 #include "ut0byte.h"
 #include "ut0mutex.h"
 
-#include <set>
 #include <vector>
 
-//#include <unordered_set>
-
 /** printf(3) format used for printing DB_TRX_ID and other system fields */
 #define TRX_ID_FMT	IB_ID_FMT
 
@@ -93,8 +90,6 @@ enum trx_dict_op_t {
 struct trx_t;
 /** The locks and state of an active transaction */
 struct trx_lock_t;
-/** Transaction system */
-struct trx_sys_t;
 /** Signal */
 struct trx_sig_t;
 /** Rollback segment */
@@ -118,9 +113,6 @@ typedef ib_id_t	roll_ptr_t;
 /** Undo number */
 typedef ib_id_t	undo_no_t;
 
-/** Maximum transaction identifier */
-#define TRX_ID_MAX	IB_ID_MAX
-
 /** Transaction savepoint */
 struct trx_savept_t{
 	undo_no_t	least_undo_no;	/*!< least undo number to undo */
@@ -128,8 +120,6 @@ struct trx_savept_t{
 
 /** File objects */
 /* @{ */
-/** Transaction system header */
-typedef byte	trx_sysf_t;
 /** Rollback segment header */
 typedef byte	trx_rsegf_t;
 /** Undo segment header */
@@ -146,56 +136,8 @@ typedef	byte	trx_undo_rec_t;
 
 typedef ib_mutex_t RsegMutex;
 typedef ib_mutex_t TrxMutex;
-typedef ib_mutex_t UndoMutex;
 typedef ib_mutex_t PQMutex;
 typedef ib_mutex_t TrxSysMutex;
 
 typedef std::vector<trx_id_t, ut_allocator<trx_id_t> >	trx_ids_t;
-
-/** Mapping read-write transactions from id to transaction instance, for
-creating read views and during trx id lookup for MVCC and locking. */
-struct TrxTrack {
-	explicit TrxTrack(trx_id_t id, trx_t* trx = NULL)
-		:
-		m_id(id),
-		m_trx(trx)
-	{
-		// Do nothing
-	}
-
-	trx_id_t	m_id;
-	trx_t*		m_trx;
-};
-
-struct TrxTrackHash {
-	size_t operator()(const TrxTrack& key) const
-	{
-		return(size_t(key.m_id));
-	}
-};
-
-/**
-Comparator for TrxMap */
-struct TrxTrackHashCmp {
-
-	bool operator() (const TrxTrack& lhs, const TrxTrack& rhs) const
-	{
-		return(lhs.m_id == rhs.m_id);
-	}
-};
-
-/**
-Comparator for TrxMap */
-struct TrxTrackCmp {
-
-	bool operator() (const TrxTrack& lhs, const TrxTrack& rhs) const
-	{
-		return(lhs.m_id < rhs.m_id);
-	}
-};
-
-//typedef std::unordered_set<TrxTrack, TrxTrackHash, TrxTrackHashCmp> TrxIdSet;
-typedef std::set<TrxTrack, TrxTrackCmp, ut_allocator<TrxTrack> >
-	TrxIdSet;
-
 #endif /* trx0types_h */
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index a9f828a49d2..60b0517db0d 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -118,17 +118,6 @@ page_t*
 trx_undo_page_get_s_latched(const page_id_t page_id, mtr_t* mtr);
 
 /******************************************************************//**
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
-	trx_undo_rec_t*	rec,	/*!< in: undo log record */
-	ulint		page_no,/*!< in: undo log header page number */
-	ulint		offset);/*!< in: undo log header offset on page */
-/******************************************************************//**
 Returns the next undo log record on the page in the specified log, or
 NULL if none exists.
 @return pointer to record, NULL if none */
@@ -139,28 +128,6 @@ trx_undo_page_get_next_rec(
 	trx_undo_rec_t*	rec,	/*!< in: undo log record */
 	ulint		page_no,/*!< in: undo log header page number */
 	ulint		offset);/*!< in: undo log header offset on page */
-/******************************************************************//**
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
-	page_t*	undo_page,/*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset);	/*!< in: undo log header offset on page */
-/******************************************************************//**
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
-	page_t*	undo_page,/*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset);/*!< in: undo log header offset on page */
 /***********************************************************************//**
 Gets the previous record in an undo log.
 @return undo log record, the page s-latched, NULL if none */
@@ -192,20 +159,18 @@ trx_undo_get_next_rec(
 @return undo log record, the page latched, NULL if none */
 trx_undo_rec_t*
 trx_undo_get_first_rec(
-	ulint			space,
+	fil_space_t*		space,
 	ulint			page_no,
 	ulint			offset,
 	ulint			mode,
 	mtr_t*			mtr);
 
 /** Allocate an undo log page.
-@param[in,out]	trx	transaction
 @param[in,out]	undo	undo log
 @param[in,out]	mtr	mini-transaction that does not hold any page latch
 @return	X-latched block if success
 @retval	NULL	on failure */
-buf_block_t*
-trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
+buf_block_t* trx_undo_add_page(trx_undo_t* undo, mtr_t* mtr)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /** Free the last undo log page. The caller must hold the rseg mutex.
@@ -238,37 +203,33 @@ trx_undo_truncate_start(
 	ulint		hdr_page_no,
 	ulint		hdr_offset,
 	undo_no_t	limit);
-/********************************************************************//**
-Initializes the undo log lists for a rollback segment memory copy.
-This function is only called when the database is started or a new
-rollback segment created.
-@return the combined size of undo log segments in pages */
-ulint
-trx_undo_lists_init(
-/*================*/
-	trx_rseg_t*	rseg);	/*!< in: rollback segment memory object */
 /** Mark that an undo log header belongs to a data dictionary transaction.
 @param[in]	trx	dictionary transaction
 @param[in,out]	undo	undo log
 @param[in,out]	mtr	mini-transaction */
 void trx_undo_mark_as_dict(const trx_t* trx, trx_undo_t* undo, mtr_t* mtr);
+/** Assign an undo log for a persistent transaction.
+A new undo log is created or a cached undo log reused.
+@param[in,out]	trx	transaction
+@param[out]	err	error code
+@param[in,out]	mtr	mini-transaction
+@return	the undo log block
+@retval	NULL	on error */
+buf_block_t*
+trx_undo_assign(trx_t* trx, dberr_t* err, mtr_t* mtr)
+	MY_ATTRIBUTE((nonnull));
 /** Assign an undo log for a transaction.
 A new undo log is created or a cached undo log reused.
 @param[in,out]	trx	transaction
 @param[in]	rseg	rollback segment
 @param[out]	undo	the undo log
-@param[in]	type	TRX_UNDO_INSERT or TRX_UNDO_UPDATE
-@retval	DB_SUCCESS	on success
-@retval	DB_TOO_MANY_CONCURRENT_TRXS
-@retval	DB_OUT_OF_FILE_SPACE
-@retval	DB_READ_ONLY
-@retval DB_OUT_OF_MEMORY */
-dberr_t
-trx_undo_assign_undo(
-	trx_t*		trx,
-	trx_rseg_t*	rseg,
-	trx_undo_t**	undo,
-	ulint		type)
+@param[out]	err	error code
+@param[in,out]	mtr	mini-transaction
+@return	the undo log block
+@retval	NULL	on error */
+buf_block_t*
+trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
+		    dberr_t* err, mtr_t* mtr)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /******************************************************************//**
 Sets the state of the undo log segment at a transaction finish.
@@ -281,7 +242,7 @@ trx_undo_set_state_at_finish(
 
 /** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK.
 @param[in,out]	trx		transaction
-@param[in,out]	undo		insert_undo or update_undo log
+@param[in,out]	undo		undo log
 @param[in]	rollback	false=XA PREPARE, true=XA ROLLBACK
 @param[in,out]	mtr		mini-transaction
 @return undo log segment header page, x-latched */
@@ -292,20 +253,7 @@ trx_undo_set_state_at_prepare(
 	bool		rollback,
 	mtr_t*		mtr);
 
-/**********************************************************************//**
-Adds the update undo log header as the first in the history list, and
-frees the memory object, or puts it to the list of cached update undo log
-segments. */
-void
-trx_undo_update_cleanup(
-/*====================*/
-	trx_t*		trx,		/*!< in: trx owning the update
-					undo log */
-	page_t*		undo_page,	/*!< in: update undo log header page,
-					x-latched */
-	mtr_t*		mtr);		/*!< in: mtr */
-
-/** Free an insert or temporary undo log after commit or rollback.
+/** Free an old insert or temporary undo log after commit or rollback.
 The information is not needed after a commit or rollback, therefore
 the data can be discarded.
 @param[in,out]	undo	undo log
@@ -313,26 +261,31 @@ the data can be discarded.
 void
 trx_undo_commit_cleanup(trx_undo_t* undo, bool is_temp);
 
-/********************************************************************//**
-At shutdown, frees the undo logs of a PREPARED transaction. */
+/** At shutdown, frees the undo logs of a transaction. */
 void
-trx_undo_free_prepared(
-/*===================*/
-	trx_t*	trx)	/*!< in/out: PREPARED transaction */
-	ATTRIBUTE_COLD __attribute__((nonnull));
-
-/***********************************************************//**
-Parses the redo log entry of an undo log page initialization.
+trx_undo_free_at_shutdown(trx_t *trx);
+
+/** Parse MLOG_UNDO_INIT.
+@param[in]	ptr	log record
+@param[in]	end_ptr	end of log record buffer
+@param[in,out]	page	page or NULL
+@param[in,out]	mtr	mini-transaction
+@return	end of log record
+@retval	NULL	if the log record is incomplete */
+byte*
+trx_undo_parse_page_init(const byte* ptr, const byte* end_ptr, page_t* page);
+/** Parse MLOG_UNDO_HDR_REUSE for crash-upgrade from MariaDB 10.2.
+@param[in]	ptr	redo log record
+@param[in]	end_ptr	end of log buffer
+@param[in,out]	page	undo page or NULL
 @return end of log record or NULL */
 byte*
-trx_undo_parse_page_init(
-/*=====================*/
-	const byte*	ptr,	/*!< in: buffer */
-	const byte*	end_ptr,/*!< in: buffer end */
-	page_t*		page,	/*!< in: page or NULL */
-	mtr_t*		mtr);	/*!< in: mtr or NULL */
-/** Parse the redo log entry of an undo log page header create or reuse.
-@param[in]	type	MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE
+trx_undo_parse_page_header_reuse(
+	const byte*	ptr,
+	const byte*	end_ptr,
+	page_t*		page);
+
+/** Parse the redo log entry of an undo log page header create.
 @param[in]	ptr	redo log record
 @param[in]	end_ptr	end of log buffer
 @param[in,out]	page	page frame or NULL
@@ -340,17 +293,19 @@ trx_undo_parse_page_init(
 @return end of log record or NULL */
 byte*
 trx_undo_parse_page_header(
-	mlog_id_t	type,
 	const byte*	ptr,
 	const byte*	end_ptr,
 	page_t*		page,
 	mtr_t*		mtr);
-/************************************************************************
-Frees an undo log memory copy. */
-void
-trx_undo_mem_free(
-/*==============*/
-	trx_undo_t*	undo);		/* in: the undo object to be freed */
+/** Read an undo log when starting up the database.
+@param[in,out]	rseg		rollback segment
+@param[in]	id		rollback segment slot
+@param[in]	page_no		undo log segment page number
+@param[in,out]	max_trx_id	the largest observed transaction ID
+@return	size of the undo log in pages */
+ulint
+trx_undo_mem_create_at_db_start(trx_rseg_t* rseg, ulint id, ulint page_no,
+				trx_id_t& max_trx_id);
 
 #endif /* !UNIV_INNOCHECKSUM */
 
@@ -373,25 +328,15 @@ trx_undo_mem_free(
 
 #ifndef UNIV_INNOCHECKSUM
 
-/** Transaction undo log memory object; this is protected by the undo_mutex
-in the corresponding transaction object */
+/** Transaction undo log memory object; modified by the thread associated
+with the transaction. */
 
 struct trx_undo_t {
 	/*-----------------------------*/
 	ulint		id;		/*!< undo log slot number within the
 					rollback segment */
-	ulint		type;		/*!< TRX_UNDO_INSERT or
-					TRX_UNDO_UPDATE */
 	ulint		state;		/*!< state of the corresponding undo log
 					segment */
-	ibool		del_marks;	/*!< relevant only in an update undo
-					log: this is TRUE if the transaction may
-					have delete marked records, because of
-					a delete of a row or an update of an
-					indexed field; purge is then
-					necessary; also TRUE if the transaction
-					has updated an externally stored
-					field */
 	trx_id_t	trx_id;		/*!< id of the trx assigned to the undo
 					log */
 	XID		xid;		/*!< X/Open XA transaction
@@ -401,8 +346,6 @@ struct trx_undo_t {
 					id */
 	trx_rseg_t*	rseg;		/*!< rseg where the undo log belongs */
 	/*-----------------------------*/
-	ulint		space;		/*!< space id where the undo log
-					placed */
 	ulint		hdr_page_no;	/*!< page number of the header page in
 					the undo log */
 	ulint		hdr_offset;	/*!< header offset of the undo log on
@@ -412,8 +355,6 @@ struct trx_undo_t {
 					top_page_no during a rollback */
 	ulint		size;		/*!< current size in pages */
 	/*-----------------------------*/
-	ulint		empty;		/*!< TRUE if the stack of undo log
-					records is currently empty */
 	ulint		top_page_no;	/*!< page number where the latest undo
 					log record was catenated; during
 					rollback the page from which the latest
@@ -421,11 +362,16 @@ struct trx_undo_t {
 	ulint		top_offset;	/*!< offset of the latest undo record,
 					i.e., the topmost element in the undo
 					log if we think of it as a stack */
-	undo_no_t	top_undo_no;	/*!< undo number of the latest record */
+	undo_no_t	top_undo_no;	/*!< undo number of the latest record
+					(IB_ID_MAX if the undo log is empty) */
 	buf_block_t*	guess_block;	/*!< guess for the buffer block where
 					the top page might reside */
 	ulint		withdraw_clock;	/*!< the withdraw clock value of the
 					buffer pool when guess_block was stored */
+
+	/** @return whether the undo log is empty */
+	bool empty() const { return top_undo_no == IB_ID_MAX; }
+
 	/*-----------------------------*/
 	UT_LIST_NODE_T(trx_undo_t) undo_list;
 					/*!< undo log objects in the rollback
@@ -438,8 +384,8 @@ struct trx_undo_t {
 /*-------------------------------------------------------------*/
 /** Transaction undo log page header offsets */
 /* @{ */
-#define	TRX_UNDO_PAGE_TYPE	0	/*!< TRX_UNDO_INSERT or
-					TRX_UNDO_UPDATE */
+#define	TRX_UNDO_PAGE_TYPE	0	/*!< unused; 0 (before MariaDB 10.3.1:
+					TRX_UNDO_INSERT or TRX_UNDO_UPDATE) */
 #define	TRX_UNDO_PAGE_START	2	/*!< Byte offset where the undo log
 					records for the LATEST transaction
 					start on this page (remember that
@@ -460,7 +406,7 @@ struct trx_undo_t {
 at most this many bytes used; we must leave space at least for one new undo
 log header on the page */
 
-#define TRX_UNDO_PAGE_REUSE_LIMIT	(3 * UNIV_PAGE_SIZE / 4)
+#define TRX_UNDO_PAGE_REUSE_LIMIT	(3 << (srv_page_size_shift - 2))
 
 /* An update undo log segment may contain several undo logs on its first page
 if the undo logs took so little space that the segment could be cached and
@@ -500,14 +446,23 @@ log segment */
 page of an update undo log segment. */
 /* @{ */
 /*-------------------------------------------------------------*/
-#define	TRX_UNDO_TRX_ID		0	/*!< Transaction id */
-#define	TRX_UNDO_TRX_NO		8	/*!< Transaction number of the
-					transaction; defined only if the log
-					is in a history list */
-#define TRX_UNDO_DEL_MARKS	16	/*!< Defined only in an update undo
-					log: TRUE if the transaction may have
-					done delete markings of records, and
-					thus purge is necessary */
+/** Transaction start identifier, or 0 if the undo log segment has been
+completely purged and trx_purge_free_segment() has started freeing it */
+#define	TRX_UNDO_TRX_ID		0
+/** Transaction end identifier (if the log is in a history list),
+or 0 if the transaction has not been committed */
+#define	TRX_UNDO_TRX_NO		8
+/** Before MariaDB 10.3.1, when purge did not reset DB_TRX_ID of
+surviving user records, this used to be called TRX_UNDO_DEL_MARKS.
+
+The value 1 indicates that purge needs to process the undo log segment.
+The value 0 indicates that all of it has been processed, and
+trx_purge_free_segment() has been invoked, so the log is not safe to access.
+
+Before MariaDB 10.3.1, a log segment may carry the value 0 even before
+trx_purge_free_segment() was called, for those undo log records for
+which purge would not result in removing delete-marked records. */
+#define	TRX_UNDO_NEEDS_PURGE	16
 #define	TRX_UNDO_LOG_START	18	/*!< Offset of the first undo log record
 					of this log on the header page; purge
 					may remove undo log record from the
@@ -537,7 +492,7 @@ page of an update undo log segment. */
 #define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE)
 
 /* Note: the writing of the undo log old header is coded by a log record
-MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the
+MLOG_UNDO_HDR_CREATE. The appending of an XID to the
 header is logged separately. In this sense, the XID is not really a member
 of the undo log header. TODO: do not append the XID to the log header if XA
 is not needed by the user. The XID wastes about 150 bytes of space in every
diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic
index dc19840777d..ac8af61be09 100644
--- a/storage/innobase/include/trx0undo.ic
+++ b/storage/innobase/include/trx0undo.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -40,9 +40,7 @@ trx_undo_build_roll_ptr(
 	ulint	offset)		/*!< in: offset of the undo entry within page */
 {
 	roll_ptr_t	roll_ptr;
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
+	compile_time_assert(DATA_ROLL_PTR_LEN == 7);
 	ut_ad(is_insert == 0 || is_insert == 1);
 	ut_ad(rseg_id < TRX_SYS_N_RSEGS);
 	ut_ad(offset < 65536);
@@ -67,12 +65,7 @@ trx_undo_decode_roll_ptr(
 	ulint*		offset)		/*!< out: offset of the undo
 					entry within page */
 {
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
+	compile_time_assert(DATA_ROLL_PTR_LEN == 7);
 	ut_ad(roll_ptr < (1ULL << 56));
 	*offset = (ulint) roll_ptr & 0xFFFF;
 	roll_ptr >>= 16;
@@ -92,14 +85,9 @@ trx_undo_roll_ptr_is_insert(
 /*========================*/
 	roll_ptr_t	roll_ptr)	/*!< in: roll pointer */
 {
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
-	ut_ad(roll_ptr < (1ULL << 56));
-	return((ibool) (roll_ptr >> 55));
+	compile_time_assert(DATA_ROLL_PTR_LEN == 7);
+	ut_ad(roll_ptr < (1ULL << (ROLL_PTR_INSERT_FLAG_POS + 1)));
+	return((ibool) (roll_ptr >> ROLL_PTR_INSERT_FLAG_POS));
 }
 
 /***********************************************************************//**
@@ -111,10 +99,8 @@ trx_undo_trx_id_is_insert(
 /*======================*/
 	const byte*	trx_id)	/*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
 {
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error
-#endif
-	return(static_cast<bool>(trx_id[DATA_TRX_ID_LEN] >> 7));
+	compile_time_assert(DATA_TRX_ID + 1 == DATA_ROLL_PTR);
+	return bool(trx_id[DATA_TRX_ID_LEN] >> 7);
 }
 
 /*****************************************************************//**
@@ -129,9 +115,7 @@ trx_write_roll_ptr(
 					written */
 	roll_ptr_t	roll_ptr)	/*!< in: roll ptr */
 {
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
+	compile_time_assert(DATA_ROLL_PTR_LEN == 7);
 	mach_write_to_7(ptr, roll_ptr);
 }
 
@@ -146,9 +130,7 @@ trx_read_roll_ptr(
 /*==============*/
 	const byte*	ptr)	/*!< in: pointer to memory from where to read */
 {
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
+	compile_time_assert(DATA_ROLL_PTR_LEN == 7);
 	return(mach_read_from_7(ptr));
 }
 
@@ -184,89 +166,24 @@ trx_undo_page_get_s_latched(const page_id_t page_id, mtr_t* mtr)
 	return(buf_block_get_frame(block));
 }
 
-/******************************************************************//**
-Returns the start offset of the undo log records of the specified undo
-log on the page.
-@return start offset */
-UNIV_INLINE
-ulint
-trx_undo_page_get_start(
-/*====================*/
-	page_t*	undo_page,/*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset)	/*!< in: undo log header offset on page */
-{
-	ulint	start;
-
-	if (page_no == page_get_page_no(undo_page)) {
-
-		start = mach_read_from_2(offset + undo_page
-					 + TRX_UNDO_LOG_START);
-	} else {
-		start = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE;
-	}
-
-	return(start);
-}
-
-/******************************************************************//**
-Returns the end offset of the undo log records of the specified undo
-log on the page.
+/** Determine the end offset of undo log records of an undo log page.
+@param[in]	undo_page	undo log page
+@param[in]	page_no		undo log header page number
+@param[in]	offset		undo log header offset
 @return end offset */
-UNIV_INLINE
-ulint
-trx_undo_page_get_end(
-/*==================*/
-	page_t*	undo_page,/*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset)	/*!< in: undo log header offset on page */
+inline
+uint16_t
+trx_undo_page_get_end(const page_t* undo_page, ulint page_no, ulint offset)
 {
-	trx_ulogf_t*	log_hdr;
-	ulint		end;
-
 	if (page_no == page_get_page_no(undo_page)) {
-
-		log_hdr = undo_page + offset;
-
-		end = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
-
-		if (end == 0) {
-			end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
-					       + TRX_UNDO_PAGE_FREE);
+		if (uint16_t end = mach_read_from_2(TRX_UNDO_NEXT_LOG
+						    + offset + undo_page)) {
+			return end;
 		}
-	} else {
-		end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
-				       + TRX_UNDO_PAGE_FREE);
-	}
-
-	return(end);
-}
-
-/******************************************************************//**
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
-	trx_undo_rec_t*	rec,	/*!< in: undo log record */
-	ulint		page_no,/*!< in: undo log header page number */
-	ulint		offset)	/*!< in: undo log header offset on page */
-{
-	page_t*	undo_page;
-	ulint	start;
-
-	undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
-
-	start = trx_undo_page_get_start(undo_page, page_no, offset);
-
-	if (start + undo_page == rec) {
-
-		return(NULL);
 	}
 
-	return(undo_page + mach_read_from_2(rec - 2));
+	return mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+				+ undo_page);
 }
 
 /******************************************************************//**
@@ -285,7 +202,7 @@ trx_undo_page_get_next_rec(
 	ulint	end;
 	ulint	next;
 
-	undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
+	undo_page = (page_t*) ut_align_down(rec, srv_page_size);
 
 	end = trx_undo_page_get_end(undo_page, page_no, offset);
 
@@ -298,55 +215,3 @@ trx_undo_page_get_next_rec(
 
 	return(undo_page + next);
 }
-
-/******************************************************************//**
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
-	page_t*	undo_page,/*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset)	/*!< in: undo log header offset on page */
-{
-	ulint	start;
-	ulint	end;
-
-	start = trx_undo_page_get_start(undo_page, page_no, offset);
-	end = trx_undo_page_get_end(undo_page, page_no, offset);
-
-	if (start == end) {
-
-		return(NULL);
-	}
-
-	return(undo_page + mach_read_from_2(undo_page + end - 2));
-}
-
-/******************************************************************//**
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
-	page_t*	undo_page,/*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset)	/*!< in: undo log header offset on page */
-{
-	ulint	start;
-	ulint	end;
-
-	start = trx_undo_page_get_start(undo_page, page_no, offset);
-	end = trx_undo_page_get_end(undo_page, page_no, offset);
-
-	if (start == end) {
-
-		return(NULL);
-	}
-
-	return(undo_page + start);
-}
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 47d65052f16..a3b50950393 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -50,16 +50,14 @@ calculated in make_version_string() in sql/sql_show.cc like this:
 because the version is shown with only one dot, we skip the last
 component, i.e. we show M.N.P as M.N */
 #define INNODB_VERSION_SHORT	\
-	(INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
+	(MYSQL_VERSION_MAJOR << 8 | MYSQL_VERSION_MINOR)
 
 #define INNODB_VERSION_STR			\
-	IB_TO_STR(INNODB_VERSION_MAJOR) "."	\
-	IB_TO_STR(INNODB_VERSION_MINOR) "."	\
-	IB_TO_STR(INNODB_VERSION_BUGFIX)
+	IB_TO_STR(MYSQL_VERSION_MAJOR) "."	\
+	IB_TO_STR(MYSQL_VERSION_MINOR) "."	\
+	IB_TO_STR(MYSQL_VERSION_PATCH)
 
-#define REFMAN "http://dev.mysql.com/doc/refman/"	\
-	IB_TO_STR(INNODB_VERSION_MAJOR) "."		\
-	IB_TO_STR(INNODB_VERSION_MINOR) "/en/"
+#define REFMAN "http://dev.mysql.com/doc/refman/5.7/en/"
 
 /** How far ahead should we tell the service manager the timeout
 (time in seconds) */
@@ -171,9 +169,8 @@ for all cases. This is used by ut0lst.h related code. */
 /* When this macro is defined then additional test functions will be
 compiled. These functions live at the end of each relevant source file
 and have "test_" prefix. These functions can be called from the end of
-innobase_init() or they can be called from gdb after
-innobase_start_or_create_for_mysql() has executed using the call
-command. */
+innodb_init() or they can be called from gdb after srv_start() has executed
+using the call command. */
 /*
 #define UNIV_COMPILE_TEST_FUNCS
 #define UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
@@ -274,33 +271,6 @@ management to ensure correct alignment for doubles etc. */
 			========================
 */
 
-/** There are currently two InnoDB file formats which are used to group
-features with similar restrictions and dependencies. Using an enum allows
-switch statements to give a compiler warning when a new one is introduced. */
-enum innodb_file_formats_enum {
-	/** Antelope File Format: InnoDB/MySQL up to 5.1.
-	This format includes REDUNDANT and COMPACT row formats */
-	UNIV_FORMAT_A		= 0,
-
-	/** Barracuda File Format: Introduced in InnoDB plugin for 5.1:
-	This format includes COMPRESSED and DYNAMIC row formats.  It
-	includes the ability to create secondary indexes from data that
-	is not on the clustered index page and the ability to store more
-	data off the clustered index page. */
-	UNIV_FORMAT_B		= 1
-};
-
-typedef enum innodb_file_formats_enum innodb_file_formats_t;
-
-/** Minimum supported file format */
-#define UNIV_FORMAT_MIN		UNIV_FORMAT_A
-
-/** Maximum supported file format */
-#define UNIV_FORMAT_MAX		UNIV_FORMAT_B
-
-/** The 2-logarithm of UNIV_PAGE_SIZE: */
-#define UNIV_PAGE_SIZE_SHIFT	srv_page_size_shift
-
 #ifdef HAVE_LZO
 #define IF_LZO(A,B) A
 #else
@@ -337,32 +307,29 @@ typedef enum innodb_file_formats_enum innodb_file_formats_t;
 #define IF_PUNCH_HOLE(A,B) B
 #endif
 
-/** The universal page size of the database */
-#define UNIV_PAGE_SIZE		((ulint) srv_page_size)
-
 /** log2 of smallest compressed page size (1<<10 == 1024 bytes)
 Note: This must never change! */
-#define UNIV_ZIP_SIZE_SHIFT_MIN		10
+#define UNIV_ZIP_SIZE_SHIFT_MIN		10U
 
 /** log2 of largest compressed page size (1<<14 == 16384 bytes).
 A compressed page directory entry reserves 14 bits for the start offset
 and 2 bits for flags. This limits the uncompressed page size to 16k.
 */
-#define UNIV_ZIP_SIZE_SHIFT_MAX		14
+#define UNIV_ZIP_SIZE_SHIFT_MAX		14U
 
 /* Define the Min, Max, Default page sizes. */
 /** Minimum Page Size Shift (power of 2) */
-#define UNIV_PAGE_SIZE_SHIFT_MIN	12
+#define UNIV_PAGE_SIZE_SHIFT_MIN	12U
 /** log2 of largest page size (1<<16 == 64436 bytes). */
 /** Maximum Page Size Shift (power of 2) */
-#define UNIV_PAGE_SIZE_SHIFT_MAX	16
+#define UNIV_PAGE_SIZE_SHIFT_MAX	16U
 /** log2 of default page size (1<<14 == 16384 bytes). */
 /** Default Page Size Shift (power of 2) */
-#define UNIV_PAGE_SIZE_SHIFT_DEF	14
+#define UNIV_PAGE_SIZE_SHIFT_DEF	14U
 /** Original 16k InnoDB Page Size Shift, in case the default changes */
-#define UNIV_PAGE_SIZE_SHIFT_ORIG	14
+#define UNIV_PAGE_SIZE_SHIFT_ORIG	14U
 /** Original 16k InnoDB Page Size as an ssize (log2 - 9) */
-#define UNIV_PAGE_SSIZE_ORIG		(UNIV_PAGE_SIZE_SHIFT_ORIG - 9)
+#define UNIV_PAGE_SSIZE_ORIG		(UNIV_PAGE_SIZE_SHIFT_ORIG - 9U)
 
 /** Minimum page size InnoDB currently supports. */
 #define UNIV_PAGE_SIZE_MIN	(1U << UNIV_PAGE_SIZE_SHIFT_MIN)
@@ -382,13 +349,13 @@ and 2 bits for flags. This limits the uncompressed page size to 16k.
 /** Largest possible ssize for an uncompressed page.
 (The convention 'ssize' is used for 'log2 minus 9' or the number of
 shifts starting with 512.)
-This max number varies depending on UNIV_PAGE_SIZE. */
+This max number varies depending on srv_page_size. */
 #define UNIV_PAGE_SSIZE_MAX	\
-	static_cast<ulint>(UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+	ulint(srv_page_size_shift - UNIV_ZIP_SIZE_SHIFT_MIN + 1U)
 
 /** Smallest possible ssize for an uncompressed page. */
 #define UNIV_PAGE_SSIZE_MIN	\
-	static_cast<ulint>(UNIV_PAGE_SIZE_SHIFT_MIN - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+	ulint(UNIV_PAGE_SIZE_SHIFT_MIN - UNIV_ZIP_SIZE_SHIFT_MIN + 1U)
 
 /** Maximum number of parallel threads in a parallelized operation */
 #define UNIV_MAX_PARALLELISM	32
@@ -493,7 +460,7 @@ typedef	ib_uint64_t		lsn_t;
 #define UINT64_UNDEFINED	((ib_uint64_t)(-1))
 
 /** The bitmask of 32-bit unsigned integer */
-#define ULINT32_MASK		0xFFFFFFFF
+#define ULINT32_MASK		0xFFFFFFFFU
 /** The undefined 32-bit unsigned integer */
 #define	ULINT32_UNDEFINED	ULINT32_MASK
 
diff --git a/storage/innobase/include/ut0byte.ic b/storage/innobase/include/ut0byte.ic
index 9c0cd6ee3c3..1ef90eca416 100644
--- a/storage/innobase/include/ut0byte.ic
+++ b/storage/innobase/include/ut0byte.ic
@@ -144,9 +144,6 @@ ut_bit_get_nth(
 	ulint	n)	/*!< in: nth bit requested */
 {
 	ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
 	return(1 & (a >> n));
 }
 
@@ -162,9 +159,6 @@ ut_bit_set_nth(
 	ibool	val)	/*!< in: value for the bit to set */
 {
 	ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
 	if (val) {
 		return(((ulint) 1 << n) | a);
 	} else {
diff --git a/storage/innobase/include/ut0crc32.h b/storage/innobase/include/ut0crc32.h
index 36b389b5bd2..32ad066f85a 100644
--- a/storage/innobase/include/ut0crc32.h
+++ b/storage/innobase/include/ut0crc32.h
@@ -47,14 +47,11 @@ typedef uint32_t	(*ut_crc32_func_t)(const byte* ptr, ulint len);
 /** Pointer to CRC32 calculation function. */
 extern ut_crc32_func_t	ut_crc32;
 
-/** Pointer to CRC32 calculation function, which uses big-endian byte order
+/** CRC32 calculation function, which uses big-endian byte order
 when converting byte strings to integers internally. */
-extern ut_crc32_func_t	ut_crc32_legacy_big_endian;
-
-/** Pointer to CRC32-byte-by-byte calculation function (byte order agnostic,
-but very slow). */
-extern ut_crc32_func_t	ut_crc32_byte_by_byte;
+extern uint32_t ut_crc32_legacy_big_endian(const byte* buf, ulint len);
 
+/** Text description of CRC32 implementation */
 extern const char*	ut_crc32_implementation;
 
 #endif /* ut0crc32_h */
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
index 6622b7cd865..0c8328449db 100644
--- a/storage/innobase/include/ut0dbg.h
+++ b/storage/innobase/include/ut0dbg.h
@@ -59,8 +59,8 @@ ut_dbg_assertion_failed(
 	ut_dbg_assertion_failed(0, __FILE__, __LINE__)
 
 /** Debug assertion */
-#define ut_ad	DBUG_ASSERT
-#ifdef UNIV_DEBUG
+#define ut_ad	DBUG_SLOW_ASSERT
+#if defined(UNIV_DEBUG) || !defined(DBUG_OFF)
 /** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
 #define ut_d(EXPR)	EXPR
 #else
diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
index 09733da20a0..f62d3744b96 100644
--- a/storage/innobase/include/ut0lst.h
+++ b/storage/innobase/include/ut0lst.h
@@ -426,7 +426,7 @@ Gets the last node in a two-way list.
 @return last node, or NULL if the list is empty */
 #define UT_LIST_GET_LAST(BASE)		(BASE).end
 
-struct	NullValidate { void operator()(const void* elem) { } };
+struct	NullValidate { void operator()(const void*) { } };
 
 /********************************************************************//**
 Iterate over all the elements and call the functor for each element.
diff --git a/storage/innobase/include/ut0new.h b/storage/innobase/include/ut0new.h
index 86fcbd59418..b79d03f1b0b 100644
--- a/storage/innobase/include/ut0new.h
+++ b/storage/innobase/include/ut0new.h
@@ -129,6 +129,10 @@ InnoDB:
 #include <string.h> /* strlen(), strrchr(), strncmp() */
 
 #include "my_global.h" /* needed for headers from mysql/psi/ */
+#if !defined(DBUG_OFF) && defined(HAVE_MADVISE)
+#include <sys/mman.h>
+#endif
+
 /* JAN: TODO: missing 5.7 header */
 #ifdef HAVE_MYSQL_MEMORY_H
 #include "mysql/psi/mysql_memory.h" /* PSI_MEMORY_CALL() */
@@ -170,7 +174,6 @@ extern PSI_memory_key	mem_key_other;
 extern PSI_memory_key	mem_key_row_log_buf;
 extern PSI_memory_key	mem_key_row_merge_sort;
 extern PSI_memory_key	mem_key_std;
-extern PSI_memory_key	mem_key_trx_sys_t_rw_trx_ids;
 extern PSI_memory_key	mem_key_partitioning;
 
 /** Setup the internal objects needed for UT_NEW() to operate.
@@ -233,6 +236,51 @@ struct ut_new_pfx_t {
 #endif
 };
 
+static inline void ut_allocate_trace_dontdump(void *ptr, size_t	bytes,
+					      bool
+#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DONTDUMP)
+					      dontdump
+#endif
+					      , ut_new_pfx_t* pfx,
+					      const char*
+#ifdef UNIV_PFS_MEMORY
+					      file
+#endif
+
+					      )
+{
+	ut_a(ptr != NULL);
+
+#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DONTDUMP)
+	if (dontdump && madvise(ptr, bytes, MADV_DONTDUMP)) {
+		ib::warn() << "Failed to set memory to DONTDUMP: "
+			   << strerror(errno)
+			   << " ptr " << ptr
+			   << " size " << bytes;
+	}
+#endif
+	if (pfx != NULL) {
+#ifdef UNIV_PFS_MEMORY
+		allocate_trace(bytes, file, pfx);
+#endif /* UNIV_PFS_MEMORY */
+		pfx->m_size = bytes;
+	}
+}
+
+#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP)
+static inline void ut_dodump(void* ptr, size_t m_size)
+{
+	if (ptr && madvise(ptr, m_size, MADV_DODUMP)) {
+		ib::warn() << "Failed to set memory to DODUMP: "
+			   << strerror(errno)
+			   << " ptr " << ptr
+			   << " size " << m_size;
+	}
+}
+#else
+static inline void ut_dodump(void*, size_t) {}
+#endif
+
 /** Allocator class for allocating memory from inside std::* containers.
 @tparam	T		type of allocated object
 @tparam oom_fatal	whether to commit suicide when running out of memory */
@@ -247,19 +295,25 @@ public:
 	typedef size_t		size_type;
 	typedef ptrdiff_t	difference_type;
 
+#ifdef UNIV_PFS_MEMORY
 	/** Default constructor. */
 	explicit
 	ut_allocator(PSI_memory_key key = PSI_NOT_INSTRUMENTED)
-#ifdef UNIV_PFS_MEMORY
 		: m_key(key)
-#endif /* UNIV_PFS_MEMORY */
 	{
 	}
+#else
+	ut_allocator() {}
+	ut_allocator(PSI_memory_key) {}
+#endif /* UNIV_PFS_MEMORY */
 
 	/** Constructor from allocator of another type. */
 	template <class U>
-	ut_allocator(
-		const ut_allocator<U>&	other)
+	ut_allocator(const ut_allocator<U>&
+#ifdef UNIV_PFS_MEMORY
+		     other
+#endif
+		     )
 #ifdef UNIV_PFS_MEMORY
 		: m_key(other.m_key)
 #endif /* UNIV_PFS_MEMORY */
@@ -280,6 +334,8 @@ public:
 #endif /* UNIV_PFS_MEMORY */
 	}
 
+	pointer allocate(size_type n) { return allocate(n, NULL, NULL); }
+
 	/** Allocate a chunk of memory that can hold 'n_elements' objects of
 	type 'T' and trace the allocation.
 	If the allocation fails this method may throw an exception. This
@@ -288,17 +344,19 @@ public:
 	After successfull allocation the returned pointer must be passed
 	to ut_allocator::deallocate() when no longer needed.
 	@param[in]	n_elements	number of elements
-	@param[in]	hint		pointer to a nearby memory location,
-	unused by this implementation
-	@param[in]	file		file name of the caller
 	@param[in]	set_to_zero	if true, then the returned memory is
 	initialized with 0x0 bytes.
+	@param[in]	throw_on_error	if true, raize exception if too big
 	@return pointer to the allocated memory */
 	pointer
 	allocate(
 		size_type	n_elements,
-		const_pointer	hint = NULL,
-		const char*	file = NULL,
+		const_pointer,
+		const char*
+#ifdef UNIV_PFS_MEMORY
+		file /*!< file name of the caller */
+#endif
+		,
 		bool		set_to_zero = false,
 		bool		throw_on_error = true)
 	{
@@ -565,6 +623,8 @@ public:
 	/** Allocate a large chunk of memory that can hold 'n_elements'
 	objects of type 'T' and trace the allocation.
 	@param[in]	n_elements	number of elements
+	@param[in]	dontdump	if true, advise the OS is not to core
+	dump this memory.
 	@param[out]	pfx		storage for the description of the
 	allocated memory. The caller must provide space for this one and keep
 	it until the memory is no longer needed and then pass it to
@@ -573,7 +633,8 @@ public:
 	pointer
 	allocate_large(
 		size_type	n_elements,
-		ut_new_pfx_t*	pfx)
+		ut_new_pfx_t*	pfx,
+		bool		dontdump = false)
 	{
 		if (n_elements == 0 || n_elements > max_size()) {
 			return(NULL);
@@ -584,13 +645,11 @@ public:
 		pointer	ptr = reinterpret_cast<pointer>(
 			os_mem_alloc_large(&n_bytes));
 
-#ifdef UNIV_PFS_MEMORY
-		if (ptr != NULL) {
-			allocate_trace(n_bytes, NULL, pfx);
+		if (ptr == NULL) {
+			return NULL;
 		}
-#else
-		pfx->m_size = n_bytes;
-#endif /* UNIV_PFS_MEMORY */
+
+		ut_allocate_trace_dontdump(ptr, n_bytes, dontdump, pfx, NULL);
 
 		return(ptr);
 	}
@@ -599,17 +658,30 @@ public:
 	deallocation.
 	@param[in,out]	ptr	pointer to memory to free
 	@param[in]	pfx	descriptor of the memory, as returned by
-	allocate_large(). */
+	allocate_large().
+	@param[in]      dodump  if true, advise the OS to include this
+	memory again if a core dump occurs. */
 	void
 	deallocate_large(
 		pointer			ptr,
-		const ut_new_pfx_t*	pfx)
+		const ut_new_pfx_t*
+#ifdef UNIV_PFS_MEMORY
+		pfx
+#endif
+		,
+		size_t			size,
+		bool			dodump = false)
 	{
+		if (dodump) {
+			ut_dodump(ptr, size);
+		}
 #ifdef UNIV_PFS_MEMORY
-		deallocate_trace(pfx);
+		if (pfx) {
+			deallocate_trace(pfx);
+		}
 #endif /* UNIV_PFS_MEMORY */
 
-		os_mem_free_large(ptr, pfx->m_size);
+		os_mem_free_large(ptr, size);
 	}
 
 #ifdef UNIV_PFS_MEMORY
@@ -723,12 +795,7 @@ could be freed by A2 even if the pfs mem key is different. */
 template <typename T>
 inline
 bool
-operator==(
-	const ut_allocator<T>&	lhs,
-	const ut_allocator<T>&	rhs)
-{
-	return(true);
-}
+operator==(const ut_allocator<T>&, const ut_allocator<T>&) { return(true); }
 
 /** Compare two allocators of the same type. */
 template <typename T>
@@ -841,6 +908,10 @@ ut_delete_array(
 	ut_allocator<byte>(key).allocate( \
 		n_bytes, NULL, __FILE__, false, false))
 
+#define ut_malloc_dontdump(n_bytes) static_cast<void*>( \
+	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).allocate_large( \
+		n_bytes, true))
+
 #define ut_zalloc(n_bytes, key)		static_cast<void*>( \
 	ut_allocator<byte>(key).allocate( \
 		n_bytes, NULL, __FILE__, true, false))
@@ -864,6 +935,10 @@ ut_delete_array(
 #define ut_free(ptr)	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate( \
 	reinterpret_cast<byte*>(ptr))
 
+#define ut_free_dodump(ptr, size) static_cast<void*>( \
+	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate_large( \
+		ptr, NULL, size, true))
+
 #else /* UNIV_PFS_MEMORY */
 
 /* Fallbacks when memory tracing is disabled at compile time. */
@@ -886,6 +961,14 @@ ut_delete_array(
 
 #define ut_malloc_nokey(n_bytes)	::malloc(n_bytes)
 
+static inline void *ut_malloc_dontdump(size_t n_bytes)
+{
+	void *ptr = os_mem_alloc_large(&n_bytes);
+
+	ut_allocate_trace_dontdump(ptr, n_bytes, true, NULL, NULL);
+	return ptr;
+}
+
 #define ut_zalloc_nokey(n_bytes)	::calloc(1, n_bytes)
 
 #define ut_zalloc_nokey_nofatal(n_bytes)	::calloc(1, n_bytes)
@@ -894,6 +977,12 @@ ut_delete_array(
 
 #define ut_free(ptr)			::free(ptr)
 
+static inline void ut_free_dodump(void *ptr, size_t size)
+{
+	ut_dodump(ptr, size);
+	os_mem_free_large(ptr, size);
+}
+
 #endif /* UNIV_PFS_MEMORY */
 
 #endif /* ut0new_h */
diff --git a/storage/innobase/include/ut0pool.h b/storage/innobase/include/ut0pool.h
index c0237158ce5..d3ea733a440 100644
--- a/storage/innobase/include/ut0pool.h
+++ b/storage/innobase/include/ut0pool.h
@@ -115,7 +115,7 @@ struct Pool {
 		} else if (m_last < m_end) {
 
 			/* Initialise the remaining elements. */
-			init(m_end - m_last);
+			init(size_t(m_end - m_last));
 
 			ut_ad(!m_pqueue.empty());
 
diff --git a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h
index 2ee0d98924b..280ee772589 100644
--- a/storage/innobase/include/ut0rnd.h
+++ b/storage/innobase/include/ut0rnd.h
@@ -58,16 +58,6 @@ UNIV_INLINE
 ulint
 ut_rnd_gen_ulint(void);
 /*==================*/
-/********************************************************//**
-Generates a random integer from a given interval.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
-	ulint	low,	/*!< in: low limit; can generate also this value */
-	ulint	high);	/*!< in: high limit; can generate also this value */
-
 /*******************************************************//**
 The following function generates a hash value for a ulint integer
 to a hash table of size table_size, which should be a prime or some
diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic
index 16dccb545d8..1e4915dd0f9 100644
--- a/storage/innobase/include/ut0rnd.ic
+++ b/storage/innobase/include/ut0rnd.ic
@@ -97,30 +97,6 @@ ut_rnd_gen_ulint(void)
 	return(rnd);
 }
 
-/********************************************************//**
-Generates a random integer from a given interval.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
-	ulint	low,	/*!< in: low limit; can generate also this value */
-	ulint	high)	/*!< in: high limit; can generate also this value */
-{
-	ulint	rnd;
-
-	ut_ad(high >= low);
-
-	if (low == high) {
-
-		return(low);
-	}
-
-	rnd = ut_rnd_gen_ulint();
-
-	return(low + (rnd % (high - low)));
-}
-
 /*******************************************************//**
 The following function generates a hash value for a ulint integer
 to a hash table of size table_size, which should be a prime
diff --git a/storage/innobase/include/ut0stage.h b/storage/innobase/include/ut0stage.h
index c39be3b90b5..e2e840afbc5 100644
--- a/storage/innobase/include/ut0stage.h
+++ b/storage/innobase/include/ut0stage.h
@@ -527,65 +527,28 @@ ut_stage_alter_t::change_phase(
 
 class ut_stage_alter_t {
 public:
-	explicit
-	ut_stage_alter_t(
-		const dict_index_t*	pk)
-	{
-	}
+	explicit ut_stage_alter_t(const dict_index_t*) {}
 
-	void
-	begin_phase_read_pk(
-		ulint	n_sort_indexes)
-	{
-	}
+	void begin_phase_read_pk(ulint)	{}
 
-	void
-	n_pk_recs_inc()
-	{
-	}
+	void n_pk_recs_inc() {}
 
-	void
-	inc(
-		ulint	inc_val = 1)
-	{
-	}
+	void inc() {}
+	void inc(ulint) {}
 
-	void
-	end_phase_read_pk()
-	{
-	}
+	void end_phase_read_pk() {}
 
-	void
-	begin_phase_sort(
-		double	sort_multi_factor)
-	{
-	}
+	void begin_phase_sort(double) {}
 
-	void
-	begin_phase_insert()
-	{
-	}
+	void begin_phase_insert() {}
 
-	void
-	begin_phase_flush(
-		ulint	n_flush_pages)
-	{
-	}
+	void begin_phase_flush(ulint) {}
 
-	void
-	begin_phase_log_index()
-	{
-	}
+	void begin_phase_log_index() {}
 
-	void
-	begin_phase_log_table()
-	{
-	}
+	void begin_phase_log_table() {}
 
-	void
-	begin_phase_end()
-	{
-	}
+	void begin_phase_end() {}
 };
 
 #endif /* HAVE_PSI_STAGE_INTERFACE */
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index 4e9c2599933..1614d3ead6d 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -45,6 +45,7 @@ Created 1/20/1994 Heikki Tuuri
 #include <stdarg.h>
 
 #include <string>
+#include <my_atomic.h>
 
 /** Index name prefix in fast index creation, as a string constant */
 #define TEMP_INDEX_PREFIX_STR	"\377"
@@ -52,35 +53,6 @@ Created 1/20/1994 Heikki Tuuri
 /** Time stamp */
 typedef time_t	ib_time_t;
 
-#ifdef HAVE_PAUSE_INSTRUCTION
-   /* According to the gcc info page, asm volatile means that the
-   instruction has important side-effects and must not be removed.
-   Also asm volatile may trigger a memory barrier (spilling all registers
-   to memory). */
-# ifdef __SUNPRO_CC
-#  define UT_RELAX_CPU() asm ("pause" )
-# else
-#  define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
-# endif /* __SUNPRO_CC */
-
-#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
-# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
-#elif defined _WIN32
-   /* In the Win32 API, the x86 PAUSE instruction is executed by calling
-   the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
-   independent way by using YieldProcessor. */
-# define UT_RELAX_CPU() YieldProcessor()
-#elif defined(__powerpc__) && defined __GLIBC__
-# include <sys/platform/ppc.h>
-# define UT_RELAX_CPU() __ppc_get_timebase()
-#else
-# define UT_RELAX_CPU() do { \
-     volatile int32	volatile_var; \
-     int32 oldval= 0; \
-     my_atomic_cas32(&volatile_var, &oldval, 1); \
-   } while (0)
-#endif
-
 #if defined (__GNUC__)
 # define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
 #elif defined (_MSC_VER)
@@ -89,15 +61,6 @@ typedef time_t	ib_time_t;
 # define UT_COMPILER_BARRIER()
 #endif
 
-#if defined(HAVE_HMT_PRIORITY_INSTRUCTION)
-# include <sys/platform/ppc.h>
-# define UT_LOW_PRIORITY_CPU() __ppc_set_ppr_low()
-# define UT_RESUME_PRIORITY_CPU() __ppc_set_ppr_med()
-#else
-# define UT_LOW_PRIORITY_CPU() ((void)0)
-# define UT_RESUME_PRIORITY_CPU() ((void)0)
-#endif
-
 /*********************************************************************//**
 Delays execution for at most max_wait_us microseconds or returns earlier
 if cond becomes true.
diff --git a/storage/innobase/innodb.cmake b/storage/innobase/innodb.cmake
index 7272585dcce..a728dd08c0d 100644
--- a/storage/innobase/innodb.cmake
+++ b/storage/innobase/innodb.cmake
@@ -33,6 +33,8 @@ MYSQL_CHECK_BZIP2()
 MYSQL_CHECK_SNAPPY()
 MYSQL_CHECK_NUMA()
 
+INCLUDE(${MYSQL_CMAKE_SCRIPT_DIR}/compile_flags.cmake)
+
 IF(CMAKE_CROSSCOMPILING)
   # Use CHECK_C_SOURCE_COMPILES instead of CHECK_C_SOURCE_RUNS when
   # cross-compiling. Not as precise, but usually good enough.
@@ -47,12 +49,6 @@ ELSE()
   ENDMACRO()
 ENDIF()
 
-## MySQL 5.7 LZ4 (not needed)
-##IF(LZ4_INCLUDE_DIR AND LZ4_LIBRARY)
-##  ADD_DEFINITIONS(-DHAVE_LZ4=1)
-##  INCLUDE_DIRECTORIES(${LZ4_INCLUDE_DIR})
-##ENDIF()
-
 # OS tests
 IF(UNIX)
   IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
@@ -125,20 +121,7 @@ ENDIF()
 
 OPTION(WITH_INNODB_EXTRA_DEBUG "Enable extra InnoDB debug checks" OFF)
 IF(WITH_INNODB_EXTRA_DEBUG)
-  IF(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
-    MESSAGE(FATAL_ERROR "WITH_INNODB_EXTRA_DEBUG can be enabled only in debug builds")
-  ENDIF()
-
-  SET(EXTRA_DEBUG_FLAGS "")
-  IF(WITH_INNODB_AHI)
-    SET(EXTRA_DEBUG_FLAGS "${EXTRA_DEBUG_FLAGS} -DUNIV_AHI_DEBUG")
-  ENDIF()
-  SET(EXTRA_DEBUG_FLAGS "${EXTRA_DEBUG_FLAGS} -DUNIV_DDL_DEBUG")
-  SET(EXTRA_DEBUG_FLAGS "${EXTRA_DEBUG_FLAGS} -DUNIV_DEBUG_FILE_ACCESSES")
-  SET(EXTRA_DEBUG_FLAGS "${EXTRA_DEBUG_FLAGS} -DUNIV_ZIP_DEBUG")
-
-  SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${EXTRA_DEBUG_FLAGS}")
-  SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${EXTRA_DEBUG_FLAGS}")
+  ADD_DEFINITIONS(-DUNIV_ZIP_DEBUG)
 ENDIF()
 
 CHECK_FUNCTION_EXISTS(sched_getcpu  HAVE_SCHED_GETCPU)
@@ -156,13 +139,6 @@ IF(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
 ENDIF()
 
 IF(NOT MSVC)
-  # workaround for old gcc on x86, gcc atomic ops only work under -march=i686
-  IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686" AND CMAKE_COMPILER_IS_GNUCC AND
-     CMAKE_C_COMPILER_VERSION VERSION_LESS "4.4.0")
-    SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=i686")
-    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686")
-  ENDIF()
-
   CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
   IF(HAVE_POSIX_MEMALIGN)
     ADD_DEFINITIONS(-DHAVE_POSIX_MEMALIGN)
@@ -246,13 +222,6 @@ IF(CMAKE_CXX_COMPILER_ID MATCHES "SunPro"
     PROPERTIES COMPILE_FLAGS -xO3)
 ENDIF()
 
-# Removing compiler optimizations for innodb/mem/* files on 64-bit Windows
-# due to 64-bit compiler error, See MySQL Bug #19424, #36366, #34297
-IF (MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 8)
-	SET_SOURCE_FILES_PROPERTIES(mem/mem0mem.cc mem/mem0pool.cc
-				    PROPERTIES COMPILE_FLAGS -Od)
-ENDIF()
-
 # Avoid generating Hardware Capabilities due to crc32 instructions
 IF(CMAKE_SYSTEM_NAME MATCHES "SunOS" AND CMAKE_SYSTEM_PROCESSOR MATCHES "i386")
   MY_CHECK_CXX_COMPILER_FLAG("-Wa,-nH")
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 1d3e75e9740..c8707955a5d 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -72,8 +72,6 @@ extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd);
 extern "C" int thd_need_wait_reports(const MYSQL_THD thd);
 extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
 
-extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
-
 /** Print info of a table lock.
 @param[in,out]	file	output stream
 @param[in]	lock	table lock */
@@ -258,7 +256,7 @@ private:
 		ulint		m_heap_no;	/*!< heap number if rec lock */
 	};
 
-	/** Used in deadlock tracking. Protected by lock_sys->mutex. */
+	/** Used in deadlock tracking. Protected by lock_sys.mutex. */
 	static ib_uint64_t	s_lock_mark_counter;
 
 	/** Calculation steps thus far. It is the count of the nodes visited. */
@@ -314,7 +312,7 @@ lock_rec_validate_page(
 #endif /* UNIV_DEBUG */
 
 /* The lock system */
-lock_sys_t*	lock_sys	= NULL;
+lock_sys_t lock_sys;
 
 /** We store info on the latest deadlock error to this buffer. InnoDB
 Monitor will then fetch it and print */
@@ -332,8 +330,11 @@ lock_report_trx_id_insanity(
 	const rec_t*	rec,		/*!< in: user record */
 	dict_index_t*	index,		/*!< in: index */
 	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
-	trx_id_t	max_trx_id)	/*!< in: trx_sys_get_max_trx_id() */
+	trx_id_t	max_trx_id)	/*!< in: trx_sys.get_max_trx_id() */
 {
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(!rec_is_metadata(rec, index));
+
 	ib::error()
 		<< "Transaction id " << trx_id
 		<< " associated with record" << rec_offsets_print(rec, offsets)
@@ -346,11 +347,6 @@ lock_report_trx_id_insanity(
 /*********************************************************************//**
 Checks that a transaction id is sensible, i.e., not in the future.
 @return true if ok */
-#ifdef UNIV_DEBUG
-
-#else
-static MY_ATTRIBUTE((warn_unused_result))
-#endif
 bool
 lock_check_trx_id_sanity(
 /*=====================*/
@@ -360,16 +356,17 @@ lock_check_trx_id_sanity(
 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
 {
 	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(!rec_is_metadata(rec, index));
 
-	trx_id_t	max_trx_id = trx_sys_get_max_trx_id();
-	bool		is_ok = trx_id < max_trx_id;
+	trx_id_t	max_trx_id = trx_sys.get_max_trx_id();
+	ut_ad(max_trx_id || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN);
 
-	if (!is_ok) {
+	if (max_trx_id && trx_id >= max_trx_id) {
 		lock_report_trx_id_insanity(
 			trx_id, rec, index, offsets, max_trx_id);
+                return false;
 	}
-
-	return(is_ok);
+	return(true);
 }
 
 /*********************************************************************//**
@@ -388,13 +385,13 @@ lock_clust_rec_cons_read_sees(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(page_rec_is_user_rec(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(!rec_is_metadata(rec, index));
 
 	/* Temp-tables are not shared across connections and multiple
 	transactions from different connections cannot simultaneously
 	operate on same temp-table and so read of temp-table is
 	always consistent read. */
-	if (srv_read_only_mode || dict_table_is_temporary(index->table)) {
-		ut_ad(view == 0 || dict_table_is_temporary(index->table));
+	if (index->table->is_temporary()) {
 		return(true);
 	}
 
@@ -426,15 +423,13 @@ lock_sec_rec_cons_read_sees(
 	const ReadView*	view)	/*!< in: consistent read view */
 {
 	ut_ad(page_rec_is_user_rec(rec));
+	ut_ad(!index->is_primary());
+	ut_ad(!rec_is_metadata(rec, index));
 
 	/* NOTE that we might call this function while holding the search
 	system latch. */
 
-	if (recv_recovery_is_on()) {
-
-		return(false);
-
-	} else if (dict_table_is_temporary(index->table)) {
+	if (index->table->is_temporary()) {
 
 		/* Temp-tables are not shared across connections and multiple
 		transactions from different connections cannot simultaneously
@@ -451,37 +446,34 @@ lock_sec_rec_cons_read_sees(
 	return(view->sees(max_trx_id));
 }
 
-/*********************************************************************//**
-Creates the lock system at database start. */
-void
-lock_sys_create(
-/*============*/
-	ulint	n_cells)	/*!< in: number of slots in lock hash table */
-{
-	ulint	lock_sys_sz;
-
-	lock_sys_sz = sizeof(*lock_sys) + OS_THREAD_MAX_N * sizeof(srv_slot_t);
 
-	lock_sys = static_cast<lock_sys_t*>(ut_zalloc_nokey(lock_sys_sz));
+/**
+  Creates the lock system at database start.
 
-	void*	ptr = &lock_sys[1];
+  @param[in] n_cells number of slots in lock hash table
+*/
+void lock_sys_t::create(ulint n_cells)
+{
+	ut_ad(this == &lock_sys);
 
-	lock_sys->waiting_threads = static_cast<srv_slot_t*>(ptr);
+	m_initialised= true;
 
-	lock_sys->last_slot = lock_sys->waiting_threads;
+	waiting_threads = static_cast<srv_slot_t*>
+		(ut_zalloc_nokey(srv_max_n_threads * sizeof *waiting_threads));
+	last_slot = waiting_threads;
 
-	mutex_create(LATCH_ID_LOCK_SYS, &lock_sys->mutex);
+	mutex_create(LATCH_ID_LOCK_SYS, &mutex);
 
-	mutex_create(LATCH_ID_LOCK_SYS_WAIT, &lock_sys->wait_mutex);
+	mutex_create(LATCH_ID_LOCK_SYS_WAIT, &wait_mutex);
 
-	lock_sys->timeout_event = os_event_create(0);
+	timeout_event = os_event_create(0);
 
-	lock_sys->rec_hash = hash_create(n_cells);
-	lock_sys->prdt_hash = hash_create(n_cells);
-	lock_sys->prdt_page_hash = hash_create(n_cells);
+	rec_hash = hash_create(n_cells);
+	prdt_hash = hash_create(n_cells);
+	prdt_page_hash = hash_create(n_cells);
 
 	if (!srv_read_only_mode) {
-		lock_latest_err_file = os_file_create_tmpfile(NULL);
+		lock_latest_err_file = os_file_create_tmpfile();
 		ut_a(lock_latest_err_file);
 	}
 }
@@ -498,31 +490,33 @@ lock_rec_lock_fold(
 			     lock->un_member.rec_lock.page_no));
 }
 
-/** Resize the lock hash tables.
-@param[in]	n_cells	number of slots in lock hash table */
-void
-lock_sys_resize(
-	ulint	n_cells)
+
+/**
+  Resize the lock hash table.
+
+  @param[in] n_cells number of slots in lock hash table
+*/
+void lock_sys_t::resize(ulint n_cells)
 {
-	hash_table_t*	old_hash;
+	ut_ad(this == &lock_sys);
 
-	lock_mutex_enter();
+	mutex_enter(&mutex);
 
-	old_hash = lock_sys->rec_hash;
-	lock_sys->rec_hash = hash_create(n_cells);
-	HASH_MIGRATE(old_hash, lock_sys->rec_hash, lock_t, hash,
+	hash_table_t* old_hash = rec_hash;
+	rec_hash = hash_create(n_cells);
+	HASH_MIGRATE(old_hash, rec_hash, lock_t, hash,
 		     lock_rec_lock_fold);
 	hash_table_free(old_hash);
 
-	old_hash = lock_sys->prdt_hash;
-	lock_sys->prdt_hash = hash_create(n_cells);
-	HASH_MIGRATE(old_hash, lock_sys->prdt_hash, lock_t, hash,
+	old_hash = prdt_hash;
+	prdt_hash = hash_create(n_cells);
+	HASH_MIGRATE(old_hash, prdt_hash, lock_t, hash,
 		     lock_rec_lock_fold);
 	hash_table_free(old_hash);
 
-	old_hash = lock_sys->prdt_page_hash;
-	lock_sys->prdt_page_hash = hash_create(n_cells);
-	HASH_MIGRATE(old_hash, lock_sys->prdt_page_hash, lock_t, hash,
+	old_hash = prdt_page_hash;
+	prdt_page_hash = hash_create(n_cells);
+	HASH_MIGRATE(old_hash, prdt_page_hash, lock_t, hash,
 		     lock_rec_lock_fold);
 	hash_table_free(old_hash);
 
@@ -551,40 +545,39 @@ lock_sys_resize(
 		buf_pool_mutex_exit(buf_pool);
 	}
 
-	lock_mutex_exit();
+	mutex_exit(&mutex);
 }
 
-/*********************************************************************//**
-Closes the lock system at database shutdown. */
-void
-lock_sys_close(void)
-/*================*/
+
+/** Closes the lock system at database shutdown. */
+void lock_sys_t::close()
 {
+	ut_ad(this == &lock_sys);
+
+	if (!m_initialised) return;
+
 	if (lock_latest_err_file != NULL) {
 		fclose(lock_latest_err_file);
 		lock_latest_err_file = NULL;
 	}
 
-	hash_table_free(lock_sys->rec_hash);
-	hash_table_free(lock_sys->prdt_hash);
-	hash_table_free(lock_sys->prdt_page_hash);
+	hash_table_free(rec_hash);
+	hash_table_free(prdt_hash);
+	hash_table_free(prdt_page_hash);
 
-	os_event_destroy(lock_sys->timeout_event);
+	os_event_destroy(timeout_event);
 
-	mutex_destroy(&lock_sys->mutex);
-	mutex_destroy(&lock_sys->wait_mutex);
+	mutex_destroy(&mutex);
+	mutex_destroy(&wait_mutex);
 
-	srv_slot_t*	slot = lock_sys->waiting_threads;
-
-	for (ulint i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
-		if (slot->event != NULL) {
-			os_event_destroy(slot->event);
+	for (ulint i = srv_max_n_threads; i--; ) {
+		if (os_event_t& event = waiting_threads[i].event) {
+			os_event_destroy(event);
 		}
 	}
 
-	ut_free(lock_sys);
-
-	lock_sys = NULL;
+	ut_free(waiting_threads);
+	m_initialised= false;
 }
 
 /*********************************************************************//**
@@ -652,7 +645,7 @@ lock_rec_get_insert_intention(
 Checks if a lock request for a new lock has to wait for request lock2.
 @return TRUE if new lock has to wait for lock2 to be removed */
 UNIV_INLINE
-ibool
+bool
 lock_rec_has_to_wait(
 /*=================*/
 	bool		for_locking,
@@ -675,160 +668,162 @@ lock_rec_has_to_wait(
 	ut_ad(trx && lock2);
 	ut_ad(lock_get_type_low(lock2) == LOCK_REC);
 
-	if (trx != lock2->trx
-	    && !lock_mode_compatible(static_cast<lock_mode>(
-			             LOCK_MODE_MASK & type_mode),
-				     lock_get_mode(lock2))) {
+	if (trx == lock2->trx
+	    || lock_mode_compatible(
+		       static_cast<lock_mode>(LOCK_MODE_MASK & type_mode),
+		       lock_get_mode(lock2))) {
+		return false;
+	}
 
-		/* We have somewhat complex rules when gap type record locks
-		cause waits */
+	/* We have somewhat complex rules when gap type record locks
+	cause waits */
 
-		if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
-		    && !(type_mode & LOCK_INSERT_INTENTION)) {
+	if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
+	    && !(type_mode & LOCK_INSERT_INTENTION)) {
 
-			/* Gap type locks without LOCK_INSERT_INTENTION flag
-			do not need to wait for anything. This is because
-			different users can have conflicting lock types
-			on gaps. */
+		/* Gap type locks without LOCK_INSERT_INTENTION flag
+		do not need to wait for anything. This is because
+		different users can have conflicting lock types
+		on gaps. */
 
-			return(FALSE);
-		}
+		return false;
+	}
 
-		if (!(type_mode & LOCK_INSERT_INTENTION)
-		    && lock_rec_get_gap(lock2)) {
+	if (!(type_mode & LOCK_INSERT_INTENTION) && lock_rec_get_gap(lock2)) {
 
-			/* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
-			does not need to wait for a gap type lock */
+		/* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
+		does not need to wait for a gap type lock */
 
-			return(FALSE);
-		}
+		return false;
+	}
 
-		if ((type_mode & LOCK_GAP)
-		    && lock_rec_get_rec_not_gap(lock2)) {
+	if ((type_mode & LOCK_GAP) && lock_rec_get_rec_not_gap(lock2)) {
 
-			/* Lock on gap does not need to wait for
-			a LOCK_REC_NOT_GAP type lock */
+		/* Lock on gap does not need to wait for
+		a LOCK_REC_NOT_GAP type lock */
 
-			return(FALSE);
-		}
+		return false;
+	}
 
-		if (lock_rec_get_insert_intention(lock2)) {
+	if (lock_rec_get_insert_intention(lock2)) {
 
-			/* No lock request needs to wait for an insert
-			intention lock to be removed. This is ok since our
-			rules allow conflicting locks on gaps. This eliminates
-			a spurious deadlock caused by a next-key lock waiting
-			for an insert intention lock; when the insert
-			intention lock was granted, the insert deadlocked on
-			the waiting next-key lock.
+		/* No lock request needs to wait for an insert
+		intention lock to be removed. This is ok since our
+		rules allow conflicting locks on gaps. This eliminates
+		a spurious deadlock caused by a next-key lock waiting
+		for an insert intention lock; when the insert
+		intention lock was granted, the insert deadlocked on
+		the waiting next-key lock.
 
-			Also, insert intention locks do not disturb each
-			other. */
+		Also, insert intention locks do not disturb each
+		other. */
 
-			return(FALSE);
-		}
+		return false;
+	}
 
-		if ((type_mode & LOCK_GAP || lock_rec_get_gap(lock2)) &&
-		    !thd_need_ordering_with(trx->mysql_thd,
-					    lock2->trx->mysql_thd)) {
-			/* If the upper server layer has already decided on the
-			commit order between the transaction requesting the
-			lock and the transaction owning the lock, we do not
-			need to wait for gap locks. Such ordeering by the upper
-			server layer happens in parallel replication, where the
-			commit order is fixed to match the original order on the
-			master.
-
-			Such gap locks are mainly needed to get serialisability
-			between transactions so that they will be binlogged in
-			the correct order so that statement-based replication
-			will give the correct results. Since the right order
-			was already determined on the master, we do not need
-			to enforce it again here.
-
-			Skipping the locks is not essential for correctness,
-			since in case of deadlock we will just kill the later
-			transaction and retry it. But it can save some
-			unnecessary rollbacks and retries. */
-
-			return (FALSE);
-		}
+	if ((type_mode & LOCK_GAP || lock_rec_get_gap(lock2))
+	    && !thd_need_ordering_with(trx->mysql_thd, lock2->trx->mysql_thd)) {
+		/* If the upper server layer has already decided on the
+		commit order between the transaction requesting the
+		lock and the transaction owning the lock, we do not
+		need to wait for gap locks. Such ordeering by the upper
+		server layer happens in parallel replication, where the
+		commit order is fixed to match the original order on the
+		master.
+
+		Such gap locks are mainly needed to get serialisability
+		between transactions so that they will be binlogged in
+		the correct order so that statement-based replication
+		will give the correct results. Since the right order
+		was already determined on the master, we do not need
+		to enforce it again here.
+
+		Skipping the locks is not essential for correctness,
+		since in case of deadlock we will just kill the later
+		transaction and retry it. But it can save some
+		unnecessary rollbacks and retries. */
+
+		return false;
+	}
 
 #ifdef WITH_WSREP
-		/* if BF thread is locking and has conflict with another BF
-		   thread, we need to look at trx ordering and lock types */
-		if (wsrep_thd_is_BF(trx->mysql_thd, FALSE)         &&
-		    wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) {
+	/* if BF thread is locking and has conflict with another BF
+	   thread, we need to look at trx ordering and lock types */
+	if (wsrep_thd_is_BF(trx->mysql_thd, FALSE)
+	    && wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) {
 
-			if (wsrep_debug) {
-				ib::info() <<
-					"BF-BF lock conflict, locking: " << for_locking;
+		if (wsrep_debug) {
+			ib::info() << "BF-BF lock conflict, locking: "
+				   << for_locking;
+			lock_rec_print(stderr, lock2);
+			ib::info()
+				<< " SQL1: " << wsrep_thd_query(trx->mysql_thd)
+				<< " SQL2: "
+				<< wsrep_thd_query(lock2->trx->mysql_thd);
+		}
+
+		if (wsrep_trx_order_before(trx->mysql_thd,
+					   lock2->trx->mysql_thd)
+		    && (type_mode & LOCK_MODE_MASK) == LOCK_X
+		    && (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) {
+			if (for_locking || wsrep_debug) {
+				/* exclusive lock conflicts are not
+				   accepted */
+				ib::info()
+					<< "BF-BF X lock conflict,mode: "
+					<< type_mode
+					<< " supremum: " << lock_is_on_supremum
+					<< "conflicts states: my "
+					<< wsrep_thd_conflict_state(
+						   trx->mysql_thd, FALSE)
+					<< " locked "
+					<< wsrep_thd_conflict_state(
+						   lock2->trx->mysql_thd,
+						   FALSE);
 				lock_rec_print(stderr, lock2);
 				ib::info() << " SQL1: "
-					   << wsrep_thd_query(trx->mysql_thd);
-				ib::info() << " SQL2: "
-					   << wsrep_thd_query(lock2->trx->mysql_thd);
-			}
+					   << wsrep_thd_query(trx->mysql_thd)
+					   << " SQL2: "
+					   << wsrep_thd_query(
+						      lock2->trx->mysql_thd);
 
-			if (wsrep_trx_order_before(trx->mysql_thd,
-						   lock2->trx->mysql_thd) &&
-			    (type_mode & LOCK_MODE_MASK) == LOCK_X        &&
-			    (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) {
-				if (for_locking || wsrep_debug) {
-					/* exclusive lock conflicts are not
-					   accepted */
-					ib::info() <<
-						"BF-BF X lock conflict,"
-						"mode: " << type_mode <<
-						" supremum: " << lock_is_on_supremum;
-					ib::info() <<
-						"conflicts states: my "
-						   << wsrep_thd_conflict_state(trx->mysql_thd, FALSE)
-						   << " locked "
-						   << wsrep_thd_conflict_state(lock2->trx->mysql_thd, FALSE);
-					lock_rec_print(stderr, lock2);
-					ib::info() << " SQL1: "
-						   << wsrep_thd_query(trx->mysql_thd);
-					ib::info() << " SQL2: "
-						   << wsrep_thd_query(lock2->trx->mysql_thd);
-
-					if (for_locking) {
-						return FALSE;
-					}
+				if (for_locking) {
+					return false;
 				}
-			} else {
-				/* if lock2->index->n_uniq <=
-				   lock2->index->n_user_defined_cols
-				   operation is on uniq index
-				*/
-				if (wsrep_debug) {
-					ib::info() <<
-						"BF conflict, modes: "
-						   << type_mode << ":" << lock2->type_mode
-						   << " idx: " << lock2->index->name()
-						   << " table: " << lock2->index->table->name.m_name
-						   << " n_uniq: " << lock2->index->n_uniq
-						   << " n_user: " << lock2->index->n_user_defined_cols;
-					ib::info() << " SQL1: "
-						   << wsrep_thd_query(trx->mysql_thd);
-					ib::info() << " SQL2: "
-						   << wsrep_thd_query(lock2->trx->mysql_thd);
-				}
-				return FALSE;
 			}
+		} else {
+			/* if lock2->index->n_uniq <=
+			   lock2->index->n_user_defined_cols
+			   operation is on uniq index
+			*/
+			if (wsrep_debug) {
+				ib::info()
+					<< "BF conflict, modes: " << type_mode
+					<< ":" << lock2->type_mode
+					<< " idx: " << lock2->index->name()
+					<< " table: "
+					<< lock2->index->table->name.m_name
+					<< " n_uniq: " << lock2->index->n_uniq
+					<< " n_user: "
+					<< lock2->index->n_user_defined_cols
+					<< " SQL1: "
+					<< wsrep_thd_query(trx->mysql_thd)
+					<< " SQL2: "
+					<< wsrep_thd_query(
+						   lock2->trx->mysql_thd);
+			}
+			return false;
 		}
-#endif /* WITH_WSREP */
-
-		return(TRUE);
 	}
+#endif /* WITH_WSREP */
 
-	return(FALSE);
+	return true;
 }
 
 /*********************************************************************//**
 Checks if a lock request lock1 has to wait for request lock2.
 @return TRUE if lock1 has to wait for lock2 to be removed */
-ibool
+bool
 lock_has_to_wait(
 /*=============*/
 	const lock_t*	lock1,	/*!< in: waiting lock */
@@ -839,32 +834,27 @@ lock_has_to_wait(
 {
 	ut_ad(lock1 && lock2);
 
-	if (lock1->trx != lock2->trx
-	    && !lock_mode_compatible(lock_get_mode(lock1),
-				     lock_get_mode(lock2))) {
-		if (lock_get_type_low(lock1) == LOCK_REC) {
-			ut_ad(lock_get_type_low(lock2) == LOCK_REC);
-
-			/* If this lock request is for a supremum record
-			then the second bit on the lock bitmap is set */
-
-			if (lock1->type_mode
-			    & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
-				return(lock_prdt_has_to_wait(
-					lock1->trx, lock1->type_mode,
-					lock_get_prdt_from_lock(lock1),
-					lock2));
-			} else {
-				return(lock_rec_has_to_wait(false,
-					lock1->trx, lock1->type_mode, lock2,
-					lock_rec_get_nth_bit(lock1, true)));
-			}
-		}
+	if (lock1->trx == lock2->trx
+	    || lock_mode_compatible(lock_get_mode(lock1),
+				    lock_get_mode(lock2))) {
+		return false;
+	}
 
-		return(TRUE);
+	if (lock_get_type_low(lock1) != LOCK_REC) {
+		return true;
+	}
+
+	ut_ad(lock_get_type_low(lock2) == LOCK_REC);
+
+	if (lock1->type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
+		return lock_prdt_has_to_wait(lock1->trx, lock1->type_mode,
+					     lock_get_prdt_from_lock(lock1),
+					     lock2);
 	}
 
-	return(FALSE);
+	return lock_rec_has_to_wait(
+		false, lock1->trx, lock1->type_mode, lock2,
+		lock_rec_get_nth_bit(lock1, PAGE_HEAP_NO_SUPREMUM));
 }
 
 /*============== RECORD LOCK BASIC FUNCTIONS ============================*/
@@ -903,7 +893,7 @@ lock_rec_expl_exist_on_page(
 
 	lock_mutex_enter();
 	/* Only used in ibuf pages, so rec_hash is good enough */
-	lock = lock_rec_get_first_on_page_addr(lock_sys->rec_hash,
+	lock = lock_rec_get_first_on_page_addr(lock_sys.rec_hash,
 					       space, page_no);
 	lock_mutex_exit();
 
@@ -1021,7 +1011,7 @@ lock_rec_has_expl(
 	      || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
 	ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
 
-	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
+	for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next(heap_no, lock)) {
 
@@ -1074,7 +1064,7 @@ lock_rec_other_has_expl_req(
 		return(NULL);
 	}
 
-	for (lock_t* lock = lock_rec_get_first(lock_sys->rec_hash,
+	for (lock_t* lock = lock_rec_get_first(lock_sys.rec_hash,
 						     block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next(heap_no, lock)) {
@@ -1129,14 +1119,14 @@ wsrep_kill_victim(
 					ib::info() << "*** Victim TRANSACTION:";
 				}
 
-				wsrep_trx_print_locking(stderr, trx, 3000);
+				trx_print_latched(stderr, trx, 3000);
 
 				if (bf_other) {
 					ib::info() << "*** Priority TRANSACTION:";
 				} else {
 					ib::info() << "*** Victim TRANSACTION:";
 				}
-                                wsrep_trx_print_locking(stderr, lock->trx, 3000);
+                                trx_print_latched(stderr, lock->trx, 3000);
 
 				ib::info() << "*** WAITING FOR THIS LOCK TO BE GRANTED:";
 
@@ -1182,7 +1172,7 @@ lock_rec_other_has_conflicting(
 
 	bool	is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
 
-	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
+	for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next(heap_no, lock)) {
 
@@ -1215,6 +1205,7 @@ static
 trx_t*
 lock_sec_rec_some_has_impl(
 /*=======================*/
+	trx_t*		caller_trx,/*!<in/out: trx of current thread */
 	const rec_t*	rec,	/*!< in: user record */
 	dict_index_t*	index,	/*!< in: secondary index */
 	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
@@ -1224,10 +1215,10 @@ lock_sec_rec_some_has_impl(
 	const page_t*	page = page_align(rec);
 
 	ut_ad(!lock_mutex_own());
-	ut_ad(!trx_sys_mutex_own());
 	ut_ad(!dict_index_is_clust(index));
 	ut_ad(page_rec_is_user_rec(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(!rec_is_metadata(rec, index));
 
 	max_trx_id = page_get_max_trx_id(page);
 
@@ -1237,7 +1228,7 @@ lock_sec_rec_some_has_impl(
 	max trx id to the log, and therefore during recovery, this value
 	for a page may be incorrect. */
 
-	if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) {
+	if (max_trx_id < trx_sys.get_min_trx_id()) {
 
 		trx = 0;
 
@@ -1250,68 +1241,17 @@ lock_sec_rec_some_has_impl(
 	x-lock. We have to look in the clustered index. */
 
 	} else {
-		trx = row_vers_impl_x_locked(rec, index, offsets);
+		trx = row_vers_impl_x_locked(caller_trx, rec, index, offsets);
 	}
 
 	return(trx);
 }
 
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Checks if some transaction, other than given trx_id, has an explicit
-lock on the given rec, in the given precise_mode.
-@return	the transaction, whose id is not equal to trx_id, that has an
-explicit lock on the given rec, in the given precise_mode or NULL.*/
-static
-trx_t*
-lock_rec_other_trx_holds_expl(
-/*==========================*/
-	ulint			precise_mode,	/*!< in: LOCK_S or LOCK_X
-						possibly ORed to LOCK_GAP or
-						LOCK_REC_NOT_GAP. */
-	trx_t*			trx,		/*!< in: trx holding implicit
-						lock on rec */
-	const rec_t*		rec,		/*!< in: user record */
-	const buf_block_t*	block)		/*!< in: buffer block
-						containing the record */
-{
-	trx_t* holds = NULL;
-
-	lock_mutex_enter();
-
-	if (trx_t* impl_trx = trx_rw_is_active(trx->id, NULL, false)) {
-		ulint heap_no = page_rec_get_heap_no(rec);
-		mutex_enter(&trx_sys->mutex);
-
-		for (trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
-		     t != NULL;
-		     t = UT_LIST_GET_NEXT(trx_list, t)) {
-
-			lock_t* expl_lock = lock_rec_has_expl(
-				precise_mode, block, heap_no, t);
-
-			if (expl_lock && expl_lock->trx != impl_trx) {
-				/* An explicit lock is held by trx other than
-				the trx holding the implicit lock. */
-				holds = expl_lock->trx;
-				break;
-			}
-		}
-
-		mutex_exit(&trx_sys->mutex);
-	}
-
-	lock_mutex_exit();
-
-	return(holds);
-}
-#endif /* UNIV_DEBUG */
-
 /*********************************************************************//**
 Return approximate number or record locks (bits set in the bitmap) for
 this transaction. Since delete-marked records may be removed, the
 record count will not be precise.
-The caller must be holding lock_sys->mutex. */
+The caller must be holding lock_sys.mutex. */
 ulint
 lock_number_of_rows_locked(
 /*=======================*/
@@ -1324,7 +1264,7 @@ lock_number_of_rows_locked(
 
 /*********************************************************************//**
 Return the number of table locks for a transaction.
-The caller must be holding lock_sys->mutex. */
+The caller must be holding lock_sys.mutex. */
 ulint
 lock_number_of_tables_locked(
 /*=========================*/
@@ -1551,7 +1491,7 @@ lock_rec_create_low(
 	    && innodb_lock_schedule_algorithm
 	    == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
 	    && !thd_is_replication_slave_thread(trx->mysql_thd)) {
-		HASH_PREPEND(lock_t, hash, lock_sys->rec_hash,
+		HASH_PREPEND(lock_t, hash, lock_sys.rec_hash,
 			     lock_rec_fold(space, page_no), lock);
 	} else {
 		HASH_INSERT(lock_t, hash, lock_hash_get(type_mode),
@@ -1674,7 +1614,7 @@ lock_queue_validate(
 	hash_table_t*		hash;
 	hash_cell_t*		cell;
 	lock_t*				next;
-	bool				wait_lock = false;
+	bool				wait_lock __attribute__((unused))= false;
 
 	if (in_lock == NULL) {
 		return true;
@@ -1777,6 +1717,11 @@ lock_rec_enqueue_waiting(
 		ut_ad(0);
 	}
 
+	if (trx->mysql_thd && thd_lock_wait_timeout(trx->mysql_thd) == 0) {
+		trx->error_state = DB_LOCK_WAIT_TIMEOUT;
+		return DB_LOCK_WAIT_TIMEOUT;
+	}
+
 	/* Enqueue the lock request that will wait to be granted, note that
 	we already own the trx mutex. */
 	lock_t* lock = lock_rec_create(
@@ -1830,7 +1775,7 @@ lock_rec_enqueue_waiting(
 	    == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
 	    && !prdt
 	    && !thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
-		HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
+		HASH_DELETE(lock_t, hash, lock_sys.rec_hash,
 			    lock_rec_lock_fold(lock), lock);
 		dberr_t res = lock_rec_insert_by_trx_age(lock);
 		if (res != DB_SUCCESS) {
@@ -1971,166 +1916,6 @@ lock_rec_add_to_queue(
 }
 
 /*********************************************************************//**
-This is a fast routine for locking a record in the most common cases:
-there are no explicit locks on the page, or there is just one lock, owned
-by this transaction, and of the right type_mode. This is a low-level function
-which does NOT look at implicit locks! Checks lock compatibility within
-explicit locks. This function sets a normal next-key lock, or in the case of
-a page supremum record, a gap type lock.
-@return whether the locking succeeded */
-UNIV_INLINE
-lock_rec_req_status
-lock_rec_lock_fast(
-/*===============*/
-	bool			impl,	/*!< in: if TRUE, no lock is set
-					if no wait is necessary: we
-					assume that the caller will
-					set an implicit lock */
-	ulint			mode,	/*!< in: lock mode: LOCK_X or
-					LOCK_S possibly ORed to either
-					LOCK_GAP or LOCK_REC_NOT_GAP */
-	const buf_block_t*	block,	/*!< in: buffer block containing
-					the record */
-	ulint			heap_no,/*!< in: heap number of record */
-	dict_index_t*		index,	/*!< in: index of record */
-	que_thr_t*		thr)	/*!< in: query thread */
-{
-	ut_ad(lock_mutex_own());
-	ut_ad(!srv_read_only_mode);
-	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
-	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)
-	      || srv_read_only_mode);
-	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
-	      || (LOCK_MODE_MASK & mode) == LOCK_X);
-	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
-	      || mode - (LOCK_MODE_MASK & mode) == 0
-	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
-	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
-
-	DBUG_EXECUTE_IF("innodb_report_deadlock", return(LOCK_REC_FAIL););
-
-	lock_t*	lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
-
-	trx_t*	trx = thr_get_trx(thr);
-
-	lock_rec_req_status	status = LOCK_REC_SUCCESS;
-
-	if (lock == NULL) {
-		if (!impl) {
-			/* Note that we don't own the trx mutex. */
-			lock = lock_rec_create(
-#ifdef WITH_WSREP
-				NULL, NULL,
-#endif
-				mode, block, heap_no, index, trx, false);
-		}
-
-		status = LOCK_REC_SUCCESS_CREATED;
-	} else {
-		trx_mutex_enter(trx);
-
-		if (lock_rec_get_next_on_page(lock)
-		     || lock->trx != trx
-		     || lock->type_mode != (mode | LOCK_REC)
-		     || lock_rec_get_n_bits(lock) <= heap_no) {
-
-			status = LOCK_REC_FAIL;
-		} else if (!impl) {
-			/* If the nth bit of the record lock is already set
-			then we do not set a new lock bit, otherwise we do
-			set */
-			if (!lock_rec_get_nth_bit(lock, heap_no)) {
-				lock_rec_set_nth_bit(lock, heap_no);
-				status = LOCK_REC_SUCCESS_CREATED;
-			}
-		}
-
-		trx_mutex_exit(trx);
-	}
-
-	return(status);
-}
-
-/*********************************************************************//**
-This is the general, and slower, routine for locking a record. This is a
-low-level function which does NOT look at implicit locks! Checks lock
-compatibility within explicit locks. This function sets a normal next-key
-lock, or in the case of a page supremum record, a gap type lock.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
-static
-dberr_t
-lock_rec_lock_slow(
-/*===============*/
-	ibool			impl,	/*!< in: if TRUE, no lock is set
-					if no wait is necessary: we
-					assume that the caller will
-					set an implicit lock */
-	ulint			mode,	/*!< in: lock mode: LOCK_X or
-					LOCK_S possibly ORed to either
-					LOCK_GAP or LOCK_REC_NOT_GAP */
-	const buf_block_t*	block,	/*!< in: buffer block containing
-					the record */
-	ulint			heap_no,/*!< in: heap number of record */
-	dict_index_t*		index,	/*!< in: index of record */
-	que_thr_t*		thr)	/*!< in: query thread */
-{
-	ut_ad(lock_mutex_own());
-	ut_ad(!srv_read_only_mode);
-	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
-	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
-	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
-	      || (LOCK_MODE_MASK & mode) == LOCK_X);
-	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
-	      || mode - (LOCK_MODE_MASK & mode) == 0
-	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
-	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
-
-	DBUG_EXECUTE_IF("innodb_report_deadlock", return(DB_DEADLOCK););
-
-	dberr_t	err;
-	trx_t*	trx = thr_get_trx(thr);
-
-	trx_mutex_enter(trx);
-
-	if (lock_rec_has_expl(mode, block, heap_no, trx)) {
-		/* The trx already has a strong enough lock: do nothing */
-		err = DB_SUCCESS;
-	} else if (
-#ifdef WITH_WSREP
-		   lock_t* c_lock =
-#endif /* WITH_WSREP */
-		   lock_rec_other_has_conflicting(
-			   static_cast<enum lock_mode>(mode),
-			   block, heap_no, trx)) {
-		/* If another transaction has a non-gap conflicting
-		request in the queue, as this transaction does not
-		have a lock strong enough already granted on the
-		record, we have to wait. */
-		err = lock_rec_enqueue_waiting(
-#ifdef WITH_WSREP
-			c_lock,
-#endif /* WITH_WSREP */
-			mode, block, heap_no, index, thr, NULL);
-	} else if (!impl) {
-		/* Set the requested lock on the record, note that
-		we already own the transaction mutex. */
-		lock_rec_add_to_queue(
-			LOCK_REC | mode, block, heap_no, index, trx, TRUE);
-		err = DB_SUCCESS_LOCKED_REC;
-	} else {
-		err = DB_SUCCESS;
-	}
-
-	trx_mutex_exit(trx);
-
-	return(err);
-}
-
-/*********************************************************************//**
 Tries to lock the specified record in the mode requested. If not immediately
 possible, enqueues a waiting lock request. This is a low-level function
 which does NOT look at implicit locks! Checks lock compatibility within
@@ -2154,33 +1939,93 @@ lock_rec_lock(
 	dict_index_t*		index,	/*!< in: index of record */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
-	ut_ad(lock_mutex_own());
-	ut_ad(!srv_read_only_mode);
-	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
-	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
-	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
-	      || (LOCK_MODE_MASK & mode) == LOCK_X);
-	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
-	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
-	      || mode - (LOCK_MODE_MASK & mode) == 0);
-	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
-
-	/* We try a simplified and faster subroutine for the most
-	common cases */
-	switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
-	case LOCK_REC_SUCCESS:
-		return(DB_SUCCESS);
-	case LOCK_REC_SUCCESS_CREATED:
-		return(DB_SUCCESS_LOCKED_REC);
-	case LOCK_REC_FAIL:
-		return(lock_rec_lock_slow(impl, mode, block,
-					  heap_no, index, thr));
-	}
+  trx_t *trx= thr_get_trx(thr);
+  dberr_t err= DB_SUCCESS;
+
+  ut_ad(!srv_read_only_mode);
+  ut_ad((LOCK_MODE_MASK & mode) == LOCK_S ||
+        (LOCK_MODE_MASK & mode) == LOCK_X);
+  ut_ad((mode & LOCK_TYPE_MASK) == LOCK_GAP ||
+        (mode & LOCK_TYPE_MASK) == LOCK_REC_NOT_GAP ||
+        (mode & LOCK_TYPE_MASK) == 0);
+  ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+  DBUG_EXECUTE_IF("innodb_report_deadlock", return DB_DEADLOCK;);
+
+  lock_mutex_enter();
+  ut_ad((LOCK_MODE_MASK & mode) != LOCK_S ||
+        lock_table_has(trx, index->table, LOCK_IS));
+  ut_ad((LOCK_MODE_MASK & mode) != LOCK_X ||
+         lock_table_has(trx, index->table, LOCK_IX));
+
+  if (lock_t *lock= lock_rec_get_first_on_page(lock_sys.rec_hash, block))
+  {
+    trx_mutex_enter(trx);
+    if (lock_rec_get_next_on_page(lock) ||
+        lock->trx != trx ||
+        lock->type_mode != (ulint(mode) | LOCK_REC) ||
+        lock_rec_get_n_bits(lock) <= heap_no)
+    {
+      /* Do nothing if the trx already has a strong enough lock on rec */
+      if (!lock_rec_has_expl(mode, block, heap_no, trx))
+      {
+        if (
+#ifdef WITH_WSREP
+	    lock_t *c_lock=
+#endif
+	    lock_rec_other_has_conflicting(mode, block, heap_no, trx))
+        {
+          /*
+            If another transaction has a non-gap conflicting
+            request in the queue, as this transaction does not
+            have a lock strong enough already granted on the
+	    record, we have to wait. */
+	    err = lock_rec_enqueue_waiting(
+#ifdef WITH_WSREP
+			c_lock,
+#endif /* WITH_WSREP */
+			mode, block, heap_no, index, thr, NULL);
+        }
+        else if (!impl)
+        {
+          /* Set the requested lock on the record. */
+          lock_rec_add_to_queue(LOCK_REC | mode, block, heap_no, index, trx,
+                                true);
+          err= DB_SUCCESS_LOCKED_REC;
+        }
+      }
+    }
+    else if (!impl)
+    {
+      /*
+        If the nth bit of the record lock is already set then we do not set
+        a new lock bit, otherwise we do set
+      */
+      if (!lock_rec_get_nth_bit(lock, heap_no))
+      {
+        lock_rec_set_nth_bit(lock, heap_no);
+        err= DB_SUCCESS_LOCKED_REC;
+      }
+    }
+    trx_mutex_exit(trx);
+  }
+  else
+  {
+    /*
+      Simplified and faster path for the most common cases
+      Note that we don't own the trx mutex.
+    */
+    if (!impl)
+      lock_rec_create(
+#ifdef WITH_WSREP
+         NULL, NULL,
+#endif
+        mode, block, heap_no, index, trx, false);
 
-	ut_error;
-	return(DB_ERROR);
+    err= DB_SUCCESS_LOCKED_REC;
+  }
+  lock_mutex_exit();
+  MONITOR_ATOMIC_INC(MONITOR_NUM_RECLOCK_REQ);
+  return err;
 }
 
 /*********************************************************************//**
@@ -2334,8 +2179,8 @@ lock_grant_and_move_on_page(ulint rec_fold, ulint space, ulint page_no)
 {
 	lock_t*		lock;
 	lock_t*		previous = static_cast<lock_t*>(
-		hash_get_nth_cell(lock_sys->rec_hash,
-				  hash_calc_hash(rec_fold, lock_sys->rec_hash))
+		hash_get_nth_cell(lock_sys.rec_hash,
+				  hash_calc_hash(rec_fold, lock_sys.rec_hash))
 		->node);
 	if (previous == NULL) {
 		return;
@@ -2412,7 +2257,7 @@ static void lock_rec_dequeue_from_page(lock_t* in_lock)
 
 	if (innodb_lock_schedule_algorithm
 	    == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS
-	    || lock_hash != lock_sys->rec_hash
+	    || lock_hash != lock_sys.rec_hash
 	    || thd_is_replication_slave_thread(in_lock->trx->mysql_thd)) {
 		/* Check if waiting locks in the queue can now be granted:
 		grant locks if there are no conflicting locks ahead. Stop at
@@ -2514,11 +2359,11 @@ lock_rec_free_all_from_discard_page(
 	page_no = block->page.id.page_no();
 
 	lock_rec_free_all_from_discard_page_low(
-		space, page_no, lock_sys->rec_hash);
+		space, page_no, lock_sys.rec_hash);
 	lock_rec_free_all_from_discard_page_low(
-		space, page_no, lock_sys->prdt_hash);
+		space, page_no, lock_sys.prdt_hash);
 	lock_rec_free_all_from_discard_page_low(
-		space, page_no, lock_sys->prdt_page_hash);
+		space, page_no, lock_sys.prdt_page_hash);
 }
 
 /*============= RECORD LOCK MOVING AND INHERITING ===================*/
@@ -2563,12 +2408,12 @@ lock_rec_reset_and_release_wait(
 	ulint			heap_no)/*!< in: heap number of record */
 {
 	lock_rec_reset_and_release_wait_low(
-		lock_sys->rec_hash, block, heap_no);
+		lock_sys.rec_hash, block, heap_no);
 
 	lock_rec_reset_and_release_wait_low(
-		lock_sys->prdt_hash, block, PAGE_HEAP_NO_INFIMUM);
+		lock_sys.prdt_hash, block, PAGE_HEAP_NO_INFIMUM);
 	lock_rec_reset_and_release_wait_low(
-		lock_sys->prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
+		lock_sys.prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
 }
 
 /*************************************************************//**
@@ -2601,7 +2446,7 @@ lock_rec_inherit_to_gap(
 	DO want S-locks/X-locks(taken for replace) set by a consistency
 	constraint to be inherited also then. */
 
-	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
+	for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next(heap_no, lock)) {
 
@@ -2612,7 +2457,8 @@ lock_rec_inherit_to_gap(
 			 && lock_get_mode(lock) ==
 			 (lock->trx->duplicates ? LOCK_S : LOCK_X))) {
 			lock_rec_add_to_queue(
-				LOCK_REC | LOCK_GAP | lock_get_mode(lock),
+				LOCK_REC | LOCK_GAP
+				| ulint(lock_get_mode(lock)),
 				heir_block, heir_heap_no, lock->index,
 				lock->trx, FALSE);
 		}
@@ -2639,7 +2485,7 @@ lock_rec_inherit_to_gap_if_gap_lock(
 
 	lock_mutex_enter();
 
-	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
+	for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next(heap_no, lock)) {
 
@@ -2648,7 +2494,8 @@ lock_rec_inherit_to_gap_if_gap_lock(
 			|| !lock_rec_get_rec_not_gap(lock))) {
 
 			lock_rec_add_to_queue(
-				LOCK_REC | LOCK_GAP | lock_get_mode(lock),
+				LOCK_REC | LOCK_GAP
+				| ulint(lock_get_mode(lock)),
 				block, heir_heap_no, lock->index,
 				lock->trx, FALSE);
 		}
@@ -2683,8 +2530,8 @@ lock_rec_move_low(
 	/* If the lock is predicate lock, it resides on INFIMUM record */
 	ut_ad(lock_rec_get_first(
 		lock_hash, receiver, receiver_heap_no) == NULL
-	      || lock_hash == lock_sys->prdt_hash
-	      || lock_hash == lock_sys->prdt_page_hash);
+	      || lock_hash == lock_sys.prdt_hash
+	      || lock_hash == lock_sys.prdt_page_hash);
 
 	for (lock = lock_rec_get_first(lock_hash,
 				       donator, donator_heap_no);
@@ -2707,7 +2554,7 @@ lock_rec_move_low(
 			lock->index, lock->trx, FALSE);
 	}
 
-	ut_ad(lock_rec_get_first(lock_sys->rec_hash,
+	ut_ad(lock_rec_get_first(lock_sys.rec_hash,
 				 donator, donator_heap_no) == NULL);
 }
 
@@ -2762,7 +2609,7 @@ lock_rec_move(
 	ulint			donator_heap_no)/*!< in: heap_no of the record
                                                 which gives the locks */
 {
-	lock_rec_move_low(lock_sys->rec_hash, receiver, donator,
+	lock_rec_move_low(lock_sys.rec_hash, receiver, donator,
 			  receiver_heap_no, donator_heap_no);
 }
 
@@ -2787,7 +2634,7 @@ lock_move_reorganize_page(
 	lock_mutex_enter();
 
 	/* FIXME: This needs to deal with predicate lock too */
-	lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
+	lock = lock_rec_get_first_on_page(lock_sys.rec_hash, block);
 
 	if (lock == NULL) {
 		lock_mutex_exit();
@@ -2844,6 +2691,9 @@ lock_move_reorganize_page(
 		for (;;) {
 			ulint	old_heap_no;
 			ulint	new_heap_no;
+			ut_d(const rec_t* const orec = rec1);
+			ut_ad(page_rec_is_metadata(rec1)
+			      == page_rec_is_metadata(rec2));
 
 			if (comp) {
 				old_heap_no = rec_get_heap_no_new(rec2);
@@ -2864,6 +2714,8 @@ lock_move_reorganize_page(
 			/* Clear the bit in old_lock. */
 			if (old_heap_no < lock->un_member.rec_lock.n_bits
 			    && lock_rec_reset_nth_bit(lock, old_heap_no)) {
+				ut_ad(!page_rec_is_metadata(orec));
+
 				/* NOTE that the old lock bitmap could be too
 				small for the new heap number! */
 
@@ -2915,7 +2767,7 @@ lock_move_rec_list_end(
 	table to the end of the hash chain, and lock_rec_add_to_queue
 	does not reuse locks if there are waiters in the queue. */
 
-	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
+	for (lock = lock_rec_get_first_on_page(lock_sys.rec_hash, block); lock;
 	     lock = lock_rec_get_next_on_page(lock)) {
 		const rec_t*	rec1	= rec;
 		const rec_t*	rec2;
@@ -2943,6 +2795,10 @@ lock_move_rec_list_end(
 		reset the lock bits on the old */
 
 		for (;;) {
+			ut_ad(page_rec_is_metadata(rec1)
+			      == page_rec_is_metadata(rec2));
+			ut_d(const rec_t* const orec = rec1);
+
 			ulint	rec1_heap_no;
 			ulint	rec2_heap_no;
 
@@ -2965,8 +2821,11 @@ lock_move_rec_list_end(
 
 				rec2_heap_no = rec_get_heap_no_old(rec2);
 
+				ut_ad(rec_get_data_size_old(rec1)
+				      == rec_get_data_size_old(rec2));
+
 				ut_ad(!memcmp(rec1, rec2,
-					      rec_get_data_size_old(rec2)));
+					      rec_get_data_size_old(rec1)));
 
 				rec1 = page_rec_get_next_low(rec1, FALSE);
 				rec2 = page_rec_get_next_low(rec2, FALSE);
@@ -2974,6 +2833,8 @@ lock_move_rec_list_end(
 
 			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
 			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
+				ut_ad(!page_rec_is_metadata(orec));
+
 				if (type_mode & LOCK_WAIT) {
 					lock_reset_lock_and_trx_wait(lock);
 				}
@@ -3017,10 +2878,11 @@ lock_move_rec_list_start(
 	ut_ad(block->frame == page_align(rec));
 	ut_ad(new_block->frame == page_align(old_end));
 	ut_ad(comp == page_rec_is_comp(old_end));
+	ut_ad(!page_rec_is_metadata(rec));
 
 	lock_mutex_enter();
 
-	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
+	for (lock = lock_rec_get_first_on_page(lock_sys.rec_hash, block); lock;
 	     lock = lock_rec_get_next_on_page(lock)) {
 		const rec_t*	rec1;
 		const rec_t*	rec2;
@@ -3042,6 +2904,10 @@ lock_move_rec_list_start(
 		reset the lock bits on the old */
 
 		while (rec1 != rec) {
+			ut_ad(page_rec_is_metadata(rec1)
+			      == page_rec_is_metadata(rec2));
+			ut_d(const rec_t* const prev = rec1);
+
 			ulint	rec1_heap_no;
 			ulint	rec2_heap_no;
 
@@ -3064,6 +2930,8 @@ lock_move_rec_list_start(
 
 			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
 			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
+				ut_ad(!page_rec_is_metadata(prev));
+
 				if (type_mode & LOCK_WAIT) {
 					lock_reset_lock_and_trx_wait(lock);
 				}
@@ -3126,7 +2994,7 @@ lock_rtr_move_rec_list(
 
 	lock_mutex_enter();
 
-	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
+	for (lock = lock_rec_get_first_on_page(lock_sys.rec_hash, block); lock;
 	     lock = lock_rec_get_next_on_page(lock)) {
 		ulint		moved = 0;
 		const rec_t*	rec1;
@@ -3142,6 +3010,8 @@ lock_rtr_move_rec_list(
 
 			rec1 = rec_move[moved].old_rec;
 			rec2 = rec_move[moved].new_rec;
+			ut_ad(!page_rec_is_metadata(rec1));
+			ut_ad(!page_rec_is_metadata(rec2));
 
 			if (comp) {
 				rec1_heap_no = rec_get_heap_no_new(rec1);
@@ -3220,6 +3090,8 @@ lock_update_merge_right(
 						page which will be
 						discarded */
 {
+	ut_ad(!page_rec_is_metadata(orig_succ));
+
 	lock_mutex_enter();
 
 	/* Inherit the locks from the supremum of the left page to the
@@ -3234,21 +3106,17 @@ lock_update_merge_right(
 	waiting transactions */
 
 	lock_rec_reset_and_release_wait_low(
-		lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
+		lock_sys.rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
 
-#ifdef UNIV_DEBUG
 	/* there should exist no page lock on the left page,
 	otherwise, it will be blocked from merge */
-	ulint   space = left_block->page.id.space();
-	ulint   page_no = left_block->page.id.page_no();
-	ut_ad(lock_rec_get_first_on_page_addr(
-                lock_sys->prdt_page_hash, space, page_no) == NULL);
-#endif /* UNIV_DEBUG */
+	ut_ad(!lock_rec_get_first_on_page_addr(lock_sys.prdt_page_hash,
+					       left_block->page.id.space(),
+					       left_block->page.id.page_no()));
 
 	lock_rec_free_all_from_discard_page(left_block);
 
 	lock_mutex_exit();
-
 }
 
 /*************************************************************//**
@@ -3352,7 +3220,7 @@ lock_update_merge_left(
 		releasing waiting transactions */
 
 		lock_rec_reset_and_release_wait_low(
-			lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
+			lock_sys.rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
 	}
 
 	/* Move the locks from the supremum of right page to the supremum
@@ -3361,15 +3229,12 @@ lock_update_merge_left(
 	lock_rec_move(left_block, right_block,
 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
 
-#ifdef UNIV_DEBUG
 	/* there should exist no page lock on the right page,
 	otherwise, it will be blocked from merge */
-	ulint	space = right_block->page.id.space();
-	ulint	page_no = right_block->page.id.page_no();
-	lock_t*	lock_test = lock_rec_get_first_on_page_addr(
-                lock_sys->prdt_page_hash, space, page_no);
-	ut_ad(!lock_test);
-#endif /* UNIV_DEBUG */
+	ut_ad(!lock_rec_get_first_on_page_addr(
+		      lock_sys.prdt_page_hash,
+		      right_block->page.id.space(),
+		      right_block->page.id.page_no()));
 
 	lock_rec_free_all_from_discard_page(right_block);
 
@@ -3420,9 +3285,9 @@ lock_update_discard(
 
 	lock_mutex_enter();
 
-	if (lock_rec_get_first_on_page(lock_sys->rec_hash, block)) {
-		ut_ad(!lock_rec_get_first_on_page(lock_sys->prdt_hash, block));
-		ut_ad(!lock_rec_get_first_on_page(lock_sys->prdt_page_hash,
+	if (lock_rec_get_first_on_page(lock_sys.rec_hash, block)) {
+		ut_ad(!lock_rec_get_first_on_page(lock_sys.prdt_hash, block));
+		ut_ad(!lock_rec_get_first_on_page(lock_sys.prdt_page_hash,
 						  block));
 		/* Inherit all the locks on the page to the record and
 		reset all the locks on the page */
@@ -3459,14 +3324,14 @@ lock_update_discard(
 
 		lock_rec_free_all_from_discard_page_low(
 			block->page.id.space(), block->page.id.page_no(),
-			lock_sys->rec_hash);
+			lock_sys.rec_hash);
 	} else {
 		lock_rec_free_all_from_discard_page_low(
 			block->page.id.space(), block->page.id.page_no(),
-			lock_sys->prdt_hash);
+			lock_sys.prdt_hash);
 		lock_rec_free_all_from_discard_page_low(
 			block->page.id.space(), block->page.id.page_no(),
-			lock_sys->prdt_page_hash);
+			lock_sys.prdt_page_hash);
 	}
 
 	lock_mutex_exit();
@@ -3484,6 +3349,7 @@ lock_update_insert(
 	ulint	donator_heap_no;
 
 	ut_ad(block->frame == page_align(rec));
+	ut_ad(!page_rec_is_metadata(rec));
 
 	/* Inherit the gap-locking locks for rec, in gap mode, from the next
 	record */
@@ -3515,6 +3381,7 @@ lock_update_delete(
 	ulint		next_heap_no;
 
 	ut_ad(page == page_align(rec));
+	ut_ad(!page_rec_is_metadata(rec));
 
 	if (page_is_comp(page)) {
 		heap_no = rec_get_heap_no_new(rec);
@@ -3900,7 +3767,7 @@ lock_table_enqueue_waiting(
 #endif /* WITH_WSREP */
 
 	/* Enqueue the lock request that will wait to be granted */
-	lock = lock_table_create(table, mode | LOCK_WAIT, trx
+	lock = lock_table_create(table, ulint(mode) | LOCK_WAIT, trx
 #ifdef WITH_WSREP
 				 , c_lock
 #endif
@@ -4011,7 +3878,7 @@ lock_table(
 	locking overhead */
 	if ((flags & BTR_NO_LOCKING_FLAG)
 	    || srv_read_only_mode
-	    || dict_table_is_temporary(table)) {
+	    || table->is_temporary()) {
 
 		return(DB_SUCCESS);
 	}
@@ -4059,13 +3926,14 @@ lock_table(
 	mode: this trx may have to wait */
 
 	if (wait_for != NULL) {
-		err = lock_table_enqueue_waiting(mode | flags, table, thr
+		err = lock_table_enqueue_waiting(ulint(mode) | flags, table,
+						 thr
 #ifdef WITH_WSREP
 						 , wait_for
 #endif
 						 );
 	} else {
-		lock_table_create(table, mode | flags, trx);
+		lock_table_create(table, ulint(mode) | flags, trx);
 
 		ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
 
@@ -4311,13 +4179,14 @@ lock_rec_unlock(
 	ut_ad(block->frame == page_align(rec));
 	ut_ad(!trx->lock.wait_lock);
 	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+	ut_ad(!page_rec_is_metadata(rec));
 
 	heap_no = page_rec_get_heap_no(rec);
 
 	lock_mutex_enter();
 	trx_mutex_enter(trx);
 
-	first_lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
+	first_lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
 
 	/* Find the last lock with the same lock_mode and transaction
 	on the record. */
@@ -4366,7 +4235,7 @@ released:
 			}
 		}
 	} else {
-		lock_grant_and_move_on_rec(lock_sys->rec_hash, first_lock, heap_no);
+		lock_grant_and_move_on_rec(lock_sys.rec_hash, first_lock, heap_no);
 	}
 
 	lock_mutex_exit();
@@ -4425,7 +4294,7 @@ lock_release(
 {
 	lock_t*		lock;
 	ulint		count = 0;
-	trx_id_t	max_trx_id = trx_sys_get_max_trx_id();
+	trx_id_t	max_trx_id = trx_sys.get_max_trx_id();
 
 	ut_ad(lock_mutex_own());
 	ut_ad(!trx_mutex_own(trx));
@@ -4523,195 +4392,6 @@ lock_trx_table_locks_remove(
 	ut_error;
 }
 
-/*********************************************************************//**
-Removes locks of a transaction on a table to be dropped.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock that is going to be removed is allowed to be a wait lock. */
-static
-void
-lock_remove_all_on_table_for_trx(
-/*=============================*/
-	dict_table_t*	table,			/*!< in: table to be dropped */
-	trx_t*		trx,			/*!< in: a transaction */
-	ibool		remove_also_table_sx_locks)/*!< in: also removes
-						table S and X locks */
-{
-	lock_t*		lock;
-	lock_t*		prev_lock;
-
-	ut_ad(lock_mutex_own());
-
-	for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
-	     lock != NULL;
-	     lock = prev_lock) {
-
-		prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
-
-		if (lock_get_type_low(lock) == LOCK_REC
-		    && lock->index->table == table) {
-			ut_a(!lock_get_wait(lock));
-
-			lock_rec_discard(lock);
-		} else if (lock_get_type_low(lock) & LOCK_TABLE
-			   && lock->un_member.tab_lock.table == table
-			   && (remove_also_table_sx_locks
-			       || !IS_LOCK_S_OR_X(lock))) {
-
-			ut_a(!lock_get_wait(lock));
-
-			lock_trx_table_locks_remove(lock);
-			lock_table_remove_low(lock);
-		}
-	}
-}
-
-/*******************************************************************//**
-Remove any explicit record locks held by recovering transactions on
-the table.
-@return number of recovered transactions examined */
-static
-ulint
-lock_remove_recovered_trx_record_locks(
-/*===================================*/
-	dict_table_t*	table)	/*!< in: check if there are any locks
-				held on records in this table or on the
-				table itself */
-{
-	ut_a(table != NULL);
-	ut_ad(lock_mutex_own());
-
-	ulint		n_recovered_trx = 0;
-
-	mutex_enter(&trx_sys->mutex);
-
-	for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
-	     trx != NULL;
-	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
-		assert_trx_in_rw_list(trx);
-
-		if (!trx->is_recovered) {
-			continue;
-		}
-
-		/* Because we are holding the lock_sys->mutex,
-		implicit locks cannot be converted to explicit ones
-		while we are scanning the explicit locks. */
-
-		lock_t*	next_lock;
-
-		for (lock_t* lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
-		     lock != NULL;
-		     lock = next_lock) {
-
-			ut_a(lock->trx == trx);
-
-			/* Recovered transactions can't wait on a lock. */
-
-			ut_a(!lock_get_wait(lock));
-
-			next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
-
-			switch (lock_get_type_low(lock)) {
-			default:
-				ut_error;
-			case LOCK_TABLE:
-				if (lock->un_member.tab_lock.table == table) {
-					lock_trx_table_locks_remove(lock);
-					lock_table_remove_low(lock);
-				}
-				break;
-			case LOCK_REC:
-				if (lock->index->table == table) {
-					lock_rec_discard(lock);
-				}
-			}
-		}
-
-		++n_recovered_trx;
-	}
-
-	mutex_exit(&trx_sys->mutex);
-
-	return(n_recovered_trx);
-}
-
-/*********************************************************************//**
-Removes locks on a table to be dropped or discarded.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-void
-lock_remove_all_on_table(
-/*=====================*/
-	dict_table_t*	table,			/*!< in: table to be dropped
-						or discarded */
-	ibool		remove_also_table_sx_locks)/*!< in: also removes
-						table S and X locks */
-{
-	lock_t*		lock;
-
-	lock_mutex_enter();
-
-	for (lock = UT_LIST_GET_FIRST(table->locks);
-	     lock != NULL;
-	     /* No op */) {
-
-		lock_t*	prev_lock;
-
-		prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
-
-		/* If we should remove all locks (remove_also_table_sx_locks
-		is TRUE), or if the lock is not table-level S or X lock,
-		then check we are not going to remove a wait lock. */
-		if (remove_also_table_sx_locks
-		    || !(lock_get_type(lock) == LOCK_TABLE
-			 && IS_LOCK_S_OR_X(lock))) {
-
-			ut_a(!lock_get_wait(lock));
-		}
-
-		lock_remove_all_on_table_for_trx(
-			table, lock->trx, remove_also_table_sx_locks);
-
-		if (prev_lock == NULL) {
-			if (lock == UT_LIST_GET_FIRST(table->locks)) {
-				/* lock was not removed, pick its successor */
-				lock = UT_LIST_GET_NEXT(
-					un_member.tab_lock.locks, lock);
-			} else {
-				/* lock was removed, pick the first one */
-				lock = UT_LIST_GET_FIRST(table->locks);
-			}
-		} else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks,
-					    prev_lock) != lock) {
-			/* If lock was removed by
-			lock_remove_all_on_table_for_trx() then pick the
-			successor of prev_lock ... */
-			lock = UT_LIST_GET_NEXT(
-				un_member.tab_lock.locks, prev_lock);
-		} else {
-			/* ... otherwise pick the successor of lock. */
-			lock = UT_LIST_GET_NEXT(
-				un_member.tab_lock.locks, lock);
-		}
-	}
-
-	/* Note: Recovered transactions don't have table level IX or IS locks
-	but can have implicit record locks that have been converted to explicit
-	record locks. Such record locks cannot be freed by traversing the
-	transaction lock list in dict_table_t (as above). */
-
-	if (!lock_sys->rollback_complete
-	    && lock_remove_recovered_trx_record_locks(table) == 0) {
-
-		lock_sys->rollback_complete = TRUE;
-	}
-
-	lock_mutex_exit();
-}
-
 /*===================== VALIDATION AND DEBUGGING ====================*/
 
 /** Print info of a table lock.
@@ -4779,7 +4459,7 @@ lock_rec_print(FILE* file, const lock_t* lock)
 		(ulong) space, (ulong) page_no,
 		(ulong) lock_rec_get_n_bits(lock),
 		lock->index->name());
-	ut_print_name(file, lock->trx, lock->index->table_name);
+	ut_print_name(file, lock->trx, lock->index->table->name.m_name);
 	fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
 
 	if (lock_get_mode(lock) == LOCK_S) {
@@ -4828,6 +4508,7 @@ lock_rec_print(FILE* file, const lock_t* lock)
 
 			rec = page_find_rec_with_heap_no(
 				buf_block_get_frame(block), i);
+			ut_ad(!page_rec_is_metadata(rec));
 
 			offsets = rec_get_offsets(
 				rec, lock->index, offsets, true,
@@ -4868,11 +4549,11 @@ lock_get_n_rec_locks(void)
 
 	ut_ad(lock_mutex_own());
 
-	for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
+	for (i = 0; i < hash_get_n_cells(lock_sys.rec_hash); i++) {
 		const lock_t*	lock;
 
 		for (lock = static_cast<const lock_t*>(
-				HASH_GET_FIRST(lock_sys->rec_hash, i));
+				HASH_GET_FIRST(lock_sys.rec_hash, i));
 		     lock != 0;
 		     lock = static_cast<const lock_t*>(
 				HASH_GET_NEXT(hash, lock))) {
@@ -4921,49 +4602,19 @@ lock_print_info_summary(
 	      "------------\n", file);
 
 	fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
-		trx_sys_get_max_trx_id());
+		trx_sys.get_max_trx_id());
 
 	fprintf(file,
 		"Purge done for trx's n:o < " TRX_ID_FMT
-		" undo n:o < " TRX_ID_FMT " state: ",
-		purge_sys->iter.trx_no,
-		purge_sys->iter.undo_no);
-
-	/* Note: We are reading the state without the latch. One because it
-	will violate the latching order and two because we are merely querying
-	the state of the variable for display. */
-
-	switch (purge_sys->state){
-	case PURGE_STATE_INIT:
-		/* Should never be in this state while the system is running. */
-		ut_error;
-
-	case PURGE_STATE_EXIT:
-		fprintf(file, "exited");
-		break;
-
-	case PURGE_STATE_DISABLED:
-		fprintf(file, "disabled");
-		break;
-
-	case PURGE_STATE_RUN:
-		fprintf(file, "running");
-		/* Check if it is waiting for more data to arrive. */
-		if (!purge_sys->running) {
-			fprintf(file, " but idle");
-		}
-		break;
-
-	case PURGE_STATE_STOP:
-		fprintf(file, "stopped");
-		break;
-	}
-
-	fprintf(file, "\n");
-
-	fprintf(file,
-		"History list length %lu\n",
-		(ulong) trx_sys->rseg_history_len);
+		" undo n:o < " TRX_ID_FMT " state: %s\n"
+		"History list length " ULINTPF "\n",
+		purge_sys.tail.trx_no(),
+		purge_sys.tail.undo_no,
+		purge_sys.enabled()
+		? (purge_sys.running() ? "running"
+		   : purge_sys.paused() ? "stopped" : "running but idle")
+		: "disabled",
+		trx_sys.history_size());
 
 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
 	fprintf(file,
@@ -4973,7 +4624,7 @@ lock_print_info_summary(
 	return(TRUE);
 }
 
-/** Functor to print not-started transaction from the mysql_trx_list. */
+/** Functor to print not-started transaction from the trx_list. */
 
 struct	PrintNotStarted {
 
@@ -4981,12 +4632,12 @@ struct	PrintNotStarted {
 
 	void	operator()(const trx_t* trx)
 	{
-		ut_ad(trx->in_mysql_trx_list);
-		ut_ad(mutex_own(&trx_sys->mutex));
+		ut_ad(mutex_own(&trx_sys.mutex));
 
 		/* See state transitions and locking rules in trx0trx.h */
 
-		if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
+		if (trx->mysql_thd
+		    && trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
 
 			fputs("---", m_file);
 			trx_print_latched(m_file, trx, 600);
@@ -4996,116 +4647,6 @@ struct	PrintNotStarted {
 	FILE*		m_file;
 };
 
-/** Iterate over a transaction's locks. Keeping track of the
-iterator using an ordinal value. */
-
-class TrxLockIterator {
-public:
-	TrxLockIterator() { rewind(); }
-
-	/** Get the m_index(th) lock of a transaction.
-	@return current lock or 0 */
-	const lock_t* current(const trx_t* trx) const
-	{
-		lock_t*	lock;
-		ulint	i = 0;
-
-		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
-		     lock != NULL && i < m_index;
-		     lock = UT_LIST_GET_NEXT(trx_locks, lock), ++i) {
-
-			/* No op */
-		}
-
-		return(lock);
-	}
-
-	/** Set the ordinal value to 0 */
-	void rewind()
-	{
-		m_index = 0;
-	}
-
-	/** Increment the ordinal value.
-	@retun the current index value */
-	ulint next()
-	{
-		return(++m_index);
-	}
-
-private:
-	/** Current iterator position */
-	ulint		m_index;
-};
-
-/** This iterates over both the RW and RO trx_sys lists. We need to keep
-track where the iterator was up to and we do that using an ordinal value. */
-
-class TrxListIterator {
-public:
-	TrxListIterator() : m_index()
-	{
-		/* We iterate over the RW trx list first. */
-
-		m_trx_list = &trx_sys->rw_trx_list;
-	}
-
-	/** Get the current transaction whose ordinality is m_index.
-	@return current transaction or 0 */
-
-	const trx_t* current()
-	{
-		return(reposition());
-	}
-
-	/** Advance the transaction current ordinal value and reset the
-	transaction lock ordinal value */
-
-	void next()
-	{
-		++m_index;
-		m_lock_iter.rewind();
-	}
-
-	TrxLockIterator& lock_iter()
-	{
-		return(m_lock_iter);
-	}
-
-private:
-	/** Reposition the "cursor" on the current transaction. If it
-	is the first time then the "cursor" will be positioned on the
-	first transaction.
-
-	@return transaction instance or 0 */
-	const trx_t* reposition() const
-	{
-		ulint	i;
-		trx_t*	trx;
-
-		/* Make the transaction at the ordinal value of m_index
-		the current transaction. ie. reposition/restore */
-
-		for (i = 0, trx = UT_LIST_GET_FIRST(*m_trx_list);
-		     trx != NULL && (i < m_index);
-		     trx = UT_LIST_GET_NEXT(trx_list, trx), ++i) {
-
-			check_trx_state(trx);
-		}
-
-		return(trx);
-	}
-
-	/** Ordinal value of the transaction in the current transaction list */
-	ulint			m_index;
-
-	/** Current transaction list */
-	trx_ut_list_t*		m_trx_list;
-
-	/** For iterating over a transaction's locks */
-	TrxLockIterator		m_lock_iter;
-};
-
 /** Prints transaction lock wait and MVCC state.
 @param[in,out]	file	file where to print
 @param[in]	trx	transaction */
@@ -5118,10 +4659,13 @@ lock_trx_print_wait_and_mvcc_state(
 
 	trx_print_latched(file, trx, 600);
 
-	const ReadView*	read_view = trx_get_read_view(trx);
+	/* Note: read_view->get_state() check is race condition. But it
+	should "kind of work" because read_view is freed only at shutdown.
+	Worst thing that may happen is that it'll get transferred to
+	another thread and print wrong values. */
 
-	if (read_view != NULL) {
-		read_view->print_limits(file);
+	if (trx->read_view.get_state() == READ_VIEW_STATE_OPEN) {
+		trx->read_view.print_limits(file);
 	}
 
 	if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
@@ -5142,118 +4686,29 @@ lock_trx_print_wait_and_mvcc_state(
 }
 
 /*********************************************************************//**
-Prints info of locks for a transaction. This function will release the
-lock mutex and the trx_sys_t::mutex if the page was read from disk.
-@return true if page was read from the tablespace */
+Prints info of locks for a transaction. */
 static
-bool
-lock_rec_fetch_page(
-/*================*/
-	const lock_t*	lock)	/*!< in: record lock */
-{
-	ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
-	ulint			space_id = lock->un_member.rec_lock.space;
-	fil_space_t*		space;
-	bool			found;
-	const page_size_t&	page_size = fil_space_get_page_size(space_id,
-								    &found);
-	ulint			page_no = lock->un_member.rec_lock.page_no;
-
-	/* Check if the .ibd file exists. */
-	if (found) {
-		mtr_t	mtr;
-
-		lock_mutex_exit();
-
-		mutex_exit(&trx_sys->mutex);
-
-		DEBUG_SYNC_C("innodb_monitor_before_lock_page_read");
-
-		/* Check if the space is exists or not. only
-		when the space is valid, try to get the page. */
-		space = fil_space_acquire(space_id);
-		if (space) {
-			dberr_t err = DB_SUCCESS;
-			mtr_start(&mtr);
-			buf_page_get_gen(
-				page_id_t(space_id, page_no), page_size,
-				RW_NO_LATCH, NULL,
-				BUF_GET_POSSIBLY_FREED,
-				__FILE__, __LINE__, &mtr, &err);
-			mtr_commit(&mtr);
-			fil_space_release(space);
-		}
-
-		lock_mutex_enter();
-
-		mutex_enter(&trx_sys->mutex);
-
-		return(true);
-	}
-
-	return(false);
-}
-
-/*********************************************************************//**
-Prints info of locks for a transaction.
-@return true if all printed, false if latches were released. */
-static
-bool
+void
 lock_trx_print_locks(
 /*=================*/
 	FILE*		file,		/*!< in/out: File to write */
-	const trx_t*	trx,		/*!< in: current transaction */
-	TrxLockIterator&iter,		/*!< in: transaction lock iterator */
-	bool		load_block)	/*!< in: if true then read block
-					from disk */
+	const trx_t*	trx)		/*!< in: current transaction */
 {
-	const lock_t* lock;
-
+	uint32_t i= 0;
 	/* Iterate over the transaction's locks. */
-	while ((lock = iter.current(trx)) != 0) {
-
+	for (lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
+	     lock != NULL;
+	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
 		if (lock_get_type_low(lock) == LOCK_REC) {
 
-			if (load_block) {
-
-				/* Note: lock_rec_fetch_page() will
-				release both the lock mutex and the
-				trx_sys_t::mutex if it does a read
-				from disk. */
-
-				if (lock_rec_fetch_page(lock)) {
-					/* We need to resync the
-					current transaction. */
-					return(false);
-				}
-
-				/* It is a single table tablespace
-				and the .ibd file is missing:
-				just print the lock without
-				attempting to load the page in the
-				buffer pool. */
-
-				fprintf(file,
-					"RECORD LOCKS on non-existing"
-					" space %u\n",
-					lock->un_member.rec_lock.space);
-			}
-
-			/* Print all the record locks on the page from
-			the record lock bitmap */
-
 			lock_rec_print(file, lock);
-
-			load_block = true;
-
 		} else {
 			ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
 
 			lock_table_print(file, lock);
 		}
 
-		if (iter.next() >= 10) {
+		if (++i == 10) {
 
 			fprintf(file,
 				"10 LOCKS PRINTED FOR THIS TRX:"
@@ -5262,10 +4717,32 @@ lock_trx_print_locks(
 			break;
 		}
 	}
+}
 
-	return(true);
+
+static my_bool lock_print_info_all_transactions_callback(
+  rw_trx_hash_element_t *element, FILE *file)
+{
+  mutex_enter(&element->mutex);
+  if (trx_t *trx= element->trx)
+  {
+    check_trx_state(trx);
+    lock_trx_print_wait_and_mvcc_state(file, trx);
+
+    if (srv_print_innodb_lock_monitor)
+    {
+      trx->reference();
+      mutex_exit(&element->mutex);
+      lock_trx_print_locks(file, trx);
+      trx->release_reference();
+      return 0;
+    }
+  }
+  mutex_exit(&element->mutex);
+  return 0;
 }
 
+
 /*********************************************************************//**
 Prints info of locks for each transaction. This function assumes that the
 caller holds the lock mutex and more importantly it will release the lock
@@ -5279,8 +4756,6 @@ lock_print_info_all_transactions(
 
 	fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
 
-	mutex_enter(&trx_sys->mutex);
-
 	/* First print info on non-active transactions */
 
 	/* NOTE: information of auto-commit non-locking read-only
@@ -5288,62 +4763,14 @@ lock_print_info_all_transactions(
 	available from INFORMATION_SCHEMA.INNODB_TRX. */
 
 	PrintNotStarted	print_not_started(file);
-	ut_list_map(trx_sys->mysql_trx_list, print_not_started);
-
-	const trx_t*	trx;
-	TrxListIterator	trx_iter;
-	const trx_t*	prev_trx = 0;
-
-	/* Control whether a block should be fetched from the buffer pool. */
-	bool		load_block = true;
-	bool		monitor = srv_print_innodb_lock_monitor;
-
-	while ((trx = trx_iter.current()) != 0) {
-
-		check_trx_state(trx);
-
-		if (trx != prev_trx) {
-			lock_trx_print_wait_and_mvcc_state(file, trx);
-			prev_trx = trx;
-
-			/* The transaction that read in the page is no
-			longer the one that read the page in. We need to
-			force a page read. */
-			load_block = true;
-		}
-
-		/* If we need to print the locked record contents then we
-		need to fetch the containing block from the buffer pool. */
-		if (monitor) {
-
-			/* Print the locks owned by the current transaction. */
-			TrxLockIterator& lock_iter = trx_iter.lock_iter();
-
-			if (!lock_trx_print_locks(
-					file, trx, lock_iter, load_block)) {
-
-				/* Resync trx_iter, the trx_sys->mutex and
-				the lock mutex were released. A page was
-				successfully read in.  We need to print its
-				contents on the next call to
-				lock_trx_print_locks(). On the next call to
-				lock_trx_print_locks() we should simply print
-				the contents of the page just read in.*/
-				load_block = false;
-
-				continue;
-			}
-		}
-
-		load_block = true;
-
-		/* All record lock details were printed without fetching
-		a page from disk, or we didn't need to print the detail. */
-		trx_iter.next();
-	}
+	mutex_enter(&trx_sys.mutex);
+	ut_list_map(trx_sys.trx_list, print_not_started);
+	mutex_exit(&trx_sys.mutex);
 
+	trx_sys.rw_trx_hash.iterate_no_dups(
+		reinterpret_cast<my_hash_walk_action>
+		(lock_print_info_all_transactions_callback), file);
 	lock_mutex_exit();
-	mutex_exit(&trx_sys->mutex);
 
 	ut_ad(lock_validate());
 }
@@ -5401,17 +4828,18 @@ lock_table_queue_validate(
 	const lock_t*	lock;
 
 	ut_ad(lock_mutex_own());
-	ut_ad(trx_sys_mutex_own());
 
 	for (lock = UT_LIST_GET_FIRST(table->locks);
 	     lock != NULL;
 	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
 
-		/* lock->trx->state cannot change from or to NOT_STARTED
-		while we are holding the trx_sys->mutex. It may change
-		from ACTIVE to PREPARED, but it may not change to
-		COMMITTED, because we are holding the lock_sys->mutex. */
-		ut_ad(trx_assert_started(lock->trx));
+		/* Transaction state may change from ACTIVE to PREPARED.
+		State change to COMMITTED is not possible while we are
+		holding lock_sys.mutex: it is done by lock_trx_release_locks()
+		under lock_sys.mutex protection.
+		Transaction in NOT_STARTED state cannot hold locks, and
+		lock->trx->state can only move to NOT_STARTED from COMMITTED. */
+		check_trx_state(lock->trx);
 
 		if (!lock_get_wait(lock)) {
 
@@ -5433,10 +4861,10 @@ lock_table_queue_validate(
 Validates the lock queue on a single record.
 @return TRUE if ok */
 static
-ibool
+bool
 lock_rec_queue_validate(
 /*====================*/
-	ibool			locked_lock_trx_sys,
+	bool			locked_lock_trx_sys,
 					/*!< in: if the caller holds
 					both the lock mutex and
 					trx_sys_t->lock. */
@@ -5445,7 +4873,6 @@ lock_rec_queue_validate(
 	const dict_index_t*	index,	/*!< in: index, or NULL if not known */
 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
 {
-	const trx_t*	impl_trx;
 	const lock_t*	lock;
 	ulint		heap_no;
 
@@ -5453,6 +4880,7 @@ lock_rec_queue_validate(
 	ut_a(block->frame == page_align(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
+	ut_ad(page_rec_is_leaf(rec));
 	ut_ad(lock_mutex_own() == locked_lock_trx_sys);
 	ut_ad(!index || dict_index_is_clust(index)
 	      || !dict_index_is_online_ddl(index));
@@ -5461,12 +4889,11 @@ lock_rec_queue_validate(
 
 	if (!locked_lock_trx_sys) {
 		lock_mutex_enter();
-		mutex_enter(&trx_sys->mutex);
 	}
 
 	if (!page_rec_is_user_rec(rec)) {
 
-		for (lock = lock_rec_get_first(lock_sys->rec_hash,
+		for (lock = lock_rec_get_first(lock_sys.rec_hash,
 					       block, heap_no);
 		     lock != NULL;
 		     lock = lock_rec_get_next_const(heap_no, lock)) {
@@ -5492,17 +4919,19 @@ lock_rec_queue_validate(
 		/* Nothing we can do */
 
 	} else if (dict_index_is_clust(index)) {
-		trx_id_t	trx_id;
-
 		/* Unlike the non-debug code, this invariant can only succeed
 		if the check and assertion are covered by the lock mutex. */
 
-		trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
-		impl_trx = trx_rw_is_active_low(trx_id, NULL);
+		const trx_id_t impl_trx_id = lock_clust_rec_some_has_impl(
+			rec, index, offsets);
+
+		const trx_t *impl_trx = impl_trx_id
+			? trx_sys.find(current_trx(), impl_trx_id, false)
+			: 0;
 
 		ut_ad(lock_mutex_own());
 		/* impl_trx cannot be committed until lock_mutex_exit()
-		because lock_trx_release_locks() acquires lock_sys->mutex */
+		because lock_trx_release_locks() acquires lock_sys.mutex */
 
 		if (!impl_trx) {
 		} else if (const lock_t* other_lock
@@ -5548,11 +4977,12 @@ lock_rec_queue_validate(
 		}
 	}
 
-	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
+	for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next_const(heap_no, lock)) {
 
 		ut_ad(!trx_is_ac_nl_ro(lock->trx));
+		ut_ad(!page_rec_is_metadata(rec));
 
 		if (index) {
 			ut_a(lock->index == index);
@@ -5592,7 +5022,6 @@ lock_rec_queue_validate(
 func_exit:
 	if (!locked_lock_trx_sys) {
 		lock_mutex_exit();
-		mutex_exit(&trx_sys->mutex);
 	}
 
 	return(TRUE);
@@ -5620,10 +5049,9 @@ lock_rec_validate_page(
 	ut_ad(!lock_mutex_own());
 
 	lock_mutex_enter();
-	mutex_enter(&trx_sys->mutex);
 loop:
 	lock = lock_rec_get_first_on_page_addr(
-		lock_sys->rec_hash,
+		lock_sys.rec_hash,
 		block->page.id.space(), block->page.id.page_no());
 
 	if (!lock) {
@@ -5680,7 +5108,6 @@ loop:
 
 function_exit:
 	lock_mutex_exit();
-	mutex_exit(&trx_sys->mutex);
 
 	if (heap != NULL) {
 		mem_heap_free(heap);
@@ -5689,61 +5116,21 @@ function_exit:
 }
 
 /*********************************************************************//**
-Validates the table locks.
-@return TRUE if ok */
-static
-ibool
-lock_validate_table_locks(
-/*======================*/
-	const trx_ut_list_t*	trx_list)	/*!< in: trx list */
-{
-	const trx_t*	trx;
-
-	ut_ad(lock_mutex_own());
-	ut_ad(trx_sys_mutex_own());
-
-	ut_ad(trx_list == &trx_sys->rw_trx_list);
-
-	for (trx = UT_LIST_GET_FIRST(*trx_list);
-	     trx != NULL;
-	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
-		const lock_t*	lock;
-
-		check_trx_state(trx);
-
-		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
-		     lock != NULL;
-		     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
-
-			if (lock_get_type_low(lock) & LOCK_TABLE) {
-
-				lock_table_queue_validate(
-					lock->un_member.tab_lock.table);
-			}
-		}
-	}
-
-	return(TRUE);
-}
-
-/*********************************************************************//**
 Validate record locks up to a limit.
 @return lock at limit or NULL if no more locks in the hash bucket */
 static MY_ATTRIBUTE((warn_unused_result))
 const lock_t*
 lock_rec_validate(
 /*==============*/
-	ulint		start,		/*!< in: lock_sys->rec_hash
+	ulint		start,		/*!< in: lock_sys.rec_hash
 					bucket */
 	ib_uint64_t*	limit)		/*!< in/out: upper limit of
 					(space, page_no) */
 {
 	ut_ad(lock_mutex_own());
-	ut_ad(trx_sys_mutex_own());
 
 	for (const lock_t* lock = static_cast<const lock_t*>(
-			HASH_GET_FIRST(lock_sys->rec_hash, start));
+			HASH_GET_FIRST(lock_sys.rec_hash, start));
 	     lock != NULL;
 	     lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock))) {
 
@@ -5782,9 +5169,12 @@ lock_rec_block_validate(
 	buf_block_t*	block;
 	mtr_t		mtr;
 
-	/* Make sure that the tablespace is not deleted while we are
-	trying to access the page. */
-	if (fil_space_t* space = fil_space_acquire_silent(space_id)) {
+	/* Transactional locks should never refer to dropped
+	tablespaces, because all DDL operations that would drop or
+	discard or rebuild a tablespace do hold an exclusive table
+	lock, which would conflict with any locks referring to the
+	tablespace from other transactions. */
+	if (fil_space_t* space = fil_space_acquire(space_id)) {
 		dberr_t err = DB_SUCCESS;
 		mtr_start(&mtr);
 
@@ -5810,10 +5200,31 @@ lock_rec_block_validate(
 
 		mtr_commit(&mtr);
 
-		fil_space_release(space);
+		space->release();
 	}
 }
 
+
+static my_bool lock_validate_table_locks(rw_trx_hash_element_t *element, void*)
+{
+  ut_ad(lock_mutex_own());
+  mutex_enter(&element->mutex);
+  if (element->trx)
+  {
+    check_trx_state(element->trx);
+    for (const lock_t *lock= UT_LIST_GET_FIRST(element->trx->lock.trx_locks);
+         lock != NULL;
+         lock= UT_LIST_GET_NEXT(trx_locks, lock))
+    {
+      if (lock_get_type_low(lock) & LOCK_TABLE)
+        lock_table_queue_validate(lock->un_member.tab_lock.table);
+    }
+  }
+  mutex_exit(&element->mutex);
+  return 0;
+}
+
+
 /*********************************************************************//**
 Validates the lock system.
 @return TRUE if ok */
@@ -5831,15 +5242,16 @@ lock_validate()
 	page_addr_set	pages;
 
 	lock_mutex_enter();
-	mutex_enter(&trx_sys->mutex);
 
-	ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list));
+	/* Validate table locks */
+	trx_sys.rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action>
+				    (lock_validate_table_locks), 0);
 
 	/* Iterate over all the record locks and validate the locks. We
 	don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex.
 	Release both mutexes during the validation check. */
 
-	for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
+	for (ulint i = 0; i < hash_get_n_cells(lock_sys.rec_hash); i++) {
 		ib_uint64_t	limit = 0;
 
 		while (const lock_t* lock = lock_rec_validate(i, &limit)) {
@@ -5852,7 +5264,6 @@ lock_validate()
 		}
 	}
 
-	mutex_exit(&trx_sys->mutex);
 	lock_mutex_exit();
 
 	for (page_addr_set::const_iterator it = pages.begin();
@@ -5883,7 +5294,7 @@ lock_rec_insert_check_and_lock(
 	dict_index_t*	index,	/*!< in: index */
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
-	ibool*		inherit)/*!< out: set to TRUE if the new
+	bool*		inherit)/*!< out: set to true if the new
 				inserted record maybe should inherit
 				LOCK_GAP type locks from the successor
 				record */
@@ -5892,7 +5303,8 @@ lock_rec_insert_check_and_lock(
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || index->is_primary()
 	      || (flags & BTR_CREATE_FLAG));
-	ut_ad(mtr->is_named_space(index->space));
+	ut_ad(mtr->is_named_space(index->table->space));
+	ut_ad(page_rec_is_leaf(rec));
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
 
@@ -5904,10 +5316,11 @@ lock_rec_insert_check_and_lock(
 
 	dberr_t		err;
 	lock_t*		lock;
-	ibool		inherit_in = *inherit;
+	bool		inherit_in = *inherit;
 	trx_t*		trx = thr_get_trx(thr);
 	const rec_t*	next_rec = page_rec_get_next_const(rec);
 	ulint		heap_no = page_rec_get_heap_no(next_rec);
+	ut_ad(!rec_is_metadata(next_rec, index));
 
 	lock_mutex_enter();
 	/* Because this code is invoked for a running transaction by
@@ -5919,7 +5332,7 @@ lock_rec_insert_check_and_lock(
 	BTR_NO_LOCKING_FLAG and skip the locking altogether. */
 	ut_ad(lock_table_has(trx, index->table, LOCK_IX));
 
-	lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
+	lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
 
 	if (lock == NULL) {
 		/* We optimize CPU time usage in the simplest case */
@@ -5933,7 +5346,7 @@ lock_rec_insert_check_and_lock(
 					       trx->id, mtr);
 		}
 
-		*inherit = FALSE;
+		*inherit = false;
 
 		return(DB_SUCCESS);
 	}
@@ -5944,7 +5357,7 @@ lock_rec_insert_check_and_lock(
 		return(DB_SUCCESS);
 	}
 
-	*inherit = TRUE;
+	*inherit = true;
 
 	/* If another transaction has an explicit lock request which locks
 	the gap, waiting or granted, on the successor, the insert has to wait.
@@ -6030,11 +5443,12 @@ lock_rec_convert_impl_to_expl_for_trx(
 	const buf_block_t*	block,	/*!< in: buffer block of rec */
 	const rec_t*		rec,	/*!< in: user record on page */
 	dict_index_t*		index,	/*!< in: index of record */
-	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
 	trx_t*			trx,	/*!< in/out: active transaction */
 	ulint			heap_no)/*!< in: rec heap number to lock */
 {
-	ut_ad(trx_is_referenced(trx));
+	ut_ad(trx->is_referenced());
+	ut_ad(page_rec_is_leaf(rec));
+	ut_ad(!rec_is_metadata(rec, index));
 
 	DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
 
@@ -6056,22 +5470,100 @@ lock_rec_convert_impl_to_expl_for_trx(
 
 	lock_mutex_exit();
 
-	trx_release_reference(trx);
+	trx->release_reference();
 
 	DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
 }
 
-/*********************************************************************//**
-If a transaction has an implicit x-lock on a record, but no explicit x-lock
-set on the record, sets one for it. */
+
+#ifdef UNIV_DEBUG
+struct lock_rec_other_trx_holds_expl_arg
+{
+  const ulint heap_no;
+  const buf_block_t * const block;
+  const trx_t *impl_trx;
+};
+
+
+static my_bool lock_rec_other_trx_holds_expl_callback(
+  rw_trx_hash_element_t *element,
+  lock_rec_other_trx_holds_expl_arg *arg)
+{
+  mutex_enter(&element->mutex);
+  if (element->trx)
+  {
+    lock_t *expl_lock= lock_rec_has_expl(LOCK_S | LOCK_REC_NOT_GAP, arg->block,
+                                         arg->heap_no, element->trx);
+    /*
+      An explicit lock is held by trx other than the trx holding the implicit
+      lock.
+    */
+    ut_ad(!expl_lock || expl_lock->trx == arg->impl_trx);
+  }
+  mutex_exit(&element->mutex);
+  return 0;
+}
+
+
+/**
+  Checks if some transaction, other than given trx_id, has an explicit
+  lock on the given rec.
+
+  FIXME: if the current transaction holds implicit lock from INSERT, a
+  subsequent locking read should not convert it to explicit. See also
+  MDEV-11215.
+
+  @param      caller_trx  trx of current thread
+  @param[in]  trx         trx holding implicit lock on rec
+  @param[in]  rec         user record
+  @param[in]  block       buffer block containing the record
+*/
+
+static void lock_rec_other_trx_holds_expl(trx_t *caller_trx, trx_t *trx,
+                                          const rec_t *rec,
+                                          const buf_block_t *block)
+{
+  if (trx)
+  {
+    ut_ad(!page_rec_is_metadata(rec));
+    lock_mutex_enter();
+    lock_rec_other_trx_holds_expl_arg arg= { page_rec_get_heap_no(rec), block,
+                                             trx };
+    trx_sys.rw_trx_hash.iterate(caller_trx,
+                                reinterpret_cast<my_hash_walk_action>
+                                (lock_rec_other_trx_holds_expl_callback),
+                                &arg);
+    lock_mutex_exit();
+  }
+}
+#endif /* UNIV_DEBUG */
+
+
+/** If an implicit x-lock exists on a record, convert it to an explicit one.
+
+Often, this is called by a transaction that is about to enter a lock wait
+due to the lock conflict. Two explicit locks would be created: first the
+exclusive lock on behalf of the lock-holder transaction in this function,
+and then a wait request on behalf of caller_trx, in the calling function.
+
+This may also be called by the same transaction that is already holding
+an implicit exclusive lock on the record. In this case, no explicit lock
+should be created.
+
+@param[in,out]	caller_trx	current transaction
+@param[in]	block		index tree leaf page
+@param[in]	rec		record on the leaf page
+@param[in]	index		the index of the record
+@param[in]	offsets		rec_get_offsets(rec,index)
+@return	whether caller_trx already holds an exclusive lock on rec */
 static
-void
+bool
 lock_rec_convert_impl_to_expl(
-/*==========================*/
-	const buf_block_t*	block,	/*!< in: buffer block of rec */
-	const rec_t*		rec,	/*!< in: user record on page */
-	dict_index_t*		index,	/*!< in: index of record */
-	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
+	trx_t*			caller_trx,
+	const buf_block_t*	block,
+	const rec_t*		rec,
+	dict_index_t*		index,
+	const ulint*		offsets)
 {
 	trx_t*		trx;
 
@@ -6079,34 +5571,50 @@ lock_rec_convert_impl_to_expl(
 	ut_ad(page_rec_is_user_rec(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
+	ut_ad(page_rec_is_leaf(rec));
+	ut_ad(!rec_is_metadata(rec, index));
 
 	if (dict_index_is_clust(index)) {
 		trx_id_t	trx_id;
 
 		trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
 
-		trx = trx_rw_is_active(trx_id, NULL, true);
+		if (trx_id == 0) {
+			return false;
+		}
+		if (UNIV_UNLIKELY(trx_id == caller_trx->id)) {
+			return true;
+		}
+
+		trx = trx_sys.find(caller_trx, trx_id);
 	} else {
 		ut_ad(!dict_index_is_online_ddl(index));
 
-		trx = lock_sec_rec_some_has_impl(rec, index, offsets);
+		trx = lock_sec_rec_some_has_impl(caller_trx, rec, index,
+						 offsets);
+		if (trx == caller_trx) {
+			trx->release_reference();
+			return true;
+		}
 
-		ut_ad(!trx || !lock_rec_other_trx_holds_expl(
-				LOCK_S | LOCK_REC_NOT_GAP, trx, rec, block));
+		ut_d(lock_rec_other_trx_holds_expl(caller_trx, trx, rec,
+						   block));
 	}
 
 	if (trx != 0) {
 		ulint	heap_no = page_rec_get_heap_no(rec);
 
-		ut_ad(trx_is_referenced(trx));
+		ut_ad(trx->is_referenced());
 
 		/* If the transaction is still active and has no
 		explicit x-lock set on the record, set one for it.
 		trx cannot be committed until the ref count is zero. */
 
 		lock_rec_convert_impl_to_expl_for_trx(
-			block, rec, index, offsets, trx, heap_no);
+			block, rec, index, trx, heap_no);
 	}
+
+	return false;
 }
 
 /*********************************************************************//**
@@ -6133,6 +5641,7 @@ lock_clust_rec_modify_check_and_lock(
 	ulint	heap_no;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(page_rec_is_leaf(rec));
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(block->frame == page_align(rec));
 
@@ -6140,7 +5649,8 @@ lock_clust_rec_modify_check_and_lock(
 
 		return(DB_SUCCESS);
 	}
-	ut_ad(!dict_table_is_temporary(index->table));
+	ut_ad(!rec_is_metadata(rec, index));
+	ut_ad(!index->table->is_temporary());
 
 	heap_no = rec_offs_comp(offsets)
 		? rec_get_heap_no_new(rec)
@@ -6149,19 +5659,15 @@ lock_clust_rec_modify_check_and_lock(
 	/* If a transaction has no explicit x-lock set on the record, set one
 	for it */
 
-	lock_rec_convert_impl_to_expl(block, rec, index, offsets);
-
-	lock_mutex_enter();
-
-	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+	if (lock_rec_convert_impl_to_expl(thr_get_trx(thr), block, rec, index,
+					  offsets)) {
+		/* We already hold an implicit exclusive lock. */
+		return DB_SUCCESS;
+	}
 
 	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
 			    block, heap_no, index, thr);
 
-	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
-
-	lock_mutex_exit();
-
 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
 
 	if (err == DB_SUCCESS_LOCKED_REC) {
@@ -6197,13 +5703,15 @@ lock_sec_rec_modify_check_and_lock(
 	ut_ad(!dict_index_is_clust(index));
 	ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
 	ut_ad(block->frame == page_align(rec));
-	ut_ad(mtr->is_named_space(index->space));
+	ut_ad(mtr->is_named_space(index->table->space));
+	ut_ad(page_rec_is_leaf(rec));
+	ut_ad(!rec_is_metadata(rec, index));
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
 
 		return(DB_SUCCESS);
 	}
-	ut_ad(!dict_table_is_temporary(index->table));
+	ut_ad(!index->table->is_temporary());
 
 	heap_no = page_rec_get_heap_no(rec);
 
@@ -6212,17 +5720,9 @@ lock_sec_rec_modify_check_and_lock(
 	index record, and this would not have been possible if another active
 	transaction had modified this secondary index record. */
 
-	lock_mutex_enter();
-
-	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
-
 	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
 			    block, heap_no, index, thr);
 
-	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
-
-	lock_mutex_exit();
-
 #ifdef UNIV_DEBUG
 	{
 		mem_heap_t*	heap		= NULL;
@@ -6289,42 +5789,34 @@ lock_sec_rec_read_check_and_lock(
 	ut_ad(block->frame == page_align(rec));
 	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(page_rec_is_leaf(rec));
 	ut_ad(mode == LOCK_X || mode == LOCK_S);
 
 	if ((flags & BTR_NO_LOCKING_FLAG)
 	    || srv_read_only_mode
-	    || dict_table_is_temporary(index->table)) {
+	    || index->table->is_temporary()) {
 
 		return(DB_SUCCESS);
 	}
 
+	ut_ad(!rec_is_metadata(rec, index));
 	heap_no = page_rec_get_heap_no(rec);
 
 	/* Some transaction may have an implicit x-lock on the record only
 	if the max trx id for the page >= min trx id for the trx list or a
 	database recovery is running. */
 
-	if ((page_get_max_trx_id(block->frame) >= trx_rw_min_trx_id()
-	     || recv_recovery_is_on())
-	    && !page_rec_is_supremum(rec)) {
-
-		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
+	if (!page_rec_is_supremum(rec)
+	    && page_get_max_trx_id(block->frame) >= trx_sys.get_min_trx_id()
+	    && lock_rec_convert_impl_to_expl(thr_get_trx(thr), block, rec,
+					     index, offsets)) {
+		/* We already hold an implicit exclusive lock. */
+		return DB_SUCCESS;
 	}
 
-	lock_mutex_enter();
-
-	ut_ad(mode != LOCK_X
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
-	ut_ad(mode != LOCK_S
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
-
-	err = lock_rec_lock(FALSE, mode | gap_mode,
+	err = lock_rec_lock(FALSE, ulint(mode) | gap_mode,
 			    block, heap_no, index, thr);
 
-	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
-
-	lock_mutex_exit();
-
 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
 
 	return(err);
@@ -6368,33 +5860,27 @@ lock_clust_rec_read_check_and_lock(
 	ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
 	      || gap_mode == LOCK_REC_NOT_GAP);
 	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(page_rec_is_leaf(rec));
+	ut_ad(!rec_is_metadata(rec, index));
 
 	if ((flags & BTR_NO_LOCKING_FLAG)
 	    || srv_read_only_mode
-	    || dict_table_is_temporary(index->table)) {
+	    || index->table->is_temporary()) {
 
 		return(DB_SUCCESS);
 	}
 
 	heap_no = page_rec_get_heap_no(rec);
 
-	if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
-
-		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
+	if (heap_no != PAGE_HEAP_NO_SUPREMUM
+	    && lock_rec_convert_impl_to_expl(thr_get_trx(thr), block, rec,
+					     index, offsets)) {
+		/* We already hold an implicit exclusive lock. */
+		return DB_SUCCESS;
 	}
 
-	lock_mutex_enter();
-
-	ut_ad(mode != LOCK_X
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
-	ut_ad(mode != LOCK_S
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
-
-	err = lock_rec_lock(FALSE, mode | gap_mode, block, heap_no, index, thr);
-
-	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
-
-	lock_mutex_exit();
+	err = lock_rec_lock(FALSE, ulint(mode) | gap_mode,
+			    block, heap_no, index, thr);
 
 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
 
@@ -6806,41 +6292,19 @@ lock_trx_release_locks(
 	trx_t*	trx)	/*!< in/out: transaction */
 {
 	check_trx_state(trx);
+	ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED)
+              || trx_state_eq(trx, TRX_STATE_ACTIVE));
 
-	if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
+	bool release_lock = UT_LIST_GET_LEN(trx->lock.trx_locks) > 0;
 
-		mutex_enter(&trx_sys->mutex);
-
-		ut_a(trx_sys->n_prepared_trx > 0);
-		--trx_sys->n_prepared_trx;
-
-		if (trx->is_recovered) {
-			ut_a(trx_sys->n_prepared_recovered_trx > 0);
-			trx_sys->n_prepared_recovered_trx--;
-		}
-
-		mutex_exit(&trx_sys->mutex);
-	} else {
-		ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)
-		      || (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
-			  && trx->is_recovered
-			  && !UT_LIST_GET_LEN(trx->lock.trx_locks)));
-	}
-
-	bool	release_lock;
-
-	release_lock = (UT_LIST_GET_LEN(trx->lock.trx_locks) > 0);
-
-	/* Don't take lock_sys mutex if trx didn't acquire any lock. */
+	/* Don't take lock_sys.mutex if trx didn't acquire any lock. */
 	if (release_lock) {
 
 		/* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY
-		is protected by both the lock_sys->mutex and the trx->mutex. */
+		is protected by both the lock_sys.mutex and the trx->mutex. */
 		lock_mutex_enter();
 	}
 
-	trx_mutex_enter(trx);
-
 	/* The following assignment makes the transaction committed in memory
 	and makes its changes to data visible to other transactions.
 	NOTE that there is a small discrepancy from the strict formal
@@ -6856,53 +6320,33 @@ lock_trx_release_locks(
 	committed. */
 
 	/*--------------------------------------*/
+	trx_mutex_enter(trx);
 	trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
+	/* Ensure that rw_trx_hash_t::find() will no longer find
+	this transaction. */
+	trx->id = 0;
+	trx_mutex_exit(trx);
 	/*--------------------------------------*/
 
-	if (trx_is_referenced(trx)) {
+	if (trx->is_referenced()) {
 
 		ut_a(release_lock);
 
 		lock_mutex_exit();
 
-		while (trx_is_referenced(trx)) {
-
-			trx_mutex_exit(trx);
+		while (trx->is_referenced()) {
 
 			DEBUG_SYNC_C("waiting_trx_is_not_referenced");
 
 			/** Doing an implicit to explicit conversion
 			should not be expensive. */
-			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
-
-			trx_mutex_enter(trx);
+			ut_delay(srv_spin_wait_delay);
 		}
 
-		trx_mutex_exit(trx);
-
 		lock_mutex_enter();
-
-		trx_mutex_enter(trx);
 	}
 
-	ut_ad(!trx_is_referenced(trx));
-
-	/* If the background thread trx_rollback_or_clean_recovered()
-	is still active then there is a chance that the rollback
-	thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
-	to clean it up calling trx_cleanup_at_db_startup(). This can
-	happen in the case we are committing a trx here that is left
-	in PREPARED state during the crash. Note that commit of the
-	rollback of a PREPARED trx happens in the recovery thread
-	while the rollback of other transactions happen in the
-	background thread. To avoid this race we unconditionally unset
-	the is_recovered flag. */
-
-	trx->is_recovered = false;
-	/* Ensure that trx_reference() will not find this transaction. */
-	trx->id = 0;
-
-	trx_mutex_exit(trx);
+	ut_ad(!trx->is_referenced());
 
 	if (release_lock) {
 
@@ -6981,52 +6425,38 @@ lock_table_get_n_locks(
 }
 
 #ifdef UNIV_DEBUG
-/*******************************************************************//**
-Do an exhaustive check for any locks (table or rec) against the table.
-@return lock if found */
-static
-const lock_t*
-lock_table_locks_lookup(
-/*====================*/
-	const dict_table_t*	table,		/*!< in: check if there are
-						any locks held on records in
-						this table or on the table
-						itself */
-	const trx_ut_list_t*	trx_list)	/*!< in: trx list to check */
-{
-	trx_t*			trx;
-
-	ut_a(table != NULL);
-	ut_ad(lock_mutex_own());
-	ut_ad(trx_sys_mutex_own());
-
-	for (trx = UT_LIST_GET_FIRST(*trx_list);
-	     trx != NULL;
-	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
-		const lock_t*	lock;
-
-		check_trx_state(trx);
-
-		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
-		     lock != NULL;
-		     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
-
-			ut_a(lock->trx == trx);
-
-			if (lock_get_type_low(lock) == LOCK_REC) {
-				ut_ad(!dict_index_is_online_ddl(lock->index)
-				      || dict_index_is_clust(lock->index));
-				if (lock->index->table == table) {
-					return(lock);
-				}
-			} else if (lock->un_member.tab_lock.table == table) {
-				return(lock);
-			}
-		}
-	}
-
-	return(NULL);
+/**
+  Do an exhaustive check for any locks (table or rec) against the table.
+
+  @param[in]  table  check if there are any locks held on records in this table
+                     or on the table itself
+*/
+
+static my_bool lock_table_locks_lookup(rw_trx_hash_element_t *element,
+                                       const dict_table_t *table)
+{
+  ut_ad(lock_mutex_own());
+  mutex_enter(&element->mutex);
+  if (element->trx)
+  {
+    check_trx_state(element->trx);
+    for (const lock_t *lock= UT_LIST_GET_FIRST(element->trx->lock.trx_locks);
+         lock != NULL;
+         lock= UT_LIST_GET_NEXT(trx_locks, lock))
+    {
+      ut_ad(lock->trx == element->trx);
+      if (lock_get_type_low(lock) == LOCK_REC)
+      {
+        ut_ad(!dict_index_is_online_ddl(lock->index) ||
+              dict_index_is_clust(lock->index));
+        ut_ad(lock->index->table != table);
+      }
+      else
+        ut_ad(lock->un_member.tab_lock.table != table);
+    }
+  }
+  mutex_exit(&element->mutex);
+  return 0;
 }
 #endif /* UNIV_DEBUG */
 
@@ -7042,17 +6472,17 @@ lock_table_has_locks(
 {
 	ibool			has_locks;
 
+	ut_ad(table != NULL);
 	lock_mutex_enter();
 
 	has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
 
 #ifdef UNIV_DEBUG
 	if (!has_locks) {
-		mutex_enter(&trx_sys->mutex);
-
-		ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
-
-		mutex_exit(&trx_sys->mutex);
+		trx_sys.rw_trx_hash.iterate(
+			reinterpret_cast<my_hash_walk_action>
+			(lock_table_locks_lookup),
+			const_cast<dict_table_t*>(table));
 	}
 #endif /* UNIV_DEBUG */
 
@@ -7087,7 +6517,7 @@ void
 lock_set_timeout_event()
 /*====================*/
 {
-	os_event_set(lock_sys->timeout_event);
+	os_event_set(lock_sys.timeout_event);
 }
 
 #ifdef UNIV_DEBUG
@@ -7154,12 +6584,14 @@ lock_trx_has_sys_table_locks(
 	return(strongest_lock);
 }
 
-/*******************************************************************//**
-Check if the transaction holds an exclusive lock on a record.
-@return whether the locks are held */
+/** Check if the transaction holds an explicit exclusive lock on a record.
+@param[in]	trx	transaction
+@param[in]	table	table
+@param[in]	block	leaf page
+@param[in]	heap_no	heap number identifying the record
+@return whether an explicit X-lock is held */
 bool
-lock_trx_has_rec_x_lock(
-/*====================*/
+lock_trx_has_expl_x_lock(
 	const trx_t*		trx,	/*!< in: transaction to check */
 	const dict_table_t*	table,	/*!< in: table to check */
 	const buf_block_t*	block,	/*!< in: buffer block of the record */
@@ -7168,11 +6600,9 @@ lock_trx_has_rec_x_lock(
 	ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
 
 	lock_mutex_enter();
-	ut_a(lock_table_has(trx, table, LOCK_IX)
-	     || dict_table_is_temporary(table));
-	ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
-			       block, heap_no, trx)
-	     || dict_table_is_temporary(table));
+	ut_ad(lock_table_has(trx, table, LOCK_IX));
+	ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no,
+				trx));
 	lock_mutex_exit();
 	return(true);
 }
@@ -7219,8 +6649,6 @@ DeadlockChecker::print(const trx_t* trx, ulint max_query_len)
 	ulint	n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
 	ulint	heap_size = mem_heap_get_size(trx->lock.lock_heap);
 
-	mutex_enter(&trx_sys->mutex);
-
 	trx_print_low(lock_latest_err_file, trx, max_query_len,
 		      n_rec_locks, n_trx_locks, heap_size);
 
@@ -7228,8 +6656,6 @@ DeadlockChecker::print(const trx_t* trx, ulint max_query_len)
 		trx_print_low(stderr, trx, max_query_len,
 			      n_rec_locks, n_trx_locks, heap_size);
 	}
-
-	mutex_exit(&trx_sys->mutex);
 }
 
 /** Print lock data to the deadlock file and possibly to stderr.
@@ -7313,8 +6739,8 @@ DeadlockChecker::get_first_lock(ulint* heap_no) const
 		hash_table_t*	lock_hash;
 
 		lock_hash = lock->type_mode & LOCK_PREDICATE
-			? lock_sys->prdt_hash
-			: lock_sys->rec_hash;
+			? lock_sys.prdt_hash
+			: lock_sys.rec_hash;
 
 		/* We are only interested in records that match the heap_no. */
 		*heap_no = lock_rec_find_set_bit(lock);
@@ -7675,12 +7101,14 @@ lock_update_split_and_merge(
 {
 	const rec_t* left_next_rec;
 
-	ut_a(left_block && right_block);
-	ut_a(orig_pred);
+	ut_ad(page_is_leaf(left_block->frame));
+	ut_ad(page_is_leaf(right_block->frame));
+	ut_ad(page_align(orig_pred) == left_block->frame);
 
 	lock_mutex_enter();
 
 	left_next_rec = page_rec_get_next_const(orig_pred);
+	ut_ad(!page_rec_is_metadata(left_next_rec));
 
 	/* Inherit the locks on the supremum of the left page to the
 	first record which was moved from the right page */
diff --git a/storage/innobase/lock/lock0prdt.cc b/storage/innobase/lock/lock0prdt.cc
index a179b0719cf..6ee97ee3ec7 100644
--- a/storage/innobase/lock/lock0prdt.cc
+++ b/storage/innobase/lock/lock0prdt.cc
@@ -525,7 +525,7 @@ lock_prdt_insert_check_and_lock(
 		return(DB_SUCCESS);
 	}
 
-	ut_ad(!dict_table_is_temporary(index->table));
+	ut_ad(!index->table->is_temporary());
 	ut_ad(!dict_index_is_clust(index));
 
 	trx_t*	trx = thr_get_trx(thr);
@@ -541,7 +541,7 @@ lock_prdt_insert_check_and_lock(
 	lock_t*		lock;
 
 	/* Only need to check locks on prdt_hash */
-	lock = lock_rec_get_first(lock_sys->prdt_hash, block, PRDT_HEAPNO);
+	lock = lock_rec_get_first(lock_sys.prdt_hash, block, PRDT_HEAPNO);
 
 	if (lock == NULL) {
 		lock_mutex_exit();
@@ -619,7 +619,6 @@ lock_prdt_update_parent(
         buf_block_t*    right_block,	/*!< in/out: the new half page */
         lock_prdt_t*	left_prdt,	/*!< in: MBR on the old page */
         lock_prdt_t*	right_prdt,	/*!< in: MBR on the new page */
-	lock_prdt_t*	parent_prdt,	/*!< in: original parent MBR */
 	ulint		space,		/*!< in: parent space id */
 	ulint		page_no)	/*!< in: parent page number */
 {
@@ -629,7 +628,7 @@ lock_prdt_update_parent(
 
 	/* Get all locks in parent */
 	for (lock = lock_rec_get_first_on_page_addr(
-			lock_sys->prdt_hash, space, page_no);
+			lock_sys.prdt_hash, space, page_no);
 	     lock;
 	     lock = lock_rec_get_next_on_page(lock)) {
 		lock_prdt_t*	lock_prdt;
@@ -673,7 +672,6 @@ static
 void
 lock_prdt_update_split_low(
 /*=======================*/
-	buf_block_t*	block,		/*!< in/out: page to be split */
 	buf_block_t*	new_block,	/*!< in/out: the new half page */
 	lock_prdt_t*	prdt,		/*!< in: MBR on the old page */
 	lock_prdt_t*	new_prdt,	/*!< in: MBR on the new page */
@@ -750,17 +748,16 @@ Update predicate lock when page splits */
 void
 lock_prdt_update_split(
 /*===================*/
-	buf_block_t*	block,		/*!< in/out: page to be split */
 	buf_block_t*	new_block,	/*!< in/out: the new half page */
 	lock_prdt_t*	prdt,		/*!< in: MBR on the old page */
 	lock_prdt_t*	new_prdt,	/*!< in: MBR on the new page */
 	ulint		space,		/*!< in: space id */
 	ulint		page_no)	/*!< in: page number */
 {
-	lock_prdt_update_split_low(block, new_block, prdt, new_prdt,
+	lock_prdt_update_split_low(new_block, prdt, new_prdt,
 				   space, page_no, LOCK_PREDICATE);
 
-	lock_prdt_update_split_low(block, new_block, NULL, NULL,
+	lock_prdt_update_split_low(new_block, NULL, NULL,
 				   space, page_no, LOCK_PRDT_PAGE);
 }
 
@@ -802,15 +799,14 @@ lock_prdt_lock(
 				SELECT FOR UPDATE */
 	ulint		type_mode,
 				/*!< in: LOCK_PREDICATE or LOCK_PRDT_PAGE */
-	que_thr_t*	thr,	/*!< in: query thread
+	que_thr_t*	thr)	/*!< in: query thread
 				(can be NULL if BTR_NO_LOCKING_FLAG) */
-	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	trx_t*		trx = thr_get_trx(thr);
 	dberr_t		err = DB_SUCCESS;
 	lock_rec_req_status	status = LOCK_REC_SUCCESS;
 
-	if (trx->read_only || dict_table_is_temporary(index->table)) {
+	if (trx->read_only || index->table->is_temporary()) {
 		return(DB_SUCCESS);
 	}
 
@@ -819,8 +815,8 @@ lock_prdt_lock(
 	ut_ad(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
 
 	hash_table_t*	hash = type_mode == LOCK_PREDICATE
-		? lock_sys->prdt_hash
-		: lock_sys->prdt_page_hash;
+		? lock_sys.prdt_hash
+		: lock_sys.prdt_page_hash;
 
 	/* Another transaction cannot have an implicit lock on the record,
 	because when we come here, we already have modified the clustered
@@ -829,7 +825,7 @@ lock_prdt_lock(
 
 	lock_mutex_enter();
 
-	const ulint	prdt_mode = mode | type_mode;
+	const ulint	prdt_mode = ulint(mode) | type_mode;
 	lock_t*		lock = lock_rec_get_first_on_page(hash, block);
 
 	if (lock == NULL) {
@@ -837,7 +833,7 @@ lock_prdt_lock(
 #ifdef WITH_WSREP
 			NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */
 #endif
-			mode | type_mode, block, PRDT_HEAPNO,
+			ulint(mode) | type_mode, block, PRDT_HEAPNO,
 			index, trx, FALSE);
 
 		status = LOCK_REC_SUCCESS_CREATED;
@@ -869,7 +865,7 @@ lock_prdt_lock(
 						NULL, /* FIXME: replicate
 						      SPATIAL INDEX locks */
 #endif
-						mode | type_mode,
+						ulint(mode) | type_mode,
 						block, PRDT_HEAPNO,
 						index, thr, prdt);
 				} else {
@@ -929,7 +925,7 @@ lock_place_prdt_page_lock(
 	lock_mutex_enter();
 
 	const lock_t*	lock = lock_rec_get_first_on_page_addr(
-		lock_sys->prdt_page_hash, space, page_no);
+		lock_sys.prdt_page_hash, space, page_no);
 
 	const ulint	mode = LOCK_S | LOCK_PRDT_PAGE;
 	trx_t*		trx = thr_get_trx(thr);
@@ -985,7 +981,7 @@ lock_test_prdt_page_lock(
 	lock_mutex_enter();
 
 	lock = lock_rec_get_first_on_page_addr(
-		lock_sys->prdt_page_hash, space, page_no);
+		lock_sys.prdt_page_hash, space, page_no);
 
 	lock_mutex_exit();
 
@@ -1005,13 +1001,13 @@ lock_prdt_rec_move(
 {
 	lock_t* lock;
 
-	if (!lock_sys->prdt_hash) {
+	if (!lock_sys.prdt_hash) {
 		return;
 	}
 
 	lock_mutex_enter();
 
-	for (lock = lock_rec_get_first(lock_sys->prdt_hash,
+	for (lock = lock_rec_get_first(lock_sys.prdt_hash,
 				       donator, PRDT_HEAPNO);
 	     lock != NULL;
 	     lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc
index 055c67bacc3..721f6151c36 100644
--- a/storage/innobase/lock/lock0wait.cc
+++ b/storage/innobase/lock/lock0wait.cc
@@ -46,9 +46,9 @@ lock_wait_table_print(void)
 {
 	ut_ad(lock_wait_mutex_own());
 
-	const srv_slot_t*	slot = lock_sys->waiting_threads;
+	const srv_slot_t*	slot = lock_sys.waiting_threads;
 
-	for (ulint i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
+	for (ulint i = 0; i < srv_max_n_threads; i++, ++slot) {
 
 		fprintf(stderr,
 			"Slot %lu: thread type %lu,"
@@ -72,7 +72,7 @@ lock_wait_table_release_slot(
 	srv_slot_t*	slot)		/*!< in: slot to release */
 {
 #ifdef UNIV_DEBUG
-	srv_slot_t*	upper = lock_sys->waiting_threads + OS_THREAD_MAX_N;
+	srv_slot_t*	upper = lock_sys.waiting_threads + srv_max_n_threads;
 #endif /* UNIV_DEBUG */
 
 	lock_wait_mutex_enter();
@@ -83,7 +83,7 @@ lock_wait_table_release_slot(
 	ut_ad(slot->thr->slot == slot);
 
 	/* Must be within the array boundaries. */
-	ut_ad(slot >= lock_sys->waiting_threads);
+	ut_ad(slot >= lock_sys.waiting_threads);
 	ut_ad(slot < upper);
 
 	/* Note: When we reserve the slot we use the trx_t::mutex to update
@@ -102,23 +102,23 @@ lock_wait_table_release_slot(
 	lock_mutex_exit();
 
 	/* Scan backwards and adjust the last free slot pointer. */
-	for (slot = lock_sys->last_slot;
-	     slot > lock_sys->waiting_threads && !slot->in_use;
+	for (slot = lock_sys.last_slot;
+	     slot > lock_sys.waiting_threads && !slot->in_use;
 	     --slot) {
 		/* No op */
 	}
 
 	/* Either the array is empty or the last scanned slot is in use. */
-	ut_ad(slot->in_use || slot == lock_sys->waiting_threads);
+	ut_ad(slot->in_use || slot == lock_sys.waiting_threads);
 
-	lock_sys->last_slot = slot + 1;
+	lock_sys.last_slot = slot + 1;
 
 	/* The last slot is either outside of the array boundary or it's
 	on an empty slot. */
-	ut_ad(lock_sys->last_slot == upper || !lock_sys->last_slot->in_use);
+	ut_ad(lock_sys.last_slot == upper || !lock_sys.last_slot->in_use);
 
-	ut_ad(lock_sys->last_slot >= lock_sys->waiting_threads);
-	ut_ad(lock_sys->last_slot <= upper);
+	ut_ad(lock_sys.last_slot >= lock_sys.waiting_threads);
+	ut_ad(lock_sys.last_slot <= upper);
 
 	lock_wait_mutex_exit();
 }
@@ -140,9 +140,9 @@ lock_wait_table_reserve_slot(
 	ut_ad(lock_wait_mutex_own());
 	ut_ad(trx_mutex_own(thr_get_trx(thr)));
 
-	slot = lock_sys->waiting_threads;
+	slot = lock_sys.waiting_threads;
 
-	for (i = OS_THREAD_MAX_N; i--; ++slot) {
+	for (i = srv_max_n_threads; i--; ++slot) {
 		if (!slot->in_use) {
 			slot->in_use = TRUE;
 			slot->thr = thr;
@@ -158,18 +158,18 @@ lock_wait_table_reserve_slot(
 			slot->suspend_time = ut_time();
 			slot->wait_timeout = wait_timeout;
 
-			if (slot == lock_sys->last_slot) {
-				++lock_sys->last_slot;
+			if (slot == lock_sys.last_slot) {
+				++lock_sys.last_slot;
 			}
 
-			ut_ad(lock_sys->last_slot
-			      <= lock_sys->waiting_threads + OS_THREAD_MAX_N);
+			ut_ad(lock_sys.last_slot
+			      <= lock_sys.waiting_threads + srv_max_n_threads);
 
 			return(slot);
 		}
 	}
 
-	ib::error() << "There appear to be " << OS_THREAD_MAX_N << " user"
+	ib::error() << "There appear to be " << srv_max_n_threads << " user"
 		" threads currently waiting inside InnoDB, which is the upper"
 		" limit. Cannot continue operation. Before aborting, we print"
 		" a list of waiting threads.";
@@ -184,7 +184,7 @@ lock_wait_table_reserve_slot(
 check if lock timeout was for priority thread,
 as a side effect trigger lock monitor
 @param[in]    trx    transaction owning the lock
-@param[in]    locked true if trx and lock_sys_mutex is ownd
+@param[in]    locked true if trx and lock_sys.mutex is ownd
 @return	false for regular lock timeout */
 static
 bool
@@ -203,7 +203,7 @@ wsrep_is_BF_lock_timeout(
 
 		ut_ad(lock_mutex_own());
 
-		wsrep_trx_print_locking(stderr, trx, 3000);
+		trx_print_latched(stderr, trx, 3000);
 
 		if (!locked) {
 			lock_mutex_exit();
@@ -290,7 +290,7 @@ lock_wait_suspend_thread(
 		if (ut_usectime(&sec, &ms) == -1) {
 			start_time = -1;
 		} else {
-			start_time = static_cast<int64_t>(sec) * 1000000 + ms;
+			start_time = int64_t(sec) * 1000000 + int64_t(ms);
 		}
 	}
 
@@ -378,31 +378,27 @@ lock_wait_suspend_thread(
 	lock_wait_table_release_slot(slot);
 
 	if (thr->lock_state == QUE_THR_LOCK_ROW) {
-		ulint	diff_time;
-
-		if (ut_usectime(&sec, &ms) == -1) {
+		int64_t	diff_time;
+		if (start_time == -1 || ut_usectime(&sec, &ms) == -1) {
 			finish_time = -1;
+			diff_time = 0;
 		} else {
-			finish_time = static_cast<int64_t>(sec) * 1000000 + ms;
+			finish_time = int64_t(sec) * 1000000 + int64_t(ms);
+			diff_time = std::max<int64_t>(
+				0, finish_time - start_time);
+			srv_stats.n_lock_wait_time.add(diff_time);
+
+			/* Only update the variable if we successfully
+			retrieved the start and finish times. See Bug#36819. */
+			if (ulint(diff_time) > lock_sys.n_lock_max_wait_time) {
+				lock_sys.n_lock_max_wait_time
+					= ulint(diff_time);
+			}
+			/* Record the lock wait time for this thread */
+			thd_storage_lock_wait(trx->mysql_thd, diff_time);
 		}
 
-		diff_time = (finish_time > start_time) ?
-			    (ulint) (finish_time - start_time) : 0;
-
 		srv_stats.n_lock_wait_current_count.dec();
-		srv_stats.n_lock_wait_time.add(diff_time);
-
-		/* Only update the variable if we successfully
-		retrieved the start and finish times. See Bug#36819. */
-		if (diff_time > lock_sys->n_lock_max_wait_time
-		    && start_time != -1
-		    && finish_time != -1) {
-
-			lock_sys->n_lock_max_wait_time = diff_time;
-		}
-
-		/* Record the lock wait time for this thread */
-		thd_set_lock_wait_time(trx->mysql_thd, diff_time);
 
 		DBUG_EXECUTE_IF("lock_instrument_slow_query_log",
 			os_thread_sleep(1000););
@@ -530,7 +526,7 @@ os_thread_ret_t
 DECLARE_THREAD(lock_wait_timeout_thread)(void*)
 {
 	int64_t		sig_count = 0;
-	os_event_t	event = lock_sys->timeout_event;
+	os_event_t	event = lock_sys.timeout_event;
 
 	ut_ad(!srv_read_only_mode);
 
@@ -556,8 +552,8 @@ DECLARE_THREAD(lock_wait_timeout_thread)(void*)
 		/* Check all slots for user threads that are waiting
 	       	on locks, and if they have exceeded the time limit. */
 
-		for (slot = lock_sys->waiting_threads;
-		     slot < lock_sys->last_slot;
+		for (slot = lock_sys.waiting_threads;
+		     slot < lock_sys.last_slot;
 		     ++slot) {
 
 			/* We are doing a read without the lock mutex
@@ -576,7 +572,7 @@ DECLARE_THREAD(lock_wait_timeout_thread)(void*)
 
 	} while (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP);
 
-	lock_sys->timeout_thread_active = false;
+	lock_sys.timeout_thread_active = false;
 
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
diff --git a/storage/innobase/log/log0crypt.cc b/storage/innobase/log/log0crypt.cc
index 478f021cbe4..dff9661c6eb 100644
--- a/storage/innobase/log/log0crypt.cc
+++ b/storage/innobase/log/log0crypt.cc
@@ -24,6 +24,7 @@ Created 11/25/2013 Minli Zhu Google
 Modified           Jan Lindström jan.lindstrom@mariadb.com
 MDEV-11782: Rewritten for MariaDB 10.2 by Marko Mäkelä, MariaDB Corporation.
 *******************************************************/
+#include <my_global.h>
 #include "m_string.h"
 #include "log0crypt.h"
 #include <mysql/service_my_crypt.h>
@@ -196,7 +197,7 @@ bool
 log_crypt_init()
 {
 	ut_ad(log_mutex_own());
-	ut_ad(log_sys->is_encrypted());
+	ut_ad(log_sys.is_encrypted());
 
 	info.key_version = encryption_key_get_latest_version(
 		LOG_DEFAULT_ENCRYPTION_KEY);
@@ -392,9 +393,9 @@ log_tmp_block_encrypt(
 	aes_ctr_iv[1] = offs;
 
 	int rc = encryption_crypt(
-		src, size, dst, &dst_len,
-		const_cast<byte*>(info.crypt_key.bytes), sizeof info.crypt_key,
-		reinterpret_cast<byte*>(aes_ctr_iv), sizeof aes_ctr_iv,
+		src, (uint)size, dst, &dst_len,
+		const_cast<byte*>(info.crypt_key.bytes), (uint)(sizeof info.crypt_key),
+		reinterpret_cast<byte*>(aes_ctr_iv), (uint)(sizeof aes_ctr_iv),
 		encrypt
 		? ENCRYPTION_FLAG_ENCRYPT|ENCRYPTION_FLAG_NOPAD
 		: ENCRYPTION_FLAG_DECRYPT|ENCRYPTION_FLAG_NOPAD,
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index 4a789382004..3119a110f74 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -69,7 +69,7 @@ c-function and its parameters are written to the log to
 reduce the size of the log.
 
   3a) You should not add parameters to these kind of functions
-  (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
+  (e.g. trx_undo_header_create())
 
   3b) You should not add such functionality which either change
   working when compared with the old or are dependent on data
@@ -81,7 +81,7 @@ reduce the size of the log.
 */
 
 /** Redo log system */
-log_t*	log_sys	= NULL;
+log_t	log_sys;
 
 /** Whether to generate and require checksums on the redo log pages */
 my_bool	innodb_log_checksums;
@@ -106,7 +106,8 @@ static time_t	log_last_margine_warning_time;
 
 /* Margins for free space in the log buffer after a log entry is catenated */
 #define LOG_BUF_FLUSH_RATIO	2
-#define LOG_BUF_FLUSH_MARGIN	(LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
+#define LOG_BUF_FLUSH_MARGIN	(LOG_BUF_WRITE_MARGIN		\
+				 + (4U << srv_page_size_shift))
 
 /* This parameter controls asynchronous making of a new checkpoint; the value
 should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
@@ -133,15 +134,8 @@ extern "C" UNIV_INTERN
 os_thread_ret_t
 DECLARE_THREAD(log_scrub_thread)(void*);
 
-/******************************************************//**
-Completes a checkpoint write i/o to a log file. */
-static
-void
-log_io_complete_checkpoint(void);
-/*============================*/
-
 /****************************************************************//**
-Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
+Returns the oldest modified block lsn in the pool, or log_sys.lsn if none
 exists.
 @return LSN of oldest modification */
 static
@@ -157,7 +151,7 @@ log_buf_pool_get_oldest_modification(void)
 
 	if (!lsn) {
 
-		lsn = log_sys->lsn;
+		lsn = log_sys.lsn;
 	}
 
 	return(lsn);
@@ -165,17 +159,13 @@ log_buf_pool_get_oldest_modification(void)
 
 /** Extends the log buffer.
 @param[in]	len	requested minimum size in bytes */
-void
-log_buffer_extend(
-	ulint	len)
+void log_buffer_extend(ulong len)
 {
-	ulint	move_start;
-	ulint	move_end;
 	byte	tmp_buf[OS_FILE_LOG_BLOCK_SIZE];
 
 	log_mutex_enter_all();
 
-	while (log_sys->is_extending) {
+	while (log_sys.is_extending) {
 		/* Another thread is trying to extend already.
 		Needs to wait for. */
 		log_mutex_exit_all();
@@ -184,28 +174,28 @@ log_buffer_extend(
 
 		log_mutex_enter_all();
 
-		if (srv_log_buffer_size > len / UNIV_PAGE_SIZE) {
+		if (srv_log_buffer_size > len) {
 			/* Already extended enough by the others */
 			log_mutex_exit_all();
 			return;
 		}
 	}
 
-	if (len >= log_sys->buf_size / 2) {
+	if (len >= srv_log_buffer_size / 2) {
 		DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash",
 				DBUG_SUICIDE(););
 
 		/* log_buffer is too small. try to extend instead of crash. */
-		ib::warn() << "The transaction log size is too large"
-			" for innodb_log_buffer_size (" << len << " >= "
-			<< LOG_BUFFER_SIZE << " / 2). Trying to extend it.";
+		ib::warn() << "The redo log transaction size " << len <<
+			" exceeds innodb_log_buffer_size="
+			<< srv_log_buffer_size << " / 2). Trying to extend it.";
 	}
 
-	log_sys->is_extending = true;
+	log_sys.is_extending = true;
 
-	while (ut_calc_align_down(log_sys->buf_free,
+	while (ut_calc_align_down(log_sys.buf_free,
 				  OS_FILE_LOG_BLOCK_SIZE)
-	       != ut_calc_align_down(log_sys->buf_next_to_write,
+	       != ut_calc_align_down(log_sys.buf_next_to_write,
 				     OS_FILE_LOG_BLOCK_SIZE)) {
 		/* Buffer might have >1 blocks to write still. */
 		log_mutex_exit_all();
@@ -215,46 +205,46 @@ log_buffer_extend(
 		log_mutex_enter_all();
 	}
 
-	move_start = ut_calc_align_down(
-		log_sys->buf_free,
+	ulong move_start = ut_calc_align_down(
+		log_sys.buf_free,
 		OS_FILE_LOG_BLOCK_SIZE);
-	move_end = log_sys->buf_free;
+	ulong move_end = log_sys.buf_free;
 
 	/* store the last log block in buffer */
-	ut_memcpy(tmp_buf, log_sys->buf + move_start,
+	ut_memcpy(tmp_buf, log_sys.buf + move_start,
 		  move_end - move_start);
 
-	log_sys->buf_free -= move_start;
-	log_sys->buf_next_to_write -= move_start;
+	log_sys.buf_free -= move_start;
+	log_sys.buf_next_to_write -= move_start;
 
-	/* reallocate log buffer */
-	srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1;
-	ut_free(log_sys->buf_ptr);
+	/* free previous after getting the right address */
+	if (!log_sys.first_in_use) {
+		log_sys.buf -= srv_log_buffer_size;
+	}
+	ut_free_dodump(log_sys.buf, srv_log_buffer_size * 2);
 
-	log_sys->buf_size = LOG_BUFFER_SIZE;
+	/* reallocate log buffer */
+	srv_log_buffer_size = len;
 
-	log_sys->buf_ptr = static_cast<byte*>(
-		ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
-	TRASH_ALLOC(log_sys->buf_ptr,
-		    log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE);
-	log_sys->buf = static_cast<byte*>(
-		ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
+	log_sys.buf = static_cast<byte*>(
+		ut_malloc_dontdump(srv_log_buffer_size * 2));
+	TRASH_ALLOC(log_sys.buf, srv_log_buffer_size * 2);
 
-	log_sys->first_in_use = true;
+	log_sys.first_in_use = true;
 
-	log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
+	log_sys.max_buf_free = srv_log_buffer_size / LOG_BUF_FLUSH_RATIO
 		- LOG_BUF_FLUSH_MARGIN;
 
 	/* restore the last log block */
-	ut_memcpy(log_sys->buf, tmp_buf, move_end - move_start);
+	ut_memcpy(log_sys.buf, tmp_buf, move_end - move_start);
 
-	ut_ad(log_sys->is_extending);
-	log_sys->is_extending = false;
+	ut_ad(log_sys.is_extending);
+	log_sys.is_extending = false;
 
 	log_mutex_exit_all();
 
 	ib::info() << "innodb_log_buffer_size was extended to "
-		<< LOG_BUFFER_SIZE << ".";
+		<< srv_log_buffer_size << ".";
 }
 
 /** Calculate actual length in redo buffer and file including
@@ -273,7 +263,7 @@ log_calculate_actual_len(
 		- (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
 
 	/* actual data length in last block already written */
-	ulint	extra_len = (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE);
+	ulint	extra_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE);
 
 	ut_ad(extra_len >= LOG_BLOCK_HDR_SIZE);
 	extra_len -= LOG_BLOCK_HDR_SIZE;
@@ -298,7 +288,7 @@ log_margin_checkpoint_age(
 
 	ut_ad(log_mutex_own());
 
-	if (margin > log_sys->log_group_capacity) {
+	if (margin > log_sys.log_group_capacity) {
 		/* return with warning output to avoid deadlock */
 		if (!log_has_printed_chkp_margine_warning
 		    || difftime(time(NULL),
@@ -310,7 +300,7 @@ log_margin_checkpoint_age(
 				" small for the single transaction log (size="
 				<< len << "). So, the last checkpoint age"
 				" might exceed the log group capacity "
-				<< log_sys->log_group_capacity << ".";
+				<< log_sys.log_group_capacity << ".";
 		}
 
 		return;
@@ -319,20 +309,20 @@ log_margin_checkpoint_age(
 	/* Our margin check should ensure that we never reach this condition.
 	Try to do checkpoint once. We cannot keep waiting here as it might
 	result in hang in case the current mtr has latch on oldest lsn */
-	if (log_sys->lsn - log_sys->last_checkpoint_lsn + margin
-	    > log_sys->log_group_capacity) {
+	if (log_sys.lsn - log_sys.last_checkpoint_lsn + margin
+	    > log_sys.log_group_capacity) {
 		/* The log write of 'len' might overwrite the transaction log
 		after the last checkpoint. Makes checkpoint. */
 
 		bool	flushed_enough = false;
 
-		if (log_sys->lsn - log_buf_pool_get_oldest_modification()
+		if (log_sys.lsn - log_buf_pool_get_oldest_modification()
 		    + margin
-		    <= log_sys->log_group_capacity) {
+		    <= log_sys.log_group_capacity) {
 			flushed_enough = true;
 		}
 
-		log_sys->check_flush_or_checkpoint = true;
+		log_sys.check_flush_or_checkpoint = true;
 		log_mutex_exit();
 
 		DEBUG_SYNC_C("margin_checkpoint_age_rescue");
@@ -363,7 +353,7 @@ log_reserve_and_open(
 loop:
 	ut_ad(log_mutex_own());
 
-	if (log_sys->is_extending) {
+	if (log_sys.is_extending) {
 		log_mutex_exit();
 
 		/* Log buffer size is extending. Writing up to the next block
@@ -383,7 +373,7 @@ loop:
 	len_upper_limit = LOG_BUF_WRITE_MARGIN + srv_log_write_ahead_size
 			  + (5 * len) / 4;
 
-	if (log_sys->buf_free + len_upper_limit > log_sys->buf_size) {
+	if (log_sys.buf_free + len_upper_limit > srv_log_buffer_size) {
 		log_mutex_exit();
 
 		DEBUG_SYNC_C("log_buf_size_exceeded");
@@ -399,7 +389,7 @@ loop:
 		goto loop;
 	}
 
-	return(log_sys->lsn);
+	return(log_sys.lsn);
 }
 
 /************************************************************//**
@@ -411,7 +401,6 @@ log_write_low(
 	const byte*	str,		/*!< in: string */
 	ulint		str_len)	/*!< in: string length */
 {
-	log_t*	log	= log_sys;
 	ulint	len;
 	ulint	data_len;
 	byte*	log_block;
@@ -420,7 +409,7 @@ log_write_low(
 part_loop:
 	/* Calculate a part length */
 
-	data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
+	data_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
 
 	if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 
@@ -431,18 +420,18 @@ part_loop:
 		data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
 
 		len = OS_FILE_LOG_BLOCK_SIZE
-			- (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
+			- (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE)
 			- LOG_BLOCK_TRL_SIZE;
 	}
 
-	ut_memcpy(log->buf + log->buf_free, str, len);
+	memcpy(log_sys.buf + log_sys.buf_free, str, len);
 
 	str_len -= len;
 	str = str + len;
 
 	log_block = static_cast<byte*>(
-		ut_align_down(
-			log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
+		ut_align_down(log_sys.buf + log_sys.buf_free,
+			      OS_FILE_LOG_BLOCK_SIZE));
 
 	log_block_set_data_len(log_block, data_len);
 
@@ -450,20 +439,21 @@ part_loop:
 		/* This block became full */
 		log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
 		log_block_set_checkpoint_no(log_block,
-					    log_sys->next_checkpoint_no);
+					    log_sys.next_checkpoint_no);
 		len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
 
-		log->lsn += len;
+		log_sys.lsn += len;
 
 		/* Initialize the next block header */
-		log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
+		log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE,
+			       log_sys.lsn);
 	} else {
-		log->lsn += len;
+		log_sys.lsn += len;
 	}
 
-	log->buf_free += len;
+	log_sys.buf_free += ulong(len);
 
-	ut_ad(log->buf_free <= log->buf_size);
+	ut_ad(log_sys.buf_free <= srv_log_buffer_size);
 
 	if (str_len > 0) {
 		goto part_loop;
@@ -483,16 +473,15 @@ log_close(void)
 	ulint		first_rec_group;
 	lsn_t		oldest_lsn;
 	lsn_t		lsn;
-	log_t*		log	= log_sys;
 	lsn_t		checkpoint_age;
 
 	ut_ad(log_mutex_own());
 
-	lsn = log->lsn;
+	lsn = log_sys.lsn;
 
 	log_block = static_cast<byte*>(
-		ut_align_down(
-			log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
+		ut_align_down(log_sys.buf + log_sys.buf_free,
+			      OS_FILE_LOG_BLOCK_SIZE));
 
 	first_rec_group = log_block_get_first_rec_group(log_block);
 
@@ -505,14 +494,13 @@ log_close(void)
 			log_block, log_block_get_data_len(log_block));
 	}
 
-	if (log->buf_free > log->max_buf_free) {
-
-		log->check_flush_or_checkpoint = true;
+	if (log_sys.buf_free > log_sys.max_buf_free) {
+		log_sys.check_flush_or_checkpoint = true;
 	}
 
-	checkpoint_age = lsn - log->last_checkpoint_lsn;
+	checkpoint_age = lsn - log_sys.last_checkpoint_lsn;
 
-	if (checkpoint_age >= log->log_group_capacity) {
+	if (checkpoint_age >= log_sys.log_group_capacity) {
 		DBUG_EXECUTE_IF(
 			"print_all_chkp_warnings",
 			log_has_printed_chkp_warning = false;);
@@ -525,131 +513,28 @@ log_close(void)
 
 			ib::error() << "The age of the last checkpoint is "
 				<< checkpoint_age << ", which exceeds the log"
-				" group capacity " << log->log_group_capacity
+				" group capacity "
+				<< log_sys.log_group_capacity
 				<< ".";
 		}
 	}
 
-	if (checkpoint_age <= log->max_modified_age_sync) {
-
+	if (checkpoint_age <= log_sys.max_modified_age_sync) {
 		goto function_exit;
 	}
 
 	oldest_lsn = buf_pool_get_oldest_modification();
 
 	if (!oldest_lsn
-	    || lsn - oldest_lsn > log->max_modified_age_sync
-	    || checkpoint_age > log->max_checkpoint_age_async) {
-
-		log->check_flush_or_checkpoint = true;
+	    || lsn - oldest_lsn > log_sys.max_modified_age_sync
+	    || checkpoint_age > log_sys.max_checkpoint_age_async) {
+		log_sys.check_flush_or_checkpoint = true;
 	}
 function_exit:
 
 	return(lsn);
 }
 
-/******************************************************//**
-Calculates the offset within a log group, when the log file headers are not
-included.
-@return size offset (<= offset) */
-UNIV_INLINE
-lsn_t
-log_group_calc_size_offset(
-/*=======================*/
-	lsn_t			offset,	/*!< in: real offset within the
-					log group */
-	const log_group_t*	group)	/*!< in: log group */
-{
-	/* The lsn parameters are updated while holding both the mutexes
-	and it is ok to have either of them while reading */
-	ut_ad(log_mutex_own() || log_write_mutex_own());
-
-	return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
-}
-
-/******************************************************//**
-Calculates the offset within a log group, when the log file headers are
-included.
-@return real offset (>= offset) */
-UNIV_INLINE
-lsn_t
-log_group_calc_real_offset(
-/*=======================*/
-	lsn_t			offset,	/*!< in: size offset within the
-					log group */
-	const log_group_t*	group)	/*!< in: log group */
-{
-	/* The lsn parameters are updated while holding both the mutexes
-	and it is ok to have either of them while reading */
-	ut_ad(log_mutex_own() || log_write_mutex_own());
-
-	return(offset + LOG_FILE_HDR_SIZE
-	       * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
-}
-
-/** Calculate the offset of an lsn within a log group.
-@param[in]	lsn	log sequence number
-@param[in]	group	log group
-@return offset within the log group */
-lsn_t
-log_group_calc_lsn_offset(
-	lsn_t			lsn,
-	const log_group_t*	group)
-{
-	lsn_t	gr_lsn;
-	lsn_t	gr_lsn_size_offset;
-	lsn_t	difference;
-	lsn_t	group_size;
-	lsn_t	offset;
-
-	/* The lsn parameters are updated while holding both the mutexes
-	and it is ok to have either of them while reading */
-	ut_ad(log_mutex_own() || log_write_mutex_own());
-
-	gr_lsn = group->lsn;
-
-	gr_lsn_size_offset = log_group_calc_size_offset(
-		group->lsn_offset, group);
-
-	group_size = group->capacity();
-
-	if (lsn >= gr_lsn) {
-
-		difference = lsn - gr_lsn;
-	} else {
-		difference = gr_lsn - lsn;
-
-		difference = difference % group_size;
-
-		difference = group_size - difference;
-	}
-
-	offset = (gr_lsn_size_offset + difference) % group_size;
-
-	/* fprintf(stderr,
-	"Offset is " LSN_PF " gr_lsn_offset is " LSN_PF
-	" difference is " LSN_PF "\n",
-	offset, gr_lsn_size_offset, difference);
-	*/
-
-	return(log_group_calc_real_offset(offset, group));
-}
-
-/********************************************************//**
-Sets the field values in group to correspond to a given lsn. For this function
-to work, the values must already be correctly initialized to correspond to
-some lsn, for instance, a checkpoint lsn. */
-void
-log_group_set_fields(
-/*=================*/
-	log_group_t*	group,	/*!< in/out: group */
-	lsn_t		lsn)	/*!< in: lsn for which the values should be
-				set */
-{
-	group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
-	group->lsn = lsn;
-}
-
 /** Calculate the recommended highest values for lsn - last_checkpoint_lsn
 and lsn - buf_get_oldest_modification().
 @param[in]	file_size	requested innodb_log_file_size
@@ -689,186 +574,123 @@ log_set_capacity(ulonglong file_size)
 
 	log_mutex_enter();
 
-	log_sys->log_group_capacity = smallest_capacity;
+	log_sys.log_group_capacity = smallest_capacity;
 
-	log_sys->max_modified_age_async = margin
+	log_sys.max_modified_age_async = margin
 		- margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
-	log_sys->max_modified_age_sync = margin
+	log_sys.max_modified_age_sync = margin
 		- margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
 
-	log_sys->max_checkpoint_age_async = margin - margin
+	log_sys.max_checkpoint_age_async = margin - margin
 		/ LOG_POOL_CHECKPOINT_RATIO_ASYNC;
-	log_sys->max_checkpoint_age = margin;
+	log_sys.max_checkpoint_age = margin;
 
 	log_mutex_exit();
 
 	return(true);
 }
 
-/** Initializes the redo logging subsystem. */
-void
-log_sys_init()
+/** Initialize the redo log subsystem. */
+void log_t::create()
 {
-	log_sys = static_cast<log_t*>(ut_zalloc_nokey(sizeof(log_t)));
-
-	mutex_create(LATCH_ID_LOG_SYS, &log_sys->mutex);
-	mutex_create(LATCH_ID_LOG_WRITE, &log_sys->write_mutex);
-
-	mutex_create(LATCH_ID_LOG_FLUSH_ORDER, &log_sys->log_flush_order_mutex);
-
-	/* Start the lsn from one log block from zero: this way every
-	log record has a start lsn != zero, a fact which we will use */
-
-	log_sys->lsn = LOG_START_LSN;
-
-	ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
-	ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
-
-	log_sys->buf_size = LOG_BUFFER_SIZE;
-
-	log_sys->buf_ptr = static_cast<byte*>(
-		ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
-	TRASH_ALLOC(log_sys->buf_ptr,
-		    log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE);
-	log_sys->buf = static_cast<byte*>(
-		ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
-
-	log_sys->first_in_use = true;
-
-	log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
-		- LOG_BUF_FLUSH_MARGIN;
-	log_sys->check_flush_or_checkpoint = true;
-
-	log_sys->n_log_ios_old = log_sys->n_log_ios;
-	log_sys->last_printout_time = time(NULL);
-	/*----------------------------*/
-
-	log_sys->write_lsn = log_sys->lsn;
-
-	log_sys->flush_event = os_event_create(0);
-
-	os_event_set(log_sys->flush_event);
-
-	/*----------------------------*/
-
-	log_sys->last_checkpoint_lsn = log_sys->lsn;
-
-	rw_lock_create(
-		checkpoint_lock_key, &log_sys->checkpoint_lock,
-		SYNC_NO_ORDER_CHECK);
-
-	log_sys->checkpoint_buf_ptr = static_cast<byte*>(
-		ut_zalloc_nokey(2 * OS_FILE_LOG_BLOCK_SIZE));
-
-	log_sys->checkpoint_buf = static_cast<byte*>(
-		ut_align(log_sys->checkpoint_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
-
-	/*----------------------------*/
-
-	log_block_init(log_sys->buf, log_sys->lsn);
-	log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
-
-	log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
-	log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE; // TODO(minliz): ensure various LOG_START_LSN?
-
-	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
-		    log_sys->lsn - log_sys->last_checkpoint_lsn);
-
-	log_scrub_thread_active = !srv_read_only_mode && srv_scrub_log;
-	if (log_scrub_thread_active) {
-		log_scrub_event = os_event_create("log_scrub_event");
-		os_thread_create(log_scrub_thread, NULL, NULL);
-	}
+  ut_ad(this == &log_sys);
+  ut_ad(!is_initialised());
+  m_initialised= true;
+
+  mutex_create(LATCH_ID_LOG_SYS, &mutex);
+  mutex_create(LATCH_ID_LOG_WRITE, &write_mutex);
+  mutex_create(LATCH_ID_LOG_FLUSH_ORDER, &log_flush_order_mutex);
+
+  /* Start the lsn from one log block from zero: this way every
+  log record has a non-zero start lsn, a fact which we will use */
+
+  lsn= LOG_START_LSN;
+
+  ut_ad(srv_log_buffer_size >= 16 * OS_FILE_LOG_BLOCK_SIZE);
+  ut_ad(srv_log_buffer_size >= 4U << srv_page_size_shift);
+
+  buf= static_cast<byte*>(ut_malloc_dontdump(srv_log_buffer_size * 2));
+  TRASH_ALLOC(buf, srv_log_buffer_size * 2);
+
+  first_in_use= true;
+
+  max_buf_free= srv_log_buffer_size / LOG_BUF_FLUSH_RATIO -
+    LOG_BUF_FLUSH_MARGIN;
+  check_flush_or_checkpoint= true;
+
+  n_log_ios_old= n_log_ios;
+  last_printout_time= time(NULL);
+
+  buf_next_to_write= 0;
+  is_extending= false;
+  write_lsn= lsn;
+  flushed_to_disk_lsn= 0;
+  n_pending_flushes= 0;
+  flush_event = os_event_create("log_flush_event");
+  os_event_set(flush_event);
+  n_log_ios= 0;
+  n_log_ios_old= 0;
+  log_group_capacity= 0;
+  max_modified_age_async= 0;
+  max_modified_age_sync= 0;
+  max_checkpoint_age_async= 0;
+  max_checkpoint_age= 0;
+  next_checkpoint_no= 0;
+  next_checkpoint_lsn= 0;
+  append_on_checkpoint= NULL;
+  n_pending_checkpoint_writes= 0;
+
+  last_checkpoint_lsn= lsn;
+  rw_lock_create(checkpoint_lock_key, &checkpoint_lock, SYNC_NO_ORDER_CHECK);
+
+  log_block_init(buf, lsn);
+  log_block_set_first_rec_group(buf, LOG_BLOCK_HDR_SIZE);
+
+  buf_free= LOG_BLOCK_HDR_SIZE;
+  lsn= LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
+
+  MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, lsn - last_checkpoint_lsn);
+
+  log_scrub_thread_active= !srv_read_only_mode && srv_scrub_log;
+  if (log_scrub_thread_active) {
+    log_scrub_event= os_event_create("log_scrub_event");
+    os_thread_create(log_scrub_thread, NULL, NULL);
+  }
 }
 
 /** Initialize the redo log.
 @param[in]	n_files		number of files */
-void
-log_init(ulint n_files)
+void log_t::files::create(ulint n_files)
 {
-	ulint	i;
-	log_group_t*	group = &log_sys->log;
-
-	group->n_files = n_files;
-	group->subformat = srv_safe_truncate;
-	if (srv_safe_truncate) {
-		group->format = srv_encrypt_log
-			? LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED
-			: LOG_HEADER_FORMAT_10_3;
-	} else {
-		group->format = srv_encrypt_log
-			? LOG_HEADER_FORMAT_10_2 | LOG_HEADER_FORMAT_ENCRYPTED
-			: LOG_HEADER_FORMAT_10_2;
-	}
-	group->file_size = srv_log_file_size;
-	group->state = LOG_GROUP_OK;
-	group->lsn = LOG_START_LSN;
-	group->lsn_offset = LOG_FILE_HDR_SIZE;
-
-	group->file_header_bufs_ptr = static_cast<byte**>(
-		ut_zalloc_nokey(sizeof(byte*) * n_files));
-
-	group->file_header_bufs = static_cast<byte**>(
-		ut_zalloc_nokey(sizeof(byte**) * n_files));
-
-	for (i = 0; i < n_files; i++) {
-		group->file_header_bufs_ptr[i] = static_cast<byte*>(
-			ut_zalloc_nokey(LOG_FILE_HDR_SIZE
-					+ OS_FILE_LOG_BLOCK_SIZE));
-
-		group->file_header_bufs[i] = static_cast<byte*>(
-			ut_align(group->file_header_bufs_ptr[i],
-				 OS_FILE_LOG_BLOCK_SIZE));
-	}
-
-	group->checkpoint_buf_ptr = static_cast<byte*>(
-		ut_zalloc_nokey(2 * OS_FILE_LOG_BLOCK_SIZE));
-
-	group->checkpoint_buf = static_cast<byte*>(
-		ut_align(group->checkpoint_buf_ptr,OS_FILE_LOG_BLOCK_SIZE));
-}
-
-/******************************************************//**
-Completes an i/o to a log file. */
-void
-log_io_complete(
-/*============*/
-	log_group_t*	group)	/*!< in: log group or a dummy pointer */
-{
-	if ((ulint) group & 0x1UL) {
-		/* It was a checkpoint write */
-		group = (log_group_t*)((ulint) group - 1);
-
-		switch (srv_file_flush_method) {
-		case SRV_O_DSYNC:
-		case SRV_NOSYNC:
-			break;
-		case SRV_FSYNC:
-		case SRV_LITTLESYNC:
-		case SRV_O_DIRECT:
-		case SRV_O_DIRECT_NO_FSYNC:
-		case SRV_ALL_O_DIRECT_FSYNC:
-			fil_flush(SRV_LOG_SPACE_FIRST_ID);
-		}
-
-
-		DBUG_PRINT("ib_log", ("checkpoint info written"));
-		log_io_complete_checkpoint();
-
-		return;
-	}
-
-	ut_error;	/*!< We currently use synchronous writing of the
-			logs and cannot end up here! */
+  ut_ad(n_files <= SRV_N_LOG_FILES_MAX);
+  ut_ad(this == &log_sys.log);
+  ut_ad(log_sys.is_initialised());
+
+  this->n_files= n_files;
+  format= srv_encrypt_log
+    ? LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED
+    : LOG_HEADER_FORMAT_CURRENT;
+  subformat= 2;
+  file_size= srv_log_file_size;
+  lsn= LOG_START_LSN;
+  lsn_offset= LOG_FILE_HDR_SIZE;
+
+  byte* ptr= static_cast<byte*>(ut_zalloc_nokey(LOG_FILE_HDR_SIZE * n_files
+						+ OS_FILE_LOG_BLOCK_SIZE));
+  file_header_bufs_ptr= ptr;
+  ptr= static_cast<byte*>(ut_align(ptr, OS_FILE_LOG_BLOCK_SIZE));
+
+  memset(file_header_bufs, 0, sizeof file_header_bufs);
+
+  for (ulint i = 0; i < n_files; i++, ptr += LOG_FILE_HDR_SIZE)
+    file_header_bufs[i] = ptr;
 }
 
 /******************************************************//**
 Writes a log file header to a log file space. */
 static
 void
-log_group_file_header_flush(
-/*========================*/
-	log_group_t*	group,		/*!< in: log group */
+log_file_header_flush(
 	ulint		nth_file,	/*!< in: header to the nth file in the
 					log file space */
 	lsn_t		start_lsn)	/*!< in: log file data starts at this
@@ -879,17 +701,15 @@ log_group_file_header_flush(
 
 	ut_ad(log_write_mutex_own());
 	ut_ad(!recv_no_log_write);
-	ut_a(nth_file < group->n_files);
-	ut_ad((group->format & ~LOG_HEADER_FORMAT_ENCRYPTED)
-	      == (srv_safe_truncate
-		  ? LOG_HEADER_FORMAT_10_3
-		  : LOG_HEADER_FORMAT_10_2));
+	ut_a(nth_file < log_sys.log.n_files);
+	ut_ad((log_sys.log.format & ~LOG_HEADER_FORMAT_ENCRYPTED)
+	      == LOG_HEADER_FORMAT_CURRENT);
 
-	buf = *(group->file_header_bufs + nth_file);
+	buf = log_sys.log.file_header_bufs[nth_file];
 
 	memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
-	mach_write_to_4(buf + LOG_HEADER_FORMAT, group->format);
-	mach_write_to_4(buf + LOG_HEADER_SUBFORMAT, srv_safe_truncate);
+	mach_write_to_4(buf + LOG_HEADER_FORMAT, log_sys.log.format);
+	mach_write_to_4(buf + LOG_HEADER_SUBFORMAT, log_sys.log.subformat);
 	mach_write_to_8(buf + LOG_HEADER_START_LSN, start_lsn);
 	strcpy(reinterpret_cast<char*>(buf) + LOG_HEADER_CREATOR,
 	       LOG_HEADER_CREATOR_CURRENT);
@@ -897,26 +717,25 @@ log_group_file_header_flush(
 	      >= sizeof LOG_HEADER_CREATOR_CURRENT);
 	log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
 
-	dest_offset = nth_file * group->file_size;
+	dest_offset = nth_file * log_sys.log.file_size;
 
 	DBUG_PRINT("ib_log", ("write " LSN_PF
 			      " file " ULINTPF " header",
 			      start_lsn, nth_file));
 
-	log_sys->n_log_ios++;
+	log_sys.n_log_ios++;
 
 	MONITOR_INC(MONITOR_LOG_IO);
 
 	srv_stats.os_log_pending_writes.inc();
 
-	const ulint	page_no
-		= (ulint) (dest_offset / univ_page_size.physical());
+	const ulint	page_no = ulint(dest_offset >> srv_page_size_shift);
 
 	fil_io(IORequestLogWrite, true,
 	       page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
 	       univ_page_size,
-	       (ulint) (dest_offset % univ_page_size.physical()),
-	       OS_FILE_LOG_BLOCK_SIZE, buf, group);
+	       ulint(dest_offset & (srv_page_size - 1)),
+	       OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
 
 	srv_stats.os_log_pending_writes.dec();
 }
@@ -935,12 +754,10 @@ log_block_store_checksum(
 }
 
 /******************************************************//**
-Writes a buffer to a log file group. */
+Writes a buffer to a log file. */
 static
 void
-log_group_write_buf(
-/*================*/
-	log_group_t*	group,		/*!< in: log group */
+log_write_buf(
 	byte*		buf,		/*!< in: buffer */
 	ulint		len,		/*!< in: buffer len; must be divisible
 					by OS_FILE_LOG_BLOCK_SIZE */
@@ -971,28 +788,27 @@ loop:
 		return;
 	}
 
-	next_offset = log_group_calc_lsn_offset(start_lsn, group);
+	next_offset = log_sys.log.calc_lsn_offset(start_lsn);
 
 	if (write_header
-	    && next_offset % group->file_size == LOG_FILE_HDR_SIZE) {
+	    && next_offset % log_sys.log.file_size == LOG_FILE_HDR_SIZE) {
 		/* We start to write a new log file instance in the group */
 
-		ut_a(next_offset / group->file_size <= ULINT_MAX);
+		ut_a(next_offset / log_sys.log.file_size <= ULINT_MAX);
 
-		log_group_file_header_flush(group, (ulint)
-					    (next_offset / group->file_size),
-					    start_lsn);
+		log_file_header_flush(
+			ulint(next_offset / log_sys.log.file_size), start_lsn);
 		srv_stats.os_log_written.add(OS_FILE_LOG_BLOCK_SIZE);
 
 		srv_stats.log_writes.inc();
 	}
 
-	if ((next_offset % group->file_size) + len > group->file_size) {
-
+	if ((next_offset % log_sys.log.file_size) + len
+	    > log_sys.log.file_size) {
 		/* if the above condition holds, then the below expression
 		is < len which is ulint, so the typecast is ok */
-		write_len = (ulint)
-			(group->file_size - (next_offset % group->file_size));
+		write_len = ulint(log_sys.log.file_size
+				  - (next_offset % log_sys.log.file_size));
 	} else {
 		write_len = len;
 	}
@@ -1024,22 +840,20 @@ loop:
 		log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
 	}
 
-	log_sys->n_log_ios++;
+	log_sys.n_log_ios++;
 
 	MONITOR_INC(MONITOR_LOG_IO);
 
 	srv_stats.os_log_pending_writes.inc();
 
-	ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
+	ut_a((next_offset >> srv_page_size_shift) <= ULINT_MAX);
 
-	const ulint	page_no
-		= (ulint) (next_offset / univ_page_size.physical());
+	const ulint	page_no = ulint(next_offset >> srv_page_size_shift);
 
 	fil_io(IORequestLogWrite, true,
 	       page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
 	       univ_page_size,
-	       (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
-	       group);
+	       ulint(next_offset & (srv_page_size - 1)), write_len, buf, NULL);
 
 	srv_stats.os_log_pending_writes.dec();
 
@@ -1063,9 +877,9 @@ static
 void
 log_write_flush_to_disk_low()
 {
-	/* FIXME: This is not holding log_sys->mutex while
+	/* FIXME: This is not holding log_sys.mutex while
 	calling os_event_set()! */
-	ut_a(log_sys->n_pending_flushes == 1); /* No other threads here */
+	ut_a(log_sys.n_pending_flushes == 1); /* No other threads here */
 
 	bool	do_flush = srv_file_flush_method != SRV_O_DSYNC;
 
@@ -1077,12 +891,12 @@ log_write_flush_to_disk_low()
 
 	log_mutex_enter();
 	if (do_flush) {
-		log_sys->flushed_to_disk_lsn = log_sys->current_flush_lsn;
+		log_sys.flushed_to_disk_lsn = log_sys.current_flush_lsn;
 	}
 
-	log_sys->n_pending_flushes--;
+	log_sys.n_pending_flushes--;
 
-	os_event_set(log_sys->flush_event);
+	os_event_set(log_sys.flush_event);
 }
 
 /** Switch the log buffer in use, and copy the content of last block
@@ -1095,29 +909,29 @@ log_buffer_switch()
 	ut_ad(log_mutex_own());
 	ut_ad(log_write_mutex_own());
 
-	const byte*	old_buf = log_sys->buf;
-	ulint		area_end = ut_calc_align(log_sys->buf_free,
+	const byte*	old_buf = log_sys.buf;
+	ulint		area_end = ut_calc_align(log_sys.buf_free,
 						 OS_FILE_LOG_BLOCK_SIZE);
 
-	if (log_sys->first_in_use) {
-		log_sys->first_in_use = false;
-		ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
+	if (log_sys.first_in_use) {
+		log_sys.first_in_use = false;
+		ut_ad(log_sys.buf == ut_align(log_sys.buf,
 					       OS_FILE_LOG_BLOCK_SIZE));
-		log_sys->buf += log_sys->buf_size;
+		log_sys.buf += srv_log_buffer_size;
 	} else {
-		log_sys->first_in_use = true;
-		log_sys->buf -= log_sys->buf_size;
-		ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
+		log_sys.first_in_use = true;
+		log_sys.buf -= srv_log_buffer_size;
+		ut_ad(log_sys.buf == ut_align(log_sys.buf,
 					       OS_FILE_LOG_BLOCK_SIZE));
 	}
 
 	/* Copy the last block to new buf */
-	ut_memcpy(log_sys->buf,
+	ut_memcpy(log_sys.buf,
 		  old_buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
 		  OS_FILE_LOG_BLOCK_SIZE);
 
-	log_sys->buf_free %= OS_FILE_LOG_BLOCK_SIZE;
-	log_sys->buf_next_to_write = log_sys->buf_free;
+	log_sys.buf_free %= OS_FILE_LOG_BLOCK_SIZE;
+	log_sys.buf_next_to_write = log_sys.buf_free;
 }
 
 /** Ensure that the log has been written to the log file up to a given
@@ -1156,7 +970,7 @@ loop:
 	(flush_to_disk == true) case, because the log_mutex
 	contention also works as the arbitrator for write-IO
 	(fsync) bandwidth between log files and data files. */
-	if (!flush_to_disk && log_sys->write_lsn >= lsn) {
+	if (!flush_to_disk && log_sys.write_lsn >= lsn) {
 		return;
 	}
 #endif
@@ -1165,8 +979,8 @@ loop:
 	ut_ad(!recv_no_log_write);
 
 	lsn_t	limit_lsn = flush_to_disk
-		? log_sys->flushed_to_disk_lsn
-		: log_sys->write_lsn;
+		? log_sys.flushed_to_disk_lsn
+		: log_sys.write_lsn;
 
 	if (limit_lsn >= lsn) {
 		log_write_mutex_exit();
@@ -1179,15 +993,15 @@ loop:
 	pending flush and based on that we wait for it to finish
 	before proceeding further. */
 	if (flush_to_disk
-	    && (log_sys->n_pending_flushes > 0
-		|| !os_event_is_set(log_sys->flush_event))) {
+	    && (log_sys.n_pending_flushes > 0
+		|| !os_event_is_set(log_sys.flush_event))) {
 		/* Figure out if the current flush will do the job
 		for us. */
-		bool work_done = log_sys->current_flush_lsn >= lsn;
+		bool work_done = log_sys.current_flush_lsn >= lsn;
 
 		log_write_mutex_exit();
 
-		os_event_wait(log_sys->flush_event);
+		os_event_wait(log_sys.flush_event);
 
 		if (work_done) {
 			return;
@@ -1198,7 +1012,7 @@ loop:
 
 	log_mutex_enter();
 	if (!flush_to_disk
-	    && log_sys->buf_free == log_sys->buf_next_to_write) {
+	    && log_sys.buf_free == log_sys.buf_next_to_write) {
 		/* Nothing to write and no flush to disk requested */
 		log_mutex_exit_all();
 		return;
@@ -1212,15 +1026,15 @@ loop:
 	ulint		pad_size;
 
 	DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF,
-			      log_sys->write_lsn,
-			      log_sys->lsn));
+			      log_sys.write_lsn,
+			      log_sys.lsn));
 	if (flush_to_disk) {
-		log_sys->n_pending_flushes++;
-		log_sys->current_flush_lsn = log_sys->lsn;
+		log_sys.n_pending_flushes++;
+		log_sys.current_flush_lsn = log_sys.lsn;
 		MONITOR_INC(MONITOR_PENDING_LOG_FLUSH);
-		os_event_reset(log_sys->flush_event);
+		os_event_reset(log_sys.flush_event);
 
-		if (log_sys->buf_free == log_sys->buf_next_to_write) {
+		if (log_sys.buf_free == log_sys.buf_next_to_write) {
 			/* Nothing to write, flush only */
 			log_mutex_exit_all();
 			log_write_flush_to_disk_low();
@@ -1229,25 +1043,25 @@ loop:
 		}
 	}
 
-	start_offset = log_sys->buf_next_to_write;
-	end_offset = log_sys->buf_free;
+	start_offset = log_sys.buf_next_to_write;
+	end_offset = log_sys.buf_free;
 
 	area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
 	area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
 
 	ut_ad(area_end - area_start > 0);
 
-	log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
+	log_block_set_flush_bit(log_sys.buf + area_start, TRUE);
 	log_block_set_checkpoint_no(
-		log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
-		log_sys->next_checkpoint_no);
+		log_sys.buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
+		log_sys.next_checkpoint_no);
 
-	write_lsn = log_sys->lsn;
-	write_buf = log_sys->buf;
+	write_lsn = log_sys.lsn;
+	write_buf = log_sys.buf;
 
 	log_buffer_switch();
 
-	log_group_set_fields(&log_sys->log, log_sys->write_lsn);
+	log_sys.log.set_fields(log_sys.write_lsn);
 
 	log_mutex_exit();
 	/* Erase the end of the last log block. */
@@ -1257,12 +1071,9 @@ loop:
 	/* Calculate pad_size if needed. */
 	pad_size = 0;
 	if (write_ahead_size > OS_FILE_LOG_BLOCK_SIZE) {
-		lsn_t	end_offset;
 		ulint	end_offset_in_unit;
-		end_offset = log_group_calc_lsn_offset(
-			ut_uint64_align_up(write_lsn,
-					   OS_FILE_LOG_BLOCK_SIZE),
-			&log_sys->log);
+		lsn_t	end_offset = log_sys.log.calc_lsn_offset(
+			ut_uint64_align_up(write_lsn, OS_FILE_LOG_BLOCK_SIZE));
 		end_offset_in_unit = (ulint) (end_offset % write_ahead_size);
 
 		if (end_offset_in_unit > 0
@@ -1270,9 +1081,9 @@ loop:
 			/* The first block in the unit was initialized
 			after the last writing.
 			Needs to be written padded data once. */
-			pad_size = std::min(
+			pad_size = std::min<ulint>(
 				ulint(write_ahead_size) - end_offset_in_unit,
-				log_sys->buf_size - area_end);
+				srv_log_buffer_size - area_end);
 			::memset(write_buf + area_end, 0, pad_size);
 		}
 	}
@@ -1281,43 +1092,41 @@ loop:
 		service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
 					       "InnoDB log write: "
 					       LSN_PF "," LSN_PF,
-					       log_sys->write_lsn, lsn);
+					       log_sys.write_lsn, lsn);
 	}
 
-	if (log_sys->is_encrypted()) {
-		log_crypt(write_buf + area_start, log_sys->write_lsn,
+	if (log_sys.is_encrypted()) {
+		log_crypt(write_buf + area_start, log_sys.write_lsn,
 			  area_end - area_start);
 	}
 
 	/* Do the write to the log files */
-	log_group_write_buf(
-		&log_sys->log, write_buf + area_start,
-		area_end - area_start + pad_size,
+	log_write_buf(
+		write_buf + area_start, area_end - area_start + pad_size,
 #ifdef UNIV_DEBUG
 		pad_size,
 #endif /* UNIV_DEBUG */
-		ut_uint64_align_down(log_sys->write_lsn,
+		ut_uint64_align_down(log_sys.write_lsn,
 				     OS_FILE_LOG_BLOCK_SIZE),
 		start_offset - area_start);
 	srv_stats.log_padded.add(pad_size);
-	log_sys->write_lsn = write_lsn;
+	log_sys.write_lsn = write_lsn;
 
 
 	if (srv_file_flush_method == SRV_O_DSYNC) {
 		/* O_SYNC means the OS did not buffer the log file at all:
 		so we have also flushed to disk what we have written */
-		log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
+		log_sys.flushed_to_disk_lsn = log_sys.write_lsn;
 	}
 
 	log_write_mutex_exit();
 
 	if (flush_to_disk) {
 		log_write_flush_to_disk_low();
-		ib_uint64_t write_lsn = log_sys->write_lsn;
-		ib_uint64_t flush_lsn = log_sys->flushed_to_disk_lsn;
+		ib_uint64_t flush_lsn = log_sys.flushed_to_disk_lsn;
 		log_mutex_exit();
 
-		innobase_mysql_log_notify(write_lsn, flush_lsn);
+		innobase_mysql_log_notify(flush_lsn);
 	}
 }
 
@@ -1346,11 +1155,11 @@ log_buffer_sync_in_background(
 
 	log_mutex_enter();
 
-	lsn = log_sys->lsn;
+	lsn = log_sys.lsn;
 
 	if (flush
-	    && log_sys->n_pending_flushes > 0
-	    && log_sys->current_flush_lsn >= lsn) {
+	    && log_sys.n_pending_flushes > 0
+	    && log_sys.current_flush_lsn >= lsn) {
 		/* The write + flush will write enough */
 		log_mutex_exit();
 		return;
@@ -1370,14 +1179,13 @@ void
 log_flush_margin(void)
 /*==================*/
 {
-	log_t*	log	= log_sys;
 	lsn_t	lsn	= 0;
 
 	log_mutex_enter();
 
-	if (log->buf_free > log->max_buf_free) {
+	if (log_sys.buf_free > log_sys.max_buf_free) {
 		/* We can write during flush */
-		lsn = log->lsn;
+		lsn = log_sys.lsn;
 	}
 
 	log_mutex_exit();
@@ -1457,36 +1265,33 @@ log_complete_checkpoint(void)
 /*=========================*/
 {
 	ut_ad(log_mutex_own());
-	ut_ad(log_sys->n_pending_checkpoint_writes == 0);
+	ut_ad(log_sys.n_pending_checkpoint_writes == 0);
 
-	log_sys->next_checkpoint_no++;
+	log_sys.next_checkpoint_no++;
 
-	log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
+	log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn;
 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
-		    log_sys->lsn - log_sys->last_checkpoint_lsn);
+		    log_sys.lsn - log_sys.last_checkpoint_lsn);
 
 	DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
 			      ", flushed to " LSN_PF,
-			      log_sys->last_checkpoint_lsn,
-			      log_sys->flushed_to_disk_lsn));
+			      log_sys.last_checkpoint_lsn,
+			      log_sys.flushed_to_disk_lsn));
 
-	rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
+	rw_lock_x_unlock_gen(&(log_sys.checkpoint_lock), LOG_CHECKPOINT);
 }
 
-/******************************************************//**
-Completes an asynchronous checkpoint info write i/o to a log file. */
-static
-void
-log_io_complete_checkpoint(void)
-/*============================*/
+/** Complete an asynchronous checkpoint write. */
+void log_t::complete_checkpoint()
 {
+	ut_ad(this == &log_sys);
 	MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
 
 	log_mutex_enter();
 
-	ut_ad(log_sys->n_pending_checkpoint_writes > 0);
+	ut_ad(n_pending_checkpoint_writes > 0);
 
-	if (--log_sys->n_pending_checkpoint_writes == 0) {
+	if (!--n_pending_checkpoint_writes) {
 		log_complete_checkpoint();
 	}
 
@@ -1500,91 +1305,78 @@ void
 log_group_checkpoint(lsn_t end_lsn)
 {
 	lsn_t		lsn_offset;
-	byte*		buf;
 
 	ut_ad(!srv_read_only_mode);
 	ut_ad(log_mutex_own());
-	ut_ad(end_lsn == 0 || end_lsn >= log_sys->next_checkpoint_lsn);
-	ut_ad(end_lsn <= log_sys->lsn);
-	ut_ad(end_lsn + SIZE_OF_MLOG_CHECKPOINT <= log_sys->lsn
+	ut_ad(end_lsn == 0 || end_lsn >= log_sys.next_checkpoint_lsn);
+	ut_ad(end_lsn <= log_sys.lsn);
+	ut_ad(end_lsn + SIZE_OF_MLOG_CHECKPOINT <= log_sys.lsn
 	      || srv_shutdown_state != SRV_SHUTDOWN_NONE);
 
 	DBUG_PRINT("ib_log", ("checkpoint " UINT64PF " at " LSN_PF
 			      " written",
-			      log_sys->next_checkpoint_no,
-			      log_sys->next_checkpoint_lsn));
-
-	log_group_t*	group = &log_sys->log;
+			      log_sys.next_checkpoint_no,
+			      log_sys.next_checkpoint_lsn));
 
-	buf = group->checkpoint_buf;
+	byte* buf = log_sys.checkpoint_buf;
 	memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
 
-	mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
-	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
+	mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys.next_checkpoint_no);
+	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys.next_checkpoint_lsn);
 
-	if (log_sys->is_encrypted()) {
+	if (log_sys.is_encrypted()) {
 		log_crypt_write_checkpoint_buf(buf);
 	}
 
-	lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn,
-					       group);
+	lsn_offset = log_sys.log.calc_lsn_offset(log_sys.next_checkpoint_lsn);
 	mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET, lsn_offset);
-	mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
+	mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE,
+			srv_log_buffer_size);
 	mach_write_to_8(buf + LOG_CHECKPOINT_END_LSN, end_lsn);
 
 	log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
 
 	MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
 
-	log_sys->n_log_ios++;
+	log_sys.n_log_ios++;
 
 	MONITOR_INC(MONITOR_LOG_IO);
 
-	ut_ad(LOG_CHECKPOINT_1 < univ_page_size.physical());
-	ut_ad(LOG_CHECKPOINT_2 < univ_page_size.physical());
+	ut_ad(LOG_CHECKPOINT_1 < srv_page_size);
+	ut_ad(LOG_CHECKPOINT_2 < srv_page_size);
 
-	if (log_sys->n_pending_checkpoint_writes++ == 0) {
-		rw_lock_x_lock_gen(&log_sys->checkpoint_lock,
+	if (log_sys.n_pending_checkpoint_writes++ == 0) {
+		rw_lock_x_lock_gen(&log_sys.checkpoint_lock,
 				   LOG_CHECKPOINT);
 	}
 
 	/* Note: We alternate the physical place of the checkpoint info.
 	See the (next_checkpoint_no & 1) below. */
 
-	/* We send as the last parameter the group machine address
-	added with 1, as we want to distinguish between a normal log
-	file write and a checkpoint field write */
-
 	fil_io(IORequestLogWrite, false,
 	       page_id_t(SRV_LOG_SPACE_FIRST_ID, 0),
 	       univ_page_size,
-	       (log_sys->next_checkpoint_no & 1)
+	       (log_sys.next_checkpoint_no & 1)
 	       ? LOG_CHECKPOINT_2 : LOG_CHECKPOINT_1,
 	       OS_FILE_LOG_BLOCK_SIZE,
-	       buf, (byte*) group + 1);
-
-	ut_ad(((ulint) group & 0x1UL) == 0);
+	       buf, reinterpret_cast<void*>(1) /* checkpoint write */);
 }
 
-/** Read a log group header page to log_sys->checkpoint_buf.
-@param[in]	group	log group
-@param[in]	header	0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
-void
-log_group_header_read(
-	const log_group_t*	group,
-	ulint			header)
+/** Read a log group header page to log_sys.checkpoint_buf.
+@param[in]	header	0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */
+void log_header_read(ulint header)
 {
 	ut_ad(log_mutex_own());
 
-	log_sys->n_log_ios++;
+	log_sys.n_log_ios++;
 
 	MONITOR_INC(MONITOR_LOG_IO);
 
 	fil_io(IORequestLogRead, true,
 	       page_id_t(SRV_LOG_SPACE_FIRST_ID,
-			 header / univ_page_size.physical()),
-	       univ_page_size, header % univ_page_size.physical(),
-	       OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
+			 header >> srv_page_size_shift),
+	       univ_page_size, header & (srv_page_size - 1),
+	       OS_FILE_LOG_BLOCK_SIZE, log_sys.checkpoint_buf, NULL);
 }
 
 /** Write checkpoint info to the log header and invoke log_mutex_exit().
@@ -1604,8 +1396,8 @@ log_write_checkpoint_info(bool sync, lsn_t end_lsn)
 
 	if (sync) {
 		/* Wait for the checkpoint write to complete */
-		rw_lock_s_lock(&log_sys->checkpoint_lock);
-		rw_lock_s_unlock(&log_sys->checkpoint_lock);
+		rw_lock_s_lock(&log_sys.checkpoint_lock);
+		rw_lock_s_unlock(&log_sys.checkpoint_lock);
 
 		DBUG_EXECUTE_IF(
 			"crash_after_checkpoint",
@@ -1621,8 +1413,8 @@ log_append_on_checkpoint(
 	mtr_buf_t*	buf)
 {
 	log_mutex_enter();
-	mtr_buf_t*	old = log_sys->append_on_checkpoint;
-	log_sys->append_on_checkpoint = buf;
+	mtr_buf_t*	old = log_sys.append_on_checkpoint;
+	log_sys.append_on_checkpoint = buf;
 	log_mutex_exit();
 	return(old);
 }
@@ -1661,7 +1453,9 @@ log_checkpoint(
 	case SRV_LITTLESYNC:
 	case SRV_O_DIRECT:
 	case SRV_O_DIRECT_NO_FSYNC:
+#ifdef _WIN32
 	case SRV_ALL_O_DIRECT_FSYNC:
+#endif
 		fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
 	}
 
@@ -1671,24 +1465,24 @@ log_checkpoint(
 	oldest_lsn = log_buf_pool_get_oldest_modification();
 
 	/* Because log also contains headers and dummy log records,
-	log_buf_pool_get_oldest_modification() will return log_sys->lsn
+	log_buf_pool_get_oldest_modification() will return log_sys.lsn
 	if the buffer pool contains no dirty buffers.
 	We must make sure that the log is flushed up to that lsn.
 	If there are dirty buffers in the buffer pool, then our
 	write-ahead-logging algorithm ensures that the log has been
 	flushed up to oldest_lsn. */
 
-	ut_ad(oldest_lsn >= log_sys->last_checkpoint_lsn);
+	ut_ad(oldest_lsn >= log_sys.last_checkpoint_lsn);
 	if (!write_always
 	    && oldest_lsn
-	    <= log_sys->last_checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT) {
+	    <= log_sys.last_checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT) {
 		/* Do nothing, because nothing was logged (other than
 		a MLOG_CHECKPOINT marker) since the previous checkpoint. */
 		log_mutex_exit();
 		return(true);
 	}
 	/* Repeat the MLOG_FILE_NAME records after the checkpoint, in
-	case some log records between the checkpoint and log_sys->lsn
+	case some log records between the checkpoint and log_sys.lsn
 	need them. Finally, write a MLOG_CHECKPOINT marker. Redo log
 	apply expects to see a MLOG_CHECKPOINT after the checkpoint,
 	except on clean shutdown, where the log will be empty after
@@ -1699,14 +1493,14 @@ log_checkpoint(
 	threads will be blocked, and no pages can be added to the
 	flush lists. */
 	lsn_t		flush_lsn	= oldest_lsn;
-	const lsn_t	end_lsn		= log_sys->lsn;
+	const lsn_t	end_lsn		= log_sys.lsn;
 	const bool	do_write
 		= srv_shutdown_state == SRV_SHUTDOWN_NONE
 		|| flush_lsn != end_lsn;
 
 	if (fil_names_clear(flush_lsn, do_write)) {
-		ut_ad(log_sys->lsn >= end_lsn + SIZE_OF_MLOG_CHECKPOINT);
-		flush_lsn = log_sys->lsn;
+		ut_ad(log_sys.lsn >= end_lsn + SIZE_OF_MLOG_CHECKPOINT);
+		flush_lsn = log_sys.lsn;
 	}
 
 	log_mutex_exit();
@@ -1729,28 +1523,28 @@ log_checkpoint(
 
 	log_mutex_enter();
 
-	ut_ad(log_sys->flushed_to_disk_lsn >= flush_lsn);
+	ut_ad(log_sys.flushed_to_disk_lsn >= flush_lsn);
 	ut_ad(flush_lsn >= oldest_lsn);
 
-	if (log_sys->last_checkpoint_lsn >= oldest_lsn) {
+	if (log_sys.last_checkpoint_lsn >= oldest_lsn) {
 		log_mutex_exit();
 		return(true);
 	}
 
-	if (log_sys->n_pending_checkpoint_writes > 0) {
+	if (log_sys.n_pending_checkpoint_writes > 0) {
 		/* A checkpoint write is running */
 		log_mutex_exit();
 
 		if (sync) {
 			/* Wait for the checkpoint write to complete */
-			rw_lock_s_lock(&log_sys->checkpoint_lock);
-			rw_lock_s_unlock(&log_sys->checkpoint_lock);
+			rw_lock_s_lock(&log_sys.checkpoint_lock);
+			rw_lock_s_unlock(&log_sys.checkpoint_lock);
 		}
 
 		return(false);
 	}
 
-	log_sys->next_checkpoint_lsn = oldest_lsn;
+	log_sys.next_checkpoint_lsn = oldest_lsn;
 	log_write_checkpoint_info(sync, end_lsn);
 	ut_ad(!log_mutex_own());
 
@@ -1788,7 +1582,6 @@ void
 log_checkpoint_margin(void)
 /*=======================*/
 {
-	log_t*		log		= log_sys;
 	lsn_t		age;
 	lsn_t		checkpoint_age;
 	ib_uint64_t	advance;
@@ -1800,39 +1593,39 @@ loop:
 	log_mutex_enter();
 	ut_ad(!recv_no_log_write);
 
-	if (!log->check_flush_or_checkpoint) {
+	if (!log_sys.check_flush_or_checkpoint) {
 		log_mutex_exit();
 		return;
 	}
 
 	oldest_lsn = log_buf_pool_get_oldest_modification();
 
-	age = log->lsn - oldest_lsn;
+	age = log_sys.lsn - oldest_lsn;
 
-	if (age > log->max_modified_age_sync) {
+	if (age > log_sys.max_modified_age_sync) {
 
 		/* A flush is urgent: we have to do a synchronous preflush */
-		advance = age - log->max_modified_age_sync;
+		advance = age - log_sys.max_modified_age_sync;
 	}
 
-	checkpoint_age = log->lsn - log->last_checkpoint_lsn;
+	checkpoint_age = log_sys.lsn - log_sys.last_checkpoint_lsn;
 
 	bool	checkpoint_sync;
 	bool	do_checkpoint;
 
-	if (checkpoint_age > log->max_checkpoint_age) {
+	if (checkpoint_age > log_sys.max_checkpoint_age) {
 		/* A checkpoint is urgent: we do it synchronously */
 		checkpoint_sync = true;
 		do_checkpoint = true;
-	} else if (checkpoint_age > log->max_checkpoint_age_async) {
+	} else if (checkpoint_age > log_sys.max_checkpoint_age_async) {
 		/* A checkpoint is not urgent: do it asynchronously */
 		do_checkpoint = true;
 		checkpoint_sync = false;
-		log->check_flush_or_checkpoint = false;
+		log_sys.check_flush_or_checkpoint = false;
 	} else {
 		do_checkpoint = false;
 		checkpoint_sync = false;
-		log->check_flush_or_checkpoint = false;
+		log_sys.check_flush_or_checkpoint = false;
 	}
 
 	log_mutex_exit();
@@ -1847,9 +1640,7 @@ loop:
 		thread doing a flush at the same time. */
 		if (!success) {
 			log_mutex_enter();
-
-			log->check_flush_or_checkpoint = true;
-
+			log_sys.check_flush_or_checkpoint = true;
 			log_mutex_exit();
 			goto loop;
 		}
@@ -1880,7 +1671,7 @@ log_check_margins(void)
 		log_checkpoint_margin();
 		log_mutex_enter();
 		ut_ad(!recv_no_log_write);
-		check = log_sys->check_flush_or_checkpoint;
+		check = log_sys.check_flush_or_checkpoint;
 		log_mutex_exit();
 	} while (check);
 }
@@ -1904,17 +1695,17 @@ logs_empty_and_mark_files_at_shutdown(void)
 
 	srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
 loop:
-	ut_ad(lock_sys || !srv_was_started);
-	ut_ad(log_sys || !srv_was_started);
-	ut_ad(fil_system || !srv_was_started);
+	ut_ad(lock_sys.is_initialised() || !srv_was_started);
+	ut_ad(log_sys.is_initialised() || !srv_was_started);
+	ut_ad(fil_system.is_initialised() || !srv_was_started);
 	os_event_set(srv_buf_resize_event);
 
 	if (!srv_read_only_mode) {
 		os_event_set(srv_error_event);
 		os_event_set(srv_monitor_event);
 		os_event_set(srv_buf_dump_event);
-		if (lock_sys) {
-			os_event_set(lock_sys->timeout_event);
+		if (lock_sys.timeout_thread_active) {
+			os_event_set(lock_sys.timeout_event);
 		}
 		if (dict_stats_event) {
 			os_event_set(dict_stats_event);
@@ -1941,7 +1732,7 @@ loop:
 
 	if (ulint total_trx = srv_was_started && !srv_read_only_mode
 	    && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
-	    ? trx_sys_any_active_transactions() : 0) {
+	    ? trx_sys.any_active_transactions() : 0) {
 
 		if (srv_print_verbose_log && count > COUNT_INTERVAL) {
 			service_manager_extend_timeout(
@@ -1969,14 +1760,14 @@ loop:
 		goto wait_suspend_loop;
 	} else if (srv_dict_stats_thread_active) {
 		thread_name = "dict_stats_thread";
-	} else if (lock_sys && lock_sys->timeout_thread_active) {
+	} else if (lock_sys.timeout_thread_active) {
 		thread_name = "lock_wait_timeout_thread";
 	} else if (srv_buf_dump_thread_active) {
 		thread_name = "buf_dump_thread";
 		goto wait_suspend_loop;
 	} else if (btr_defragment_thread_active) {
 		thread_name = "btr_defragment_thread";
-	} else if (srv_fast_shutdown != 2 && trx_rollback_or_clean_is_active) {
+	} else if (srv_fast_shutdown != 2 && trx_rollback_is_active) {
 		thread_name = "rollback of recovered transactions";
 	} else {
 		thread_name = NULL;
@@ -2042,10 +1833,10 @@ wait_suspend_loop:
 		os_event_set(log_scrub_event);
 	}
 
-	if (log_sys) {
+	if (log_sys.is_initialised()) {
 		log_mutex_enter();
-		const ulint	n_write	= log_sys->n_pending_checkpoint_writes;
-		const ulint	n_flush	= log_sys->n_pending_flushes;
+		const ulint	n_write	= log_sys.n_pending_checkpoint_writes;
+		const ulint	n_flush	= log_sys.n_pending_flushes;
 		log_mutex_exit();
 
 		if (log_scrub_thread_active || n_write || n_flush) {
@@ -2096,7 +1887,7 @@ wait_suspend_loop:
 
 		srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
 
-		if (fil_system) {
+		if (fil_system.is_initialised()) {
 			fil_close_all_files();
 		}
 		return;
@@ -2109,10 +1900,10 @@ wait_suspend_loop:
 
 		log_mutex_enter();
 
-		lsn = log_sys->lsn;
+		lsn = log_sys.lsn;
 
-		const bool lsn_changed = lsn != log_sys->last_checkpoint_lsn;
-		ut_ad(lsn >= log_sys->last_checkpoint_lsn);
+		const bool lsn_changed = lsn != log_sys.last_checkpoint_lsn;
+		ut_ad(lsn >= log_sys.last_checkpoint_lsn);
 
 		log_mutex_exit();
 
@@ -2136,7 +1927,7 @@ wait_suspend_loop:
 				       "Free innodb buffer pool");
 	buf_all_freed();
 
-	ut_a(lsn == log_sys->lsn
+	ut_a(lsn == log_sys.lsn
 	     || srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
 
 	if (lsn < srv_start_lsn) {
@@ -2160,7 +1951,7 @@ wait_suspend_loop:
 	/* Make some checks that the server really is quiet */
 	ut_a(srv_get_active_thread_type() == SRV_NONE);
 
-	ut_a(lsn == log_sys->lsn
+	ut_a(lsn == log_sys.lsn
 	     || srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
 }
 
@@ -2172,8 +1963,8 @@ log_peek_lsn(
 /*=========*/
 	lsn_t*	lsn)	/*!< out: if returns TRUE, current lsn is here */
 {
-	if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
-		*lsn = log_sys->lsn;
+	if (0 == mutex_enter_nowait(&(log_sys.mutex))) {
+		*lsn = log_sys.lsn;
 
 		log_mutex_exit();
 
@@ -2200,15 +1991,15 @@ log_print(
 		"Log flushed up to   " LSN_PF "\n"
 		"Pages flushed up to " LSN_PF "\n"
 		"Last checkpoint at  " LSN_PF "\n",
-		log_sys->lsn,
-		log_sys->flushed_to_disk_lsn,
+		log_sys.lsn,
+		log_sys.flushed_to_disk_lsn,
 		log_buf_pool_get_oldest_modification(),
-		log_sys->last_checkpoint_lsn);
+		log_sys.last_checkpoint_lsn);
 
 	current_time = time(NULL);
 
 	time_elapsed = difftime(current_time,
-				log_sys->last_printout_time);
+				log_sys.last_printout_time);
 
 	if (time_elapsed <= 0) {
 		time_elapsed = 1;
@@ -2218,15 +2009,15 @@ log_print(
 		ULINTPF " pending log flushes, "
 		ULINTPF " pending chkp writes\n"
 		ULINTPF " log i/o's done, %.2f log i/o's/second\n",
-		log_sys->n_pending_flushes,
-		log_sys->n_pending_checkpoint_writes,
-		log_sys->n_log_ios,
+		log_sys.n_pending_flushes,
+		log_sys.n_pending_checkpoint_writes,
+		log_sys.n_log_ios,
 		static_cast<double>(
-			log_sys->n_log_ios - log_sys->n_log_ios_old)
+			log_sys.n_log_ios - log_sys.n_log_ios_old)
 		/ time_elapsed);
 
-	log_sys->n_log_ios_old = log_sys->n_log_ios;
-	log_sys->last_printout_time = current_time;
+	log_sys.n_log_ios_old = log_sys.n_log_ios;
+	log_sys.last_printout_time = current_time;
 
 	log_mutex_exit();
 }
@@ -2237,68 +2028,39 @@ void
 log_refresh_stats(void)
 /*===================*/
 {
-	log_sys->n_log_ios_old = log_sys->n_log_ios;
-	log_sys->last_printout_time = time(NULL);
-}
-
-/** Close a log group.
-@param[in,out]	group	log group to close */
-static
-void
-log_group_close(log_group_t* group)
-{
-	ulint	i;
-
-	for (i = 0; i < group->n_files; i++) {
-		ut_free(group->file_header_bufs_ptr[i]);
-	}
-
-	ut_free(group->file_header_bufs_ptr);
-	ut_free(group->file_header_bufs);
-	ut_free(group->checkpoint_buf_ptr);
-	group->n_files = 0;
-	group->file_header_bufs_ptr = NULL;
-	group->file_header_bufs = NULL;
-	group->checkpoint_buf_ptr = NULL;
-}
-
-/********************************************************//**
-Closes all log groups. */
-void
-log_group_close_all(void)
-/*=====================*/
-{
-	log_group_close(&log_sys->log);
+	log_sys.n_log_ios_old = log_sys.n_log_ios;
+	log_sys.last_printout_time = time(NULL);
 }
 
 /** Shut down the redo log subsystem. */
-void
-log_shutdown()
+void log_t::close()
 {
-	log_group_close_all();
+  ut_ad(this == &log_sys);
+  if (!is_initialised()) return;
+  m_initialised = false;
+  log.close();
 
-	ut_free(log_sys->buf_ptr);
-	log_sys->buf_ptr = NULL;
-	log_sys->buf = NULL;
-	ut_free(log_sys->checkpoint_buf_ptr);
-	log_sys->checkpoint_buf_ptr = NULL;
-	log_sys->checkpoint_buf = NULL;
+  if (!first_in_use)
+    buf -= srv_log_buffer_size;
+  ut_free_dodump(buf, srv_log_buffer_size * 2);
+  buf = NULL;
 
-	os_event_destroy(log_sys->flush_event);
+  os_event_destroy(flush_event);
 
-	rw_lock_free(&log_sys->checkpoint_lock);
+  rw_lock_free(&checkpoint_lock);
+  /* rw_lock_free() already called checkpoint_lock.~rw_lock_t();
+  tame the debug assertions when the destructor will be called once more. */
+  ut_ad(checkpoint_lock.magic_n == 0);
+  ut_d(checkpoint_lock.magic_n = RW_LOCK_MAGIC_N);
 
-	mutex_free(&log_sys->mutex);
-	mutex_free(&log_sys->write_mutex);
-	mutex_free(&log_sys->log_flush_order_mutex);
+  mutex_free(&mutex);
+  mutex_free(&write_mutex);
+  mutex_free(&log_flush_order_mutex);
 
-	if (!srv_read_only_mode && srv_scrub_log) {
-		os_event_destroy(log_scrub_event);
-	}
+  if (!srv_read_only_mode && srv_scrub_log)
+    os_event_destroy(log_scrub_event);
 
-	recv_sys_close();
-	ut_free(log_sys);
-	log_sys = NULL;
+  recv_sys_close();
 }
 
 /******************************************************//**
@@ -2319,7 +2081,7 @@ log_pad_current_log_block(void)
 	lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
 
 	pad_length = OS_FILE_LOG_BLOCK_SIZE
-		- (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
+		- (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE)
 		- LOG_BLOCK_TRL_SIZE;
 	if (pad_length
 	    == (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
@@ -2336,7 +2098,7 @@ log_pad_current_log_block(void)
 		log_write_low(&b, 1);
 	}
 
-	lsn = log_sys->lsn;
+	lsn = log_sys.lsn;
 
 	log_close();
 
@@ -2352,14 +2114,14 @@ log_scrub()
 /*=========*/
 {
 	log_mutex_enter();
-	ulint cur_lbn = log_block_convert_lsn_to_no(log_sys->lsn);
+	ulint cur_lbn = log_block_convert_lsn_to_no(log_sys.lsn);
 
 	if (next_lbn_to_pad == cur_lbn)
 	{
 		log_pad_current_log_block();
 	}
 
-	next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys->lsn);
+	next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys.lsn);
 	log_mutex_exit();
 }
 
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index f71067fddf2..afcd9079480 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -60,7 +60,7 @@ Created 9/20/1997 Heikki Tuuri
 #include "row0merge.h"
 
 /** Log records are stored in the hash table in chunks at most of this size;
-this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
+this must be less than srv_page_size as it is stored in the buffer pool */
 #define RECV_DATA_BLOCK_SIZE	(MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
 
 /** Read-ahead area in applying log records to file pages */
@@ -77,7 +77,7 @@ volatile bool	recv_recovery_on;
 bool	recv_needed_recovery;
 #ifdef UNIV_DEBUG
 /** TRUE if writing to the redo log (mtr_commit) is forbidden.
-Protected by log_sys->mutex. */
+Protected by log_sys.mutex. */
 bool	recv_no_log_write = false;
 #endif /* UNIV_DEBUG */
 
@@ -568,7 +568,9 @@ recv_sys_close()
 			os_event_destroy(recv_sys->flush_end);
 		}
 
-		ut_free(recv_sys->buf);
+		if (recv_sys->buf != NULL) {
+			ut_free_dodump(recv_sys->buf, recv_sys->buf_size);
+		}
 
 		ut_ad(!recv_writer_thread_active);
 		mutex_free(&recv_sys->writer_mutex);
@@ -627,7 +629,7 @@ DECLARE_THREAD(recv_writer_thread)(
 
 		/* Wait till we get a signal to clean the LRU list.
 		Bounded by max wait time of 100ms. */
-		ib_uint64_t      sig_count = os_event_reset(buf_flush_event);
+		int64_t      sig_count = os_event_reset(buf_flush_event);
 		os_event_wait_time_low(buf_flush_event, 100000, sig_count);
 
 		mutex_enter(&recv_sys->writer_mutex);
@@ -683,7 +685,8 @@ recv_sys_init()
 	}
 
 	recv_sys->buf = static_cast<byte*>(
-		ut_malloc_nokey(RECV_PARSING_BUF_SIZE));
+		ut_malloc_dontdump(RECV_PARSING_BUF_SIZE));
+	recv_sys->buf_size = RECV_PARSING_BUF_SIZE;
 
 	recv_sys->addr_hash = hash_create(size / 512);
 	recv_sys->progress_time = ut_time();
@@ -717,8 +720,9 @@ recv_sys_debug_free(void)
 
 	hash_table_free(recv_sys->addr_hash);
 	mem_heap_free(recv_sys->heap);
-	ut_free(recv_sys->buf);
+	ut_free_dodump(recv_sys->buf, recv_sys->buf_size);
 
+	recv_sys->buf_size = 0;
 	recv_sys->buf = NULL;
 	recv_sys->heap = NULL;
 	recv_sys->addr_hash = NULL;
@@ -734,57 +738,46 @@ recv_sys_debug_free(void)
 	mutex_exit(&(recv_sys->mutex));
 }
 
-/** Read a log segment to a buffer.
-@param[out]	buf		buffer
-@param[in]	group		redo log files
-@param[in, out]	start_lsn	in : read area start, out: the last read valid lsn
+/** Read a log segment to log_sys.buf.
+@param[in,out]	start_lsn	in: read area start,
+out: the last read valid lsn
 @param[in]	end_lsn		read area end
-@param[out] invalid_block - invalid, (maybe incompletely written) block encountered
-@return	false, if invalid block encountered (e.g checksum mismatch), true otherwise */
-bool
-log_group_read_log_seg(
-	byte*			buf,
-	const log_group_t*	group,
-	lsn_t			*start_lsn,
-	lsn_t			end_lsn)
+@return	whether no invalid blocks (e.g checksum mismatch) were found */
+bool log_t::files::read_log_seg(lsn_t* start_lsn, lsn_t end_lsn)
 {
 	ulint	len;
-	lsn_t	source_offset;
 	bool success = true;
-	ut_ad(log_mutex_own());
+	ut_ad(log_sys.mutex.is_owned());
 	ut_ad(!(*start_lsn % OS_FILE_LOG_BLOCK_SIZE));
 	ut_ad(!(end_lsn % OS_FILE_LOG_BLOCK_SIZE));
-
+	byte* buf = log_sys.buf;
 loop:
-	source_offset = log_group_calc_lsn_offset(*start_lsn, group);
+	lsn_t source_offset = calc_lsn_offset(*start_lsn);
 
 	ut_a(end_lsn - *start_lsn <= ULINT_MAX);
 	len = (ulint) (end_lsn - *start_lsn);
 
 	ut_ad(len != 0);
 
-	const bool at_eof = (source_offset % group->file_size) + len
-		> group->file_size;
+	const bool at_eof = (source_offset % file_size) + len > file_size;
 	if (at_eof) {
 		/* If the above condition is true then len (which is ulint)
 		is > the expression below, so the typecast is ok */
-		len = (ulint) (group->file_size -
-			(source_offset % group->file_size));
+		len = ulint(file_size - (source_offset % file_size));
 	}
 
-	log_sys->n_log_ios++;
+	log_sys.n_log_ios++;
 
 	MONITOR_INC(MONITOR_LOG_IO);
 
-	ut_a(source_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
+	ut_a((source_offset >> srv_page_size_shift) <= ULINT_MAX);
 
-	const ulint	page_no
-		= (ulint) (source_offset / univ_page_size.physical());
+	const ulint	page_no = ulint(source_offset >> srv_page_size_shift);
 
 	fil_io(IORequestLogRead, true,
 	       page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
 	       univ_page_size,
-	       (ulint) (source_offset % univ_page_size.physical()),
+	       ulint(source_offset & (srv_page_size - 1)),
 	       len, buf, NULL);
 
 	for (ulint l = 0; l < len; l += OS_FILE_LOG_BLOCK_SIZE,
@@ -802,7 +795,7 @@ loop:
 			break;
 		}
 
-		if (innodb_log_checksums || group->is_encrypted()) {
+		if (innodb_log_checksums || is_encrypted()) {
 			ulint crc = log_block_calc_checksum_crc32(buf);
 			ulint cksum = log_block_get_checksum(buf);
 
@@ -825,7 +818,7 @@ loop:
 				break;
 			}
 
-			if (group->is_encrypted()) {
+			if (is_encrypted()) {
 				log_crypt(buf, *start_lsn,
 					  OS_FILE_LOG_BLOCK_SIZE, true);
 			}
@@ -872,14 +865,10 @@ recv_synchronize_groups()
 	the block is always incomplete */
 
 	lsn_t start_lsn = ut_uint64_align_down(recovered_lsn,
-						     OS_FILE_LOG_BLOCK_SIZE);
-	log_group_read_log_seg(log_sys->buf, &log_sys->log,
-			       &start_lsn, start_lsn + OS_FILE_LOG_BLOCK_SIZE);
-
-	/* Update the fields in the group struct to correspond to
-	recovered_lsn */
-
-	log_group_set_fields(&log_sys->log, recovered_lsn);
+					       OS_FILE_LOG_BLOCK_SIZE);
+	log_sys.log.read_log_seg(&start_lsn,
+				 start_lsn + OS_FILE_LOG_BLOCK_SIZE);
+	log_sys.log.set_fields(recovered_lsn);
 
 	/* Copy the checkpoint info to the log; remember that we have
 	incremented checkpoint_no by one, and the info will not be written
@@ -905,19 +894,17 @@ recv_check_log_header_checksum(
 }
 
 /** Find the latest checkpoint in the format-0 log header.
-@param[out]	max_group	log group, or NULL
 @param[out]	max_field	LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
 @return error code or DB_SUCCESS */
 static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
-recv_find_max_checkpoint_0(log_group_t** max_group, ulint* max_field)
+recv_find_max_checkpoint_0(ulint* max_field)
 {
-	log_group_t*	group = &log_sys->log;
 	ib_uint64_t	max_no = 0;
 	ib_uint64_t	checkpoint_no;
-	byte*		buf	= log_sys->checkpoint_buf;
+	byte*		buf	= log_sys.checkpoint_buf;
 
-	ut_ad(group->format == 0);
+	ut_ad(log_sys.log.format == 0);
 
 	/** Offset of the first checkpoint checksum */
 	static const uint CHECKSUM_1 = 288;
@@ -928,11 +915,11 @@ recv_find_max_checkpoint_0(log_group_t** max_group, ulint* max_field)
 	/** Least significant bits of the checkpoint offset */
 	static const uint OFFSET_LOW32 = 16;
 
-	*max_group = NULL;
+	bool found = false;
 
 	for (ulint field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
 	     field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
-		log_group_header_read(group, field);
+		log_header_read(field);
 
 		if (static_cast<uint32_t>(ut_fold_binary(buf, CHECKSUM_1))
 		    != mach_read_from_4(buf + CHECKSUM_1)
@@ -959,21 +946,19 @@ recv_find_max_checkpoint_0(log_group_t** max_group, ulint* max_field)
 			    mach_read_from_8(buf + LOG_CHECKPOINT_LSN)));
 
 		if (checkpoint_no >= max_no) {
-			*max_group = group;
+			found = true;
 			*max_field = field;
 			max_no = checkpoint_no;
 
-			group->state = LOG_GROUP_OK;
-
-			group->lsn = mach_read_from_8(
+			log_sys.log.lsn = mach_read_from_8(
 				buf + LOG_CHECKPOINT_LSN);
-			group->lsn_offset = static_cast<ib_uint64_t>(
+			log_sys.log.lsn_offset = static_cast<ib_uint64_t>(
 				mach_read_from_4(buf + OFFSET_HIGH32)) << 32
 				| mach_read_from_4(buf + OFFSET_LOW32);
 		}
 	}
 
-	if (*max_group != NULL) {
+	if (found) {
 		return(DB_SUCCESS);
 	}
 
@@ -994,34 +979,27 @@ recv_find_max_checkpoint_0(log_group_t** max_group, ulint* max_field)
 static dberr_t recv_log_format_0_recover(lsn_t lsn, bool crypt)
 {
 	log_mutex_enter();
-	log_group_t*	group = &log_sys->log;
-	const lsn_t	source_offset
-		= log_group_calc_lsn_offset(lsn, group);
+	const lsn_t	source_offset = log_sys.log.calc_lsn_offset(lsn);
 	log_mutex_exit();
-	const ulint	page_no
-		= (ulint) (source_offset / univ_page_size.physical());
-	byte*		buf = log_sys->buf;
+	const ulint	page_no = ulint(source_offset >> srv_page_size_shift);
+	byte*		buf = log_sys.buf;
 
 	static const char* NO_UPGRADE_RECOVERY_MSG =
 		"Upgrade after a crash is not supported."
 		" This redo log was created before MariaDB 10.2.2";
-	static const char* NO_UPGRADE_RTFM_MSG =
-		". Please follow the instructions at "
-		"https://mariadb.com/kb/en/library/upgrading/";
 
 	fil_io(IORequestLogRead, true,
 	       page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
 	       univ_page_size,
-	       (ulint) ((source_offset & ~(OS_FILE_LOG_BLOCK_SIZE - 1))
-			% univ_page_size.physical()),
-		OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
+	       ulint((source_offset & ~(OS_FILE_LOG_BLOCK_SIZE - 1))
+		     & (srv_page_size - 1)),
+	       OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
 
 	if (log_block_calc_checksum_format_0(buf)
 	    != log_block_get_checksum(buf)
 	    && !log_crypt_101_read_block(buf)) {
 		ib::error() << NO_UPGRADE_RECOVERY_MSG
-			<< ", and it appears corrupted"
-			<< NO_UPGRADE_RTFM_MSG;
+			<< ", and it appears corrupted.";
 		return(DB_CORRUPTION);
 	}
 
@@ -1029,12 +1007,11 @@ static dberr_t recv_log_format_0_recover(lsn_t lsn, bool crypt)
 	    == (source_offset & (OS_FILE_LOG_BLOCK_SIZE - 1))) {
 	} else if (crypt) {
 		ib::error() << "Cannot decrypt log for upgrading."
-			" The encrypted log was created before MariaDB 10.2.2"
-			    << NO_UPGRADE_RTFM_MSG;
+			" The encrypted log was created"
+			" before MariaDB 10.2.2.";
 		return DB_ERROR;
 	} else {
-		ib::error() << NO_UPGRADE_RECOVERY_MSG
-			<< NO_UPGRADE_RTFM_MSG;
+		ib::error() << NO_UPGRADE_RECOVERY_MSG << ".";
 		return(DB_ERROR);
 	}
 
@@ -1043,29 +1020,29 @@ static dberr_t recv_log_format_0_recover(lsn_t lsn, bool crypt)
 	recv_sys->parse_start_lsn = recv_sys->recovered_lsn
 		= recv_sys->scanned_lsn
 		= recv_sys->mlog_checkpoint_lsn = lsn;
-	log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn
-		= log_sys->lsn = log_sys->write_lsn
-		= log_sys->current_flush_lsn = log_sys->flushed_to_disk_lsn
+	log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn
+		= log_sys.lsn = log_sys.write_lsn
+		= log_sys.current_flush_lsn = log_sys.flushed_to_disk_lsn
 		= lsn;
-	log_sys->next_checkpoint_no = 0;
+	log_sys.next_checkpoint_no = 0;
 	return(DB_SUCCESS);
 }
 
-/** Determine if a redo log from MariaDB 10.3 is clean.
+/** Determine if a redo log from MariaDB 10.4 is clean.
 @return	error code
 @retval	DB_SUCCESS	if the redo log is clean
 @retval	DB_CORRUPTION	if the redo log is corrupted
 @retval	DB_ERROR	if the redo log is not empty */
-static
-dberr_t
-recv_log_recover_10_3()
+static dberr_t recv_log_recover_10_4()
 {
-	log_group_t*	group = &log_sys->log;
-	const lsn_t	lsn = group->lsn;
-	const lsn_t	source_offset = log_group_calc_lsn_offset(lsn, group);
+	ut_ad(!log_sys.is_encrypted());
+	const lsn_t	lsn = log_sys.log.lsn;
+	log_mutex_enter();
+	const lsn_t	source_offset = log_sys.log.calc_lsn_offset(lsn);
+	log_mutex_exit();
 	const ulint	page_no
 		= (ulint) (source_offset / univ_page_size.physical());
-	byte*		buf = log_sys->buf;
+	byte*		buf = log_sys.buf;
 
 	fil_io(IORequestLogRead, true,
 	       page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
@@ -1075,11 +1052,7 @@ recv_log_recover_10_3()
 	       OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
 
 	if (log_block_calc_checksum(buf) != log_block_get_checksum(buf)) {
-		return(DB_CORRUPTION);
-	}
-
-	if (group->is_encrypted()) {
-		log_crypt(buf, lsn, OS_FILE_LOG_BLOCK_SIZE, true);
+		return DB_CORRUPTION;
 	}
 
 	/* On a clean shutdown, the redo log will be logically empty
@@ -1087,7 +1060,7 @@ recv_log_recover_10_3()
 
 	if (log_block_get_data_len(buf)
 	    != (source_offset & (OS_FILE_LOG_BLOCK_SIZE - 1))) {
-		return(DB_ERROR);
+		return DB_ERROR;
 	}
 
 	/* Mark the redo log for downgrading. */
@@ -1095,12 +1068,12 @@ recv_log_recover_10_3()
 	recv_sys->parse_start_lsn = recv_sys->recovered_lsn
 		= recv_sys->scanned_lsn
 		= recv_sys->mlog_checkpoint_lsn = lsn;
-	log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn
-		= log_sys->lsn = log_sys->write_lsn
-		= log_sys->current_flush_lsn = log_sys->flushed_to_disk_lsn
+	log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn
+		= log_sys.lsn = log_sys.write_lsn
+		= log_sys.current_flush_lsn = log_sys.flushed_to_disk_lsn
 		= lsn;
-	log_sys->next_checkpoint_no = 0;
-	return(DB_SUCCESS);
+	log_sys.next_checkpoint_no = 0;
+	return DB_SUCCESS;
 }
 
 /** Find the latest checkpoint in the log header.
@@ -1109,29 +1082,24 @@ recv_log_recover_10_3()
 dberr_t
 recv_find_max_checkpoint(ulint* max_field)
 {
-	log_group_t*	group;
 	ib_uint64_t	max_no;
 	ib_uint64_t	checkpoint_no;
 	ulint		field;
 	byte*		buf;
 
-	group = &log_sys->log;
-
 	max_no = 0;
 	*max_field = 0;
 
-	buf = log_sys->checkpoint_buf;
-
-	group->state = LOG_GROUP_CORRUPTED;
+	buf = log_sys.checkpoint_buf;
 
-	log_group_header_read(group, 0);
+	log_header_read(0);
 	/* Check the header page checksum. There was no
 	checksum in the first redo log format (version 0). */
-	group->format = mach_read_from_4(buf + LOG_HEADER_FORMAT);
-	group->subformat = group->format
+	log_sys.log.format = mach_read_from_4(buf + LOG_HEADER_FORMAT);
+	log_sys.log.subformat = log_sys.log.format != LOG_HEADER_FORMAT_3_23
 		? mach_read_from_4(buf + LOG_HEADER_SUBFORMAT)
 		: 0;
-	if (group->format != 0
+	if (log_sys.log.format != LOG_HEADER_FORMAT_3_23
 	    && !recv_check_log_header_checksum(buf)) {
 		ib::error() << "Invalid redo log header checksum.";
 		return(DB_CORRUPTION);
@@ -1143,35 +1111,27 @@ recv_find_max_checkpoint(ulint* max_field)
 	/* Ensure that the string is NUL-terminated. */
 	creator[LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR] = 0;
 
-	switch (group->format) {
-	case 0:
-		return(recv_find_max_checkpoint_0(&group, max_field));
+	switch (log_sys.log.format) {
+	case LOG_HEADER_FORMAT_3_23:
+		return(recv_find_max_checkpoint_0(max_field));
 	case LOG_HEADER_FORMAT_10_2:
 	case LOG_HEADER_FORMAT_10_2 | LOG_HEADER_FORMAT_ENCRYPTED:
-	case LOG_HEADER_FORMAT_10_3:
-	case LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED:
+	case LOG_HEADER_FORMAT_CURRENT:
+	case LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED:
 	case LOG_HEADER_FORMAT_10_4:
 		/* We can only parse the unencrypted LOG_HEADER_FORMAT_10_4.
 		The encrypted format uses a larger redo log block trailer. */
 		break;
 	default:
 		ib::error() << "Unsupported redo log format."
-			" The redo log was created"
-			" with " << creator <<
-			". Please follow the instructions at "
-			REFMAN "upgrading-downgrading.html";
-		/* Do not issue a message about a possibility
-		to cleanly shut down the newer server version
-		and to remove the redo logs, because the
-		format of the system data structures may
-		radically change after MySQL 5.7. */
+			" The redo log was created with " << creator << ".";
 		return(DB_ERROR);
 	}
 
 	for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
 	     field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
 
-		log_group_header_read(group, field);
+		log_header_read(field);
 
 		const ulint crc32 = log_block_calc_checksum_crc32(buf);
 		const ulint cksum = log_block_get_checksum(buf);
@@ -1186,7 +1146,7 @@ recv_find_max_checkpoint(ulint* max_field)
 			continue;
 		}
 
-		if (group->is_encrypted()
+		if (log_sys.is_encrypted()
 		    && !log_crypt_read_checkpoint_buf(buf)) {
 			ib::error() << "Reading checkpoint"
 				" encryption info failed.";
@@ -1204,12 +1164,11 @@ recv_find_max_checkpoint(ulint* max_field)
 		if (checkpoint_no >= max_no) {
 			*max_field = field;
 			max_no = checkpoint_no;
-			group->state = LOG_GROUP_OK;
-			group->lsn = mach_read_from_8(
+			log_sys.log.lsn = mach_read_from_8(
 				buf + LOG_CHECKPOINT_LSN);
-			group->lsn_offset = mach_read_from_8(
+			log_sys.log.lsn_offset = mach_read_from_8(
 				buf + LOG_CHECKPOINT_OFFSET);
-			log_sys->next_checkpoint_no = checkpoint_no;
+			log_sys.next_checkpoint_no = checkpoint_no;
 		}
 	}
 
@@ -1226,22 +1185,8 @@ recv_find_max_checkpoint(ulint* max_field)
 		return(DB_ERROR);
 	}
 
-	switch (group->format) {
-	case LOG_HEADER_FORMAT_10_3:
-	case LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED:
-		if (group->subformat == 1) {
-			/* 10.2 with new crash-safe TRUNCATE */
-			break;
-		}
-		/* fall through */
-	case LOG_HEADER_FORMAT_10_4:
-		if (srv_operation == SRV_OPERATION_BACKUP) {
-			ib::error()
-				<< "Incompatible redo log format."
-				" The redo log was created with " << creator;
-			return DB_ERROR;
-		}
-		dberr_t err = recv_log_recover_10_3();
+	if (log_sys.log.format == LOG_HEADER_FORMAT_10_4) {
+		dberr_t err = recv_log_recover_10_4();
 		if (err != DB_SUCCESS) {
 			ib::error()
 				<< "Downgrade after a crash is not supported."
@@ -1249,10 +1194,10 @@ recv_find_max_checkpoint(ulint* max_field)
 				<< (err == DB_ERROR
 				    ? "." : ", and it appears corrupted.");
 		}
-		return(err);
+		return err;
 	}
 
-	return(DB_SUCCESS);
+	return DB_SUCCESS;
 }
 
 /** Try to parse a single log record body and also applies it if
@@ -1584,18 +1529,22 @@ parse_log:
 		ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
 		break;
 	case MLOG_UNDO_ERASE_END:
-		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
-		ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
+		if (page) {
+			ut_ad(page_type == FIL_PAGE_UNDO_LOG);
+			trx_undo_erase_page_end(page);
+		}
 		break;
 	case MLOG_UNDO_INIT:
 		/* Allow anything in page_type when creating a page. */
-		ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
+		ptr = trx_undo_parse_page_init(ptr, end_ptr, page);
 		break;
-	case MLOG_UNDO_HDR_CREATE:
 	case MLOG_UNDO_HDR_REUSE:
 		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
-		ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
-						 page, mtr);
+		ptr = trx_undo_parse_page_header_reuse(ptr, end_ptr, page);
+		break;
+	case MLOG_UNDO_HDR_CREATE:
+		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
+		ptr = trx_undo_parse_page_header(ptr, end_ptr, page, mtr);
 		break;
 	case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
 		ut_ad(!page || fil_page_type_is_index(page_type));
@@ -1663,9 +1612,15 @@ parse_log:
 				ptr, end_ptr, page, page_zip, index);
 		}
 		break;
+	case MLOG_ZIP_WRITE_TRX_ID:
+		/* This must be a clustered index leaf page. */
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ptr = page_zip_parse_write_trx_id(ptr, end_ptr,
+						  page, page_zip);
+		break;
 	case MLOG_FILE_WRITE_CRYPT_DATA:
 		dberr_t err;
-		ptr = const_cast<byte*>(fil_parse_write_crypt_data(ptr, end_ptr, block, &err));
+		ptr = const_cast<byte*>(fil_parse_write_crypt_data(ptr, end_ptr, &err));
 
 		if (err != DB_SUCCESS) {
 			recv_sys->found_corrupt_log = TRUE;
@@ -1775,13 +1730,13 @@ recv_add_to_hash_table(
 	ut_ad(type != MLOG_INDEX_LOAD);
 	ut_ad(type != MLOG_TRUNCATE);
 
-	len = rec_end - body;
+	len = ulint(rec_end - body);
 
 	recv = static_cast<recv_t*>(
 		mem_heap_alloc(recv_sys->heap, sizeof(recv_t)));
 
 	recv->type = type;
-	recv->len = rec_end - body;
+	recv->len = ulint(rec_end - body);
 	recv->start_lsn = start_lsn;
 	recv->end_lsn = end_lsn;
 
@@ -1810,13 +1765,13 @@ recv_add_to_hash_table(
 
 	prev_field = &(recv->data);
 
-	/* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
+	/* Store the log record body in chunks of less than srv_page_size:
 	recv_sys->heap grows into the buffer pool, and bigger chunks could not
 	be allocated */
 
 	while (rec_end > body) {
 
-		len = rec_end - body;
+		len = ulint(rec_end - body);
 
 		if (len > RECV_DATA_BLOCK_SIZE) {
 			len = RECV_DATA_BLOCK_SIZE;
@@ -1920,9 +1875,7 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
 			recv_addr->space, recv_addr->page_no);
 	}
 
-	DBUG_PRINT("ib_log",
-		   ("Applying log to page %u:%u",
-		    recv_addr->space, recv_addr->page_no));
+	DBUG_LOG("ib_log", "Applying log to page " << block->page.id);
 
 	recv_addr->state = RECV_BEING_PROCESSED;
 
@@ -1967,11 +1920,12 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
 	start_lsn = end_lsn = 0;
 
 	recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
+	fil_space_t* space = fil_space_acquire(block->page.id.space());
 
 	while (recv) {
 		end_lsn = recv->end_lsn;
 
-		ut_ad(end_lsn <= log_sys->log.scanned_lsn);
+		ut_ad(end_lsn <= log_sys.log.scanned_lsn);
 
 		if (recv->len > RECV_DATA_BLOCK_SIZE) {
 			/* We have to copy the record body to a separate
@@ -1993,13 +1947,6 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
 		with LSN less than recorded LSN is skipped.
 		Note: We can't skip complete recv_addr as same page may have
 		valid REDO records post truncate those needs to be applied. */
-		bool	skip_recv = false;
-		if (srv_was_tablespace_truncated(fil_space_get(recv_addr->space))) {
-			lsn_t	init_lsn =
-				truncate_t::get_truncated_tablespace_init_lsn(
-				recv_addr->space);
-			skip_recv = (recv->start_lsn < init_lsn);
-		}
 
 		/* Ignore applying the redo logs for tablespace that is
 		truncated. Post recovery there is fixup action that will
@@ -2009,8 +1956,11 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
 		was re-inited and that would lead to an error while applying
 		such action. */
 		if (recv->start_lsn >= page_lsn
-		    && !srv_is_tablespace_truncated(recv_addr->space)
-		    && !skip_recv) {
+		    && !srv_is_tablespace_truncated(space->id)
+		    && !(srv_was_tablespace_truncated(space)
+			 && recv->start_lsn
+			 < truncate_t::get_truncated_tablespace_init_lsn(
+				 space->id))) {
 
 			lsn_t	end_lsn;
 
@@ -2027,22 +1977,20 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
 					recv_addr->space, recv_addr->page_no);
 			}
 
-			DBUG_PRINT("ib_log",
-				   ("apply " LSN_PF ":"
-				    " %s len " ULINTPF " page %u:%u",
-				    recv->start_lsn,
-				    get_mlog_string(recv->type), recv->len,
-				    recv_addr->space,
-				    recv_addr->page_no));
+			DBUG_LOG("ib_log", "apply " << recv->start_lsn << ": "
+				 << get_mlog_string(recv->type)
+				 << " len " << recv->len
+				 << " page " << block->page.id);
 
 			recv_parse_or_apply_log_rec_body(
 				recv->type, buf, buf + recv->len,
-				recv_addr->space, recv_addr->page_no,
+				block->page.id.space(),
+				block->page.id.page_no(),
 				true, block, &mtr);
 
 			end_lsn = recv->start_lsn + recv->len;
 			mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
-			mach_write_to_8(UNIV_PAGE_SIZE
+			mach_write_to_8(srv_page_size
 					- FIL_PAGE_END_LSN_OLD_CHKSUM
 					+ page, end_lsn);
 
@@ -2059,6 +2007,8 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
 		recv = UT_LIST_GET_NEXT(rec_list, recv);
 	}
 
+	space->release();
+
 #ifdef UNIV_ZIP_DEBUG
 	if (fil_page_index_page_check(page)) {
 		page_zip_des_t*	page_zip = buf_block_get_page_zip(block);
@@ -2401,7 +2351,7 @@ recv_parse_log_rec(
 							      end_ptr));
 	}
 
-	return(new_ptr - ptr);
+	return ulint(new_ptr - ptr);
 }
 
 /*******************************************************//**
@@ -2576,9 +2526,7 @@ loop:
 			/* Do nothing */
 			break;
 		case MLOG_CHECKPOINT:
-#if SIZE_OF_MLOG_CHECKPOINT != 1 + 8
-# error SIZE_OF_MLOG_CHECKPOINT != 1 + 8
-#endif
+			compile_time_assert(SIZE_OF_MLOG_CHECKPOINT == 1 + 8);
 			lsn = mach_read_from_8(ptr + 1);
 
 			if (UNIV_UNLIKELY(srv_print_verbose_log == 2)) {
@@ -3110,7 +3058,6 @@ recv_scan_log_recs(
 
 /** Scans log from a buffer and stores new log data to the parsing buffer.
 Parses and hashes the log records if new data found.
-@param[in,out]	group			log group
 @param[in]	checkpoint_lsn		latest checkpoint log sequence number
 @param[in,out]	contiguous_lsn		log sequence number
 until which all redo log has been scanned
@@ -3120,7 +3067,6 @@ can be applied to the tablespaces
 static
 bool
 recv_group_scan_log_recs(
-	log_group_t*	group,
 	lsn_t		checkpoint_lsn,
 	lsn_t*		contiguous_lsn,
 	bool		last_phase)
@@ -3149,12 +3095,12 @@ recv_group_scan_log_recs(
 	lsn_t	end_lsn;
 	store_t	store_to_hash	= recv_sys->mlog_checkpoint_lsn == 0
 		? STORE_NO : (last_phase ? STORE_IF_EXISTS : STORE_YES);
-	ulint	available_mem	= UNIV_PAGE_SIZE
+	ulint	available_mem	= srv_page_size
 		* (buf_pool_get_n_pages()
 		   - (recv_n_pool_free_frames * srv_buf_pool_instances));
 
-	group->scanned_lsn = end_lsn = *contiguous_lsn = ut_uint64_align_down(
-		*contiguous_lsn, OS_FILE_LOG_BLOCK_SIZE);
+	log_sys.log.scanned_lsn = end_lsn = *contiguous_lsn =
+		ut_uint64_align_down(*contiguous_lsn, OS_FILE_LOG_BLOCK_SIZE);
 
 	do {
 		if (last_phase && store_to_hash == STORE_NO) {
@@ -3169,15 +3115,13 @@ recv_group_scan_log_recs(
 		start_lsn = ut_uint64_align_down(end_lsn,
 						 OS_FILE_LOG_BLOCK_SIZE);
 		end_lsn = start_lsn;
-		log_group_read_log_seg(
-			log_sys->buf, group, &end_lsn,
-			start_lsn + RECV_SCAN_SIZE);
+		log_sys.log.read_log_seg(&end_lsn, start_lsn + RECV_SCAN_SIZE);
 	} while (end_lsn != start_lsn
 		 && !recv_scan_log_recs(
-			 available_mem, &store_to_hash, log_sys->buf,
+			 available_mem, &store_to_hash, log_sys.buf,
 			 checkpoint_lsn,
 			 start_lsn, end_lsn,
-			 contiguous_lsn, &group->scanned_lsn));
+			 contiguous_lsn, &log_sys.log.scanned_lsn));
 
 	if (recv_sys->found_corrupt_log || recv_sys->found_corrupt_fs) {
 		DBUG_RETURN(false);
@@ -3185,7 +3129,7 @@ recv_group_scan_log_recs(
 
 	DBUG_PRINT("ib_log", ("%s " LSN_PF " completed",
 			      last_phase ? "rescan" : "scan",
-			      group->scanned_lsn));
+			      log_sys.log.scanned_lsn));
 
 	DBUG_RETURN(store_to_hash == STORE_NO);
 }
@@ -3393,55 +3337,35 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
 
 	log_mutex_enter();
 
-	/* Look for the latest checkpoint from any of the log groups */
-
 	err = recv_find_max_checkpoint(&max_cp_field);
 
 	if (err != DB_SUCCESS) {
-skip_apply:
+
+		srv_start_lsn = recv_sys->recovered_lsn = log_sys.lsn;
 		log_mutex_exit();
 		return(err);
 	}
 
-	switch (log_sys->log.format) {
-	case 0:
-		break;
-	case LOG_HEADER_FORMAT_10_2:
-	case LOG_HEADER_FORMAT_10_2 | LOG_HEADER_FORMAT_ENCRYPTED:
-		break;
-	case LOG_HEADER_FORMAT_10_3:
-	case LOG_HEADER_FORMAT_10_3 | LOG_HEADER_FORMAT_ENCRYPTED:
-		if (log_sys->log.subformat == 1) {
-			/* 10.2 with new crash-safe TRUNCATE */
-			break;
-		}
-		/* fall through */
-	default:
-		/* This must be a clean log from a newer version. */
-		goto skip_apply;
-	}
-
-	log_group_header_read(&log_sys->log, max_cp_field);
+	log_header_read(max_cp_field);
 
-	buf = log_sys->checkpoint_buf;
+	buf = log_sys.checkpoint_buf;
 
 	checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
 	checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
 
-	/* Start reading the log groups from the checkpoint lsn up. The
-	variable contiguous_lsn contains an lsn up to which the log is
-	known to be contiguously written to all log groups. */
-
+	/* Start reading the log from the checkpoint lsn. The variable
+	contiguous_lsn contains an lsn up to which the log is known to
+	be contiguously written. */
 	recv_sys->mlog_checkpoint_lsn = 0;
 
-	ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
+	ut_ad(RECV_SCAN_SIZE <= srv_log_buffer_size);
 
 	const lsn_t	end_lsn = mach_read_from_8(
 		buf + LOG_CHECKPOINT_END_LSN);
 
 	ut_ad(recv_sys->n_addrs == 0);
 	contiguous_lsn = checkpoint_lsn;
-	switch (log_sys->log.format) {
+	switch (log_sys.log.format) {
 	case 0:
 		log_mutex_exit();
 		return recv_log_format_0_recover(checkpoint_lsn,
@@ -3460,9 +3384,7 @@ skip_apply:
 	}
 
 	/* Look for MLOG_CHECKPOINT. */
-	log_group_t* group = &log_sys->log;
-	recv_group_scan_log_recs(group, checkpoint_lsn, &contiguous_lsn,
-				 false);
+	recv_group_scan_log_recs(checkpoint_lsn, &contiguous_lsn, false);
 	/* The first scan should not have stored or applied any records. */
 	ut_ad(recv_sys->n_addrs == 0);
 	ut_ad(!recv_sys->found_corrupt_fs);
@@ -3479,7 +3401,7 @@ skip_apply:
 	}
 
 	if (recv_sys->mlog_checkpoint_lsn == 0) {
-		lsn_t scan_lsn = group->scanned_lsn;
+		lsn_t scan_lsn = log_sys.log.scanned_lsn;
 		if (!srv_read_only_mode && scan_lsn != checkpoint_lsn) {
 			log_mutex_exit();
 			ib::error err;
@@ -3492,12 +3414,12 @@ skip_apply:
 			return(DB_ERROR);
 		}
 
-		group->scanned_lsn = checkpoint_lsn;
+		log_sys.log.scanned_lsn = checkpoint_lsn;
 		rescan = false;
 	} else {
 		contiguous_lsn = checkpoint_lsn;
 		rescan = recv_group_scan_log_recs(
-			group, checkpoint_lsn, &contiguous_lsn, false);
+			checkpoint_lsn, &contiguous_lsn, false);
 
 		if ((recv_sys->found_corrupt_log && !srv_force_recovery)
 		    || recv_sys->found_corrupt_fs) {
@@ -3543,7 +3465,7 @@ skip_apply:
 		}
 	}
 
-	log_sys->lsn = recv_sys->recovered_lsn;
+	log_sys.lsn = recv_sys->recovered_lsn;
 
 	if (recv_needed_recovery) {
 		bool missing_tablespace = false;
@@ -3570,8 +3492,7 @@ skip_apply:
 
 			lsn_t recent_stored_lsn = recv_sys->last_stored_lsn;
 			rescan = recv_group_scan_log_recs(
-				group, checkpoint_lsn,
-				&recent_stored_lsn, false);
+				checkpoint_lsn, &recent_stored_lsn, false);
 
 			ut_ad(!recv_sys->found_corrupt_fs);
 
@@ -3604,8 +3525,8 @@ skip_apply:
 		if (rescan) {
 			contiguous_lsn = checkpoint_lsn;
 
-			recv_group_scan_log_recs(group, checkpoint_lsn,
-						 &contiguous_lsn, true);
+			recv_group_scan_log_recs(
+				checkpoint_lsn, &contiguous_lsn, true);
 
 			if ((recv_sys->found_corrupt_log
 			     && !srv_force_recovery)
@@ -3618,12 +3539,11 @@ skip_apply:
 		ut_ad(!rescan || recv_sys->n_addrs == 0);
 	}
 
-	/* We currently have only one log group */
-
-	if (group->scanned_lsn < checkpoint_lsn
-	    || group->scanned_lsn < recv_max_page_lsn) {
+	if (log_sys.log.scanned_lsn < checkpoint_lsn
+	    || log_sys.log.scanned_lsn < recv_max_page_lsn) {
 
-		ib::error() << "We scanned the log up to " << group->scanned_lsn
+		ib::error() << "We scanned the log up to "
+			<< log_sys.log.scanned_lsn
 			<< ". A checkpoint was at " << checkpoint_lsn << " and"
 			" the maximum LSN on a database page was "
 			<< recv_max_page_lsn << ". It is possible that the"
@@ -3639,11 +3559,8 @@ skip_apply:
 		return(DB_ERROR);
 	}
 
-	/* Synchronize the uncorrupted log groups to the most up-to-date log
-	group; we also copy checkpoint info to groups */
-
-	log_sys->next_checkpoint_lsn = checkpoint_lsn;
-	log_sys->next_checkpoint_no = checkpoint_no + 1;
+	log_sys.next_checkpoint_lsn = checkpoint_lsn;
+	log_sys.next_checkpoint_no = checkpoint_no + 1;
 
 	recv_synchronize_groups();
 
@@ -3653,24 +3570,24 @@ skip_apply:
 		srv_start_lsn = recv_sys->recovered_lsn;
 	}
 
-	log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
-	log_sys->buf_next_to_write = log_sys->buf_free;
-	log_sys->write_lsn = log_sys->lsn;
+	log_sys.buf_free = ulong(log_sys.lsn % OS_FILE_LOG_BLOCK_SIZE);
+	log_sys.buf_next_to_write = log_sys.buf_free;
+	log_sys.write_lsn = log_sys.lsn;
 
-	log_sys->last_checkpoint_lsn = checkpoint_lsn;
+	log_sys.last_checkpoint_lsn = checkpoint_lsn;
 
 	if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL) {
 		/* Write a MLOG_CHECKPOINT marker as the first thing,
 		before generating any other redo log. This ensures
 		that subsequent crash recovery will be possible even
 		if the server were killed soon after this. */
-		fil_names_clear(log_sys->last_checkpoint_lsn, true);
+		fil_names_clear(log_sys.last_checkpoint_lsn, true);
 	}
 
 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
-		    log_sys->lsn - log_sys->last_checkpoint_lsn);
+		    log_sys.lsn - log_sys.last_checkpoint_lsn);
 
-	log_sys->next_checkpoint_no = ++checkpoint_no;
+	log_sys.next_checkpoint_no = ++checkpoint_no;
 
 	mutex_enter(&recv_sys->mutex);
 
@@ -3747,7 +3664,6 @@ recv_recovery_rollback_active(void)
 		/* Drop partially created indexes. */
 		row_merge_drop_temp_indexes();
 		/* Drop garbage tables. */
-		if (srv_safe_truncate)
 		row_mysql_drop_garbage_tables();
 
 		/* Drop any auxiliary tables that were not dropped when the
@@ -3759,8 +3675,8 @@ recv_recovery_rollback_active(void)
 		/* Rollback the uncommitted transactions which have no user
 		session */
 
-		trx_rollback_or_clean_is_active = true;
-		os_thread_create(trx_rollback_or_clean_all_recovered, 0, 0);
+		trx_rollback_is_active = true;
+		os_thread_create(trx_rollback_all_recovered, 0, 0);
 	}
 }
 
@@ -3777,26 +3693,26 @@ recv_reset_logs(
 {
 	ut_ad(log_mutex_own());
 
-	log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
+	log_sys.lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
 
-	log_sys->log.lsn = log_sys->lsn;
-	log_sys->log.lsn_offset = LOG_FILE_HDR_SIZE;
+	log_sys.log.lsn = log_sys.lsn;
+	log_sys.log.lsn_offset = LOG_FILE_HDR_SIZE;
 
-	log_sys->buf_next_to_write = 0;
-	log_sys->write_lsn = log_sys->lsn;
+	log_sys.buf_next_to_write = 0;
+	log_sys.write_lsn = log_sys.lsn;
 
-	log_sys->next_checkpoint_no = 0;
-	log_sys->last_checkpoint_lsn = 0;
+	log_sys.next_checkpoint_no = 0;
+	log_sys.last_checkpoint_lsn = 0;
 
-	memset(log_sys->buf, 0, log_sys->buf_size);
-	log_block_init(log_sys->buf, log_sys->lsn);
-	log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
+	memset(log_sys.buf, 0, srv_log_buffer_size);
+	log_block_init(log_sys.buf, log_sys.lsn);
+	log_block_set_first_rec_group(log_sys.buf, LOG_BLOCK_HDR_SIZE);
 
-	log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
-	log_sys->lsn += LOG_BLOCK_HDR_SIZE;
+	log_sys.buf_free = LOG_BLOCK_HDR_SIZE;
+	log_sys.lsn += LOG_BLOCK_HDR_SIZE;
 
 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
-		    (log_sys->lsn - log_sys->last_checkpoint_lsn));
+		    (log_sys.lsn - log_sys.last_checkpoint_lsn));
 
 	log_mutex_exit();
 
@@ -3993,6 +3909,9 @@ static const char* get_mlog_string(mlog_id_t type)
 	case MLOG_ZIP_PAGE_REORGANIZE:
 		return("MLOG_ZIP_PAGE_REORGANIZE");
 
+	case MLOG_ZIP_WRITE_TRX_ID:
+		return("MLOG_ZIP_WRITE_TRX_ID");
+
 	case MLOG_FILE_RENAME2:
 		return("MLOG_FILE_RENAME2");
 
diff --git a/storage/innobase/mem/mem0mem.cc b/storage/innobase/mem/mem0mem.cc
index b4f1dd0602f..1d3519501da 100644
--- a/storage/innobase/mem/mem0mem.cc
+++ b/storage/innobase/mem/mem0mem.cc
@@ -29,31 +29,6 @@ Created 6/9/1994 Heikki Tuuri
 #include "srv0srv.h"
 #include <stdarg.h>
 
-/** Duplicates a NUL-terminated string, allocated from a memory heap.
-@param[in]	heap,	memory heap where string is allocated
-@param[in]	str)	string to be copied
-@return own: a copy of the string */
-char*
-mem_heap_strdup(
-	mem_heap_t*	heap,
-	const char*	str)
-{
-	return(static_cast<char*>(mem_heap_dup(heap, str, strlen(str) + 1)));
-}
-
-/**********************************************************************//**
-Duplicate a block of data, allocated from a memory heap.
-@return own: a copy of the data */
-void*
-mem_heap_dup(
-/*=========*/
-	mem_heap_t*	heap,	/*!< in: memory heap where copy is allocated */
-	const void*	data,	/*!< in: data to be copied */
-	ulint		len)	/*!< in: length of data, in bytes */
-{
-	return(memcpy(mem_heap_alloc(heap, len), data, len));
-}
-
 /**********************************************************************//**
 Concatenate two strings and return the result, using a memory heap.
 @return own: the result */
@@ -149,7 +124,7 @@ mem_heap_printf_low(
 
 				val = va_arg(ap, unsigned long);
 
-				plen = sprintf(tmp, "%lu", val);
+				plen = size_t(sprintf(tmp, "%lu", val));
 				len += plen;
 
 				if (buf) {
@@ -242,7 +217,7 @@ mem_heap_validate(
 			break;
 		case MEM_HEAP_BUFFER:
 		case MEM_HEAP_BUFFER | MEM_HEAP_BTR_SEARCH:
-			ut_ad(block->len <= UNIV_PAGE_SIZE);
+			ut_ad(block->len <= srv_page_size);
 			break;
 		default:
 			ut_error;
@@ -287,13 +262,13 @@ mem_heap_create_block_func(
 	/* In dynamic allocation, calculate the size: block header + data. */
 	len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
 
-	if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
+	if (type == MEM_HEAP_DYNAMIC || len < srv_page_size / 2) {
 
 		ut_ad(type == MEM_HEAP_DYNAMIC || n <= MEM_MAX_ALLOC_IN_BUF);
 
 		block = static_cast<mem_block_t*>(ut_malloc_nokey(len));
 	} else {
-		len = UNIV_PAGE_SIZE;
+		len = srv_page_size;
 
 		if ((type & MEM_HEAP_BTR_SEARCH) && heap) {
 			/* We cannot allocate the block from the
@@ -435,7 +410,7 @@ mem_heap_block_free(
 	len = block->len;
 	block->magic_n = MEM_FREED_BLOCK_MAGIC_N;
 
-	if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
+	if (type == MEM_HEAP_DYNAMIC || len < srv_page_size / 2) {
 		ut_ad(!buf_block);
 		ut_free(block);
 	} else {
diff --git a/storage/innobase/mtr/mtr0log.cc b/storage/innobase/mtr/mtr0log.cc
index 9cc9b77d6a8..6baf1f06bf9 100644
--- a/storage/innobase/mtr/mtr0log.cc
+++ b/storage/innobase/mtr/mtr0log.cc
@@ -148,7 +148,7 @@ mlog_parse_nbytes(
 	offset = mach_read_from_2(ptr);
 	ptr += 2;
 
-	if (offset >= UNIV_PAGE_SIZE) {
+	if (offset >= srv_page_size) {
 		recv_sys->found_corrupt_log = TRUE;
 
 		return(NULL);
@@ -316,7 +316,7 @@ mlog_write_string(
 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
 {
 	ut_ad(ptr && mtr);
-	ut_a(len < UNIV_PAGE_SIZE);
+	ut_a(len < srv_page_size);
 
 	memcpy(ptr, str, len);
 
@@ -336,7 +336,7 @@ mlog_log_string(
 	byte*	log_ptr;
 
 	ut_ad(ptr && mtr);
-	ut_ad(len <= UNIV_PAGE_SIZE);
+	ut_ad(len <= srv_page_size);
 
 	log_ptr = mlog_open(mtr, 30);
 
@@ -387,7 +387,7 @@ mlog_parse_string(
 	len = mach_read_from_2(ptr);
 	ptr += 2;
 
-	if (offset >= UNIV_PAGE_SIZE || len + offset > UNIV_PAGE_SIZE) {
+	if (offset >= srv_page_size || len + offset > srv_page_size) {
 		recv_sys->found_corrupt_log = TRUE;
 
 		return(NULL);
@@ -430,23 +430,30 @@ mlog_open_and_write_index(
 
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
 
+	mtr->set_modified();
+	switch (mtr->get_log_mode()) {
+	case MTR_LOG_NONE:
+	case MTR_LOG_NO_REDO:
+		return NULL;
+	case MTR_LOG_SHORT_INSERTS:
+		ut_ad(0);
+		/* fall through */
+	case MTR_LOG_ALL:
+		break;
+	}
+
 	if (!page_rec_is_comp(rec)) {
-		log_start = log_ptr = mlog_open(mtr, 11 + size);
-		if (!log_ptr) {
-			return(NULL); /* logging is disabled */
-		}
+		log_start = log_ptr = mtr->get_log()->open(11 + size);
 		log_ptr = mlog_write_initial_log_record_fast(rec, type,
 							     log_ptr, mtr);
 		log_end = log_ptr + 11 + size;
 	} else {
 		ulint	i;
+		bool	is_instant = index->is_instant();
 		ulint	n	= dict_index_get_n_fields(index);
-		ulint	total	= 11 + size + (n + 2) * 2;
-		ulint	alloc	= total;
-
-		if (alloc > mtr_buf_t::MAX_DATA_SIZE) {
-			alloc = mtr_buf_t::MAX_DATA_SIZE;
-		}
+		ulint	total	= 11 + (is_instant ? 2 : 0) + size + (n + 2) * 2;
+		ulint	alloc	= std::min(total,
+					   ulint(mtr_buf_t::MAX_DATA_SIZE));
 
 		const bool is_leaf = page_is_leaf(page_align(rec));
 
@@ -456,30 +463,30 @@ mlog_open_and_write_index(
 			n = DICT_INDEX_SPATIAL_NODEPTR_SIZE;
 		}
 
-		log_start = log_ptr = mlog_open(mtr, alloc);
-
-		if (!log_ptr) {
-			return(NULL); /* logging is disabled */
-		}
-
+		log_start = log_ptr = mtr->get_log()->open(alloc);
 		log_end = log_ptr + alloc;
 
 		log_ptr = mlog_write_initial_log_record_fast(
 			rec, type, log_ptr, mtr);
 
-		mach_write_to_2(log_ptr, n);
-		log_ptr += 2;
+		if (is_instant) {
+			// marked as instant index
+			mach_write_to_2(log_ptr, n | 0x8000);
+
+			log_ptr += 2;
 
-		if (is_leaf) {
-			mach_write_to_2(
-				log_ptr, dict_index_get_n_unique_in_tree(index));
+			// record the n_core_fields
+			mach_write_to_2(log_ptr, index->n_core_fields);
 		} else {
-			mach_write_to_2(
-				log_ptr,
-				dict_index_get_n_unique_in_tree_nonleaf(index));
+			mach_write_to_2(log_ptr, n);
 		}
 
 		log_ptr += 2;
+		mach_write_to_2(
+			log_ptr, is_leaf
+			? dict_index_get_n_unique_in_tree(index)
+			: dict_index_get_n_unique_in_tree_nonleaf(index));
+		log_ptr += 2;
 
 		for (i = 0; i < n; i++) {
 			dict_field_t*		field;
@@ -501,19 +508,14 @@ mlog_open_and_write_index(
 			}
 			if (log_ptr + 2 > log_end) {
 				mlog_close(mtr, log_ptr);
-				ut_a(total > (ulint) (log_ptr - log_start));
-				total -= log_ptr - log_start;
-				alloc = total;
-
-				if (alloc > mtr_buf_t::MAX_DATA_SIZE) {
-					alloc = mtr_buf_t::MAX_DATA_SIZE;
-				}
-
-				log_start = log_ptr = mlog_open(mtr, alloc);
-
-				if (!log_ptr) {
-					return(NULL); /* logging is disabled */
-				}
+				ut_a(total > ulint(log_ptr - log_start));
+				total -= ulint(log_ptr - log_start);
+				alloc = std::min(
+					total,
+					ulint(mtr_buf_t::MAX_DATA_SIZE));
+
+				log_start = log_ptr = mtr->get_log()->open(
+					alloc);
 				log_end = log_ptr + alloc;
 			}
 			mach_write_to_2(log_ptr, len);
@@ -544,6 +546,7 @@ mlog_parse_index(
 	ulint		i, n, n_uniq;
 	dict_table_t*	table;
 	dict_index_t*	ind;
+	ulint		n_core_fields = 0;
 
 	ut_ad(comp == FALSE || comp == TRUE);
 
@@ -553,6 +556,23 @@ mlog_parse_index(
 		}
 		n = mach_read_from_2(ptr);
 		ptr += 2;
+		if (n & 0x8000) { /* record after instant ADD COLUMN */
+			n &= 0x7FFF;
+
+			n_core_fields = mach_read_from_2(ptr);
+
+			if (!n_core_fields || n_core_fields > n) {
+				recv_sys->found_corrupt_log = TRUE;
+				return(NULL);
+			}
+
+			ptr += 2;
+
+			if (end_ptr < ptr + 2) {
+				return(NULL);
+			}
+		}
+
 		n_uniq = mach_read_from_2(ptr);
 		ptr += 2;
 		ut_ad(n_uniq <= n);
@@ -562,11 +582,9 @@ mlog_parse_index(
 	} else {
 		n = n_uniq = 1;
 	}
-	table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n, 0,
+	table = dict_mem_table_create("LOG_DUMMY", NULL, n, 0,
 				      comp ? DICT_TF_COMPACT : 0, 0);
-	ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY",
-				    DICT_HDR_SPACE, 0, n);
-	ind->table = table;
+	ind = dict_mem_index_create(table, "LOG_DUMMY", 0, n);
 	ind->n_uniq = (unsigned int) n_uniq;
 	if (n_uniq != n) {
 		ut_a(n_uniq + DATA_ROLL_PTR <= n);
@@ -604,6 +622,22 @@ mlog_parse_index(
 			ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col
 				= &table->cols[n + DATA_ROLL_PTR];
 		}
+
+		ut_ad(table->n_cols == table->n_def);
+
+		if (n_core_fields) {
+			for (i = n_core_fields; i < n; i++) {
+				ind->fields[i].col->def_val.len
+					= UNIV_SQL_NULL;
+			}
+			ind->n_core_fields = n_core_fields;
+			ind->n_core_null_bytes = UT_BITS_IN_BYTES(
+				ind->get_n_nullable(n_core_fields));
+		} else {
+			ind->n_core_null_bytes = UT_BITS_IN_BYTES(
+				unsigned(ind->n_nullable));
+			ind->n_core_fields = ind->n_fields;
+		}
 	}
 	/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
 	ind->cached = TRUE;
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index dafa41e7a9c..92b1aa38a81 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -467,7 +467,7 @@ mtr_write_log(
 	ut_ad(!recv_no_log_write);
 	DBUG_PRINT("ib_log",
 		   (ULINTPF " extra bytes written at " LSN_PF,
-		    len, log_sys->lsn));
+		    len, log_sys.lsn));
 
 	log_reserve_and_open(len);
 	log->for_each_block(write_log);
@@ -495,8 +495,6 @@ void mtr_t::start()
 	m_impl.m_state = MTR_STATE_ACTIVE;
 	ut_d(m_impl.m_user_space_id = TRX_SYS_SPACE);
 	m_impl.m_user_space = NULL;
-	m_impl.m_undo_space = NULL;
-	m_impl.m_sys_space = NULL;
 	m_impl.m_flush_observer = NULL;
 
 	ut_d(m_impl.m_magic_n = MTR_MAGIC_N);
@@ -593,9 +591,7 @@ mtr_t::commit_checkpoint(
 
 	if (write_mlog_checkpoint) {
 		byte*	ptr = m_impl.m_log.push<byte*>(SIZE_OF_MLOG_CHECKPOINT);
-#if SIZE_OF_MLOG_CHECKPOINT != 9
-# error SIZE_OF_MLOG_CHECKPOINT != 9
-#endif
+		compile_time_assert(SIZE_OF_MLOG_CHECKPOINT == 1 + 8);
 		*ptr = MLOG_CHECKPOINT;
 		mach_write_to_8(ptr + 1, checkpoint_lsn);
 	}
@@ -607,7 +603,7 @@ mtr_t::commit_checkpoint(
 	if (write_mlog_checkpoint) {
 		DBUG_PRINT("ib_log",
 			   ("MLOG_CHECKPOINT(" LSN_PF ") written at " LSN_PF,
-			    checkpoint_lsn, log_sys->lsn));
+			    checkpoint_lsn, log_sys.lsn));
 	}
 }
 
@@ -619,18 +615,8 @@ mtr_t::commit_checkpoint(
 bool
 mtr_t::is_named_space(ulint space) const
 {
-	ut_ad(!m_impl.m_sys_space
-	      || m_impl.m_sys_space->id == TRX_SYS_SPACE);
-	ut_ad(!m_impl.m_undo_space
-	      || m_impl.m_undo_space->id != TRX_SYS_SPACE);
 	ut_ad(!m_impl.m_user_space
 	      || m_impl.m_user_space->id != TRX_SYS_SPACE);
-	ut_ad(!m_impl.m_sys_space
-	      || m_impl.m_sys_space != m_impl.m_user_space);
-	ut_ad(!m_impl.m_sys_space
-	      || m_impl.m_sys_space != m_impl.m_undo_space);
-	ut_ad(!m_impl.m_user_space
-	      || m_impl.m_user_space != m_impl.m_undo_space);
 
 	switch (get_log_mode()) {
 	case MTR_LOG_NONE:
@@ -645,6 +631,28 @@ mtr_t::is_named_space(ulint space) const
 	ut_error;
 	return(false);
 }
+/** Check if a tablespace is associated with the mini-transaction
+(needed for generating a MLOG_FILE_NAME record)
+@param[in]	space	tablespace
+@return whether the mini-transaction is associated with the space */
+bool mtr_t::is_named_space(const fil_space_t* space) const
+{
+	ut_ad(!m_impl.m_user_space
+	      || m_impl.m_user_space->id != TRX_SYS_SPACE);
+
+	switch (get_log_mode()) {
+	case MTR_LOG_NONE:
+	case MTR_LOG_NO_REDO:
+		return true;
+	case MTR_LOG_ALL:
+	case MTR_LOG_SHORT_INSERTS:
+		return(m_impl.m_user_space == space
+		       || is_predefined_tablespace(space->id));
+	}
+
+	ut_error;
+	return false;
+}
 #endif /* UNIV_DEBUG */
 
 /** Acquire a tablespace X-latch.
@@ -662,22 +670,15 @@ mtr_t::x_lock_space(ulint space_id, const char* file, unsigned line)
 	ut_ad(is_active());
 
 	if (space_id == TRX_SYS_SPACE) {
-		space = m_impl.m_sys_space;
-
-		if (!space) {
-			space = m_impl.m_sys_space = fil_space_get(space_id);
-		}
+		space = fil_system.sys_space;
 	} else if ((space = m_impl.m_user_space) && space_id == space->id) {
-	} else if ((space = m_impl.m_undo_space) && space_id == space->id) {
-	} else if (get_log_mode() == MTR_LOG_NO_REDO) {
+	} else {
 		space = fil_space_get(space_id);
-		ut_ad(space->purpose == FIL_TYPE_TEMPORARY
+		ut_ad(get_log_mode() != MTR_LOG_NO_REDO
+		      || space->purpose == FIL_TYPE_TEMPORARY
 		      || space->purpose == FIL_TYPE_IMPORT
-		      || space->redo_skipped_count > 0
+		      || my_atomic_loadlint(&space->redo_skipped_count) > 0
 		      || srv_is_tablespace_truncated(space->id));
-	} else {
-		/* called from trx_rseg_create() */
-		space = m_impl.m_undo_space = fil_space_get(space_id);
 	}
 
 	ut_ad(space);
@@ -689,44 +690,6 @@ mtr_t::x_lock_space(ulint space_id, const char* file, unsigned line)
 	return(space);
 }
 
-/** Look up the system tablespace. */
-void
-mtr_t::lookup_sys_space()
-{
-	ut_ad(!m_impl.m_sys_space);
-	m_impl.m_sys_space = fil_space_get(TRX_SYS_SPACE);
-	ut_ad(m_impl.m_sys_space);
-}
-
-/** Look up the user tablespace.
-@param[in]	space_id	tablespace ID */
-void
-mtr_t::lookup_user_space(ulint space_id)
-{
-	ut_ad(space_id != TRX_SYS_SPACE);
-	ut_ad(m_impl.m_user_space_id == space_id);
-	ut_ad(!m_impl.m_user_space);
-	m_impl.m_user_space = fil_space_get(space_id);
-	ut_ad(m_impl.m_user_space);
-}
-
-/** Set the tablespace associated with the mini-transaction
-(needed for generating a MLOG_FILE_NAME record)
-@param[in]	space	user or system tablespace */
-void
-mtr_t::set_named_space(fil_space_t* space)
-{
-	ut_ad(m_impl.m_user_space_id == TRX_SYS_SPACE);
-	ut_d(m_impl.m_user_space_id = space->id);
-	if (space->id == TRX_SYS_SPACE) {
-		ut_ad(m_impl.m_sys_space == NULL
-		      || m_impl.m_sys_space == space);
-		m_impl.m_sys_space = space;
-	} else {
-		m_impl.m_user_space = space;
-	}
-}
-
 /** Release an object in the memo stack.
 @return true if released */
 bool
@@ -790,7 +753,7 @@ mtr_t::Command::prepare_write()
 	case MTR_LOG_NONE:
 		ut_ad(m_impl->m_log.size() == 0);
 		log_mutex_enter();
-		m_end_lsn = m_start_lsn = log_sys->lsn;
+		m_end_lsn = m_start_lsn = log_sys.lsn;
 		return(0);
 	case MTR_LOG_ALL:
 		break;
@@ -801,8 +764,8 @@ mtr_t::Command::prepare_write()
 	ut_ad(len > 0);
 	ut_ad(n_recs > 0);
 
-	if (len > log_sys->buf_size / 2) {
-		log_buffer_extend((len + 1) * 2);
+	if (len > srv_log_buffer_size / 2) {
+		log_buffer_extend(ulong((len + 1) * 2));
 	}
 
 	ut_ad(m_impl->m_n_log_recs == n_recs);
@@ -956,38 +919,6 @@ mtr_t::Command::execute()
 	release_resources();
 }
 
-/** Release the free extents that was reserved using
-fsp_reserve_free_extents().  This is equivalent to calling
-fil_space_release_free_extents().  This is intended for use
-with index pages.
-@param[in]	n_reserved	number of reserved extents */
-void
-mtr_t::release_free_extents(ulint n_reserved)
-{
-	fil_space_t*	space;
-
-	ut_ad(m_impl.m_undo_space == NULL);
-
-	if (m_impl.m_user_space != NULL) {
-
-		ut_ad(m_impl.m_user_space->id
-		      == m_impl.m_user_space_id);
-		ut_ad(memo_contains(get_memo(), &m_impl.m_user_space->latch,
-				    MTR_MEMO_X_LOCK));
-
-		space = m_impl.m_user_space;
-	} else {
-
-		ut_ad(m_impl.m_sys_space->id == TRX_SYS_SPACE);
-		ut_ad(memo_contains(get_memo(), &m_impl.m_sys_space->latch,
-				    MTR_MEMO_X_LOCK));
-
-		space = m_impl.m_sys_space;
-	}
-
-	space->release_free_extents(n_reserved);
-}
-
 #ifdef UNIV_DEBUG
 /** Check if memo contains the given item.
 @return	true if contains */
diff --git a/storage/innobase/mysql-test/storage_engine/repair_table.rdiff b/storage/innobase/mysql-test/storage_engine/repair_table.rdiff
index 717d437b2d1..e9c46b3a6c1 100644
--- a/storage/innobase/mysql-test/storage_engine/repair_table.rdiff
+++ b/storage/innobase/mysql-test/storage_engine/repair_table.rdiff
@@ -78,7 +78,7 @@
  DROP TABLE t1, t2;
  call mtr.add_suppression("Got an error from thread_id=.*");
  call mtr.add_suppression("MySQL thread id .*, query id .* localhost.*root Checking table");
-@@ -62,45 +63,32 @@
+@@ -63,46 +64,33 @@
  CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>, <CUSTOM_INDEX> (a)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
  REPAIR TABLE t1;
  Table	Op	Msg_type	Msg_text
@@ -94,9 +94,10 @@
  Table	Op	Msg_type	Msg_text
 -test.t1	repair	warning	Number of rows changed from 0 to 3
 -test.t1	repair	status	OK
++test.t1	repair	note	The storage engine for the table doesn't support repair
+ db.opt
 -t1.MYD
 -t1.MYI
-+test.t1	repair	note	The storage engine for the table doesn't support repair
  t1.frm
 +t1.ibd
  INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o');
diff --git a/storage/innobase/os/os0event.cc b/storage/innobase/os/os0event.cc
index 71b86df24a4..4453faedb71 100644
--- a/storage/innobase/os/os0event.cc
+++ b/storage/innobase/os/os0event.cc
@@ -33,9 +33,6 @@ Created 2012-09-23 Sunny Bains
 
 #include <list>
 
-/** The number of microsecnds in a second. */
-static const ulint MICROSECS_IN_A_SECOND = 1000000;
-
 #ifdef _WIN32
 /** Native condition variable. */
 typedef CONDITION_VARIABLE	os_cond_t;
@@ -49,7 +46,7 @@ typedef os_event_list_t::iterator				event_iter_t;
 
 /** InnoDB condition variable. */
 struct os_event {
-	os_event(const char* name) UNIV_NOTHROW;
+	os_event() UNIV_NOTHROW;
 
 	~os_event() UNIV_NOTHROW;
 
@@ -379,13 +376,8 @@ os_event::wait_time_low(
 
 		tv.tv_usec += time_in_usec;
 
-		if ((ulint) tv.tv_usec >= MICROSECS_IN_A_SECOND) {
-			tv.tv_sec += tv.tv_usec / MICROSECS_IN_A_SECOND;
-			tv.tv_usec %= MICROSECS_IN_A_SECOND;
-		}
-
-		abstime.tv_sec  = tv.tv_sec;
-		abstime.tv_nsec = tv.tv_usec * 1000;
+		abstime.tv_sec = tv.tv_sec + tv.tv_usec / 1000000;
+		abstime.tv_nsec = tv.tv_usec % 1000000 * 1000;
 	} else {
 		abstime.tv_nsec = 999999999;
 		abstime.tv_sec = (time_t) ULINT_MAX;
@@ -421,7 +413,7 @@ os_event::wait_time_low(
 }
 
 /** Constructor */
-os_event::os_event(const char* name) UNIV_NOTHROW
+os_event::os_event() UNIV_NOTHROW
 {
 	init();
 
@@ -450,14 +442,9 @@ Creates an event semaphore, i.e., a semaphore which may just have two
 states: signaled and nonsignaled. The created event is manual reset: it
 must be reset explicitly by calling sync_os_reset_event.
 @return	the event handle */
-os_event_t
-os_event_create(
-/*============*/
-	const char*	name)			/*!< in: the name of the
-						event, if NULL the event
-						is created without a name */
+os_event_t os_event_create(const char*)
 {
-	return(UT_NEW_NOKEY(os_event(name)));
+	return(UT_NEW_NOKEY(os_event()));
 }
 
 /**
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 26cc70e6e41..523475d56c8 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -692,10 +692,6 @@ bool	os_has_said_disk_full;
 /** Default Zip compression level */
 extern uint page_zip_level;
 
-#if DATA_TRX_ID_LEN > 6
-#error "COMPRESSION_ALGORITHM will not fit"
-#endif /* DATA_TRX_ID_LEN */
-
 /** Validates the consistency of the aio system.
 @return true if ok */
 static
@@ -852,7 +848,8 @@ os_file_get_block_size(
 #ifdef _WIN32
 
 	fblock_size = 0;
-
+	BOOL result = false;
+	size_t len = 0;
 	// Open volume for this file, find out it "physical bytes per sector"
 
 	HANDLE volume_handle = INVALID_HANDLE_VALUE;
@@ -863,7 +860,7 @@ os_file_get_block_size(
 		goto end;
 	}
 
-	size_t len = strlen(volume);
+	len = strlen(volume);
 	if (volume[len - 1] == '\\') {
 		// Trim trailing backslash from volume name.
 		volume[len - 1] = 0;
@@ -889,7 +886,7 @@ os_file_get_block_size(
 	storage_query.PropertyId = StorageAccessAlignmentProperty;
 	storage_query.QueryType  = PropertyStandardQuery;
 
-	BOOL result = os_win32_device_io_control(volume_handle,
+	result = os_win32_device_io_control(volume_handle,
 		IOCTL_STORAGE_QUERY_PROPERTY,
 		&storage_query,
 		sizeof(storage_query),
@@ -1039,7 +1036,7 @@ AIOHandler::post_io_processing(Slot* slot)
 	ut_ad(slot->is_reserved);
 
 	/* Total bytes read so far */
-	ulint	n_bytes = (slot->ptr - slot->buf) + slot->n_bytes;
+	ulint	n_bytes = ulint(slot->ptr - slot->buf) + slot->n_bytes;
 
 	return(n_bytes == slot->original_len ? DB_SUCCESS : DB_FAIL);
 }
@@ -1087,21 +1084,14 @@ os_aio_validate_skip()
 /** Try os_aio_validate() every this many times */
 # define OS_AIO_VALIDATE_SKIP	13
 
-	/** The os_aio_validate() call skip counter.
-	Use a signed type because of the race condition below. */
-	static int os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
-
-	/* There is a race condition below, but it does not matter,
-	because this call is only for heuristic purposes. We want to
-	reduce the call frequency of the costly os_aio_validate()
-	check in debug builds. */
-	--os_aio_validate_count;
+	static int os_aio_validate_count;
 
-	if (os_aio_validate_count > 0) {
-		return(true);
+	if (my_atomic_add32_explicit(&os_aio_validate_count, -1,
+				     MY_MEMORY_ORDER_RELAXED)
+	    % OS_AIO_VALIDATE_SKIP) {
+		return true;
 	}
 
-	os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
 	return(os_aio_validate());
 }
 #endif /* UNIV_DEBUG */
@@ -1245,22 +1235,32 @@ AIO::release_with_mutex(Slot* slot)
 	release();
 }
 
-/** Creates a temporary file.  This function is like tmpfile(3), but
-the temporary file is created in the given parameter path. If the path
-is NULL then it will create the file in the MySQL server configuration
+/** Create a temporary file. This function is like tmpfile(3), but
+the temporary file is created in the in the mysql server configuration
 parameter (--tmpdir).
-@param[in]	path	location for creating temporary file
-@@return temporary file handle, or NULL on error */
+@return temporary file handle, or NULL on error */
 FILE*
-os_file_create_tmpfile(
-	const char*	path)
+os_file_create_tmpfile()
 {
 	FILE*	file	= NULL;
 	WAIT_ALLOW_WRITES();
-	int	fd	= innobase_mysql_tmpfile(path);
+	os_file_t	fd	= innobase_mysql_tmpfile(NULL);
 
-	if (fd >= 0) {
+	if (fd != OS_FILE_CLOSED) {
+#ifdef _WIN32
+		int crt_fd = _open_osfhandle((intptr_t)HANDLE(fd), 0);
+		if (crt_fd != -1) {
+			file = fdopen(crt_fd, "w+b");
+			if (!file) {
+				close(crt_fd);
+			}
+		}
+#else
 		file = fdopen(fd, "w+b");
+		if (!file) {
+			close(fd);
+		}
+#endif
 	}
 
 	if (file == NULL) {
@@ -1268,10 +1268,6 @@ os_file_create_tmpfile(
 		ib::error()
 			<< "Unable to create temporary file; errno: "
 			<< errno;
-
-		if (fd >= 0) {
-			close(fd);
-		}
 	}
 
 	return(file);
@@ -1329,7 +1325,7 @@ os_file_make_new_pathname(
 	/* Find the offset of the last slash. We will strip off the
 	old basename.ibd which starts after that slash. */
 	last_slash = strrchr((char*) old_path, OS_PATH_SEPARATOR);
-	dir_len = last_slash ? last_slash - old_path : strlen(old_path);
+	dir_len = last_slash ? ulint(last_slash - old_path) : strlen(old_path);
 
 	/* allocate a new path and move the old directory path to it. */
 	new_path_len = dir_len + strlen(base_name) + sizeof "/.ibd";
@@ -1476,7 +1472,7 @@ os_file_get_parent_dir(
 
 	/* Non-trivial directory component */
 
-	return(mem_strdupl(path, last_slash - path));
+	return(mem_strdupl(path, ulint(last_slash - path)));
 }
 #ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
 
@@ -2303,23 +2299,23 @@ AIO::is_linux_native_aio_supported()
 
 	memset(&io_event, 0x0, sizeof(io_event));
 
-	byte*	buf = static_cast<byte*>(ut_malloc_nokey(UNIV_PAGE_SIZE * 2));
-	byte*	ptr = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
+	byte*	buf = static_cast<byte*>(ut_malloc_nokey(srv_page_size * 2));
+	byte*	ptr = static_cast<byte*>(ut_align(buf, srv_page_size));
 
 	struct iocb	iocb;
 
 	/* Suppress valgrind warning. */
-	memset(buf, 0x00, UNIV_PAGE_SIZE * 2);
+	memset(buf, 0x00, srv_page_size * 2);
 	memset(&iocb, 0x0, sizeof(iocb));
 
 	struct iocb*	p_iocb = &iocb;
 
 	if (!srv_read_only_mode) {
 
-		io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0);
+		io_prep_pwrite(p_iocb, fd, ptr, srv_page_size, 0);
 
 	} else {
-		ut_a(UNIV_PAGE_SIZE >= 512);
+		ut_a(srv_page_size >= 512);
 		io_prep_pread(p_iocb, fd, ptr, 512, 0);
 	}
 
@@ -3275,7 +3271,7 @@ os_file_get_size(
 		/* st_blocks is in 512 byte sized blocks */
 		file_size.m_alloc_size = s.st_blocks * 512;
 	} else {
-		file_size.m_total_size = ~0;
+		file_size.m_total_size = ~0U;
 		file_size.m_alloc_size = (os_offset_t) errno;
 	}
 
@@ -3417,16 +3413,6 @@ static void __stdcall win_free_syncio_event(void *data) {
 
 
 /*
-Initialize tls index.for event handle used for synchronized IO on files that
-might be opened with FILE_FLAG_OVERLAPPED.
-*/
-static void win_init_syncio_event() {
-	fls_sync_io = FlsAlloc(win_free_syncio_event);
-	ut_a(fls_sync_io != FLS_OUT_OF_INDEXES);
-}
-
-
-/*
 Retrieve per-thread event for doing synchronous io on asyncronously opened files
 */
 static HANDLE win_get_syncio_event()
@@ -3527,46 +3513,6 @@ struct WinIoInit
 /* Ensures proper initialization and shutdown */
 static WinIoInit win_io_init;
 
-/** Check if the file system supports sparse files.
-@param[in]	 name		File name
-@return true if the file system supports sparse files */
-static
-bool
-os_is_sparse_file_supported_win32(const char* filename)
-{
-	char	volname[MAX_PATH];
-	BOOL	result = GetVolumePathName(filename, volname, MAX_PATH);
-
-	if (!result) {
-
-		ib::error()
-			<< "os_is_sparse_file_supported: "
-			<< "Failed to get the volume path name for: "
-			<< filename
-			<< "- OS error number " << GetLastError();
-
-		return(false);
-	}
-
-	DWORD	flags;
-
-	result = GetVolumeInformation(
-		volname, NULL, MAX_PATH, NULL, NULL,
-		&flags, NULL, MAX_PATH);
-
-
-	if (!result) {
-		ib::error()
-			<< "os_is_sparse_file_supported: "
-			<< "Failed to get the volume info for: "
-			<< volname
-			<< "- OS error number " << GetLastError();
-
-		return(false);
-	}
-
-	return(flags & FILE_SUPPORTS_SPARSE_FILES) ? true : false;
-}
 
 /** Free storage space associated with a section of the file.
 @param[in]	fh		Open file handle
@@ -3863,7 +3809,7 @@ os_file_create_simple_func(
 		ib::info()
 			<< "Read only mode set. Unable to"
 			" open file '" << name << "' in RW mode, "
-			<< "trying RO mode", name;
+			<< "trying RO mode";
 
 		access = GENERIC_READ;
 
@@ -4602,7 +4548,7 @@ bool
 os_file_close_func(
 	os_file_t	file)
 {
-	ut_a(file > 0);
+	ut_a(file);
 
 	if (CloseHandle(file)) {
 		return(true);
@@ -4924,7 +4870,7 @@ os_file_io(
 	os_offset_t	offset,
 	dberr_t*	err)
 {
-	ulint		original_n = n;
+	ssize_t		original_n = ssize_t(n);
 	IORequest	type = in_type;
 	ssize_t		bytes_returned = 0;
 
@@ -4939,7 +4885,7 @@ os_file_io(
 
 			break;
 
-		} else if ((ulint) n_bytes + bytes_returned == n) {
+		} else if (n_bytes + bytes_returned == ssize_t(n)) {
 
 			bytes_returned += n_bytes;
 
@@ -4958,9 +4904,9 @@ os_file_io(
 
 		/* Handle partial read/write. */
 
-		ut_ad((ulint) n_bytes + bytes_returned < n);
+		ut_ad(ulint(n_bytes + bytes_returned) < n);
 
-		bytes_returned += (ulint) n_bytes;
+		bytes_returned += n_bytes;
 
 		if (!type.is_partial_io_warning_disabled()) {
 
@@ -5284,7 +5230,7 @@ os_file_set_nocache(
 
 		ib::error()
 			<< "Failed to set DIRECTIO_ON on file "
-			<< file_name << ": " << operation_name
+			<< file_name << "; " << operation_name << ": "
 			<< strerror(errno_save) << ","
 			" continuing anyway.";
 	}
@@ -5298,9 +5244,9 @@ os_file_set_nocache(
 # ifdef UNIV_LINUX
 				ib::warn()
 					<< "Failed to set O_DIRECT on file"
-					<< file_name << ";" << operation_name
+					<< file_name << "; " << operation_name
 					<< ": " << strerror(errno_save) << ", "
-					<< "ccontinuing anyway. O_DIRECT is "
+					"continuing anyway. O_DIRECT is "
 					"known to result in 'Invalid argument' "
 					"on Linux on tmpfs, "
 					"see MySQL Bug#26662.";
@@ -5316,7 +5262,7 @@ short_warning:
 				<< "Failed to set O_DIRECT on file "
 				<< file_name << "; " << operation_name
 				<< " : " << strerror(errno_save)
-				<< " continuing anyway.";
+				<< ", continuing anyway.";
 		}
 	}
 #endif /* defined(UNIV_SOLARIS) && defined(DIRECTIO_ON) */
@@ -5404,18 +5350,16 @@ fallback:
 #endif /* _WIN32*/
 
 	/* Write up to 1 megabyte at a time. */
-	ulint	buf_size = ut_min(
-		static_cast<ulint>(64),
-		static_cast<ulint>(size / UNIV_PAGE_SIZE));
-
-	buf_size *= UNIV_PAGE_SIZE;
+	ulint	buf_size = ut_min(ulint(64),
+				  ulint(size >> srv_page_size_shift))
+		<< srv_page_size_shift;
 
 	/* Align the buffer for possible raw i/o */
 	byte*	buf2;
 
-	buf2 = static_cast<byte*>(ut_malloc_nokey(buf_size + UNIV_PAGE_SIZE));
+	buf2 = static_cast<byte*>(ut_malloc_nokey(buf_size + srv_page_size));
 
-	byte*	buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+	byte*	buf = static_cast<byte*>(ut_align(buf2, srv_page_size));
 
 	/* Write buffer full of zeros */
 	memset(buf, 0, buf_size);
@@ -5589,7 +5533,7 @@ IORequest::punch_hole(os_file_t fh, os_offset_t off, ulint len)
 
 	/* Check does file system support punching holes for this
 	tablespace. */
-	if (!should_punch_hole() || !srv_use_trim) {
+	if (!should_punch_hole()) {
 		return DB_IO_NO_PUNCH_HOLE;
 	}
 
@@ -5640,7 +5584,7 @@ os_is_sparse_file_supported(os_file_t fh)
 
 	/* We don't know the FS block size, use the sector size. The FS
 	will do the magic. */
-	err = os_file_punch_hole_posix(fh, 0, UNIV_PAGE_SIZE);
+	err = os_file_punch_hole_posix(fh, 0, srv_page_size);
 
 	return(err == DB_SUCCESS);
 #endif /* _WIN32 */
@@ -6251,7 +6195,7 @@ AIO::reserve_slot(
 	doing simulated AIO */
 	ulint		local_seg;
 
-	local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6)) % m_n_segments;
+	local_seg = (offset >> (srv_page_size_shift + 6)) % m_n_segments;
 
 	for (;;) {
 
@@ -6932,10 +6876,10 @@ public:
 			}
 
 			m_ptr = static_cast<byte*>(
-				ut_malloc_nokey(len + UNIV_PAGE_SIZE));
+				ut_malloc_nokey(len + srv_page_size));
 
 			m_buf = static_cast<byte*>(
-				ut_align(m_ptr, UNIV_PAGE_SIZE));
+				ut_align(m_ptr, srv_page_size));
 
 		} else {
 			len = first_slot()->len;
diff --git a/storage/innobase/os/os0thread.cc b/storage/innobase/os/os0thread.cc
index 19d91c43637..e0996e45880 100644
--- a/storage/innobase/os/os0thread.cc
+++ b/storage/innobase/os/os0thread.cc
@@ -137,7 +137,7 @@ os_thread_create_func(
 
 #endif /* not _WIN32 */
 
-	ut_a(os_thread_count <= OS_THREAD_MAX_N);
+	ut_a(os_thread_count <= srv_max_n_threads);
 
 	/* Return the thread_id if the caller requests it. */
 	if (thread_id != NULL) {
@@ -182,7 +182,7 @@ os_thread_exit(bool detach)
 	pfs_delete_thread();
 #endif
 
-	my_atomic_addlint(&os_thread_count, -1);
+	my_atomic_addlint(&os_thread_count, ulint(-1));
 
 #ifdef _WIN32
 	ExitThread(0);
diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc
index 2327d11f1fa..b4775d19e1d 100644
--- a/storage/innobase/page/page0cur.cc
+++ b/storage/innobase/page/page0cur.cc
@@ -359,19 +359,15 @@ page_cur_search_with_match(
 
 #ifdef BTR_CUR_HASH_ADAPT
 	if (is_leaf
-	    && (mode == PAGE_CUR_LE)
+	    && page_get_direction(page) == PAGE_RIGHT
+	    && page_header_get_offs(page, PAGE_LAST_INSERT)
+	    && mode == PAGE_CUR_LE
 	    && !dict_index_is_spatial(index)
-	    && (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
-	    && (page_header_get_ptr(page, PAGE_LAST_INSERT))
-	    && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
-
-		if (page_cur_try_search_shortcut(
-			    block, index, tuple,
-			    iup_matched_fields,
-			    ilow_matched_fields,
-			    cursor)) {
-			return;
-		}
+	    && page_header_get_field(page, PAGE_N_DIRECTION) > 3
+	    && page_cur_try_search_shortcut(
+		    block, index, tuple,
+		    iup_matched_fields, ilow_matched_fields, cursor)) {
+		return;
 	}
 # ifdef PAGE_CUR_DBG
 	if (mode == PAGE_CUR_DBG) {
@@ -415,7 +411,7 @@ page_cur_search_with_match(
 	owned by the upper limit directory slot. */
 
 	low = 0;
-	up = page_dir_get_n_slots(page) - 1;
+	up = ulint(page_dir_get_n_slots(page)) - 1;
 
 	/* Perform binary search until the lower and upper limit directory
 	slots come to the distance 1 of each other */
@@ -523,7 +519,7 @@ up_rec_match:
 				ulint   rec_info = rec_get_info_bits(mid_rec,
                                                      rec_offs_comp(offsets));
 				ut_ad(rec_info & REC_INFO_MIN_REC_FLAG);
-				ut_ad(btr_page_get_prev(page, &mtr) == FIL_NULL);
+				ut_ad(!page_has_prev(page));
 				mtr_commit(&mtr);
 #endif
 
@@ -601,6 +597,7 @@ page_cur_search_with_match_bytes(
 	rec_offs_init(offsets_);
 
 	ut_ad(dtuple_validate(tuple));
+	ut_ad(!(tuple->info_bits & REC_INFO_MIN_REC_FLAG));
 #ifdef UNIV_DEBUG
 # ifdef PAGE_CUR_DBG
 	if (mode != PAGE_CUR_DBG)
@@ -620,18 +617,16 @@ page_cur_search_with_match_bytes(
 
 #ifdef BTR_CUR_HASH_ADAPT
 	if (page_is_leaf(page)
-	    && (mode == PAGE_CUR_LE)
-	    && (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
-	    && (page_header_get_ptr(page, PAGE_LAST_INSERT))
-	    && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
-
-		if (page_cur_try_search_shortcut_bytes(
-			    block, index, tuple,
-			    iup_matched_fields, iup_matched_bytes,
-			    ilow_matched_fields, ilow_matched_bytes,
-			    cursor)) {
-			return;
-		}
+	    && page_get_direction(page) == PAGE_RIGHT
+	    && page_header_get_offs(page, PAGE_LAST_INSERT)
+	    && mode == PAGE_CUR_LE
+	    && page_header_get_field(page, PAGE_N_DIRECTION) > 3
+	    && page_cur_try_search_shortcut_bytes(
+		    block, index, tuple,
+		    iup_matched_fields, iup_matched_bytes,
+		    ilow_matched_fields, ilow_matched_bytes,
+		    cursor)) {
+		return;
 	}
 # ifdef PAGE_CUR_DBG
 	if (mode == PAGE_CUR_DBG) {
@@ -662,11 +657,11 @@ page_cur_search_with_match_bytes(
 	owned by the upper limit directory slot. */
 
 	low = 0;
-	up = page_dir_get_n_slots(page) - 1;
+	up = ulint(page_dir_get_n_slots(page)) - 1;
 
 	/* Perform binary search until the lower and upper limit directory
 	slots come to the distance 1 of each other */
-	ut_d(bool is_leaf = page_is_leaf(page));
+	const bool is_leaf = page_is_leaf(page);
 
 	while (up - low > 1) {
 		mid = (low + up) / 2;
@@ -734,6 +729,17 @@ up_slot_match:
 			    low_matched_fields, low_matched_bytes,
 			    up_matched_fields, up_matched_bytes);
 
+		if (UNIV_UNLIKELY(rec_get_info_bits(
+					  mid_rec,
+					  dict_table_is_comp(index->table))
+				  & REC_INFO_MIN_REC_FLAG)) {
+			ut_ad(!page_has_prev(page_align(mid_rec)));
+			ut_ad(!page_rec_is_leaf(mid_rec)
+			      || rec_is_metadata(mid_rec, index));
+			cmp = 1;
+			goto low_rec_match;
+		}
+
 		offsets = rec_get_offsets(
 			mid_rec, index, offsets_, is_leaf,
 			dtuple_get_n_fields_cmp(tuple), &heap);
@@ -767,23 +773,6 @@ up_rec_match:
 			   || mode == PAGE_CUR_LE_OR_EXTENDS
 #endif /* PAGE_CUR_LE_OR_EXTENDS */
 			   ) {
-			if (!cmp && !cur_matched_fields) {
-#ifdef UNIV_DEBUG
-				mtr_t	mtr;
-				mtr_start(&mtr);
-
-				/* We got a match, but cur_matched_fields is
-				0, it must have REC_INFO_MIN_REC_FLAG */
-				ulint   rec_info = rec_get_info_bits(mid_rec,
-                                                     rec_offs_comp(offsets));
-				ut_ad(rec_info & REC_INFO_MIN_REC_FLAG);
-				ut_ad(btr_page_get_prev(page, &mtr) == FIL_NULL);
-				mtr_commit(&mtr);
-#endif
-
-				cur_matched_fields = dtuple_get_n_fields_cmp(tuple);
-			}
-
 			goto low_rec_match;
 		} else {
 
@@ -854,19 +843,19 @@ page_cur_insert_rec_write_log(
 	const byte* log_end;
 	ulint	i;
 
-	if (dict_table_is_temporary(index->table)) {
+	if (index->table->is_temporary()) {
 		mtr->set_modified();
 		ut_ad(mtr->get_log_mode() == MTR_LOG_NO_REDO);
 		return;
 	}
 
-	ut_a(rec_size < UNIV_PAGE_SIZE);
-	ut_ad(mtr->is_named_space(index->space));
+	ut_a(rec_size < srv_page_size);
+	ut_ad(mtr->is_named_space(index->table->space));
 	ut_ad(page_align(insert_rec) == page_align(cursor_rec));
 	ut_ad(!page_rec_is_comp(insert_rec)
 	      == !dict_table_is_comp(index->table));
 
-	ut_d(const bool is_leaf = page_rec_is_leaf(cursor_rec));
+	const bool is_leaf = page_rec_is_leaf(cursor_rec);
 
 	{
 		mem_heap_t*	heap		= NULL;
@@ -1003,8 +992,8 @@ need_extra_info:
 		/* Write the mismatch index */
 		log_ptr += mach_write_compressed(log_ptr, i);
 
-		ut_a(i < UNIV_PAGE_SIZE);
-		ut_a(extra_size < UNIV_PAGE_SIZE);
+		ut_a(i < srv_page_size);
+		ut_a(extra_size < srv_page_size);
 	} else {
 		/* Write the record end segment length
 		and the extra info storage flag */
@@ -1021,7 +1010,7 @@ need_extra_info:
 		mlog_close(mtr, log_ptr + rec_size);
 	} else {
 		mlog_close(mtr, log_ptr);
-		ut_a(rec_size < UNIV_PAGE_SIZE);
+		ut_a(rec_size < srv_page_size);
 		mlog_catenate_string(mtr, ins_ptr, rec_size);
 	}
 }
@@ -1073,7 +1062,7 @@ page_cur_parse_insert_rec(
 
 		cursor_rec = page + offset;
 
-		if (offset >= UNIV_PAGE_SIZE) {
+		if (offset >= srv_page_size) {
 
 			recv_sys->found_corrupt_log = TRUE;
 
@@ -1088,7 +1077,7 @@ page_cur_parse_insert_rec(
 		return(NULL);
 	}
 
-	if (end_seg_len >= UNIV_PAGE_SIZE << 1) {
+	if (end_seg_len >= srv_page_size << 1) {
 		recv_sys->found_corrupt_log = TRUE;
 
 		return(NULL);
@@ -1112,7 +1101,7 @@ page_cur_parse_insert_rec(
 			return(NULL);
 		}
 
-		ut_a(origin_offset < UNIV_PAGE_SIZE);
+		ut_a(origin_offset < srv_page_size);
 
 		mismatch_index = mach_parse_compressed(&ptr, end_ptr);
 
@@ -1121,7 +1110,7 @@ page_cur_parse_insert_rec(
 			return(NULL);
 		}
 
-		ut_a(mismatch_index < UNIV_PAGE_SIZE);
+		ut_a(mismatch_index < srv_page_size);
 	}
 
 	if (end_ptr < ptr + (end_seg_len >> 1)) {
@@ -1140,7 +1129,7 @@ page_cur_parse_insert_rec(
 	/* Read from the log the inserted index record end segment which
 	differs from the cursor record */
 
-	ut_d(bool is_leaf = page_is_leaf(page));
+	const bool is_leaf = page_is_leaf(page);
 
 	offsets = rec_get_offsets(cursor_rec, index, offsets, is_leaf,
 				  ULINT_UNDEFINED, &heap);
@@ -1163,7 +1152,7 @@ page_cur_parse_insert_rec(
 
 	/* Build the inserted record to buf */
 
-        if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) {
+        if (UNIV_UNLIKELY(mismatch_index >= srv_page_size)) {
 
 		ib::fatal() << "is_short " << is_short << ", "
 			<< "info_and_status_bits " << info_and_status_bits
@@ -1177,15 +1166,13 @@ page_cur_parse_insert_rec(
 	ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
 
 	if (page_is_comp(page)) {
-		/* Make rec_get_offsets() and rec_offs_make_valid() happy. */
-		ut_d(rec_set_heap_no_new(buf + origin_offset,
-					 PAGE_HEAP_NO_USER_LOW));
+		rec_set_heap_no_new(buf + origin_offset,
+				    PAGE_HEAP_NO_USER_LOW);
 		rec_set_info_and_status_bits(buf + origin_offset,
 					     info_and_status_bits);
 	} else {
-		/* Make rec_get_offsets() and rec_offs_make_valid() happy. */
-		ut_d(rec_set_heap_no_old(buf + origin_offset,
-					 PAGE_HEAP_NO_USER_LOW));
+		rec_set_heap_no_old(buf + origin_offset,
+				    PAGE_HEAP_NO_USER_LOW);
 		rec_set_info_bits_old(buf + origin_offset,
 				      info_and_status_bits);
 	}
@@ -1214,6 +1201,50 @@ page_cur_parse_insert_rec(
 	return(const_cast<byte*>(ptr + end_seg_len));
 }
 
+/** Reset PAGE_DIRECTION and PAGE_N_DIRECTION.
+@param[in,out]	ptr		the PAGE_DIRECTION_B field
+@param[in,out]	page		index tree page frame
+@param[in]	page_zip	compressed page descriptor, or NULL */
+static inline
+void
+page_direction_reset(byte* ptr, page_t* page, page_zip_des_t* page_zip)
+{
+	ut_ad(ptr == PAGE_HEADER + PAGE_DIRECTION_B + page);
+	page_ptr_set_direction(ptr, PAGE_NO_DIRECTION);
+	if (page_zip) {
+		page_zip_write_header(page_zip, ptr, 1, NULL);
+	}
+	ptr = PAGE_HEADER + PAGE_N_DIRECTION + page;
+	*reinterpret_cast<uint16_t*>(ptr) = 0;
+	if (page_zip) {
+		page_zip_write_header(page_zip, ptr, 2, NULL);
+	}
+}
+
+/** Increment PAGE_N_DIRECTION.
+@param[in,out]	ptr		the PAGE_DIRECTION_B field
+@param[in,out]	page		index tree page frame
+@param[in]	page_zip	compressed page descriptor, or NULL
+@param[in]	dir		PAGE_RIGHT or PAGE_LEFT */
+static inline
+void
+page_direction_increment(
+	byte*		ptr,
+	page_t*		page,
+	page_zip_des_t*	page_zip,
+	uint		dir)
+{
+	ut_ad(ptr == PAGE_HEADER + PAGE_DIRECTION_B + page);
+	ut_ad(dir == PAGE_RIGHT || dir == PAGE_LEFT);
+	page_ptr_set_direction(ptr, dir);
+	if (page_zip) {
+		page_zip_write_header(page_zip, ptr, 1, NULL);
+	}
+	page_header_set_field(
+		page, page_zip, PAGE_N_DIRECTION,
+		1U + page_header_get_field(page, PAGE_N_DIRECTION));
+}
+
 /***********************************************************//**
 Inserts a record next to page cursor on an uncompressed page.
 Returns pointer to inserted record if succeed, i.e., enough
@@ -1247,7 +1278,7 @@ page_cur_insert_rec_low(
 	      == (ibool) !!page_is_comp(page));
 	ut_ad(fil_page_index_page_check(page));
 	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) == index->id
-	      || recv_recovery_is_on()
+	      || index->is_dummy
 	      || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index)));
 
 	ut_ad(!page_rec_is_supremum(current_rec));
@@ -1324,28 +1355,7 @@ use_heap:
 
 	/* 3. Create the record */
 	insert_rec = rec_copy(insert_buf, rec, offsets);
-	rec_offs_make_valid(insert_rec, index, offsets);
-
-	/* This is because assertion below is debug assertion */
-#ifdef UNIV_DEBUG
-	if (UNIV_UNLIKELY(current_rec == insert_rec)) {
-		ulint extra_len, data_len;
-		extra_len = rec_offs_extra_size(offsets);
-		data_len = rec_offs_data_size(offsets);
-
-		fprintf(stderr, "InnoDB: Error: current_rec == insert_rec "
-			" extra_len " ULINTPF
-			" data_len " ULINTPF " insert_buf %p rec %p\n",
-			extra_len, data_len, insert_buf, rec);
-		fprintf(stderr, "InnoDB; Physical record: \n");
-		rec_print(stderr, rec, index);
-		fprintf(stderr, "InnoDB: Inserted record: \n");
-		rec_print(stderr, insert_rec, index);
-		fprintf(stderr, "InnoDB: Current record: \n");
-		rec_print(stderr, current_rec, index);
-		ut_a(current_rec != insert_rec);
-	}
-#endif /* UNIV_DEBUG */
+	rec_offs_make_valid(insert_rec, index, page_is_leaf(page), offsets);
 
 	/* 4. Insert the record in the linked list of records */
 	ut_ad(current_rec != insert_rec);
@@ -1355,9 +1365,24 @@ use_heap:
 		rec_t*	next_rec = page_rec_get_next(current_rec);
 #ifdef UNIV_DEBUG
 		if (page_is_comp(page)) {
-			ut_ad(rec_get_status(current_rec)
-				<= REC_STATUS_INFIMUM);
-			ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
+			switch (rec_get_status(current_rec)) {
+			case REC_STATUS_ORDINARY:
+			case REC_STATUS_NODE_PTR:
+			case REC_STATUS_COLUMNS_ADDED:
+			case REC_STATUS_INFIMUM:
+				break;
+			case REC_STATUS_SUPREMUM:
+				ut_ad(!"wrong status on current_rec");
+			}
+			switch (rec_get_status(insert_rec)) {
+			case REC_STATUS_ORDINARY:
+			case REC_STATUS_NODE_PTR:
+			case REC_STATUS_COLUMNS_ADDED:
+				break;
+			case REC_STATUS_INFIMUM:
+			case REC_STATUS_SUPREMUM:
+				ut_ad(!"wrong status on insert_rec");
+			}
 			ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
 		}
 #endif
@@ -1366,7 +1391,7 @@ use_heap:
 	}
 
 	page_header_set_field(page, NULL, PAGE_N_RECS,
-			      1 + page_get_n_recs(page));
+			      1U + page_get_n_recs(page));
 
 	/* 5. Set the n_owned field in the inserted record to zero,
 	and set the heap_no field */
@@ -1388,34 +1413,18 @@ use_heap:
 	      == rec_get_node_ptr_flag(insert_rec));
 
 	if (!dict_index_is_spatial(index)) {
+		byte* ptr = PAGE_HEADER + PAGE_DIRECTION_B + page;
 		if (UNIV_UNLIKELY(last_insert == NULL)) {
-			page_header_set_field(page, NULL, PAGE_DIRECTION,
-					      PAGE_NO_DIRECTION);
-			page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
-
-		} else if ((last_insert == current_rec)
-			   && (page_header_get_field(page, PAGE_DIRECTION)
-			       != PAGE_LEFT)) {
-
-			page_header_set_field(page, NULL, PAGE_DIRECTION,
-					      PAGE_RIGHT);
-			page_header_set_field(page, NULL, PAGE_N_DIRECTION,
-					      page_header_get_field(
-						page, PAGE_N_DIRECTION) + 1);
-
-		} else if ((page_rec_get_next(insert_rec) == last_insert)
-			   && (page_header_get_field(page, PAGE_DIRECTION)
-			       != PAGE_RIGHT)) {
-
-			page_header_set_field(page, NULL, PAGE_DIRECTION,
-					      PAGE_LEFT);
-			page_header_set_field(page, NULL, PAGE_N_DIRECTION,
-					      page_header_get_field(
-						page, PAGE_N_DIRECTION) + 1);
+no_direction:
+			page_direction_reset(ptr, page, NULL);
+		} else if (last_insert == current_rec
+			   && page_ptr_get_direction(ptr) != PAGE_LEFT) {
+			page_direction_increment(ptr, page, NULL, PAGE_RIGHT);
+		} else if (page_ptr_get_direction(ptr) != PAGE_RIGHT
+			   && page_rec_get_next(insert_rec) == last_insert) {
+			page_direction_increment(ptr, page, NULL, PAGE_LEFT);
 		} else {
-			page_header_set_field(page, NULL, PAGE_DIRECTION,
-					      PAGE_NO_DIRECTION);
-			page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+			goto no_direction;
 		}
 	}
 
@@ -1496,9 +1505,9 @@ page_cur_insert_rec_zip(
 	ut_ad(page_is_comp(page));
 	ut_ad(fil_page_index_page_check(page));
 	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) == index->id
-	      || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index))
-	      || recv_recovery_is_on());
-
+	      || index->is_dummy
+	      || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index)));
+	ut_ad(!page_get_instant(page));
 	ut_ad(!page_cur_is_after_last(cursor));
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(page_zip_validate(page_zip, page, index));
@@ -1616,11 +1625,13 @@ page_cur_insert_rec_zip(
 			because the MLOG_COMP_REC_INSERT should only
 			be logged after a successful operation. */
 			ut_ad(!recv_recovery_is_on());
+			ut_ad(!index->is_dummy);
 		} else if (recv_recovery_is_on()) {
 			/* This should be followed by
 			MLOG_ZIP_PAGE_COMPRESS_NO_DATA,
 			which should succeed. */
-			rec_offs_make_valid(insert_rec, index, offsets);
+			rec_offs_make_valid(insert_rec, index,
+					    page_is_leaf(page), offsets);
 		} else {
 			ulint	pos = page_rec_get_n_recs_before(insert_rec);
 			ut_ad(pos > 0);
@@ -1636,7 +1647,8 @@ page_cur_insert_rec_zip(
 						level, page, index, mtr);
 
 					rec_offs_make_valid(
-						insert_rec, index, offsets);
+						insert_rec, index,
+						page_is_leaf(page), offsets);
 					return(insert_rec);
 				}
 
@@ -1679,7 +1691,8 @@ page_cur_insert_rec_zip(
 					insert_rec = page + rec_get_next_offs(
 						cursor->rec, TRUE);
 					rec_offs_make_valid(
-						insert_rec, index, offsets);
+						insert_rec, index,
+						page_is_leaf(page), offsets);
 					return(insert_rec);
 				}
 
@@ -1730,14 +1743,13 @@ too_small:
 
 		/* On compressed pages, do not relocate records from
 		the free list.  If extra_size would grow, use the heap. */
-		extra_size_diff
-			= rec_offs_extra_size(offsets)
-			- rec_offs_extra_size(foffsets);
+		extra_size_diff = lint(rec_offs_extra_size(offsets)
+				       - rec_offs_extra_size(foffsets));
 
 		if (UNIV_UNLIKELY(extra_size_diff < 0)) {
 			/* Add an offset to the extra_size. */
 			if (rec_offs_size(foffsets)
-			    < rec_size - extra_size_diff) {
+			    < rec_size - ulint(extra_size_diff)) {
 
 				goto too_small;
 			}
@@ -1821,7 +1833,7 @@ use_heap:
 
 	/* 3. Create the record */
 	insert_rec = rec_copy(insert_buf, rec, offsets);
-	rec_offs_make_valid(insert_rec, index, offsets);
+	rec_offs_make_valid(insert_rec, index, page_is_leaf(page), offsets);
 
 	/* 4. Insert the record in the linked list of records */
 	ut_ad(cursor->rec != insert_rec);
@@ -1840,7 +1852,7 @@ use_heap:
 	}
 
 	page_header_set_field(page, page_zip, PAGE_N_RECS,
-			      1 + page_get_n_recs(page));
+			      1U + page_get_n_recs(page));
 
 	/* 5. Set the n_owned field in the inserted record to zero,
 	and set the heap_no field */
@@ -1860,36 +1872,20 @@ use_heap:
 	      == rec_get_node_ptr_flag(insert_rec));
 
 	if (!dict_index_is_spatial(index)) {
+		byte* ptr = PAGE_HEADER + PAGE_DIRECTION_B + page;
 		if (UNIV_UNLIKELY(last_insert == NULL)) {
-			page_header_set_field(page, page_zip, PAGE_DIRECTION,
-					      PAGE_NO_DIRECTION);
-			page_header_set_field(page, page_zip,
-					      PAGE_N_DIRECTION, 0);
-
-		} else if ((last_insert == cursor->rec)
-			   && (page_header_get_field(page, PAGE_DIRECTION)
-			       != PAGE_LEFT)) {
-
-			page_header_set_field(page, page_zip, PAGE_DIRECTION,
-					      PAGE_RIGHT);
-			page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
-					      page_header_get_field(
-						page, PAGE_N_DIRECTION) + 1);
-
-		} else if ((page_rec_get_next(insert_rec) == last_insert)
-			   && (page_header_get_field(page, PAGE_DIRECTION)
-			       != PAGE_RIGHT)) {
-
-			page_header_set_field(page, page_zip, PAGE_DIRECTION,
-					      PAGE_LEFT);
-			page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
-					      page_header_get_field(
-						page, PAGE_N_DIRECTION) + 1);
+no_direction:
+			page_direction_reset(ptr, page, page_zip);
+		} else if (last_insert == cursor->rec
+			   && page_ptr_get_direction(ptr) != PAGE_LEFT) {
+			page_direction_increment(ptr, page, page_zip,
+						 PAGE_RIGHT);
+		} else if (page_ptr_get_direction(ptr) != PAGE_RIGHT
+			   && page_rec_get_next(insert_rec) == last_insert) {
+			page_direction_increment(ptr, page, page_zip,
+						 PAGE_LEFT);
 		} else {
-			page_header_set_field(page, page_zip, PAGE_DIRECTION,
-					      PAGE_NO_DIRECTION);
-			page_header_set_field(page, page_zip,
-					      PAGE_N_DIRECTION, 0);
+			goto no_direction;
 		}
 	}
 
@@ -1940,7 +1936,7 @@ page_copy_rec_list_to_created_page_write_log(
 	byte*	log_ptr;
 
 	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-	ut_ad(mtr->is_named_space(index->space));
+	ut_ad(mtr->is_named_space(index->table->space));
 
 	log_ptr = mlog_open_and_write_index(mtr, page, index,
 					    page_is_comp(page)
@@ -1970,6 +1966,8 @@ page_parse_copy_rec_list_to_created_page(
 	page_t*		page;
 	page_zip_des_t*	page_zip;
 
+	ut_ad(index->is_dummy);
+
 	if (ptr + 4 > end_ptr) {
 
 		return(NULL);
@@ -1990,6 +1988,13 @@ page_parse_copy_rec_list_to_created_page(
 		return(rec_end);
 	}
 
+	/* This function is never invoked on the clustered index root page,
+	except in the redo log apply of
+	page_copy_rec_list_end_to_created_page() which was logged by.
+	page_copy_rec_list_to_created_page_write_log().
+	For other pages, this field must be zero-initialized. */
+	ut_ad(!page_get_instant(block->frame) || page_is_root(block->frame));
+
 	while (ptr < rec_end) {
 		ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
 						block, index, mtr);
@@ -2003,9 +2008,8 @@ page_parse_copy_rec_list_to_created_page(
 	page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
 
 	if (!dict_index_is_spatial(index)) {
-		page_header_set_field(page, page_zip, PAGE_DIRECTION,
-				      PAGE_NO_DIRECTION);
-		page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+		page_direction_reset(PAGE_HEADER + PAGE_DIRECTION_B + page,
+				     page, page_zip);
 	}
 
 	return(rec_end);
@@ -2045,6 +2049,9 @@ page_copy_rec_list_end_to_created_page(
 	ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
 	ut_ad(page_align(rec) != new_page);
 	ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page));
+	/* This function is never invoked on the clustered index root page,
+	except in btr_lift_page_up(). */
+	ut_ad(!page_get_instant(new_page) || page_is_root(new_page));
 
 	if (page_rec_is_infimum(rec)) {
 
@@ -2059,9 +2066,9 @@ page_copy_rec_list_end_to_created_page(
 #ifdef UNIV_DEBUG
 	/* To pass the debug tests we have to set these dummy values
 	in the debug version */
-	page_dir_set_n_slots(new_page, NULL, UNIV_PAGE_SIZE / 2);
+	page_dir_set_n_slots(new_page, NULL, srv_page_size / 2);
 	page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP,
-			    new_page + UNIV_PAGE_SIZE - 1);
+			    new_page + srv_page_size - 1);
 #endif
 	log_ptr = page_copy_rec_list_to_created_page_write_log(new_page,
 							       index, mtr);
@@ -2070,7 +2077,7 @@ page_copy_rec_list_end_to_created_page(
 
 	/* Individual inserts are logged in a shorter form */
 
-	const mtr_log_t	log_mode = dict_table_is_temporary(index->table)
+	const mtr_log_t	log_mode = index->table->is_temporary()
 	    || !index->is_readable() /* IMPORT TABLESPACE */
 		? mtr_get_log_mode(mtr)
 		: mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
@@ -2085,7 +2092,7 @@ page_copy_rec_list_end_to_created_page(
 	slot_index = 0;
 	n_recs = 0;
 
-	ut_d(const bool is_leaf = page_is_leaf(new_page));
+	const bool is_leaf = page_is_leaf(new_page);
 
 	do {
 		offsets = rec_get_offsets(rec, index, offsets, is_leaf,
@@ -2126,11 +2133,11 @@ page_copy_rec_list_end_to_created_page(
 
 		rec_size = rec_offs_size(offsets);
 
-		ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
+		ut_ad(heap_top < new_page + srv_page_size);
 
 		heap_top += rec_size;
 
-		rec_offs_make_valid(insert_rec, index, offsets);
+		rec_offs_make_valid(insert_rec, index, is_leaf, offsets);
 		page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
 					      index, mtr);
 		prev_rec = insert_rec;
@@ -2158,9 +2165,13 @@ page_copy_rec_list_end_to_created_page(
 		mem_heap_free(heap);
 	}
 
+	/* Restore the log mode */
+
+	mtr_set_log_mode(mtr, log_mode);
+
 	log_data_len = mtr->get_log()->size() - log_data_len;
 
-	ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
+	ut_a(log_data_len < 100U << srv_page_size_shift);
 
 	if (log_ptr != NULL) {
 		mach_write_to_4(log_ptr, log_data_len);
@@ -2182,15 +2193,10 @@ page_copy_rec_list_end_to_created_page(
 	page_dir_set_n_heap(new_page, NULL, PAGE_HEAP_NO_USER_LOW + n_recs);
 	page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs);
 
-	page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL);
-
-	page_header_set_field(new_page, NULL, PAGE_DIRECTION,
-			      PAGE_NO_DIRECTION);
-	page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0);
-
-	/* Restore the log mode */
-
-	mtr_set_log_mode(mtr, log_mode);
+	*reinterpret_cast<uint16_t*>(PAGE_HEADER + PAGE_LAST_INSERT + new_page)
+		= 0;
+	page_direction_reset(PAGE_HEADER + PAGE_DIRECTION_B + new_page,
+			     new_page, NULL);
 }
 
 /***********************************************************//**
@@ -2206,7 +2212,7 @@ page_cur_delete_rec_write_log(
 	byte*	log_ptr;
 
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-	ut_ad(mtr->is_named_space(index->space));
+	ut_ad(mtr->is_named_space(index->table->space));
 
 	log_ptr = mlog_open_and_write_index(mtr, rec, index,
 					    page_rec_is_comp(rec)
@@ -2317,9 +2323,9 @@ page_cur_delete_rec(
 	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
 	ut_ad(fil_page_index_page_check(page));
 	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) == index->id
-	      || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index))
-	      || recv_recovery_is_on());
-	ut_ad(mtr == NULL || mtr->is_named_space(index->space));
+	      || index->is_dummy
+	      || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index)));
+	ut_ad(!mtr || mtr->is_named_space(index->table->space));
 
 	/* The record must not be the supremum or infimum record. */
 	ut_ad(page_rec_is_user_rec(current_rec));
@@ -2395,9 +2401,7 @@ page_cur_delete_rec(
 	prev_rec is owned by the same slot, i.e., PAGE_DIR_SLOT_MIN_N_OWNED
 	>= 2. */
 
-#if PAGE_DIR_SLOT_MIN_N_OWNED < 2
-# error "PAGE_DIR_SLOT_MIN_N_OWNED < 2"
-#endif
+	compile_time_assert(PAGE_DIR_SLOT_MIN_N_OWNED >= 2);
 	ut_ad(cur_n_owned > 1);
 
 	if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) {
diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc
index 4b5b2cd999b..36a4cb46cf7 100644
--- a/storage/innobase/page/page0page.cc
+++ b/storage/innobase/page/page0page.cc
@@ -92,24 +92,24 @@ page_dir_find_owner_slot(
 	const page_t* page = page_align(rec);
 	const page_dir_slot_t* first_slot = page_dir_get_nth_slot(page, 0);
 	const page_dir_slot_t* slot = page_dir_get_nth_slot(
-		page, page_dir_get_n_slots(page) - 1);
+		page, ulint(page_dir_get_n_slots(page)) - 1);
 	const rec_t*		r = rec;
 
 	if (page_is_comp(page)) {
 		while (rec_get_n_owned_new(r) == 0) {
 			r = rec_get_next_ptr_const(r, TRUE);
 			ut_ad(r >= page + PAGE_NEW_SUPREMUM);
-			ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
+			ut_ad(r < page + (srv_page_size - PAGE_DIR));
 		}
 	} else {
 		while (rec_get_n_owned_old(r) == 0) {
 			r = rec_get_next_ptr_const(r, FALSE);
 			ut_ad(r >= page + PAGE_OLD_SUPREMUM);
-			ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
+			ut_ad(r < page + (srv_page_size - PAGE_DIR));
 		}
 	}
 
-	uint16 rec_offs_bytes = mach_encode_2(r - page);
+	uint16 rec_offs_bytes = mach_encode_2(ulint(r - page));
 
 	while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
 
@@ -234,9 +234,9 @@ page_set_autoinc(
 {
 	ut_ad(mtr_memo_contains_flagged(
 		      mtr, block, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
-	ut_ad(dict_index_is_clust(index));
+	ut_ad(index->is_primary());
 	ut_ad(index->page == block->page.id.page_no());
-	ut_ad(index->space == block->page.id.space());
+	ut_ad(index->table->space_id == block->page.id.space());
 
 	byte*	field = PAGE_HEADER + PAGE_ROOT_AUTO_INC
 		+ buf_block_get_frame(block);
@@ -357,12 +357,10 @@ page_create_low(
 {
 	page_t*		page;
 
-#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
-# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
-#endif
-#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA
-# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
-#endif
+	compile_time_assert(PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE
+			    <= PAGE_DATA);
+	compile_time_assert(PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE
+			    <= PAGE_DATA);
 
 	buf_block_modify_clock_inc(block);
 
@@ -376,7 +374,8 @@ page_create_low(
 
 	memset(page + PAGE_HEADER, 0, PAGE_HEADER_PRIV_END);
 	page[PAGE_HEADER + PAGE_N_DIR_SLOTS + 1] = 2;
-	page[PAGE_HEADER + PAGE_DIRECTION + 1] = PAGE_NO_DIRECTION;
+	page[PAGE_HEADER + PAGE_INSTANT] = 0;
+	page[PAGE_HEADER + PAGE_DIRECTION_B] = PAGE_NO_DIRECTION;
 
 	if (comp) {
 		page[PAGE_HEADER + PAGE_N_HEAP] = 0x80;/*page_is_comp()*/
@@ -386,10 +385,10 @@ page_create_low(
 		       sizeof infimum_supremum_compact);
 		memset(page
 		       + PAGE_NEW_SUPREMUM_END, 0,
-		       UNIV_PAGE_SIZE - PAGE_DIR - PAGE_NEW_SUPREMUM_END);
-		page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1]
+		       srv_page_size - PAGE_DIR - PAGE_NEW_SUPREMUM_END);
+		page[srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1]
 			= PAGE_NEW_SUPREMUM;
-		page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1]
+		page[srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1]
 			= PAGE_NEW_INFIMUM;
 	} else {
 		page[PAGE_HEADER + PAGE_N_HEAP + 1] = PAGE_HEAP_NO_USER_LOW;
@@ -398,10 +397,10 @@ page_create_low(
 		       sizeof infimum_supremum_redundant);
 		memset(page
 		       + PAGE_OLD_SUPREMUM_END, 0,
-		       UNIV_PAGE_SIZE - PAGE_DIR - PAGE_OLD_SUPREMUM_END);
-		page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1]
+		       srv_page_size - PAGE_DIR - PAGE_OLD_SUPREMUM_END);
+		page[srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1]
 			= PAGE_OLD_SUPREMUM;
-		page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1]
+		page[srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1]
 			= PAGE_OLD_INFIMUM;
 	}
 
@@ -474,19 +473,19 @@ page_create_zip(
 
 	/* PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC are always 0 for
 	temporary tables. */
-	ut_ad(max_trx_id == 0 || !dict_table_is_temporary(index->table));
+	ut_ad(max_trx_id == 0 || !index->table->is_temporary());
 	/* In secondary indexes and the change buffer, PAGE_MAX_TRX_ID
 	must be zero on non-leaf pages. max_trx_id can be 0 when the
 	index consists of an empty root (leaf) page. */
 	ut_ad(max_trx_id == 0
 	      || level == 0
 	      || !dict_index_is_sec_or_ibuf(index)
-	      || dict_table_is_temporary(index->table));
+	      || index->table->is_temporary());
 	/* In the clustered index, PAGE_ROOT_AUTOINC or
 	PAGE_MAX_TRX_ID must be 0 on other pages than the root. */
 	ut_ad(level == 0 || max_trx_id == 0
 	      || !dict_index_is_sec_or_ibuf(index)
-	      || dict_table_is_temporary(index->table));
+	      || index->table->is_temporary());
 
 	page = page_create_low(block, TRUE, is_spatial);
 	mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + page, level);
@@ -532,7 +531,7 @@ page_create_empty(
 	max_trx_id is ignored for temp tables because it not required
 	for MVCC. */
 	if (dict_index_is_sec_or_ibuf(index)
-	    && !dict_table_is_temporary(index->table)
+	    && !index->table->is_temporary()
 	    && page_is_leaf(page)) {
 		max_trx_id = page_get_max_trx_id(page);
 		ut_ad(max_trx_id);
@@ -544,7 +543,7 @@ page_create_empty(
 	}
 
 	if (page_zip) {
-		ut_ad(!dict_table_is_temporary(index->table));
+		ut_ad(!index->table->is_temporary());
 		page_create_zip(block, index,
 				page_header_get_field(page, PAGE_LEVEL),
 				max_trx_id, NULL, mtr);
@@ -593,9 +592,9 @@ page_copy_rec_list_end_no_locks(
 
 	btr_assert_not_corrupted(new_block, index);
 	ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
-	ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
+	ut_a(mach_read_from_2(new_page + srv_page_size - 10) == (ulint)
 	     (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
-	ut_d(const bool is_leaf = page_is_leaf(block->frame));
+	const bool is_leaf = page_is_leaf(block->frame);
 
 	cur2 = page_get_infimum_rec(buf_block_get_frame(new_block));
 
@@ -714,7 +713,7 @@ page_copy_rec_list_end(
 	for MVCC. */
 	if (dict_index_is_sec_or_ibuf(index)
 	    && page_is_leaf(page)
-	    && !dict_table_is_temporary(index->table)) {
+	    && !index->table->is_temporary()) {
 		page_update_max_trx_id(new_block, NULL,
 				       page_get_max_trx_id(page), mtr);
 	}
@@ -765,9 +764,10 @@ page_copy_rec_list_end(
 
 	/* Update the lock table and possible hash index */
 
-	if (dict_index_is_spatial(index) && rec_move) {
+	if (dict_table_is_locking_disabled(index->table)) {
+	} else if (rec_move && dict_index_is_spatial(index)) {
 		lock_rtr_move_rec_list(new_block, block, rec_move, num_moved);
-	} else if (!dict_table_is_locking_disabled(index->table)) {
+	} else {
 		lock_move_rec_list_end(new_block, block, rec);
 	}
 
@@ -775,7 +775,7 @@ page_copy_rec_list_end(
 		mem_heap_free(heap);
 	}
 
-	btr_search_move_or_delete_hash_entries(new_block, block, index);
+	btr_search_move_or_delete_hash_entries(new_block, block);
 
 	return(ret);
 }
@@ -874,7 +874,7 @@ page_copy_rec_list_start(
 	max_trx_id is ignored for temp tables because it not required
 	for MVCC. */
 	if (is_leaf && dict_index_is_sec_or_ibuf(index)
-	    && !dict_table_is_temporary(index->table)) {
+	    && !index->table->is_temporary()) {
 		page_update_max_trx_id(new_block, NULL,
 				       page_get_max_trx_id(page_align(rec)),
 				       mtr);
@@ -925,9 +925,10 @@ zip_reorganize:
 
 	/* Update the lock table and possible hash index */
 
-	if (dict_index_is_spatial(index)) {
+	if (dict_table_is_locking_disabled(index->table)) {
+	} else if (dict_index_is_spatial(index)) {
 		lock_rtr_move_rec_list(new_block, block, rec_move, num_moved);
-	} else if (!dict_table_is_locking_disabled(index->table)) {
+	} else {
 		lock_move_rec_list_start(new_block, block, rec, ret);
 	}
 
@@ -935,7 +936,7 @@ zip_reorganize:
 		mem_heap_free(heap);
 	}
 
-	btr_search_move_or_delete_hash_entries(new_block, block, index);
+	btr_search_move_or_delete_hash_entries(new_block, block);
 
 	return(ret);
 }
@@ -1049,7 +1050,7 @@ page_delete_rec_list_end(
 	ulint*		offsets		= offsets_;
 	rec_offs_init(offsets_);
 
-	ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
+	ut_ad(size == ULINT_UNDEFINED || size < srv_page_size);
 	ut_ad(!page_zip || page_rec_is_comp(rec));
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
@@ -1103,7 +1104,7 @@ delete_all:
 				       ? MLOG_COMP_LIST_END_DELETE
 				       : MLOG_LIST_END_DELETE, mtr);
 
-	ut_d(const bool is_leaf = page_is_leaf(page));
+	const bool is_leaf = page_is_leaf(page);
 
 	if (page_zip) {
 		mtr_log_t	log_mode;
@@ -1154,9 +1155,10 @@ delete_all:
 						  is_leaf,
 						  ULINT_UNDEFINED, &heap);
 			s = rec_offs_size(offsets);
-			ut_ad(rec2 - page + s - rec_offs_extra_size(offsets)
-			      < UNIV_PAGE_SIZE);
-			ut_ad(size + s < UNIV_PAGE_SIZE);
+			ut_ad(ulint(rec2 - page) + s
+			      - rec_offs_extra_size(offsets)
+			      < srv_page_size);
+			ut_ad(size + s < srv_page_size);
 			size += s;
 			n_recs++;
 
@@ -1173,7 +1175,7 @@ delete_all:
 		}
 	}
 
-	ut_ad(size < UNIV_PAGE_SIZE);
+	ut_ad(size < srv_page_size);
 
 	/* Update the page directory; there is no need to balance the number
 	of the records owned by the supremum record, as it is allowed to be
@@ -1294,7 +1296,7 @@ page_delete_rec_list_start(
 	/* Individual deletes are not logged */
 
 	mtr_log_t	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-	ut_d(const bool is_leaf = page_rec_is_leaf(rec));
+	const bool	is_leaf = page_rec_is_leaf(rec);
 
 	while (page_cur_get_rec(&cur1) != rec) {
 		offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
@@ -1628,7 +1630,7 @@ page_rec_get_nth_const(
 		return(page_get_infimum_rec(page));
 	}
 
-	ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
+	ut_ad(nth < srv_page_size / (REC_N_NEW_EXTRA_BYTES + 1));
 
 	for (i = 0;; i++) {
 
@@ -1690,7 +1692,7 @@ page_rec_get_n_recs_before(
 			slot = page_dir_get_nth_slot(page, i);
 			slot_rec = page_dir_slot_get_rec(slot);
 
-			n += rec_get_n_owned_new(slot_rec);
+			n += lint(rec_get_n_owned_new(slot_rec));
 
 			if (rec == slot_rec) {
 
@@ -1708,7 +1710,7 @@ page_rec_get_n_recs_before(
 			slot = page_dir_get_nth_slot(page, i);
 			slot_rec = page_dir_slot_get_rec(slot);
 
-			n += rec_get_n_owned_old(slot_rec);
+			n += lint(rec_get_n_owned_old(slot_rec));
 
 			if (rec == slot_rec) {
 
@@ -1720,7 +1722,7 @@ page_rec_get_n_recs_before(
 	n--;
 
 	ut_ad(n >= 0);
-	ut_ad((ulong) n < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
+	ut_ad((ulong) n < srv_page_size / (REC_N_NEW_EXTRA_BYTES + 1));
 
 	return((ulint) n);
 }
@@ -1872,20 +1874,20 @@ page_header_print(
 	fprintf(stderr,
 		"--------------------------------\n"
 		"PAGE HEADER INFO\n"
-		"Page address %p, n records %lu (%s)\n"
-		"n dir slots %lu, heap top %lu\n"
-		"Page n heap %lu, free %lu, garbage %lu\n"
-		"Page last insert %lu, direction %lu, n direction %lu\n",
-		page, (ulong) page_header_get_field(page, PAGE_N_RECS),
+		"Page address %p, n records %u (%s)\n"
+		"n dir slots %u, heap top %u\n"
+		"Page n heap %u, free %u, garbage %u\n"
+		"Page last insert %u, direction %u, n direction %u\n",
+		page, page_header_get_field(page, PAGE_N_RECS),
 		page_is_comp(page) ? "compact format" : "original format",
-		(ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS),
-		(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
-		(ulong) page_dir_get_n_heap(page),
-		(ulong) page_header_get_field(page, PAGE_FREE),
-		(ulong) page_header_get_field(page, PAGE_GARBAGE),
-		(ulong) page_header_get_field(page, PAGE_LAST_INSERT),
-		(ulong) page_header_get_field(page, PAGE_DIRECTION),
-		(ulong) page_header_get_field(page, PAGE_N_DIRECTION));
+		page_header_get_field(page, PAGE_N_DIR_SLOTS),
+		page_header_get_field(page, PAGE_HEAP_TOP),
+		page_dir_get_n_heap(page),
+		page_header_get_field(page, PAGE_FREE),
+		page_header_get_field(page, PAGE_GARBAGE),
+		page_header_get_field(page, PAGE_LAST_INSERT),
+		page_get_direction(page),
+		page_header_get_field(page, PAGE_N_DIRECTION));
 }
 
 /***************************************************************//**
@@ -2013,7 +2015,7 @@ page_simple_validate_old(
 
 	n_slots = page_dir_get_n_slots(page);
 
-	if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
+	if (UNIV_UNLIKELY(n_slots > srv_page_size / 4)) {
 		ib::error() << "Nonsensical number " << n_slots
 			<< " of page dir slots";
 
@@ -2053,7 +2055,7 @@ page_simple_validate_old(
 			goto func_exit;
 		}
 
-		if (UNIV_UNLIKELY(rec_get_n_owned_old(rec))) {
+		if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) != 0)) {
 			/* This is a record pointed to by a dir slot */
 			if (UNIV_UNLIKELY(rec_get_n_owned_old(rec)
 					  != own_count)) {
@@ -2089,7 +2091,7 @@ page_simple_validate_old(
 
 		if (UNIV_UNLIKELY
 		    (rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA
-		     || rec_get_next_offs(rec, FALSE) >= UNIV_PAGE_SIZE)) {
+		     || rec_get_next_offs(rec, FALSE) >= srv_page_size)) {
 
 			ib::error() << "Next record offset nonsensical "
 				<< rec_get_next_offs(rec, FALSE) << " for rec "
@@ -2100,7 +2102,7 @@ page_simple_validate_old(
 
 		count++;
 
-		if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+		if (UNIV_UNLIKELY(count > srv_page_size)) {
 			ib::error() << "Page record list appears"
 				" to be circular " << count;
 			goto func_exit;
@@ -2137,7 +2139,7 @@ page_simple_validate_old(
 
 	while (rec != NULL) {
 		if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
-				  || rec >= page + UNIV_PAGE_SIZE)) {
+				  || rec >= page + srv_page_size)) {
 			ib::error() << "Free list record has"
 				" a nonsensical offset " << (rec - page);
 
@@ -2154,7 +2156,7 @@ page_simple_validate_old(
 
 		count++;
 
-		if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+		if (UNIV_UNLIKELY(count > srv_page_size)) {
 			ib::error() << "Page free list appears"
 				" to be circular " << count;
 			goto func_exit;
@@ -2203,7 +2205,7 @@ page_simple_validate_new(
 
 	n_slots = page_dir_get_n_slots(page);
 
-	if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
+	if (UNIV_UNLIKELY(n_slots > srv_page_size / 4)) {
 		ib::error() << "Nonsensical number " << n_slots
 			<< " of page dir slots";
 
@@ -2244,7 +2246,7 @@ page_simple_validate_new(
 			goto func_exit;
 		}
 
-		if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
+		if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) != 0)) {
 			/* This is a record pointed to by a dir slot */
 			if (UNIV_UNLIKELY(rec_get_n_owned_new(rec)
 					  != own_count)) {
@@ -2280,7 +2282,7 @@ page_simple_validate_new(
 
 		if (UNIV_UNLIKELY
 		    (rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA
-		     || rec_get_next_offs(rec, TRUE) >= UNIV_PAGE_SIZE)) {
+		     || rec_get_next_offs(rec, TRUE) >= srv_page_size)) {
 
 			ib::error() << "Next record offset nonsensical "
 				<< rec_get_next_offs(rec, TRUE)
@@ -2291,7 +2293,7 @@ page_simple_validate_new(
 
 		count++;
 
-		if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+		if (UNIV_UNLIKELY(count > srv_page_size)) {
 			ib::error() << "Page record list appears to be"
 				" circular " << count;
 			goto func_exit;
@@ -2328,7 +2330,7 @@ page_simple_validate_new(
 
 	while (rec != NULL) {
 		if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
-				  || rec >= page + UNIV_PAGE_SIZE)) {
+				  || rec >= page + srv_page_size)) {
 
 			ib::error() << "Free list record has"
 				" a nonsensical offset " << page_offset(rec);
@@ -2346,7 +2348,7 @@ page_simple_validate_new(
 
 		count++;
 
-		if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+		if (UNIV_UNLIKELY(count > srv_page_size)) {
 			ib::error() << "Page free list appears to be"
 				" circular " << count;
 			goto func_exit;
@@ -2421,26 +2423,27 @@ page_validate(
 	same temp-table in parallel.
 	max_trx_id is ignored for temp tables because it not required
 	for MVCC. */
-	if (dict_index_is_sec_or_ibuf(index)
-	    && !dict_table_is_temporary(index->table)
-	    && page_is_leaf(page)
-	    && !page_is_empty(page)) {
+	if (!page_is_leaf(page) || page_is_empty(page)
+	    || !dict_index_is_sec_or_ibuf(index)
+	    || index->table->is_temporary()) {
+	} else if (trx_id_t sys_max_trx_id = trx_sys.get_max_trx_id()) {
 		trx_id_t	max_trx_id	= page_get_max_trx_id(page);
-		trx_id_t	sys_max_trx_id	= trx_sys_get_max_trx_id();
 
 		if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) {
 			ib::error() << "PAGE_MAX_TRX_ID out of bounds: "
 				<< max_trx_id << ", " << sys_max_trx_id;
 			goto func_exit2;
 		}
+	} else {
+		ut_ad(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN);
 	}
 
-	heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
+	heap = mem_heap_create(srv_page_size + 200);
 
 	/* The following buffer is used to check that the
 	records in the page record heap do not overlap */
 
-	buf = static_cast<byte*>(mem_heap_zalloc(heap, UNIV_PAGE_SIZE));
+	buf = static_cast<byte*>(mem_heap_zalloc(heap, srv_page_size));
 
 	/* Check first that the record heap and the directory do not
 	overlap. */
@@ -2532,7 +2535,7 @@ page_validate(
 
 			data_size += rec_offs_size(offsets);
 
-#if UNIV_GIS_DEBUG
+#if defined(UNIV_GIS_DEBUG)
 			/* For spatial index, print the mbr info.*/
 			if (index->type & DICT_SPATIAL) {
 				rec_print_mbr_rec(stderr, rec, offsets);
@@ -2543,7 +2546,7 @@ page_validate(
 
 		offs = page_offset(rec_get_start(rec, offsets));
 		i = rec_offs_size(offsets);
-		if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
+		if (UNIV_UNLIKELY(offs + i >= srv_page_size)) {
 			ib::error() << "Record offset out of bounds";
 			goto func_exit;
 		}
@@ -2564,7 +2567,7 @@ page_validate(
 			rec_own_count = rec_get_n_owned_old(rec);
 		}
 
-		if (UNIV_UNLIKELY(rec_own_count)) {
+		if (UNIV_UNLIKELY(rec_own_count != 0)) {
 			/* This is a record pointed to by a dir slot */
 			if (UNIV_UNLIKELY(rec_own_count != own_count)) {
 				ib::error() << "Wrong owned count "
@@ -2651,7 +2654,7 @@ n_owned_zero:
 		count++;
 		offs = page_offset(rec_get_start(rec, offsets));
 		i = rec_offs_size(offsets);
-		if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
+		if (UNIV_UNLIKELY(offs + i >= srv_page_size)) {
 			ib::error() << "Record offset out of bounds";
 			goto func_exit;
 		}
@@ -2749,7 +2752,11 @@ page_delete_rec(
 					belongs to */
 	page_cur_t*		pcur,	/*!< in/out: page cursor on record
 					to delete */
-	page_zip_des_t*		page_zip,/*!< in: compressed page descriptor */
+	page_zip_des_t*
+#ifdef UNIV_ZIP_DEBUG
+		page_zip/*!< in: compressed page descriptor */
+#endif
+	,
 	const ulint*		offsets)/*!< in: offsets for record */
 {
 	bool		no_compress_needed;
@@ -2761,8 +2768,7 @@ page_delete_rec(
 	if (!rec_offs_any_extern(offsets)
 	    && ((page_get_data_size(page) - rec_offs_size(offsets)
 		< BTR_CUR_PAGE_COMPRESS_LIMIT(index))
-		|| (mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL
-		    && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)
+		|| !page_has_siblings(page)
 		|| (page_get_n_recs(page) < 2))) {
 
 		ulint	root_page_no = dict_index_get_page(index);
@@ -2803,19 +2809,26 @@ page_find_rec_max_not_deleted(
 	const rec_t*	rec = page_get_infimum_rec(page);
 	const rec_t*	prev_rec = NULL; // remove warning
 
-	/* Because the page infimum is never delete-marked,
+	/* Because the page infimum is never delete-marked
+	and never the metadata pseudo-record (MIN_REC_FLAG)),
 	prev_rec will always be assigned to it first. */
-	ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
+	ut_ad(!rec_get_info_bits(rec, page_rec_is_comp(rec)));
+	ut_ad(page_is_leaf(page));
+
 	if (page_is_comp(page)) {
 		do {
-			if (!rec_get_deleted_flag(rec, true)) {
+			if (!(rec[-REC_NEW_INFO_BITS]
+			      & (REC_INFO_DELETED_FLAG
+				 | REC_INFO_MIN_REC_FLAG))) {
 				prev_rec = rec;
 			}
 			rec = page_rec_get_next_low(rec, true);
 		} while (rec != page + PAGE_NEW_SUPREMUM);
 	} else {
 		do {
-			if (!rec_get_deleted_flag(rec, false)) {
+			if (!(rec[-REC_OLD_INFO_BITS]
+			      & (REC_INFO_DELETED_FLAG
+				 | REC_INFO_MIN_REC_FLAG))) {
 				prev_rec = rec;
 			}
 			rec = page_rec_get_next_low(rec, false);
diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc
index 6b68ee973af..4b611baefae 100644
--- a/storage/innobase/page/page0zip.cc
+++ b/storage/innobase/page/page0zip.cc
@@ -156,7 +156,7 @@ page_zip_empty_size(
 	ulint	n_fields,	/*!< in: number of columns in the index */
 	ulint	zip_size)	/*!< in: compressed page size in bytes */
 {
-	lint	size = zip_size
+	ulint	size = zip_size
 		/* subtract the page header and the longest
 		uncompressed data needed for one record */
 		- (PAGE_DATA
@@ -166,7 +166,7 @@ page_zip_empty_size(
 		   - REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
 		/* subtract the space for page_zip_fields_encode() */
 		- compressBound(static_cast<uLong>(2 * (n_fields + 1)));
-	return(size > 0 ? (ulint) size : 0);
+	return(lint(size) > 0 ? size : 0);
 }
 
 /** Check whether a tuple is too big for compressed table
@@ -230,7 +230,8 @@ page_zip_dir_elems(
 	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
 {
 	/* Exclude the page infimum and supremum from the record count. */
-	return(page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW);
+	return ulint(page_dir_get_n_heap(page_zip->data))
+		- PAGE_HEAP_NO_USER_LOW;
 }
 
 /*************************************************************//**
@@ -289,7 +290,7 @@ page_zip_dir_user_size(
 	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
 {
 	ulint	size = PAGE_ZIP_DIR_SLOT_SIZE
-		* page_get_n_recs(page_zip->data);
+		* ulint(page_get_n_recs(page_zip->data));
 	ut_ad(size <= page_zip_dir_size(page_zip));
 	return(size);
 }
@@ -397,7 +398,7 @@ page_zip_compress_write_log(
 	}
 
 	/* Read the number of user records. */
-	trailer_size = page_dir_get_n_heap(page_zip->data)
+	trailer_size = ulint(page_dir_get_n_heap(page_zip->data))
 		- PAGE_HEAP_NO_USER_LOW;
 	/* Multiply by uncompressed of size stored per record */
 	if (!page_is_leaf(page)) {
@@ -411,15 +412,13 @@ page_zip_compress_write_log(
 	/* Add the space occupied by BLOB pointers. */
 	trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
 	ut_a(page_zip->m_end > PAGE_DATA);
-#if FIL_PAGE_DATA > PAGE_DATA
-# error "FIL_PAGE_DATA > PAGE_DATA"
-#endif
+	compile_time_assert(FIL_PAGE_DATA <= PAGE_DATA);
 	ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
 
 	log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
 						     MLOG_ZIP_PAGE_COMPRESS,
 						     log_ptr, mtr);
-	mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
+	mach_write_to_2(log_ptr, ulint(page_zip->m_end - FIL_PAGE_TYPE));
 	log_ptr += 2;
 	mach_write_to_2(log_ptr, trailer_size);
 	log_ptr += 2;
@@ -431,7 +430,7 @@ page_zip_compress_write_log(
 	/* Write most of the page header, the compressed stream and
 	the modification log. */
 	mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
-			     page_zip->m_end - FIL_PAGE_TYPE);
+			     ulint(page_zip->m_end - FIL_PAGE_TYPE));
 	/* Write the uncompressed trailer of the compressed page. */
 	mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
 			     - trailer_size, trailer_size);
@@ -614,7 +613,7 @@ page_zip_fields_encode(
 			}
 
 			buf = page_zip_fixed_field_encode(
-				buf, field->fixed_len << 1);
+				buf, ulint(field->fixed_len) << 1);
 			col++;
 		}
 	}
@@ -670,8 +669,7 @@ page_zip_dir_encode(
 		status = REC_STATUS_ORDINARY;
 	} else {
 		status = REC_STATUS_NODE_PTR;
-		if (UNIV_UNLIKELY
-		    (mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
+		if (UNIV_UNLIKELY(!page_has_prev(page))) {
 			min_mark = REC_INFO_MIN_REC_FLAG;
 		}
 	}
@@ -695,15 +693,14 @@ page_zip_dir_encode(
 		heap_no = rec_get_heap_no_new(rec);
 		ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
 		ut_a(heap_no < n_heap);
-		ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
+		ut_a(offs < srv_page_size - PAGE_DIR);
 		ut_a(offs >= PAGE_ZIP_START);
-#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
-# error PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2
-#endif
-#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1
-# error PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1
-#endif
-		if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
+		compile_time_assert(!(PAGE_ZIP_DIR_SLOT_MASK
+				      & (PAGE_ZIP_DIR_SLOT_MASK + 1)));
+		compile_time_assert(PAGE_ZIP_DIR_SLOT_MASK
+				    >= UNIV_ZIP_SIZE_MAX - 1);
+
+		if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) != 0)) {
 			offs |= PAGE_ZIP_DIR_SLOT_OWNED;
 		}
 
@@ -726,7 +723,7 @@ page_zip_dir_encode(
 			recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
 		}
 
-		ut_a(rec_get_status(rec) == status);
+		ut_a(ulint(rec_get_status(rec)) == status);
 	}
 
 	offs = page_header_get_field(page, PAGE_FREE);
@@ -741,7 +738,7 @@ page_zip_dir_encode(
 		ut_a(heap_no < n_heap);
 
 		ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
-		ut_a(rec_get_status(rec) == status);
+		ut_a(ulint(rec_get_status(rec)) == status);
 
 		mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
 
@@ -810,7 +807,7 @@ page_zip_set_alloc(
 #ifdef PAGE_ZIP_COMPRESS_DBG
 /** Set this variable in a debugger to enable
 excessive logging in page_zip_compress(). */
-static ibool	page_zip_compress_dbg;
+static bool	page_zip_compress_dbg;
 /** Set this variable in a debugger to enable
 binary logging of the data passed to deflate().
 When this variable is nonzero, it will act
@@ -1296,7 +1293,7 @@ page_zip_compress(
 		  && dict_table_is_comp(index->table)
 		  && !dict_index_is_ibuf(index)));
 
-	UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+	UNIV_MEM_ASSERT_RW(page, srv_page_size);
 
 	/* Check the data that will be omitted. */
 	ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
@@ -1328,7 +1325,7 @@ page_zip_compress(
 	}
 
 	/* The dense directory excludes the infimum and supremum records. */
-	n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
+	n_dense = ulint(page_dir_get_n_heap(page)) - PAGE_HEAP_NO_USER_LOW;
 #ifdef PAGE_ZIP_COMPRESS_DBG
 	if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
 		ib::info() << "compress "
@@ -1347,8 +1344,8 @@ page_zip_compress(
 
 		if (logfile) {
 			/* Write the uncompressed page to the log. */
-			if (fwrite(page, 1, UNIV_PAGE_SIZE, logfile)
-			    != UNIV_PAGE_SIZE) {
+			if (fwrite(page, 1, srv_page_size, logfile)
+			    != srv_page_size) {
 				perror("fwrite");
 			}
 			/* Record the compressed size as zero.
@@ -1383,7 +1380,7 @@ page_zip_compress(
 	    && !dict_index_is_ibuf(index)
 	    && page_get_n_recs(page) >= 2
 	    && ((ulint)(rand() % 100) < srv_simulate_comp_failures)
-	    && strcasecmp(index->table_name, "IBUF_DUMMY") != 0) {
+	    && strcmp(index->table->name.m_name, "IBUF_DUMMY")) {
 
 #ifdef UNIV_DEBUG
 		ib::error()
@@ -1407,7 +1404,7 @@ page_zip_compress(
 			       + REC_OFFS_HEADER_SIZE
 			       + n_dense * ((sizeof *recs)
 					    - PAGE_ZIP_DIR_SLOT_SIZE)
-			       + UNIV_PAGE_SIZE * 4
+			       + srv_page_size * 4
 			       + (512 << MAX_MEM_LEVEL));
 
 	recs = static_cast<const rec_t**>(
@@ -1424,7 +1421,7 @@ page_zip_compress(
 	page_zip_set_alloc(&c_stream, heap);
 
 	err = deflateInit2(&c_stream, static_cast<int>(level),
-			   Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
+			   Z_DEFLATED, srv_page_size_shift,
 			   MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
 	ut_a(err == Z_OK);
 
@@ -1545,7 +1542,7 @@ page_zip_compress(
 	c_stream.avail_in = static_cast<uInt>(
 		page_header_get_field(page, PAGE_HEAP_TOP)
 		- (c_stream.next_in - page));
-	ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
+	ut_a(c_stream.avail_in <= srv_page_size - PAGE_ZIP_START - PAGE_DIR);
 
 	UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
 	err = deflate(&c_stream, Z_FINISH);
@@ -1623,7 +1620,7 @@ err_exit:
 		/* Record the compressed size of the block. */
 		byte sz[4];
 		mach_write_to_4(sz, c_stream.total_out);
-		fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
+		fseek(logfile, srv_page_size, SEEK_SET);
 		if (fwrite(sz, 1, sizeof sz, logfile) != sizeof sz) {
 			perror("fwrite");
 		}
@@ -1708,11 +1705,9 @@ page_zip_fields_decode(
 		return(NULL);
 	}
 
-	table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n, 0,
+	table = dict_mem_table_create("ZIP_DUMMY", NULL, n, 0,
 				      DICT_TF_COMPACT, 0);
-	index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
-				      DICT_HDR_SPACE, 0, n);
-	index->table = table;
+	index = dict_mem_index_create(table, "ZIP_DUMMY", 0, n);
 	index->n_uniq = unsigned(n);
 	/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
 	index->cached = TRUE;
@@ -1776,6 +1771,11 @@ page_zip_fields_decode(
 		}
 	}
 
+	/* ROW_FORMAT=COMPRESSED does not support instant ADD COLUMN */
+	index->n_core_fields = index->n_fields;
+	index->n_core_null_bytes
+		= UT_BITS_IN_BYTES(unsigned(index->n_nullable));
+
 	ut_ad(b == end);
 
 	if (is_spatial) {
@@ -1817,7 +1817,7 @@ page_zip_dir_decode(
 	/* Traverse the list of stored records in the sorting order,
 	starting from the first user record. */
 
-	slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
+	slot = page + (srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
 	UNIV_PREFETCH_RW(slot);
 
 	/* Zero out the page trailer. */
@@ -1851,7 +1851,7 @@ page_zip_dir_decode(
 	mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
 	{
 		const page_dir_slot_t*	last_slot = page_dir_get_nth_slot(
-			page, page_dir_get_n_slots(page) - 1);
+			page, page_dir_get_n_slots(page) - 1U);
 
 		if (UNIV_UNLIKELY(slot != last_slot)) {
 			page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
@@ -1934,7 +1934,7 @@ page_zip_set_extra_bytes(
 	page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
 
 	/* The dense directory excludes the infimum and supremum records. */
-	n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
+	n = ulint(page_dir_get_n_heap(page)) - PAGE_HEAP_NO_USER_LOW;
 
 	if (i >= n) {
 		if (UNIV_LIKELY(i == n)) {
@@ -2020,8 +2020,8 @@ page_zip_apply_log_ext(
 				return(NULL);
 			}
 
-			memcpy(next_out, data, dst - next_out);
-			data += dst - next_out;
+			memcpy(next_out, data, ulint(dst - next_out));
+			data += ulint(dst - next_out);
 			next_out = dst + (DATA_TRX_ID_LEN
 					  + DATA_ROLL_PTR_LEN);
 		} else if (rec_offs_nth_extern(offsets, i)) {
@@ -2030,7 +2030,7 @@ page_zip_apply_log_ext(
 			ut_ad(len
 			      >= BTR_EXTERN_FIELD_REF_SIZE);
 
-			len += dst - next_out
+			len += ulint(dst - next_out)
 				- BTR_EXTERN_FIELD_REF_SIZE;
 
 			if (UNIV_UNLIKELY(data + len >= end)) {
@@ -2050,7 +2050,7 @@ page_zip_apply_log_ext(
 	}
 
 	/* Copy the last bytes of the record. */
-	len = rec_get_end(rec, offsets) - next_out;
+	len = ulint(rec_get_end(rec, offsets) - next_out);
 	if (UNIV_UNLIKELY(data + len >= end)) {
 		page_zip_fail(("page_zip_apply_log_ext:"
 			       " last %p+%lu >= %p\n",
@@ -2165,13 +2165,11 @@ page_zip_apply_log(
 			continue;
 		}
 
-#if REC_STATUS_NODE_PTR != TRUE
-# error "REC_STATUS_NODE_PTR != TRUE"
-#endif
+		compile_time_assert(REC_STATUS_NODE_PTR == TRUE);
 		rec_get_offsets_reverse(data, index,
 					hs & REC_STATUS_NODE_PTR,
 					offsets);
-		rec_offs_make_valid(rec, index, offsets);
+		rec_offs_make_valid(rec, index, is_leaf, offsets);
 
 		/* Copy the extra bytes (backwards). */
 		{
@@ -2252,7 +2250,7 @@ page_zip_apply_log(
 
 			/* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
 			b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-			len = rec_get_end(rec, offsets) - b;
+			len = ulint(rec_get_end(rec, offsets) - b);
 			if (UNIV_UNLIKELY(data + len >= end)) {
 				page_zip_fail(("page_zip_apply_log:"
 					       " clust %p+%lu >= %p\n",
@@ -2325,7 +2323,7 @@ page_zip_decompress_node_ptrs(
 		d_stream->avail_out = static_cast<uInt>(
 			rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
 
-		ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
+		ut_ad(d_stream->avail_out < srv_page_size
 		      - PAGE_ZIP_START - PAGE_DIR);
 		switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 		case Z_STREAM_END:
@@ -2392,7 +2390,7 @@ page_zip_decompress_node_ptrs(
 	d_stream->avail_out = static_cast<uInt>(
 		page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
 		- page_offset(d_stream->next_out));
-	if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
+	if (UNIV_UNLIKELY(d_stream->avail_out > srv_page_size
 			  - PAGE_ZIP_START - PAGE_DIR)) {
 
 		page_zip_fail(("page_zip_decompress_node_ptrs:"
@@ -2423,9 +2421,10 @@ zlib_done:
 
 		/* Clear the unused heap space on the uncompressed page. */
 		memset(d_stream->next_out, 0,
-		       page_dir_get_nth_slot(page,
-					     page_dir_get_n_slots(page) - 1)
-		       - d_stream->next_out);
+		       ulint(page_dir_get_nth_slot(page,
+						   page_dir_get_n_slots(page)
+						   - 1U)
+			     - d_stream->next_out));
 	}
 
 #ifdef UNIV_DEBUG
@@ -2546,7 +2545,7 @@ page_zip_decompress_sec(
 	d_stream->avail_out = static_cast<uInt>(
 		page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
 		- page_offset(d_stream->next_out));
-	if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
+	if (UNIV_UNLIKELY(d_stream->avail_out > srv_page_size
 			  - PAGE_ZIP_START - PAGE_DIR)) {
 
 		page_zip_fail(("page_zip_decompress_sec:"
@@ -2577,9 +2576,10 @@ zlib_done:
 
 		/* Clear the unused heap space on the uncompressed page. */
 		memset(d_stream->next_out, 0,
-		       page_dir_get_nth_slot(page,
-					     page_dir_get_n_slots(page) - 1)
-		       - d_stream->next_out);
+		       ulint(page_dir_get_nth_slot(page,
+						   page_dir_get_n_slots(page)
+						   - 1U)
+			     - d_stream->next_out));
 	}
 
 	ut_d(page_zip->m_start = unsigned(PAGE_DATA + d_stream->total_in));
@@ -2765,7 +2765,7 @@ page_zip_decompress_clust(
 		d_stream->avail_out =static_cast<uInt>(
 			rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
 
-		ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
+		ut_ad(d_stream->avail_out < srv_page_size
 		      - PAGE_ZIP_START - PAGE_DIR);
 		err = inflate(d_stream, Z_SYNC_FLUSH);
 		switch (err) {
@@ -2875,7 +2875,7 @@ page_zip_decompress_clust(
 	d_stream->avail_out = static_cast<uInt>(
 		page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
 		- page_offset(d_stream->next_out));
-	if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
+	if (UNIV_UNLIKELY(d_stream->avail_out > srv_page_size
 			  - PAGE_ZIP_START - PAGE_DIR)) {
 
 		page_zip_fail(("page_zip_decompress_clust:"
@@ -2906,9 +2906,10 @@ zlib_done:
 
 		/* Clear the unused heap space on the uncompressed page. */
 		memset(d_stream->next_out, 0,
-		       page_dir_get_nth_slot(page,
-					     page_dir_get_n_slots(page) - 1)
-		       - d_stream->next_out);
+		       ulint(page_dir_get_nth_slot(page,
+						   page_dir_get_n_slots(page)
+						   - 1U)
+			     - d_stream->next_out));
 	}
 
 	ut_d(page_zip->m_start = unsigned(PAGE_DATA + d_stream->total_in));
@@ -2952,7 +2953,7 @@ zlib_done:
 		ulint	len;
 		byte*	dst;
 		rec_t*	rec	= recs[slot];
-		ibool	exists	= !page_zip_dir_find_free(
+		bool	exists	= !page_zip_dir_find_free(
 			page_zip, page_offset(rec));
 		offsets = rec_get_offsets(rec, index, offsets, true,
 					  ULINT_UNDEFINED, &heap);
@@ -3048,7 +3049,7 @@ page_zip_decompress_low(
 	ulint*		offsets;
 
 	ut_ad(page_zip_simple_validate(page_zip));
-	UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
+	UNIV_MEM_ASSERT_W(page, srv_page_size);
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
 	/* The dense directory excludes the infimum and supremum records. */
@@ -3061,7 +3062,7 @@ page_zip_decompress_low(
 		return(FALSE);
 	}
 
-	heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
+	heap = mem_heap_create(n_dense * (3 * sizeof *recs) + srv_page_size);
 
 	recs = static_cast<rec_t**>(
 		mem_heap_alloc(heap, n_dense * sizeof *recs));
@@ -3093,9 +3094,9 @@ page_zip_decompress_low(
 
 #ifdef UNIV_ZIP_DEBUG
 	/* Clear the uncompressed page, except the header. */
-	memset(PAGE_DATA + page, 0x55, UNIV_PAGE_SIZE - PAGE_DATA);
+	memset(PAGE_DATA + page, 0x55, srv_page_size - PAGE_DATA);
 #endif /* UNIV_ZIP_DEBUG */
-	UNIV_MEM_INVALID(PAGE_DATA + page, UNIV_PAGE_SIZE - PAGE_DATA);
+	UNIV_MEM_INVALID(PAGE_DATA + page, srv_page_size - PAGE_DATA);
 
 	/* Copy the page directory. */
 	if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
@@ -3128,9 +3129,9 @@ zlib_error:
 	d_stream.avail_in = static_cast<uInt>(
 		page_zip_get_size(page_zip) - (PAGE_DATA + 1));
 	d_stream.next_out = page + PAGE_ZIP_START;
-	d_stream.avail_out = uInt(UNIV_PAGE_SIZE - PAGE_ZIP_START);
+	d_stream.avail_out = uInt(srv_page_size - PAGE_ZIP_START);
 
-	if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
+	if (UNIV_UNLIKELY(inflateInit2(&d_stream, srv_page_size_shift)
 			  != Z_OK)) {
 		ut_error;
 	}
@@ -3187,8 +3188,7 @@ zlib_error:
 			goto err_exit;
 		}
 
-		info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
-			? REC_INFO_MIN_REC_FLAG : 0;
+		info_bits = page_has_prev(page) ? 0 : REC_INFO_MIN_REC_FLAG;
 
 		if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
 							    info_bits))) {
@@ -3226,7 +3226,7 @@ err_exit:
 	}
 
 	ut_a(page_is_comp(page));
-	UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+	UNIV_MEM_ASSERT_RW(page, srv_page_size);
 
 	page_zip_fields_free(index);
 	mem_heap_free(heap);
@@ -3315,7 +3315,7 @@ page_zip_hexdump_func(
 #define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
 
 /** Flag: make page_zip_validate() compare page headers only */
-ibool	page_zip_validate_header_only = FALSE;
+bool	page_zip_validate_header_only;
 
 /**********************************************************************//**
 Check that the compressed and decompressed pages match.
@@ -3342,7 +3342,7 @@ page_zip_validate_low(
 		page_zip_fail(("page_zip_validate: page header\n"));
 		page_zip_hexdump(page_zip, sizeof *page_zip);
 		page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
-		page_zip_hexdump(page, UNIV_PAGE_SIZE);
+		page_zip_hexdump(page, srv_page_size);
 		return(FALSE);
 	}
 
@@ -3353,11 +3353,12 @@ page_zip_validate_low(
 	}
 
 	/* page_zip_decompress() expects the uncompressed page to be
-	UNIV_PAGE_SIZE aligned. */
-	temp_page_buf = static_cast<byte*>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
-	temp_page = static_cast<byte*>(ut_align(temp_page_buf, UNIV_PAGE_SIZE));
+	srv_page_size aligned. */
+	temp_page_buf = static_cast<byte*>(
+		ut_malloc_nokey(2 << srv_page_size_shift));
+	temp_page = static_cast<byte*>(ut_align(temp_page_buf, srv_page_size));
 
-	UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+	UNIV_MEM_ASSERT_RW(page, srv_page_size);
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
 	temp_page_zip = *page_zip;
@@ -3390,7 +3391,7 @@ page_zip_validate_low(
 		valid = FALSE;
 	}
 	if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
-		   UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
+		   srv_page_size - PAGE_HEADER - FIL_PAGE_DATA_END)) {
 
 		/* In crash recovery, the "minimum record" flag may be
 		set incorrectly until the mini-transaction is
@@ -3414,7 +3415,7 @@ page_zip_validate_low(
 
 			if (!memcmp(page + PAGE_HEADER,
 				    temp_page + PAGE_HEADER,
-				    UNIV_PAGE_SIZE - PAGE_HEADER
+				    srv_page_size - PAGE_HEADER
 				    - FIL_PAGE_DATA_END)) {
 
 				/* Only the minimum record flag
@@ -3465,7 +3466,7 @@ page_zip_validate_low(
 			page + PAGE_NEW_INFIMUM, TRUE);
 		trec = page_rec_get_next_low(
 			temp_page + PAGE_NEW_INFIMUM, TRUE);
-		ut_d(const bool is_leaf = page_is_leaf(page));
+		const bool is_leaf = page_is_leaf(page);
 
 		do {
 			if (page_offset(rec) != page_offset(trec)) {
@@ -3508,8 +3509,8 @@ func_exit:
 	if (!valid) {
 		page_zip_hexdump(page_zip, sizeof *page_zip);
 		page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
-		page_zip_hexdump(page, UNIV_PAGE_SIZE);
-		page_zip_hexdump(temp_page, UNIV_PAGE_SIZE);
+		page_zip_hexdump(page, srv_page_size);
+		page_zip_hexdump(temp_page, srv_page_size);
 	}
 	ut_free(temp_page_buf);
 	return(valid);
@@ -3607,7 +3608,7 @@ page_zip_write_rec_ext(
 			memmove(ext_end - n_ext
 				* BTR_EXTERN_FIELD_REF_SIZE,
 				ext_end,
-				externs - ext_end);
+				ulint(externs - ext_end));
 		}
 
 		ut_a(blob_no + n_ext <= page_zip->n_blobs);
@@ -3633,7 +3634,7 @@ page_zip_write_rec_ext(
 
 			/* Log the preceding fields. */
 			ASSERT_ZERO(data, src - start);
-			memcpy(data, start, src - start);
+			memcpy(data, start, ulint(src - start));
 			data += src - start;
 			start = src + (DATA_TRX_ID_LEN
 				       + DATA_ROLL_PTR_LEN);
@@ -3653,7 +3654,7 @@ page_zip_write_rec_ext(
 			src += len - BTR_EXTERN_FIELD_REF_SIZE;
 
 			ASSERT_ZERO(data, src - start);
-			memcpy(data, start, src - start);
+			memcpy(data, start, ulint(src - start));
 			data += src - start;
 			start = src + BTR_EXTERN_FIELD_REF_SIZE;
 
@@ -3665,7 +3666,7 @@ page_zip_write_rec_ext(
 	}
 
 	/* Log the last bytes of the record. */
-	len = rec_offs_data_size(offsets) - (start - rec);
+	len = rec_offs_data_size(offsets) - ulint(start - rec);
 
 	ASSERT_ZERO(data, len);
 	memcpy(data, start, len);
@@ -3725,7 +3726,7 @@ page_zip_write_rec(
 	}
 
 	ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
-	ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
+	ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + srv_page_size
 	      - PAGE_DIR - PAGE_DIR_SLOT_SIZE
 	      * page_dir_get_n_slots(page));
 
@@ -3796,7 +3797,7 @@ page_zip_write_rec(
 
 				/* Log the preceding fields. */
 				ASSERT_ZERO(data, src - rec);
-				memcpy(data, rec, src - rec);
+				memcpy(data, rec, ulint(src - rec));
 				data += src - rec;
 
 				/* Store trx_id and roll_ptr. */
@@ -3810,7 +3811,7 @@ page_zip_write_rec(
 
 				/* Log the last bytes of the record. */
 				len = rec_offs_data_size(offsets)
-					- (src - rec);
+					- ulint(src - rec);
 
 				ASSERT_ZERO(data, len);
 				memcpy(data, src, len);
@@ -3891,8 +3892,8 @@ page_zip_parse_write_blob_ptr(
 	z_offset = mach_read_from_2(ptr + 2);
 
 	if (offset < PAGE_ZIP_START
-	    || offset >= UNIV_PAGE_SIZE
-	    || z_offset >= UNIV_PAGE_SIZE) {
+	    || offset >= srv_page_size
+	    || z_offset >= srv_page_size) {
 corrupt:
 		recv_sys->found_corrupt_log = TRUE;
 
@@ -3998,7 +3999,7 @@ page_zip_write_blob_ptr(
 			(byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
 		mach_write_to_2(log_ptr, page_offset(field));
 		log_ptr += 2;
-		mach_write_to_2(log_ptr, externs - page_zip->data);
+		mach_write_to_2(log_ptr, ulint(externs - page_zip->data));
 		log_ptr += 2;
 		memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
 		log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
@@ -4033,8 +4034,8 @@ page_zip_parse_write_node_ptr(
 	z_offset = mach_read_from_2(ptr + 2);
 
 	if (offset < PAGE_ZIP_START
-	    || offset >= UNIV_PAGE_SIZE
-	    || z_offset >= UNIV_PAGE_SIZE) {
+	    || offset >= srv_page_size
+	    || z_offset >= srv_page_size) {
 corrupt:
 		recv_sys->found_corrupt_log = TRUE;
 
@@ -4061,7 +4062,7 @@ corrupt:
 
 		storage_end = page_zip_dir_start(page_zip);
 
-		heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
+		heap_no = 1 + ulint(storage_end - storage) / REC_NODE_PTR_SIZE;
 
 		if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
 		    || UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
@@ -4119,9 +4120,7 @@ page_zip_write_node_ptr(
 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 	ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-#if REC_NODE_PTR_SIZE != 4
-# error "REC_NODE_PTR_SIZE != 4"
-#endif
+	compile_time_assert(REC_NODE_PTR_SIZE == 4);
 	mach_write_to_4(field, ptr);
 	memcpy(storage, field, REC_NODE_PTR_SIZE);
 
@@ -4136,7 +4135,7 @@ page_zip_write_node_ptr(
 			field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
 		mach_write_to_2(log_ptr, page_offset(field));
 		log_ptr += 2;
-		mach_write_to_2(log_ptr, storage - page_zip->data);
+		mach_write_to_2(log_ptr, ulint(storage - page_zip->data));
 		log_ptr += 2;
 		memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
 		log_ptr += REC_NODE_PTR_SIZE;
@@ -4144,17 +4143,23 @@ page_zip_write_node_ptr(
 	}
 }
 
-/**********************************************************************//**
-Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
+/** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record.
+@param[in,out]	page_zip	compressed page
+@param[in,out]	rec		record
+@param[in]	offsets		rec_get_offsets(rec, index)
+@param[in]	trx_id_field	field number of DB_TRX_ID (number of PK fields)
+@param[in]	trx_id		DB_TRX_ID value (transaction identifier)
+@param[in]	roll_ptr	DB_ROLL_PTR value (undo log pointer)
+@param[in,out]	mtr		mini-transaction, or NULL to skip logging */
 void
 page_zip_write_trx_id_and_roll_ptr(
-/*===============================*/
-	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
-	byte*		rec,	/*!< in/out: record */
-	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
-	ulint		trx_id_col,/*!< in: column number of TRX_ID in rec */
-	trx_id_t	trx_id,	/*!< in: transaction identifier */
-	roll_ptr_t	roll_ptr)/*!< in: roll_ptr */
+	page_zip_des_t*	page_zip,
+	byte*		rec,
+	const ulint*	offsets,
+	ulint		trx_id_col,
+	trx_id_t	trx_id,
+	roll_ptr_t	roll_ptr,
+	mtr_t*		mtr)
 {
 	byte*	field;
 	byte*	storage;
@@ -4181,9 +4186,7 @@ page_zip_write_trx_id_and_roll_ptr(
 		- (rec_get_heap_no_new(rec) - 1)
 		* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
-#endif
+	compile_time_assert(DATA_TRX_ID + 1 == DATA_ROLL_PTR);
 	field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
 	ut_ad(len == DATA_TRX_ID_LEN);
 	ut_ad(field + DATA_TRX_ID_LEN
@@ -4192,13 +4195,9 @@ page_zip_write_trx_id_and_roll_ptr(
 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 	ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-#if DATA_TRX_ID_LEN != 6
-# error "DATA_TRX_ID_LEN != 6"
-#endif
+	compile_time_assert(DATA_TRX_ID_LEN == 6);
 	mach_write_to_6(field, trx_id);
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
+	compile_time_assert(DATA_ROLL_PTR_LEN == 7);
 	mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
 	memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
@@ -4206,6 +4205,83 @@ page_zip_write_trx_id_and_roll_ptr(
 	UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
 			   rec_offs_extra_size(offsets));
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+	if (mtr) {
+		byte*	log_ptr	= mlog_open(
+			mtr, 11 + 2 + 2 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+		if (UNIV_UNLIKELY(!log_ptr)) {
+			return;
+		}
+
+		log_ptr = mlog_write_initial_log_record_fast(
+			(byte*) field, MLOG_ZIP_WRITE_TRX_ID, log_ptr, mtr);
+		mach_write_to_2(log_ptr, page_offset(field));
+		log_ptr += 2;
+		mach_write_to_2(log_ptr, ulint(storage - page_zip->data));
+		log_ptr += 2;
+		memcpy(log_ptr, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+		log_ptr += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+		mlog_close(mtr, log_ptr);
+	}
+}
+
+/** Parse a MLOG_ZIP_WRITE_TRX_ID record.
+@param[in]	ptr		redo log buffer
+@param[in]	end_ptr		end of redo log buffer
+@param[in,out]	page		uncompressed page
+@param[in,out]	page_zip	compressed page
+@return end of log record
+@retval	NULL	if the log record is incomplete */
+byte*
+page_zip_parse_write_trx_id(
+	byte*		ptr,
+	byte*		end_ptr,
+	page_t*		page,
+	page_zip_des_t*	page_zip)
+{
+	byte* const end = 2 + 2 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + ptr;
+
+	if (UNIV_UNLIKELY(end_ptr < end)) {
+		return(NULL);
+	}
+
+	uint offset = mach_read_from_2(ptr);
+	uint z_offset = mach_read_from_2(ptr + 2);
+
+	if (offset < PAGE_ZIP_START
+	    || offset >= srv_page_size
+	    || z_offset >= srv_page_size) {
+corrupt:
+		recv_sys->found_corrupt_log = TRUE;
+
+		return(NULL);
+	}
+
+	if (page) {
+		if (!page_zip || !page_is_leaf(page)) {
+			goto corrupt;
+		}
+
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(page_zip_validate(page_zip, page, NULL));
+#endif /* UNIV_ZIP_DEBUG */
+
+		byte* field = page + offset;
+		byte* storage = page_zip->data + z_offset;
+
+		if (storage >= page_zip_dir_start(page_zip)) {
+			goto corrupt;
+		}
+
+		memcpy(field, ptr + 4, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+		memcpy(storage, ptr + 4, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(page_zip_validate(page_zip, page, NULL));
+#endif /* UNIV_ZIP_DEBUG */
+	}
+
+	return end;
 }
 
 /**********************************************************************//**
@@ -4385,7 +4461,7 @@ page_zip_dir_insert(
 
 	/* Read the old n_dense (n_heap may have been incremented). */
 	n_dense = page_dir_get_n_heap(page_zip->data)
-		- (PAGE_HEAP_NO_USER_LOW + 1);
+		- (PAGE_HEAP_NO_USER_LOW + 1U);
 
 	if (UNIV_LIKELY_NULL(free_rec)) {
 		/* The record was allocated from the free list.
@@ -4412,7 +4488,7 @@ page_zip_dir_insert(
 
 	/* Shift the dense directory to allocate place for rec. */
 	memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
-		slot_rec - slot_free);
+		ulint(slot_rec - slot_free));
 
 	/* Write the entry for the inserted record.
 	The "owned" and "deleted" flags must be zero. */
@@ -4470,7 +4546,7 @@ page_zip_dir_delete(
 	if (UNIV_LIKELY(slot_rec > slot_free)) {
 		memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
 			slot_free,
-			slot_rec - slot_free);
+			ulint(slot_rec - slot_free));
 	}
 
 	/* Write the entry for the deleted record.
@@ -4483,7 +4559,7 @@ page_zip_dir_delete(
 	}
 
 	n_ext = rec_offs_n_extern(offsets);
-	if (UNIV_UNLIKELY(n_ext)) {
+	if (UNIV_UNLIKELY(n_ext != 0)) {
 		/* Shift and zero fill the array of BLOB pointers. */
 		ulint	blob_no;
 		byte*	externs;
@@ -4503,7 +4579,7 @@ page_zip_dir_delete(
 		page_zip->n_blobs -= static_cast<unsigned>(n_ext);
 		/* Shift and zero fill the array. */
 		memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
-			(page_zip->n_blobs - blob_no)
+			ulint(page_zip->n_blobs - blob_no)
 			* BTR_EXTERN_FIELD_REF_SIZE);
 		memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
 	}
@@ -4534,7 +4610,7 @@ page_zip_dir_add_slot(
 
 	/* Read the old n_dense (n_heap has already been incremented). */
 	n_dense = page_dir_get_n_heap(page_zip->data)
-		- (PAGE_HEAP_NO_USER_LOW + 1);
+		- (PAGE_HEAP_NO_USER_LOW + 1U);
 
 	dir = page_zip->data + page_zip_get_size(page_zip)
 		- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
@@ -4554,7 +4630,7 @@ page_zip_dir_add_slot(
 		ASSERT_ZERO(externs - PAGE_ZIP_CLUST_LEAF_SLOT_SIZE,
 			               PAGE_ZIP_CLUST_LEAF_SLOT_SIZE);
 		memmove(externs - PAGE_ZIP_CLUST_LEAF_SLOT_SIZE,
-			externs, stored - externs);
+			externs, ulint(stored - externs));
 	} else {
 		stored = dir
 			- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
@@ -4564,7 +4640,7 @@ page_zip_dir_add_slot(
 
 	/* Move the uncompressed area backwards to make space
 	for one directory slot. */
-	memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
+	memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, ulint(dir - stored));
 }
 
 /***********************************************************//**
@@ -4639,9 +4715,7 @@ page_zip_write_header_log(
 
 	ut_ad(offset < PAGE_DATA);
 	ut_ad(offset + length < PAGE_DATA);
-#if PAGE_DATA > 255
-# error "PAGE_DATA > 255"
-#endif
+	compile_time_assert(PAGE_DATA < 256U);
 	ut_ad(length > 0);
 	ut_ad(length < 256);
 
@@ -4690,9 +4764,9 @@ page_zip_reorganize(
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(page_is_comp(page));
 	ut_ad(!dict_index_is_ibuf(index));
-	ut_ad(!dict_table_is_temporary(index->table));
+	ut_ad(!index->table->is_temporary());
 	/* Note that page_zip_validate(page_zip, page, index) may fail here. */
-	UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+	UNIV_MEM_ASSERT_RW(page, srv_page_size);
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
 	/* Disable logging */
@@ -4766,7 +4840,7 @@ page_zip_copy_recs(
 	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(!dict_index_is_ibuf(index));
-	ut_ad(!dict_table_is_temporary(index->table));
+	ut_ad(!index->table->is_temporary());
 #ifdef UNIV_ZIP_DEBUG
 	/* The B-tree operations that call this function may set
 	FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
@@ -4780,22 +4854,20 @@ page_zip_copy_recs(
 		ut_a(dict_index_is_clust(index));
 	}
 
-	UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
+	UNIV_MEM_ASSERT_W(page, srv_page_size);
 	UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
-	UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
+	UNIV_MEM_ASSERT_RW(src, srv_page_size);
 	UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
 
 	/* Copy those B-tree page header fields that are related to
 	the records stored in the page.  Also copy the field
 	PAGE_MAX_TRX_ID.  Skip the rest of the page header and
 	trailer.  On the compressed page, there is no trailer. */
-#if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END
-# error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END"
-#endif
+	compile_time_assert(PAGE_MAX_TRX_ID + 8 == PAGE_HEADER_PRIV_END);
 	memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
 	       PAGE_HEADER_PRIV_END);
 	memcpy(PAGE_DATA + page, PAGE_DATA + src,
-	       UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
+	       srv_page_size - PAGE_DATA - FIL_PAGE_DATA_END);
 	memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
 	       PAGE_HEADER_PRIV_END);
 	memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
@@ -4824,9 +4896,8 @@ page_zip_copy_recs(
 	      + page_zip->m_end < page_zip_get_size(page_zip));
 
 	if (!page_is_leaf(src)
-	    && UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
-	    && UNIV_LIKELY(mach_read_from_4(page
-					    + FIL_PAGE_PREV) != FIL_NULL)) {
+	    && UNIV_UNLIKELY(!page_has_prev(src))
+	    && UNIV_LIKELY(page_has_prev(page))) {
 		/* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
 		ulint	offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
 						 TRUE);
@@ -4998,9 +5069,7 @@ page_zip_verify_checksum(
                                  (data) + FIL_PAGE_SPACE_ID);
 	const page_id_t	page_id(space_id, page_no);
 
-#if FIL_PAGE_LSN % 8
-#error "FIL_PAGE_LSN must be 64 bit aligned"
-#endif
+	compile_time_assert(!(FIL_PAGE_LSN % 8));
 
 	/* Check if page is empty */
 	if (stored == 0
diff --git a/storage/innobase/pars/pars0opt.cc b/storage/innobase/pars/pars0opt.cc
index 934cd80fd14..28ab2ccd505 100644
--- a/storage/innobase/pars/pars0opt.cc
+++ b/storage/innobase/pars/pars0opt.cc
@@ -205,7 +205,7 @@ opt_look_for_col_in_comparison_before(
 
 			if (opt_check_exp_determined_before(exp, sel_node,
 							    nth_table)) {
-				*op = search_cond->func;
+				*op = ulint(search_cond->func);
 
 				return(exp);
 			}
@@ -224,7 +224,8 @@ opt_look_for_col_in_comparison_before(
 
 			if (opt_check_exp_determined_before(exp, sel_node,
 							    nth_table)) {
-				*op = opt_invert_cmp_op(search_cond->func);
+				*op = ulint(opt_invert_cmp_op(
+						    search_cond->func));
 
 				return(exp);
 			}
@@ -1256,7 +1257,7 @@ opt_print_query_plan(
 		fprintf(stderr,
 			"Index %s of table %s"
 			"; exact m. %lu, match %lu, end conds %lu\n",
-			plan->index->name(), plan->index->table_name,
+			plan->index->name(), plan->index->table->name.m_name,
 			(unsigned long) plan->n_exact_match,
 			(unsigned long) n_fields,
 			(unsigned long) UT_LIST_GET_LEN(plan->end_conds));
diff --git a/storage/innobase/pars/pars0pars.cc b/storage/innobase/pars/pars0pars.cc
index d7447810912..33dc9ebf602 100644
--- a/storage/innobase/pars/pars0pars.cc
+++ b/storage/innobase/pars/pars0pars.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -1082,7 +1083,7 @@ pars_update_statement_start(
 
 	node = upd_node_create(pars_sym_tab_global->heap);
 
-	node->is_delete = is_delete;
+	node->is_delete = is_delete ? PLAIN_DELETE : NO_DELETE;
 
 	node->table_sym = table_sym;
 	node->col_assign_list = col_assign_list;
@@ -1247,9 +1248,9 @@ pars_update_statement(
 	node->select = sel_node;
 
 	ut_a(!node->is_delete || (node->col_assign_list == NULL));
-	ut_a(node->is_delete || (node->col_assign_list != NULL));
+	ut_a(node->is_delete == PLAIN_DELETE || node->col_assign_list != NULL);
 
-	if (node->is_delete) {
+	if (node->is_delete == PLAIN_DELETE) {
 		node->cmpl_info = 0;
 	} else {
 		pars_process_assign_list(node);
@@ -1828,7 +1829,7 @@ pars_column_def(
 	ulint len2;
 
 	if (len) {
-		len2 = eval_node_get_int_val(len);
+		len2 = ulint(eval_node_get_int_val(len));
 	} else {
 		len2 = 0;
 	}
@@ -1911,14 +1912,15 @@ pars_create_table(
 	n_cols = que_node_list_get_len(column_defs);
 
 	table = dict_mem_table_create(
-		table_sym->name, 0, n_cols, 0, flags, flags2);
+		table_sym->name, NULL, n_cols, 0, flags, flags2);
 
+	mem_heap_t* heap = pars_sym_tab_global->heap;
 	column = column_defs;
 
 	while (column) {
 		dtype = dfield_get_type(que_node_get_val(column));
 
-		dict_mem_table_add_col(table, table->heap,
+		dict_mem_table_add_col(table, heap,
 				       column->name, dtype->mtype,
 				       dtype->prtype, dtype->len);
 		column->resolved = TRUE;
@@ -1927,8 +1929,10 @@ pars_create_table(
 		column = static_cast<sym_node_t*>(que_node_get_next(column));
 	}
 
-	node = tab_create_graph_create(table, pars_sym_tab_global->heap,
-		FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
+	dict_table_add_system_columns(table, heap);
+	node = tab_create_graph_create(table, heap,
+				       FIL_ENCRYPTION_DEFAULT,
+				       FIL_DEFAULT_ENCRYPTION_KEY);
 
 	table_sym->resolved = TRUE;
 	table_sym->token_type = SYM_TABLE;
@@ -1968,7 +1972,7 @@ pars_create_index(
 		ind_type = ind_type | DICT_CLUSTERED;
 	}
 
-	index = dict_mem_index_create(table_sym->name, index_sym->name, 0,
+	index = dict_mem_index_create(NULL, index_sym->name,
 				      ind_type, n_fields);
 	column = column_list;
 
@@ -1981,7 +1985,8 @@ pars_create_index(
 		column = static_cast<sym_node_t*>(que_node_get_next(column));
 	}
 
-	node = ind_create_graph_create(index, pars_sym_tab_global->heap, NULL);
+	node = ind_create_graph_create(index, table_sym->name,
+				       pars_sym_tab_global->heap);
 
 	table_sym->resolved = TRUE;
 	table_sym->token_type = SYM_TABLE;
@@ -2066,9 +2071,8 @@ pars_get_lex_chars(
 {
 	int	len;
 
-	len = static_cast<int>(
-		pars_sym_tab_global->string_len
-		- pars_sym_tab_global->next_char_pos);
+	len = int(pars_sym_tab_global->string_len)
+		- pars_sym_tab_global->next_char_pos;
 	if (len == 0) {
 		return(0);
 	}
@@ -2077,8 +2081,8 @@ pars_get_lex_chars(
 		len = max_size;
 	}
 
-	ut_memcpy(buf, pars_sym_tab_global->sql_string
-		  + pars_sym_tab_global->next_char_pos, len);
+	memcpy(buf, pars_sym_tab_global->sql_string
+	       + pars_sym_tab_global->next_char_pos, ulint(len));
 
 	pars_sym_tab_global->next_char_pos += len;
 
@@ -2337,7 +2341,7 @@ pars_info_add_int4_literal(
 /*=======================*/
 	pars_info_t*	info,		/*!< in: info struct */
 	const char*	name,		/*!< in: name */
-	lint		val)		/*!< in: value */
+	ulint		val)		/*!< in: value */
 {
 	byte*	buf = static_cast<byte*>(mem_heap_alloc(info->heap, 4));
 
diff --git a/storage/innobase/que/que0que.cc b/storage/innobase/que/que0que.cc
index 05964403543..ef40c3479a0 100644
--- a/storage/innobase/que/que0que.cc
+++ b/storage/innobase/que/que0que.cc
@@ -479,7 +479,7 @@ que_graph_free_recursive(
 		if (upd->in_mysql_interface) {
 
 			btr_pcur_free_for_mysql(upd->pcur);
-			upd->in_mysql_interface = FALSE;
+			upd->in_mysql_interface = false;
 		}
 
 		que_graph_free_recursive(upd->cascade_node);
@@ -1001,11 +1001,6 @@ que_thr_step(
 		} else if (type == QUE_NODE_FOR) {
 			for_step(thr);
 		} else if (type == QUE_NODE_PROC) {
-
-			/* We can access trx->undo_no without reserving
-			trx->undo_mutex, because there cannot be active query
-			threads doing updating or inserting at the moment! */
-
 			if (thr->prev_node == que_node_get_parent(node)) {
 				trx->last_sql_stat_start.least_undo_no
 					= trx->undo_no;
@@ -1022,8 +1017,10 @@ que_thr_step(
 	} else if (type == QUE_NODE_SELECT) {
 		thr = row_sel_step(thr);
 	} else if (type == QUE_NODE_INSERT) {
+		trx_start_if_not_started_xa(thr_get_trx(thr), true);
 		thr = row_ins_step(thr);
 	} else if (type == QUE_NODE_UPDATE) {
+		trx_start_if_not_started_xa(thr_get_trx(thr), true);
 		thr = row_upd_step(thr);
 	} else if (type == QUE_NODE_FETCH) {
 		thr = fetch_step(thr);
diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc
index 2fb7083b0b2..470c8ec63f1 100644
--- a/storage/innobase/read/read0read.cc
+++ b/storage/innobase/read/read0read.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,10 +24,11 @@ Cursor read
 Created 2/16/1997 Heikki Tuuri
 *******************************************************/
 
-#include "read0read.h"
+#include "read0types.h"
 
 #include "srv0srv.h"
 #include "trx0sys.h"
+#include "trx0purge.h"
 
 /*
 -------------------------------------------------------------------------------
@@ -162,8 +164,8 @@ For details see: row_vers_old_has_index_entry() and row_purge_poss_sec()
 
 Some additional issues:
 
-What if trx_sys->view_list == NULL and some transaction T1 and Purge both
-try to open read_view at same time. Only one can acquire trx_sys->mutex.
+What if trx_sys.view_list == NULL and some transaction T1 and Purge both
+try to open read_view at same time. Only one can acquire trx_sys.mutex.
 In which order will the views be opened? Should it matter? If no, why?
 
 The order does not matter. No new transactions can be created and no running
@@ -171,611 +173,124 @@ RW transaction can commit or rollback (or free views). AC-NL-RO transactions
 will mark their views as closed but not actually free their views.
 */
 
-/** Minimum number of elements to reserve in ReadView::ids_t */
-static const ulint MIN_TRX_IDS = 32;
-
-#ifdef UNIV_DEBUG
-/** Functor to validate the view list. */
-struct	ViewCheck {
-
-	ViewCheck() : m_prev_view() { }
-
-	void	operator()(const ReadView* view)
-	{
-		ut_a(m_prev_view == NULL
-		     || view->is_closed()
-		     || view->le(m_prev_view));
-
-		m_prev_view = view;
-	}
-
-	const ReadView*	m_prev_view;
-};
-
-/**
-Validates a read view list. */
-
-bool
-MVCC::validate() const
-{
-	ViewCheck	check;
-
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	ut_list_map(m_views, check);
-
-	return(true);
-}
-#endif /* UNIV_DEBUG */
-
-/**
-Try and increase the size of the array. Old elements are
-copied across.
-@param n 		Make space for n elements */
-
-void
-ReadView::ids_t::reserve(ulint n)
-{
-	if (n <= capacity()) {
-		return;
-	}
-
-	/** Keep a minimum threshold */
-	if (n < MIN_TRX_IDS) {
-		n = MIN_TRX_IDS;
-	}
-
-	value_type*	p = m_ptr;
-
-	m_ptr = UT_NEW_ARRAY_NOKEY(value_type, n);
-
-	m_reserved = n;
-
-	ut_ad(size() < capacity());
-
-	if (p != NULL) {
-
-		::memmove(m_ptr, p, size() * sizeof(value_type));
-
-		UT_DELETE_ARRAY(p);
-	}
-}
-
-/**
-Copy and overwrite this array contents
-@param start		Source array
-@param end		Pointer to end of array */
-
-void
-ReadView::ids_t::assign(const value_type* start, const value_type* end)
-{
-	ut_ad(end >= start);
-
-	ulint	n = end - start;
-
-	/* No need to copy the old contents across during reserve(). */
-	clear();
-
-	/* Create extra space if required. */
-	reserve(n);
-
-	resize(n);
-
-	ut_ad(size() == n);
-
-	::memmove(m_ptr, start, size() * sizeof(value_type));
-}
-
-/**
-Append a value to the array.
-@param value		the value to append */
-
-void
-ReadView::ids_t::push_back(value_type value)
-{
-	if (capacity() <= size()) {
-		reserve(size() * 2);
-	}
-
-	m_ptr[m_size++] = value;
-	ut_ad(size() <= capacity());
-}
-
-/**
-Insert the value in the correct slot, preserving the order. Doesn't
-check for duplicates. */
-
-void
-ReadView::ids_t::insert(value_type value)
-{
-	ut_ad(value > 0);
-
-	reserve(size() + 1);
-
-	if (empty() || back() < value) {
-		push_back(value);
-		return;
-	}
-
-	value_type*	end = data() + size();
-	value_type*	ub = std::upper_bound(data(), end, value);
-
-	if (ub == end) {
-		push_back(value);
-	} else {
-		ut_ad(ub < end);
-
-		ulint	n_elems = std::distance(ub, end);
-		ulint	n = n_elems * sizeof(value_type);
-
-		/* Note: Copying overlapped memory locations. */
-		::memmove(ub + 1, ub, n);
-
-		*ub = value;
-
-		resize(size() + 1);
-	}
-}
-
-/**
-ReadView constructor */
-ReadView::ReadView()
-	:
-	m_low_limit_id(),
-	m_up_limit_id(),
-	m_creator_trx_id(),
-	m_ids(),
-	m_low_limit_no()
-{
-	ut_d(::memset(&m_view_list, 0x0, sizeof(m_view_list)));
-}
-
-/**
-ReadView destructor */
-ReadView::~ReadView()
-{
-	// Do nothing
-}
-
-/** Constructor
-@param size		Number of views to pre-allocate */
-MVCC::MVCC(ulint size)
-{
-	UT_LIST_INIT(m_free, &ReadView::m_view_list);
-	UT_LIST_INIT(m_views, &ReadView::m_view_list);
-
-	for (ulint i = 0; i < size; ++i) {
-		ReadView*	view = UT_NEW_NOKEY(ReadView());
-
-		UT_LIST_ADD_FIRST(m_free, view);
-	}
-}
-
-MVCC::~MVCC()
-{
-	for (ReadView* view = UT_LIST_GET_FIRST(m_free);
-	     view != NULL;
-	     view = UT_LIST_GET_FIRST(m_free)) {
-
-		UT_LIST_REMOVE(m_free, view);
-
-		UT_DELETE(view);
-	}
-
-	ut_a(UT_LIST_GET_LEN(m_views) == 0);
-}
-
-/**
-Copy the transaction ids from the source vector */
-
-void
-ReadView::copy_trx_ids(const trx_ids_t& trx_ids)
-{
-	ulint	size = trx_ids.size();
-
-	if (m_creator_trx_id > 0) {
-		ut_ad(size > 0);
-		--size;
-	}
-
-	if (size == 0) {
-		m_ids.clear();
-		return;
-	}
-
-	m_ids.reserve(size);
-	m_ids.resize(size);
-
-	ids_t::value_type*	p = m_ids.data();
-
-	/* Copy all the trx_ids except the creator trx id */
-
-	if (m_creator_trx_id > 0) {
-
-		/* Note: We go through all this trouble because it is
-		unclear whether std::vector::resize() will cause an
-		overhead or not. We should test this extensively and
-		if the vector to vector copy is fast enough then get
-		rid of this code and replace it with more readable
-		and obvious code. The code below does exactly one copy,
-		and filters out the creator's trx id. */
-
-		trx_ids_t::const_iterator	it = std::lower_bound(
-			trx_ids.begin(), trx_ids.end(), m_creator_trx_id);
-
-		ut_ad(it != trx_ids.end() && *it == m_creator_trx_id);
-
-		ulint	i = std::distance(trx_ids.begin(), it);
-		ulint	n = i * sizeof(trx_ids_t::value_type);
-
-		::memmove(p, &trx_ids[0], n);
-
-		n = (trx_ids.size() - i - 1) * sizeof(trx_ids_t::value_type);
-
-		ut_ad(i + (n / sizeof(trx_ids_t::value_type)) == m_ids.size());
-
-		if (n > 0) {
-			::memmove(p + i, &trx_ids[i + 1], n);
-		}
-	} else {
-		ulint	n = size * sizeof(trx_ids_t::value_type);
-
-		::memmove(p, &trx_ids[0], n);
-	}
-
-#ifdef UNIV_DEBUG
-	/* Assert that all transaction ids in list are active. */
-	for (trx_ids_t::const_iterator it = trx_ids.begin();
-	     it != trx_ids.end(); ++it) {
-
-		trx_t*	trx = trx_get_rw_trx_by_id(*it);
-		ut_ad(trx != NULL);
-		ut_ad(trx->state == TRX_STATE_ACTIVE
-		      || trx->state == TRX_STATE_PREPARED);
-	}
-#endif /* UNIV_DEBUG */
-}
-
-/**
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view.
-@param id		Creator transaction id */
-
-void
-ReadView::prepare(trx_id_t id)
-{
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	m_creator_trx_id = id;
-
-	m_low_limit_no = m_low_limit_id = trx_sys->max_trx_id;
-
-	if (!trx_sys->rw_trx_ids.empty()) {
-		copy_trx_ids(trx_sys->rw_trx_ids);
-	} else {
-		m_ids.clear();
-	}
-
-	if (UT_LIST_GET_LEN(trx_sys->serialisation_list) > 0) {
-		const trx_t*	trx;
-
-		trx = UT_LIST_GET_FIRST(trx_sys->serialisation_list);
-
-		if (trx->no < m_low_limit_no) {
-			m_low_limit_no = trx->no;
-		}
-	}
-}
 
 /**
-Complete the read view creation */
+  Creates a snapshot where exactly the transactions serialized before this
+  point in time are seen in the view.
 
-void
-ReadView::complete()
-{
-	/* The first active transaction has the smallest id. */
-	m_up_limit_id = !m_ids.empty() ? m_ids.front() : m_low_limit_id;
-
-	ut_ad(m_up_limit_id <= m_low_limit_id);
-
-	m_closed = false;
-}
-
-/**
-Find a free view from the active list, if none found then allocate
-a new view.
-@return a view to use */
-
-ReadView*
-MVCC::get_view()
-{
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	ReadView*	view;
-
-	if (UT_LIST_GET_LEN(m_free) > 0) {
-		view = UT_LIST_GET_FIRST(m_free);
-		UT_LIST_REMOVE(m_free, view);
-	} else {
-		view = UT_NEW_NOKEY(ReadView());
-
-		if (view == NULL) {
-			ib::error() << "Failed to allocate MVCC view";
-		}
-	}
-
-	return(view);
-}
-
-/**
-Release a view that is inactive but not closed. Caller must own
-the trx_sys_t::mutex.
-@param view		View to release */
-void
-MVCC::view_release(ReadView*& view)
+  @param[in,out] trx transaction
+*/
+inline void ReadView::snapshot(trx_t *trx)
 {
-	ut_ad(!srv_read_only_mode);
-	ut_ad(trx_sys_mutex_own());
-
-	uintptr_t	p = reinterpret_cast<uintptr_t>(view);
-
-	ut_a(p & 0x1);
-
-	view = reinterpret_cast<ReadView*>(p & ~1);
-
-	ut_ad(view->m_closed);
-
-	/** RW transactions should not free their views here. Their views
-	should freed using view_close_view() */
-
-	ut_ad(view->m_creator_trx_id == 0);
-
-	UT_LIST_REMOVE(m_views, view);
-
-	UT_LIST_ADD_LAST(m_free, view);
-
-	view = NULL;
+  trx_sys.snapshot_ids(trx, &m_ids, &m_low_limit_id, &m_low_limit_no);
+  std::sort(m_ids.begin(), m_ids.end());
+  m_up_limit_id= m_ids.empty() ? m_low_limit_id : m_ids.front();
+  ut_ad(m_up_limit_id <= m_low_limit_id);
 }
 
-/**
-Allocate and create a view.
-@param view		view owned by this class created for the
-			caller. Must be freed by calling view_close()
-@param trx		transaction instance of caller */
-void
-MVCC::view_open(ReadView*& view, trx_t* trx)
-{
-	ut_ad(!srv_read_only_mode);
-
-	/** If no new RW transaction has been started since the last view
-	was created then reuse the the existing view. */
-	if (view != NULL) {
-
-		uintptr_t	p = reinterpret_cast<uintptr_t>(view);
-
-		view = reinterpret_cast<ReadView*>(p & ~1);
-
-		ut_ad(view->m_closed);
-
-		/* NOTE: This can be optimised further, for now we only
-		resuse the view iff there are no active RW transactions.
-
-		There is an inherent race here between purge and this
-		thread. Purge will skip views that are marked as closed.
-		Therefore we must set the low limit id after we reset the
-		closed status after the check. */
-
-		if (trx_is_autocommit_non_locking(trx) && view->empty()) {
-
-			view->m_closed = false;
-
-			if (view->m_low_limit_id == trx_sys_get_max_trx_id()) {
-				return;
-			} else {
-				view->m_closed = true;
-			}
-		}
-
-		mutex_enter(&trx_sys->mutex);
-
-		UT_LIST_REMOVE(m_views, view);
-
-	} else {
-		mutex_enter(&trx_sys->mutex);
-
-		view = get_view();
-	}
-
-	if (view != NULL) {
-
-		view->prepare(trx->id);
-
-		view->complete();
-
-		UT_LIST_ADD_FIRST(m_views, view);
-
-		ut_ad(!view->is_closed());
-
-		ut_ad(validate());
-	}
-
-	trx_sys_mutex_exit();
-}
 
 /**
-Get the oldest (active) view in the system.
-@return oldest view if found or NULL */
+  Opens a read view where exactly the transactions serialized before this
+  point in time are seen in the view.
 
-ReadView*
-MVCC::get_oldest_view() const
-{
-	ReadView*	view;
-
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	for (view = UT_LIST_GET_LAST(m_views);
-	     view != NULL;
-	     view = UT_LIST_GET_PREV(m_view_list, view)) {
-
-		if (!view->is_closed()) {
-			break;
-		}
-	}
-
-	return(view);
-}
+  View becomes visible to purge thread.
 
-/**
-Copy state from another view. Must call copy_complete() to finish.
-@param other		view to copy from */
-
-void
-ReadView::copy_prepare(const ReadView& other)
-{
-	ut_ad(&other != this);
-
-	if (!other.m_ids.empty()) {
-		const ids_t::value_type* 	p = other.m_ids.data();
-
-		m_ids.assign(p, p + other.m_ids.size());
-	} else {
-		m_ids.clear();
-	}
-
-	m_up_limit_id = other.m_up_limit_id;
-
-	m_low_limit_no = other.m_low_limit_no;
-
-	m_low_limit_id = other.m_low_limit_id;
-
-	m_creator_trx_id = other.m_creator_trx_id;
-}
-
-/**
-Complete the copy, insert the creator transaction id into the
-m_ids too and adjust the m_up_limit_id, if required */
-
-void
-ReadView::copy_complete()
-{
-	ut_ad(!trx_sys_mutex_own());
-
-	if (m_creator_trx_id > 0) {
-		m_ids.insert(m_creator_trx_id);
-	}
-
-	if (!m_ids.empty()) {
-		/* The last active transaction has the smallest id. */
-		m_up_limit_id = std::min(m_ids.front(), m_up_limit_id);
-	}
-
-	ut_ad(m_up_limit_id <= m_low_limit_id);
-
-	/* We added the creator transaction ID to the m_ids. */
-	m_creator_trx_id = 0;
-}
-
-/** Clones the oldest view and stores it in view. No need to
-call view_close(). The caller owns the view that is passed in.
-This function is called by Purge to determine whether it should
-purge the delete marked record or not.
-@param view		Preallocated view, owned by the caller */
-
-void
-MVCC::clone_oldest_view(ReadView* view)
-{
-	mutex_enter(&trx_sys->mutex);
-
-	ReadView*	oldest_view = get_oldest_view();
-
-	if (oldest_view == NULL) {
-
-		view->prepare(0);
-
-		trx_sys_mutex_exit();
-
-		view->complete();
-
-	} else {
-		view->copy_prepare(*oldest_view);
-
-		trx_sys_mutex_exit();
-
-		view->copy_complete();
-	}
-}
-
-/**
-@return the number of active views */
-
-ulint
-MVCC::size() const
+  @param[in,out] trx transaction
+*/
+void ReadView::open(trx_t *trx)
 {
-	trx_sys_mutex_enter();
-
-	ulint	size = 0;
-
-	for (const ReadView* view = UT_LIST_GET_FIRST(m_views);
-	     view != NULL;
-	     view = UT_LIST_GET_NEXT(m_view_list, view)) {
-
-		if (!view->is_closed()) {
-			++size;
-		}
-	}
-
-	trx_sys_mutex_exit();
-
-	return(size);
+  ut_ad(this == &trx->read_view);
+  switch (m_state)
+  {
+  case READ_VIEW_STATE_OPEN:
+    ut_ad(!srv_read_only_mode);
+    return;
+  case READ_VIEW_STATE_CLOSED:
+    if (srv_read_only_mode)
+      return;
+    /*
+      Reuse closed view if there were no read-write transactions since (and at)
+      its creation time.
+
+      Original comment states: there is an inherent race here between purge
+      and this thread.
+
+      To avoid this race we should've checked trx_sys.get_max_trx_id() and
+      set state to READ_VIEW_STATE_OPEN atomically under trx_sys.mutex
+      protection. But we're cutting edges to achieve great scalability.
+
+      There're at least two types of concurrent threads interested in this
+      value: purge coordinator thread (see trx_sys_t::clone_oldest_view()) and
+      InnoDB monitor thread (see lock_trx_print_wait_and_mvcc_state()).
+
+      What bad things can happen because we allow this race?
+
+      Speculative execution may reorder state change before get_max_trx_id().
+      In this case purge thread has short gap to clone outdated view. Which is
+      probably not that bad: it just won't be able to purge things that it was
+      actually allowed to purge for a short while.
+
+      This thread may as well get suspended after trx_sys.get_max_trx_id() and
+      before state is set to READ_VIEW_STATE_OPEN. New read-write transaction
+      may get started, committed and purged meanwhile. It is acceptable as
+      well, since this view doesn't see it.
+    */
+    if (trx_is_autocommit_non_locking(trx) && m_ids.empty() &&
+        m_low_limit_id == trx_sys.get_max_trx_id())
+      goto reopen;
+
+    /*
+      Can't reuse view, take new snapshot.
+
+      Alas this empty critical section is simplest way to make sure concurrent
+      purge thread completed snapshot copy. Of course purge thread may come
+      again and try to copy once again after we release this mutex, but in
+      this case it is guaranteed to see READ_VIEW_STATE_REGISTERED and thus
+      it'll skip this view.
+
+      This critical section can be replaced with new state, which purge thread
+      would set to inform us to wait until it completes snapshot. However it'd
+      complicate m_state even further.
+    */
+    mutex_enter(&trx_sys.mutex);
+    mutex_exit(&trx_sys.mutex);
+    my_atomic_store32_explicit(&m_state, READ_VIEW_STATE_SNAPSHOT,
+                               MY_MEMORY_ORDER_RELAXED);
+    break;
+  default:
+    ut_ad(0);
+  }
+
+  snapshot(trx);
+reopen:
+  m_creator_trx_id= trx->id;
+  my_atomic_store32_explicit(&m_state, READ_VIEW_STATE_OPEN,
+                             MY_MEMORY_ORDER_RELEASE);
 }
 
-/**
-Close a view created by the above function.
-@para view		view allocated by trx_open.
-@param own_mutex	true if caller owns trx_sys_t::mutex */
-
-void
-MVCC::view_close(ReadView*& view, bool own_mutex)
-{
-	uintptr_t	p = reinterpret_cast<uintptr_t>(view);
-
-	/* Note: The assumption here is that AC-NL-RO transactions will
-	call this function with own_mutex == false. */
-	if (!own_mutex) {
-		/* Sanitise the pointer first. */
-		ReadView*	ptr = reinterpret_cast<ReadView*>(p & ~1);
-
-		/* Note this can be called for a read view that
-		was already closed. */
-		ptr->m_closed = true;
-
-		/* Set the view as closed. */
-		view = reinterpret_cast<ReadView*>(p | 0x1);
-	} else {
-		view = reinterpret_cast<ReadView*>(p & ~1);
-
-		view->close();
-
-		UT_LIST_REMOVE(m_views, view);
-		UT_LIST_ADD_LAST(m_free, view);
-
-		ut_ad(validate());
-
-		view = NULL;
-	}
-}
 
 /**
-Set the view creator transaction id. Note: This shouldbe set only
-for views created by RW transactions.
-@param view		Set the creator trx id for this view
-@param id		Transaction id to set */
+  Clones the oldest view and stores it in view.
 
-void
-MVCC::set_view_creator_trx_id(ReadView* view, trx_id_t id)
+  No need to call ReadView::close(). The caller owns the view that is passed
+  in. This function is called by purge thread to determine whether it should
+  purge the delete marked record or not.
+*/
+void trx_sys_t::clone_oldest_view()
 {
-	ut_ad(id > 0);
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	view->creator_trx_id(id);
+  purge_sys.view.snapshot(0);
+  mutex_enter(&mutex);
+  /* Find oldest view. */
+  for (const trx_t *trx= UT_LIST_GET_FIRST(trx_list); trx;
+       trx= UT_LIST_GET_NEXT(trx_list, trx))
+  {
+    int32_t state;
+
+    while ((state= trx->read_view.get_state()) == READ_VIEW_STATE_SNAPSHOT)
+      ut_delay(1);
+
+    if (state == READ_VIEW_STATE_OPEN)
+      purge_sys.view.copy(trx->read_view);
+  }
+  mutex_exit(&mutex);
 }
diff --git a/storage/innobase/rem/rem0cmp.cc b/storage/innobase/rem/rem0cmp.cc
index 62581bbac38..f8449e5443f 100644
--- a/storage/innobase/rem/rem0cmp.cc
+++ b/storage/innobase/rem/rem0cmp.cc
@@ -222,7 +222,6 @@ static
 int
 cmp_geometry_field(
 /*===============*/
-	ulint		mtype,		/*!< in: main type */
 	ulint		prtype,		/*!< in: precise type */
 	const byte*	a,		/*!< in: data field */
 	unsigned int	a_length,	/*!< in: data field length,
@@ -296,12 +295,10 @@ cmp_gis_field(
 					not UNIV_SQL_NULL */
 {
 	if (mode == PAGE_CUR_MBR_EQUAL) {
-		/* TODO: Since the DATA_GEOMETRY is not used in compare
-		function, we could pass it instead of a specific type now */
-		return(cmp_geometry_field(DATA_GEOMETRY, DATA_GIS_MBR,
-					  a, a_length, b, b_length));
+		return cmp_geometry_field(DATA_GIS_MBR,
+					  a, a_length, b, b_length);
 	} else {
-		return(rtree_key_cmp(mode, a, a_length, b, b_length));
+		return rtree_key_cmp(mode, a, int(a_length), b, int(b_length));
 	}
 }
 
@@ -372,8 +369,7 @@ cmp_whole_field(
 		return(innobase_mysql_cmp(prtype,
 					  a, a_length, b, b_length));
 	case DATA_GEOMETRY:
-		return(cmp_geometry_field(mtype, prtype, a, a_length, b,
-				b_length));
+		return cmp_geometry_field(prtype, a, a_length, b, b_length);
 	default:
 		ib::fatal() << "Unknown data type number " << mtype;
 	}
@@ -402,6 +398,9 @@ cmp_data(
 	const byte*	data2,
 	ulint		len2)
 {
+	ut_ad(len1 != UNIV_SQL_DEFAULT);
+	ut_ad(len2 != UNIV_SQL_DEFAULT);
+
 	if (len1 == UNIV_SQL_NULL || len2 == UNIV_SQL_NULL) {
 		if (len1 == len2) {
 			return(0);
@@ -701,6 +700,11 @@ cmp_dtuple_rec_with_match_low(
 		contain externally stored fields, and the first fields
 		(primary key fields) should already differ. */
 		ut_ad(!rec_offs_nth_extern(offsets, cur_field));
+		/* We should never compare against instantly added columns.
+		Columns can only be instantly added to clustered index
+		leaf page records, and the first fields (primary key fields)
+		should already differ. */
+		ut_ad(!rec_offs_nth_default(offsets, cur_field));
 
 		rec_b_ptr = rec_get_nth_field(rec, offsets, cur_field,
 					      &rec_f_len);
@@ -816,6 +820,8 @@ cmp_dtuple_rec_with_match_bytes(
 
 		dtuple_b_ptr = static_cast<const byte*>(
 			dfield_get_data(dfield));
+
+		ut_ad(!rec_offs_nth_default(offsets, cur_field));
 		rec_b_ptr = rec_get_nth_field(rec, offsets,
 					      cur_field, &rec_f_len);
 		ut_ad(!rec_offs_nth_extern(offsets, cur_field));
@@ -1137,10 +1143,9 @@ cmp_rec_rec_with_match(
 	/* Test if rec is the predefined minimum record */
 	if (UNIV_UNLIKELY(rec_get_info_bits(rec1, comp)
 			  & REC_INFO_MIN_REC_FLAG)) {
-		/* There should only be one such record. */
-		ut_ad(!(rec_get_info_bits(rec2, comp)
-			& REC_INFO_MIN_REC_FLAG));
-		ret = -1;
+		ret = UNIV_UNLIKELY(rec_get_info_bits(rec2, comp)
+				    & REC_INFO_MIN_REC_FLAG)
+			? 0 : -1;
 		goto order_resolved;
 	} else if (UNIV_UNLIKELY
 		   (rec_get_info_bits(rec2, comp)
@@ -1190,6 +1195,8 @@ cmp_rec_rec_with_match(
 		DB_ROLL_PTR, and any externally stored columns. */
 		ut_ad(!rec_offs_nth_extern(offsets1, cur_field));
 		ut_ad(!rec_offs_nth_extern(offsets2, cur_field));
+		ut_ad(!rec_offs_nth_default(offsets1, cur_field));
+		ut_ad(!rec_offs_nth_default(offsets2, cur_field));
 
 		rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
 					       cur_field, &rec1_f_len);
diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc
index 35b0235b8ec..495c29e4805 100644
--- a/storage/innobase/rem/rem0rec.cc
+++ b/storage/innobase/rem/rem0rec.cc
@@ -166,7 +166,10 @@ rec_get_n_extern_new(
 	ulint		i;
 
 	ut_ad(dict_table_is_comp(index->table));
-	ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
+	ut_ad(!index->table->supports_instant() || index->is_dummy);
+	ut_ad(!index->is_instant());
+	ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY
+	      || rec_get_status(rec) == REC_STATUS_COLUMNS_ADDED);
 	ut_ad(n == ULINT_UNDEFINED || n <= dict_index_get_n_fields(index));
 
 	if (n == ULINT_UNDEFINED) {
@@ -228,50 +231,118 @@ rec_get_n_extern_new(
 	return(n_extern);
 }
 
-/******************************************************//**
-Determine the offset to each field in a leaf-page record
-in ROW_FORMAT=COMPACT.  This is a special case of
-rec_init_offsets() and rec_get_offsets_func(). */
-UNIV_INLINE MY_ATTRIBUTE((nonnull))
+/** Get the added field count in a REC_STATUS_COLUMNS_ADDED record.
+@param[in,out]	header	variable header of a REC_STATUS_COLUMNS_ADDED record
+@return	number of added fields */
+static inline unsigned rec_get_n_add_field(const byte*& header)
+{
+	unsigned n_fields_add = *--header;
+	if (n_fields_add < 0x80) {
+		ut_ad(rec_get_n_add_field_len(n_fields_add) == 1);
+		return n_fields_add;
+	}
+
+	n_fields_add &= 0x7f;
+	n_fields_add |= unsigned(*--header) << 7;
+	ut_ad(n_fields_add < REC_MAX_N_FIELDS);
+	ut_ad(rec_get_n_add_field_len(n_fields_add) == 2);
+	return n_fields_add;
+}
+
+/** Format of a leaf-page ROW_FORMAT!=REDUNDANT record */
+enum rec_leaf_format {
+	/** Temporary file record */
+	REC_LEAF_TEMP,
+	/** Temporary file record, with added columns
+	(REC_STATUS_COLUMNS_ADDED) */
+	REC_LEAF_TEMP_COLUMNS_ADDED,
+	/** Normal (REC_STATUS_ORDINARY) */
+	REC_LEAF_ORDINARY,
+	/** With added columns (REC_STATUS_COLUMNS_ADDED) */
+	REC_LEAF_COLUMNS_ADDED
+};
+
+/** Determine the offset to each field in a leaf-page record
+in ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED.
+This is a special case of rec_init_offsets() and rec_get_offsets_func().
+@param[in]	rec	leaf-page record
+@param[in]	index	the index that the record belongs in
+@param[in]	n_core	number of core fields (index->n_core_fields)
+@param[in]	def_val	default values for non-core fields, or
+			NULL to refer to index->fields[].col->def_val
+@param[in,out]	offsets	offsets, with valid rec_offs_n_fields(offsets)
+@param[in]	format	record format */
+static inline
 void
 rec_init_offsets_comp_ordinary(
-/*===========================*/
-	const rec_t*		rec,	/*!< in: physical record in
-					ROW_FORMAT=COMPACT */
-	bool			temp,	/*!< in: whether to use the
-					format for temporary files in
-					index creation */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets)/*!< in/out: array of offsets;
-					in: n=rec_offs_n_fields(offsets) */
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	ulint*			offsets,
+	ulint			n_core,
+	const dict_col_t::def_t*def_val,
+	rec_leaf_format		format)
 {
-	ulint		i		= 0;
 	ulint		offs		= 0;
-	ulint		any_ext		= 0;
-	ulint		n_null		= index->n_nullable;
-	const byte*	nulls		= temp
-		? rec - 1
-		: rec - (1 + REC_N_NEW_EXTRA_BYTES);
-	const byte*	lens		= nulls - UT_BITS_IN_BYTES(n_null);
+	ulint		any		= 0;
+	const byte*	nulls		= rec;
+	const byte*	lens		= NULL;
+	ulint		n_fields	= n_core;
 	ulint		null_mask	= 1;
 
+	ut_ad(index->n_core_fields >= n_core);
+	ut_ad(n_core > 0);
+	ut_ad(index->n_fields >= n_core);
+	ut_ad(index->n_core_null_bytes <= UT_BITS_IN_BYTES(index->n_nullable));
+	ut_ad(format == REC_LEAF_TEMP || format == REC_LEAF_TEMP_COLUMNS_ADDED
+	      || dict_table_is_comp(index->table));
+	ut_ad(format != REC_LEAF_TEMP_COLUMNS_ADDED
+	      || index->n_fields == rec_offs_n_fields(offsets));
+	ut_d(ulint n_null= 0);
+
+	switch (format) {
+	case REC_LEAF_TEMP:
+		if (dict_table_is_comp(index->table)) {
+			/* No need to do adjust fixed_len=0. We only need to
+			adjust it for ROW_FORMAT=REDUNDANT. */
+			format = REC_LEAF_ORDINARY;
+		}
+		goto ordinary;
+	case REC_LEAF_ORDINARY:
+		nulls -= REC_N_NEW_EXTRA_BYTES;
+ordinary:
+		lens = --nulls - index->n_core_null_bytes;
+
+		ut_d(n_null = std::min(index->n_core_null_bytes * 8U,
+				       index->n_nullable));
+		break;
+	case REC_LEAF_COLUMNS_ADDED:
+		/* We would have !index->is_instant() when rolling back
+		an instant ADD COLUMN operation. */
+		nulls -= REC_N_NEW_EXTRA_BYTES;
+		ut_ad(index->is_instant());
+		/* fall through */
+	case REC_LEAF_TEMP_COLUMNS_ADDED:
+		n_fields = n_core + 1 + rec_get_n_add_field(nulls);
+		ut_ad(n_fields <= index->n_fields);
+		const ulint n_nullable = index->get_n_nullable(n_fields);
+		const ulint n_null_bytes = UT_BITS_IN_BYTES(n_nullable);
+		ut_d(n_null = n_nullable);
+		ut_ad(n_null <= index->n_nullable);
+		ut_ad(n_null_bytes >= index->n_core_null_bytes
+		      || n_core < index->n_core_fields);
+		lens = --nulls - n_null_bytes;
+	}
+
 #ifdef UNIV_DEBUG
-	/* We cannot invoke rec_offs_make_valid() here if temp=true.
+	/* We cannot invoke rec_offs_make_valid() if format==REC_LEAF_TEMP.
 	Similarly, rec_offs_validate() will fail in that case, because
 	it invokes rec_get_status(). */
 	offsets[2] = (ulint) rec;
 	offsets[3] = (ulint) index;
 #endif /* UNIV_DEBUG */
 
-	ut_ad(temp || dict_table_is_comp(index->table));
-
-	if (temp && dict_table_is_comp(index->table)) {
-		/* No need to do adjust fixed_len=0. We only need to
-		adjust it for ROW_FORMAT=REDUNDANT. */
-		temp = false;
-	}
-
-	/* read the lengths of fields 0..n */
+	/* read the lengths of fields 0..n_fields */
+	ulint i = 0;
 	do {
 		const dict_field_t*	field
 			= dict_index_get_nth_field(index, i);
@@ -279,6 +350,32 @@ rec_init_offsets_comp_ordinary(
 			= dict_field_get_col(field);
 		ulint			len;
 
+		/* set default value flag */
+		if (i < n_fields) {
+		} else if (def_val) {
+			const dict_col_t::def_t& d = def_val[i - n_core];
+			if (!d.data) {
+				len = offs | REC_OFFS_SQL_NULL;
+				ut_ad(d.len == UNIV_SQL_NULL);
+			} else {
+				len = offs | REC_OFFS_DEFAULT;
+				any |= REC_OFFS_DEFAULT;
+			}
+
+			goto resolved;
+		} else {
+			ulint dlen;
+			if (!index->instant_field_value(i, &dlen)) {
+				len = offs | REC_OFFS_SQL_NULL;
+				ut_ad(dlen == UNIV_SQL_NULL);
+			} else {
+				len = offs | REC_OFFS_DEFAULT;
+				any |= REC_OFFS_DEFAULT;
+			}
+
+			goto resolved;
+		}
+
 		if (!(col->prtype & DATA_NOT_NULL)) {
 			/* nullable field => read the null flag */
 			ut_ad(n_null--);
@@ -301,7 +398,8 @@ rec_init_offsets_comp_ordinary(
 		}
 
 		if (!field->fixed_len
-		    || (temp && !dict_col_get_fixed_size(col, temp))) {
+		    || (format == REC_LEAF_TEMP
+			&& !dict_col_get_fixed_size(col, true))) {
 			/* Variable-length field: read the length */
 			len = *lens--;
 			/* If the maximum length of the field is up
@@ -311,26 +409,21 @@ rec_init_offsets_comp_ordinary(
 			stored in one byte for 0..127.  The length
 			will be encoded in two bytes when it is 128 or
 			more, or when the field is stored externally. */
-			if (DATA_BIG_COL(col)) {
-				if (len & 0x80) {
-					/* 1exxxxxxx xxxxxxxx */
-					len <<= 8;
-					len |= *lens--;
-
-					offs += len & 0x3fff;
-					if (UNIV_UNLIKELY(len
-							  & 0x4000)) {
-						ut_ad(dict_index_is_clust
-						      (index));
-						any_ext = REC_OFFS_EXTERNAL;
-						len = offs
-							| REC_OFFS_EXTERNAL;
-					} else {
-						len = offs;
-					}
-
-					goto resolved;
+			if ((len & 0x80) && DATA_BIG_COL(col)) {
+				/* 1exxxxxxx xxxxxxxx */
+				len <<= 8;
+				len |= *lens--;
+
+				offs += len & 0x3fff;
+				if (UNIV_UNLIKELY(len & 0x4000)) {
+					ut_ad(dict_index_is_clust(index));
+					any |= REC_OFFS_EXTERNAL;
+					len = offs | REC_OFFS_EXTERNAL;
+				} else {
+					len = offs;
 				}
+
+				goto resolved;
 			}
 
 			len = offs += len;
@@ -342,12 +435,117 @@ resolved:
 	} while (++i < rec_offs_n_fields(offsets));
 
 	*rec_offs_base(offsets)
-		= (rec - (lens + 1)) | REC_OFFS_COMPACT | any_ext;
+		= ulint(rec - (lens + 1)) | REC_OFFS_COMPACT | any;
 }
 
-/******************************************************//**
-The following function determines the offsets to each field in the
-record.	 The offsets are written to a previously allocated array of
+#ifdef UNIV_DEBUG
+/** Update debug data in offsets, in order to tame rec_offs_validate().
+@param[in]	rec	record
+@param[in]	index	the index that the record belongs in
+@param[in]	leaf	whether the record resides in a leaf page
+@param[in,out]	offsets	offsets from rec_get_offsets() to adjust */
+void
+rec_offs_make_valid(
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	bool			leaf,
+	ulint*			offsets)
+{
+	ut_ad(rec_offs_n_fields(offsets)
+	      <= (leaf
+		  ? dict_index_get_n_fields(index)
+		  : dict_index_get_n_unique_in_tree_nonleaf(index) + 1)
+	      || index->is_dummy || dict_index_is_ibuf(index));
+	const bool is_user_rec = (dict_table_is_comp(index->table)
+				  ? rec_get_heap_no_new(rec)
+				  : rec_get_heap_no_old(rec))
+		>= PAGE_HEAP_NO_USER_LOW;
+	ulint n = rec_get_n_fields(rec, index);
+	/* The infimum and supremum records carry 1 field. */
+	ut_ad(is_user_rec || n == 1);
+	ut_ad(is_user_rec || rec_offs_n_fields(offsets) == 1);
+	ut_ad(!is_user_rec
+	      || (n + (index->id == DICT_INDEXES_ID)) >= index->n_core_fields
+	      || n >= rec_offs_n_fields(offsets));
+	for (; n < rec_offs_n_fields(offsets); n++) {
+		ut_ad(leaf);
+		ut_ad(rec_offs_base(offsets)[1 + n] & REC_OFFS_DEFAULT);
+	}
+	offsets[2] = ulint(rec);
+	offsets[3] = ulint(index);
+}
+
+/** Validate offsets returned by rec_get_offsets().
+@param[in]	rec	record, or NULL
+@param[in]	index	the index that the record belongs in, or NULL
+@param[in,out]	offsets	the offsets of the record
+@return true */
+bool
+rec_offs_validate(
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	const ulint*		offsets)
+{
+	ulint	i	= rec_offs_n_fields(offsets);
+	ulint	last	= ULINT_MAX;
+	ulint	comp	= *rec_offs_base(offsets) & REC_OFFS_COMPACT;
+
+	if (rec) {
+		ut_ad(ulint(rec) == offsets[2]);
+		if (!comp) {
+			const bool is_user_rec = rec_get_heap_no_old(rec)
+				>= PAGE_HEAP_NO_USER_LOW;
+			ulint n = rec_get_n_fields_old(rec);
+			/* The infimum and supremum records carry 1 field. */
+			ut_ad(is_user_rec || n == 1);
+			ut_ad(is_user_rec || i == 1);
+			ut_ad(!is_user_rec || n >= i || !index
+			      || (n + (index->id == DICT_INDEXES_ID))
+			      >= index->n_core_fields);
+			for (; n < i; n++) {
+				ut_ad(rec_offs_base(offsets)[1 + n]
+				      & REC_OFFS_DEFAULT);
+			}
+		}
+	}
+	if (index) {
+		ulint max_n_fields;
+		ut_ad(ulint(index) == offsets[3]);
+		max_n_fields = ut_max(
+			dict_index_get_n_fields(index),
+			dict_index_get_n_unique_in_tree(index) + 1);
+		if (comp && rec) {
+			switch (rec_get_status(rec)) {
+			case REC_STATUS_COLUMNS_ADDED:
+			case REC_STATUS_ORDINARY:
+				break;
+			case REC_STATUS_NODE_PTR:
+				max_n_fields = dict_index_get_n_unique_in_tree(
+					index) + 1;
+				break;
+			case REC_STATUS_INFIMUM:
+			case REC_STATUS_SUPREMUM:
+				max_n_fields = 1;
+				break;
+			default:
+				ut_error;
+			}
+		}
+		/* index->n_def == 0 for dummy indexes if !comp */
+		ut_a(!comp || index->n_def);
+		ut_a(!index->n_def || i <= max_n_fields);
+	}
+	while (i--) {
+		ulint	curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK;
+		ut_a(curr <= last);
+		last = curr;
+	}
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
+/** Determine the offsets to each field in the record.
+ The offsets are written to a previously allocated array of
 ulint, where rec_offs_n_fields(offsets) has been initialized to the
 number of fields in the record.	 The rest of the array will be
 initialized by this function.  rec_offs_base(offsets)[0] will be set
@@ -358,27 +556,32 @@ offsets past the end of fields 0..n_fields, or to the beginning of
 fields 1..n_fields+1.  When the high-order bit of the offset at [i+1]
 is set (REC_OFFS_SQL_NULL), the field i is NULL.  When the second
 high-order bit of the offset at [i+1] is set (REC_OFFS_EXTERNAL), the
-field i is being stored externally. */
+field i is being stored externally.
+@param[in]	rec	record
+@param[in]	index	the index that the record belongs in
+@param[in]	leaf	whether the record resides in a leaf page
+@param[in,out]	offsets	array of offsets, with valid rec_offs_n_fields() */
 static
 void
 rec_init_offsets(
-/*=============*/
-	const rec_t*		rec,	/*!< in: physical record */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets)/*!< in/out: array of offsets;
-					in: n=rec_offs_n_fields(offsets) */
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	bool			leaf,
+	ulint*			offsets)
 {
 	ulint	i	= 0;
 	ulint	offs;
 
-	rec_offs_make_valid(rec, index, offsets);
+	ut_ad(index->n_core_null_bytes <= UT_BITS_IN_BYTES(index->n_nullable));
+	ut_d(offsets[2] = ulint(rec));
+	ut_d(offsets[3] = ulint(index));
 
 	if (dict_table_is_comp(index->table)) {
 		const byte*	nulls;
 		const byte*	lens;
 		dict_field_t*	field;
 		ulint		null_mask;
-		ulint		status = rec_get_status(rec);
+		rec_comp_status_t status = rec_get_status(rec);
 		ulint		n_node_ptr_field = ULINT_UNDEFINED;
 
 		switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
@@ -390,18 +593,39 @@ rec_init_offsets(
 			rec_offs_base(offsets)[1] = 8;
 			return;
 		case REC_STATUS_NODE_PTR:
+			ut_ad(!leaf);
 			n_node_ptr_field
 				= dict_index_get_n_unique_in_tree_nonleaf(
 					index);
 			break;
+		case REC_STATUS_COLUMNS_ADDED:
+			ut_ad(leaf);
+			rec_init_offsets_comp_ordinary(rec, index, offsets,
+						       index->n_core_fields,
+						       NULL,
+						       REC_LEAF_COLUMNS_ADDED);
+			return;
 		case REC_STATUS_ORDINARY:
-			rec_init_offsets_comp_ordinary(
-				rec, false, index, offsets);
+			ut_ad(leaf);
+			rec_init_offsets_comp_ordinary(rec, index, offsets,
+						       index->n_core_fields,
+						       NULL,
+						       REC_LEAF_ORDINARY);
 			return;
 		}
 
+		/* The n_nullable flags in the clustered index node pointer
+		records in ROW_FORMAT=COMPACT or ROW_FORMAT=DYNAMIC must
+		reflect the number of 'core columns'. These flags are
+		useless garbage, and they are only reserved because of
+		file format compatibility.
+		(Clustered index node pointer records only contain the
+		PRIMARY KEY columns, which are always NOT NULL,
+		so we should have used n_nullable=0.) */
+		ut_ad(index->n_core_fields > 0);
+
 		nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
-		lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
+		lens = nulls - index->n_core_null_bytes;
 		offs = 0;
 		null_mask = 1;
 
@@ -477,13 +701,17 @@ resolved:
 		} while (++i < rec_offs_n_fields(offsets));
 
 		*rec_offs_base(offsets)
-			= (rec - (lens + 1)) | REC_OFFS_COMPACT;
+			= ulint(rec - (lens + 1)) | REC_OFFS_COMPACT;
 	} else {
 		/* Old-style record: determine extra size and end offsets */
 		offs = REC_N_OLD_EXTRA_BYTES;
+		const ulint n_fields = rec_get_n_fields_old(rec);
+		const ulint n = std::min(n_fields, rec_offs_n_fields(offsets));
+		ulint any;
+
 		if (rec_get_1byte_offs_flag(rec)) {
-			offs += rec_offs_n_fields(offsets);
-			*rec_offs_base(offsets) = offs;
+			offs += n_fields;
+			any = offs;
 			/* Determine offsets to fields */
 			do {
 				offs = rec_1_get_field_end_info(rec, i);
@@ -492,10 +720,10 @@ resolved:
 					offs |= REC_OFFS_SQL_NULL;
 				}
 				rec_offs_base(offsets)[1 + i] = offs;
-			} while (++i < rec_offs_n_fields(offsets));
+			} while (++i < n);
 		} else {
-			offs += 2 * rec_offs_n_fields(offsets);
-			*rec_offs_base(offsets) = offs;
+			offs += 2 * n_fields;
+			any = offs;
 			/* Determine offsets to fields */
 			do {
 				offs = rec_2_get_field_end_info(rec, i);
@@ -506,11 +734,24 @@ resolved:
 				if (offs & REC_2BYTE_EXTERN_MASK) {
 					offs &= ~REC_2BYTE_EXTERN_MASK;
 					offs |= REC_OFFS_EXTERNAL;
-					*rec_offs_base(offsets) |= REC_OFFS_EXTERNAL;
+					any |= REC_OFFS_EXTERNAL;
 				}
 				rec_offs_base(offsets)[1 + i] = offs;
+			} while (++i < n);
+		}
+
+		if (i < rec_offs_n_fields(offsets)) {
+			offs = (rec_offs_base(offsets)[i] & REC_OFFS_MASK)
+				| REC_OFFS_DEFAULT;
+
+			do {
+				rec_offs_base(offsets)[1 + i] = offs;
 			} while (++i < rec_offs_n_fields(offsets));
+
+			any |= REC_OFFS_DEFAULT;
 		}
+
+		*rec_offs_base(offsets) = any;
 	}
 }
 
@@ -529,9 +770,7 @@ rec_get_offsets_func(
 	const rec_t*		rec,
 	const dict_index_t*	index,
 	ulint*			offsets,
-#ifdef UNIV_DEBUG
 	bool			leaf,
-#endif /* UNIV_DEBUG */
 	ulint			n_fields,
 #ifdef UNIV_DEBUG
 	const char*		file,	/*!< in: file name where called */
@@ -549,6 +788,7 @@ rec_get_offsets_func(
 	if (dict_table_is_comp(index->table)) {
 		switch (UNIV_EXPECT(rec_get_status(rec),
 				    REC_STATUS_ORDINARY)) {
+		case REC_STATUS_COLUMNS_ADDED:
 		case REC_STATUS_ORDINARY:
 			ut_ad(leaf);
 			n = dict_index_get_n_fields(index);
@@ -582,8 +822,8 @@ rec_get_offsets_func(
 		page_rec_is_user_rec(rec) and similar predicates
 		cannot be evaluated. We can still distinguish the
 		infimum and supremum record based on the heap number. */
-		ut_d(const bool is_user_rec = rec_get_heap_no_old(rec)
-		     >= PAGE_HEAP_NO_USER_LOW);
+		const bool is_user_rec = rec_get_heap_no_old(rec)
+			>= PAGE_HEAP_NO_USER_LOW;
 		/* The infimum and supremum records carry 1 field. */
 		ut_ad(is_user_rec || n == 1);
 		ut_ad(!is_user_rec || leaf || index->is_dummy
@@ -594,9 +834,14 @@ rec_get_offsets_func(
 		ut_ad(!is_user_rec || !leaf || index->is_dummy
 		      || dict_index_is_ibuf(index)
 		      || n == n_fields /* btr_pcur_restore_position() */
-		      || n == index->n_fields
-		      || (index->id == DICT_INDEXES_ID
-			  && (n == DICT_NUM_FIELDS__SYS_INDEXES - 1)));
+		      || (n + (index->id == DICT_INDEXES_ID)
+			  >= index->n_core_fields && n <= index->n_fields));
+
+		if (is_user_rec && leaf && n < index->n_fields) {
+			ut_ad(!index->is_dummy);
+			ut_ad(!dict_index_is_ibuf(index));
+			n = index->n_fields;
+		}
 	}
 
 	if (UNIV_UNLIKELY(n_fields < n)) {
@@ -620,7 +865,7 @@ rec_get_offsets_func(
 	}
 
 	rec_offs_set_n_fields(offsets, n);
-	rec_init_offsets(rec, index, offsets);
+	rec_init_offsets(rec, index, leaf, offsets);
 	return(offsets);
 }
 
@@ -654,8 +899,9 @@ rec_get_offsets_reverse(
 	ut_ad(index);
 	ut_ad(offsets);
 	ut_ad(dict_table_is_comp(index->table));
+	ut_ad(!index->is_instant());
 
-	if (UNIV_UNLIKELY(node_ptr)) {
+	if (UNIV_UNLIKELY(node_ptr != 0)) {
 		n_node_ptr_field =
 			dict_index_get_n_unique_in_tree_nonleaf(index);
 		n = n_node_ptr_field + 1;
@@ -741,7 +987,7 @@ resolved:
 	} while (++i < rec_offs_n_fields(offsets));
 
 	ut_ad(lens >= extra);
-	*rec_offs_base(offsets) = (lens - extra + REC_N_NEW_EXTRA_BYTES)
+	*rec_offs_base(offsets) = (ulint(lens - extra) + REC_N_NEW_EXTRA_BYTES)
 		| REC_OFFS_COMPACT | any_ext;
 }
 
@@ -793,7 +1039,7 @@ rec_get_nth_field_offs_old(
 
 	*len = next_os - os;
 
-	ut_ad(*len < UNIV_PAGE_SIZE);
+	ut_ad(*len < srv_page_size);
 
 	return(os);
 }
@@ -801,7 +1047,8 @@ rec_get_nth_field_offs_old(
 /**********************************************************//**
 Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
 @return total size */
-UNIV_INLINE MY_ATTRIBUTE((warn_unused_result, nonnull(1,2)))
+MY_ATTRIBUTE((warn_unused_result, nonnull(1,2)))
+static inline
 ulint
 rec_get_converted_size_comp_prefix_low(
 /*===================================*/
@@ -812,21 +1059,31 @@ rec_get_converted_size_comp_prefix_low(
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
 	ulint*			extra,	/*!< out: extra size */
+	rec_comp_status_t	status,	/*!< in: status flags */
 	bool			temp)	/*!< in: whether this is a
 					temporary file record */
 {
-	ulint	extra_size;
+	ulint	extra_size = temp ? 0 : REC_N_NEW_EXTRA_BYTES;
 	ulint	data_size;
 	ulint	i;
 	ut_ad(n_fields > 0);
 	ut_ad(n_fields <= dict_index_get_n_fields(index));
-	ut_ad(!temp || extra);
-
 	ut_d(ulint n_null = index->n_nullable);
+	ut_ad(status == REC_STATUS_ORDINARY || status == REC_STATUS_NODE_PTR
+	      || status == REC_STATUS_COLUMNS_ADDED);
+
+	if (status == REC_STATUS_COLUMNS_ADDED
+	    && (!temp || n_fields > index->n_core_fields)) {
+		ut_ad(index->is_instant());
+		ut_ad(UT_BITS_IN_BYTES(n_null) >= index->n_core_null_bytes);
+		extra_size += UT_BITS_IN_BYTES(index->get_n_nullable(n_fields))
+			+ rec_get_n_add_field_len(n_fields - 1
+						  - index->n_core_fields);
+	} else {
+		ut_ad(n_fields <= index->n_core_fields);
+		extra_size += index->n_core_null_bytes;
+	}
 
-	extra_size = temp
-		? UT_BITS_IN_BYTES(index->n_nullable)
-		: REC_N_NEW_EXTRA_BYTES + UT_BITS_IN_BYTES(index->n_nullable);
 	data_size = 0;
 
 	if (temp && dict_table_is_comp(index->table)) {
@@ -941,7 +1198,8 @@ rec_get_converted_size_comp_prefix(
 {
 	ut_ad(dict_table_is_comp(index->table));
 	return(rec_get_converted_size_comp_prefix_low(
-		       index, fields, n_fields, extra, false));
+		       index, fields, n_fields, extra,
+		       REC_STATUS_ORDINARY, false));
 }
 
 /**********************************************************//**
@@ -954,40 +1212,41 @@ rec_get_converted_size_comp(
 					dict_table_is_comp() is
 					assumed to hold, even if
 					it does not */
-	ulint			status,	/*!< in: status bits of the record */
+	rec_comp_status_t	status,	/*!< in: status bits of the record */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
 	ulint*			extra)	/*!< out: extra size */
 {
-	ulint	size;
 	ut_ad(n_fields > 0);
 
 	switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
 	case REC_STATUS_ORDINARY:
-		ut_ad(n_fields == dict_index_get_n_fields(index));
-		size = 0;
-		break;
+		if (n_fields > index->n_core_fields) {
+			ut_ad(index->is_instant());
+			status = REC_STATUS_COLUMNS_ADDED;
+		}
+		/* fall through */
+	case REC_STATUS_COLUMNS_ADDED:
+		ut_ad(n_fields >= index->n_core_fields);
+		ut_ad(n_fields <= index->n_fields);
+		return rec_get_converted_size_comp_prefix_low(
+			index, fields, n_fields, extra, status, false);
 	case REC_STATUS_NODE_PTR:
 		n_fields--;
 		ut_ad(n_fields == dict_index_get_n_unique_in_tree_nonleaf(
 					index));
 		ut_ad(dfield_get_len(&fields[n_fields]) == REC_NODE_PTR_SIZE);
-		size = REC_NODE_PTR_SIZE; /* child page number */
-		break;
+		return REC_NODE_PTR_SIZE /* child page number */
+			+ rec_get_converted_size_comp_prefix_low(
+				index, fields, n_fields, extra, status, false);
 	case REC_STATUS_INFIMUM:
 	case REC_STATUS_SUPREMUM:
-		/* infimum or supremum record, 8 data bytes */
-		if (UNIV_LIKELY_NULL(extra)) {
-			*extra = REC_N_NEW_EXTRA_BYTES;
-		}
-		return(REC_N_NEW_EXTRA_BYTES + 8);
-	default:
-		ut_error;
-		return(ULINT_UNDEFINED);
+		/* not supported */
+		break;
 	}
 
-	return(size + rec_get_converted_size_comp_prefix_low(
-		       index, fields, n_fields, extra, false));
+	ut_error;
+	return(ULINT_UNDEFINED);
 }
 
 /***********************************************************//**
@@ -1084,8 +1343,7 @@ rec_convert_dtuple_to_rec_old(
 	/* Set the info bits of the record */
 	rec_set_info_bits_old(rec, dtuple_get_info_bits(dtuple)
 			      & REC_INFO_BITS_MASK);
-	/* Make rec_get_offsets() and rec_offs_make_valid() happy. */
-	ut_d(rec_set_heap_no_old(rec, PAGE_HEAP_NO_USER_LOW));
+	rec_set_heap_no_old(rec, PAGE_HEAP_NO_USER_LOW);
 
 	/* Store the data and the offsets */
 
@@ -1157,78 +1415,89 @@ rec_convert_dtuple_to_rec_old(
 	return(rec);
 }
 
-/*********************************************************//**
-Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INLINE
+/** Convert a data tuple into a ROW_FORMAT=COMPACT record.
+@param[out]	rec		converted record
+@param[in]	index		index
+@param[in]	fields		data fields to convert
+@param[in]	n_fields	number of data fields
+@param[in]	status		rec_get_status(rec)
+@param[in]	temp		whether to use the format for temporary files
+				in index creation */
+static inline
 void
 rec_convert_dtuple_to_rec_comp(
-/*===========================*/
-	rec_t*			rec,	/*!< in: origin of record */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	const dfield_t*		fields,	/*!< in: array of data fields */
-	ulint			n_fields,/*!< in: number of data fields */
-	ulint			status,	/*!< in: status bits of the record */
-	bool			temp)	/*!< in: whether to use the
-					format for temporary files in
-					index creation */
+	rec_t*			rec,
+	const dict_index_t*	index,
+	const dfield_t*		fields,
+	ulint			n_fields,
+	rec_comp_status_t	status,
+	bool			temp)
 {
 	const dfield_t*	field;
 	const dtype_t*	type;
 	byte*		end;
-	byte*		nulls;
-	byte*		lens;
+	byte*		nulls = temp
+		? rec - 1 : rec - (REC_N_NEW_EXTRA_BYTES + 1);
+	byte*		UNINIT_VAR(lens);
 	ulint		len;
 	ulint		i;
-	ulint		n_node_ptr_field;
+	ulint		UNINIT_VAR(n_node_ptr_field);
 	ulint		fixed_len;
 	ulint		null_mask	= 1;
 
 	ut_ad(n_fields > 0);
 	ut_ad(temp || dict_table_is_comp(index->table));
-	ulint n_null = index->n_nullable;
-	const ulint n_null_bytes = UT_BITS_IN_BYTES(n_null);
+	ut_ad(index->n_core_null_bytes <= UT_BITS_IN_BYTES(index->n_nullable));
+
+	ut_d(ulint n_null = index->n_nullable);
 
-	if (temp) {
-		ut_ad(status == REC_STATUS_ORDINARY);
+	switch (status) {
+	case REC_STATUS_COLUMNS_ADDED:
+		ut_ad(index->is_instant());
+		ut_ad(n_fields > index->n_core_fields);
+		rec_set_n_add_field(nulls, n_fields - 1
+				    - index->n_core_fields);
+		/* fall through */
+	case REC_STATUS_ORDINARY:
 		ut_ad(n_fields <= dict_index_get_n_fields(index));
-		n_node_ptr_field = ULINT_UNDEFINED;
-		nulls = rec - 1;
-		if (dict_table_is_comp(index->table)) {
+		if (!temp) {
+			rec_set_heap_no_new(rec, PAGE_HEAP_NO_USER_LOW);
+			rec_set_status(rec, n_fields == index->n_core_fields
+				       ? REC_STATUS_ORDINARY
+				       : REC_STATUS_COLUMNS_ADDED);
+		} if (dict_table_is_comp(index->table)) {
 			/* No need to do adjust fixed_len=0. We only
 			need to adjust it for ROW_FORMAT=REDUNDANT. */
 			temp = false;
 		}
-	} else {
-		/* Make rec_get_offsets() and rec_offs_make_valid() happy. */
-		ut_d(rec_set_heap_no_new(rec, PAGE_HEAP_NO_USER_LOW));
-		nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
 
-		switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
-		case REC_STATUS_ORDINARY:
-			ut_ad(n_fields <= dict_index_get_n_fields(index));
-			n_node_ptr_field = ULINT_UNDEFINED;
-			break;
-		case REC_STATUS_NODE_PTR:
-			ut_ad(n_fields
-			      == dict_index_get_n_unique_in_tree_nonleaf(index)
-				 + 1);
-			n_node_ptr_field = n_fields - 1;
-			break;
-		case REC_STATUS_INFIMUM:
-		case REC_STATUS_SUPREMUM:
-			ut_ad(n_fields == 1);
-			n_node_ptr_field = ULINT_UNDEFINED;
-			break;
-		default:
-			ut_error;
-			return;
-		}
+		n_node_ptr_field = ULINT_UNDEFINED;
+		lens = nulls - (index->is_instant()
+				? UT_BITS_IN_BYTES(index->get_n_nullable(
+							   n_fields))
+				: UT_BITS_IN_BYTES(
+					unsigned(index->n_nullable)));
+		break;
+	case REC_STATUS_NODE_PTR:
+		ut_ad(!temp);
+		rec_set_heap_no_new(rec, PAGE_HEAP_NO_USER_LOW);
+		rec_set_status(rec, status);
+		ut_ad(n_fields
+		      == dict_index_get_n_unique_in_tree_nonleaf(index) + 1);
+		ut_d(n_null = std::min(index->n_core_null_bytes * 8U,
+				       index->n_nullable));
+		n_node_ptr_field = n_fields - 1;
+		lens = nulls - index->n_core_null_bytes;
+		break;
+	case REC_STATUS_INFIMUM:
+	case REC_STATUS_SUPREMUM:
+		ut_error;
+		return;
 	}
 
 	end = rec;
 	/* clear the SQL-null flags */
-	lens = nulls - n_null_bytes;
-	memset(lens + 1, 0, nulls - lens);
+	memset(lens + 1, 0, ulint(nulls - lens));
 
 	/* Store the data and the offsets */
 
@@ -1334,21 +1603,26 @@ rec_convert_dtuple_to_rec_new(
 	const dict_index_t*	index,	/*!< in: record descriptor */
 	const dtuple_t*		dtuple)	/*!< in: data tuple */
 {
+	ut_ad(!(dtuple->info_bits
+		& ~(REC_NEW_STATUS_MASK | REC_INFO_DELETED_FLAG
+		    | REC_INFO_MIN_REC_FLAG)));
+	rec_comp_status_t status = static_cast<rec_comp_status_t>(
+		dtuple->info_bits & REC_NEW_STATUS_MASK);
+	if (status == REC_STATUS_ORDINARY
+	    && dtuple->n_fields > index->n_core_fields) {
+		ut_ad(index->is_instant());
+		status = REC_STATUS_COLUMNS_ADDED;
+	}
+
 	ulint	extra_size;
-	ulint	status;
-	rec_t*	rec;
 
-	status = dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK;
 	rec_get_converted_size_comp(
 		index, status, dtuple->fields, dtuple->n_fields, &extra_size);
-	rec = buf + extra_size;
+	rec_t* rec = buf + extra_size;
 
 	rec_convert_dtuple_to_rec_comp(
 		rec, index, dtuple->fields, dtuple->n_fields, status, false);
-
-	/* Set the info bits of the record */
-	rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
-
+	rec_set_info_bits_new(rec, dtuple->info_bits & ~REC_NEW_STATUS_MASK);
 	return(rec);
 }
 
@@ -1388,45 +1662,82 @@ rec_convert_dtuple_to_rec(
 @param[in]	fields		data fields
 @param[in]	n_fields	number of data fields
 @param[out]	extra		record header size
+@param[in]	status		REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED
 @return	total size, in bytes */
 ulint
 rec_get_converted_size_temp(
 	const dict_index_t*	index,
 	const dfield_t*		fields,
 	ulint			n_fields,
-	ulint*			extra)
+	ulint*			extra,
+	rec_comp_status_t	status)
 {
-	return(rec_get_converted_size_comp_prefix_low(
-		       index, fields, n_fields, extra, true));
+	return rec_get_converted_size_comp_prefix_low(
+		index, fields, n_fields, extra, status, true);
 }
 
-/******************************************************//**
-Determine the offset to each field in temporary file.
-@see rec_convert_dtuple_to_temp() */
+/** Determine the offset to each field in temporary file.
+@param[in]	rec	temporary file record
+@param[in]	index	index of that the record belongs to
+@param[in,out]	offsets	offsets to the fields; in: rec_offs_n_fields(offsets)
+@param[in]	n_core	number of core fields (index->n_core_fields)
+@param[in]	def_val	default values for non-core fields
+@param[in]	status	REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED */
 void
 rec_init_offsets_temp(
-/*==================*/
-	const rec_t*		rec,	/*!< in: temporary file record */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets)/*!< in/out: array of offsets;
-					in: n=rec_offs_n_fields(offsets) */
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	ulint*			offsets,
+	ulint			n_core,
+	const dict_col_t::def_t*def_val,
+	rec_comp_status_t	status)
 {
-	rec_init_offsets_comp_ordinary(rec, true, index, offsets);
+	ut_ad(status == REC_STATUS_ORDINARY
+	      || status == REC_STATUS_COLUMNS_ADDED);
+	/* The table may have been converted to plain format
+	if it was emptied during an ALTER TABLE operation. */
+	ut_ad(index->n_core_fields == n_core || !index->is_instant());
+	ut_ad(index->n_core_fields >= n_core);
+	rec_init_offsets_comp_ordinary(rec, index, offsets, n_core, def_val,
+				       status == REC_STATUS_COLUMNS_ADDED
+				       ? REC_LEAF_TEMP_COLUMNS_ADDED
+				       : REC_LEAF_TEMP);
 }
 
-/*********************************************************//**
-Builds a temporary file record out of a data tuple.
-@see rec_init_offsets_temp() */
+/** Determine the offset to each field in temporary file.
+@param[in]	rec	temporary file record
+@param[in]	index	index of that the record belongs to
+@param[in,out]	offsets	offsets to the fields; in: rec_offs_n_fields(offsets)
+*/
+void
+rec_init_offsets_temp(
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	ulint*			offsets)
+{
+	ut_ad(!index->is_instant());
+	rec_init_offsets_comp_ordinary(rec, index, offsets,
+				       index->n_core_fields, NULL,
+				       REC_LEAF_TEMP);
+}
+
+/** Convert a data tuple prefix to the temporary file format.
+@param[out]	rec		record in temporary file format
+@param[in]	index		clustered or secondary index
+@param[in]	fields		data fields
+@param[in]	n_fields	number of data fields
+@param[in]	status		REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED
+*/
 void
 rec_convert_dtuple_to_temp(
-/*=======================*/
-	rec_t*			rec,		/*!< out: record */
-	const dict_index_t*	index,		/*!< in: record descriptor */
-	const dfield_t*		fields,		/*!< in: array of data fields */
-	ulint			n_fields)	/*!< in: number of fields */
+	rec_t*			rec,
+	const dict_index_t*	index,
+	const dfield_t*		fields,
+	ulint			n_fields,
+	rec_comp_status_t	status)
 {
 	rec_convert_dtuple_to_rec_comp(rec, index, fields, n_fields,
-				       REC_STATUS_ORDINARY, true);
+				       status, true);
 }
 
 /** Copy the first n fields of a (copy of a) physical record to a data tuple.
@@ -1437,13 +1748,11 @@ The fields are copied into the memory heap.
 @param[in]	n_fields	number of fields to copy
 @param[in,out]	heap		memory heap */
 void
-rec_copy_prefix_to_dtuple_func(
+rec_copy_prefix_to_dtuple(
 	dtuple_t*		tuple,
 	const rec_t*		rec,
 	const dict_index_t*	index,
-#ifdef UNIV_DEBUG
 	bool			is_leaf,
-#endif /* UNIV_DEBUG */
 	ulint			n_fields,
 	mem_heap_t*		heap)
 {
@@ -1458,10 +1767,10 @@ rec_copy_prefix_to_dtuple_func(
 				  n_fields, &heap);
 
 	ut_ad(rec_validate(rec, offsets));
+	ut_ad(!rec_offs_any_default(offsets));
 	ut_ad(dtuple_check_typed(tuple));
 
-	dtuple_set_info_bits(tuple, rec_get_info_bits(
-				     rec, dict_table_is_comp(index->table)));
+	tuple->info_bits = rec_get_info_bits(rec, rec_offs_comp(offsets));
 
 	for (ulint i = 0; i < n_fields; i++) {
 		dfield_t*	field;
@@ -1539,14 +1848,8 @@ rec_copy_prefix_to_buf(
 						or NULL */
 	ulint*			buf_size)	/*!< in/out: buffer size */
 {
-	const byte*	nulls;
-	const byte*	lens;
-	ulint		i;
-	ulint		prefix_len;
-	ulint		null_mask;
-	ulint		status;
-	bool		is_rtr_node_ptr = false;
-
+	ut_ad(n_fields <= index->n_fields || dict_index_is_ibuf(index));
+	ut_ad(index->n_core_null_bytes <= UT_BITS_IN_BYTES(index->n_nullable));
 	UNIV_PREFETCH_RW(*buf);
 
 	if (!dict_table_is_comp(index->table)) {
@@ -1557,40 +1860,62 @@ rec_copy_prefix_to_buf(
 			       buf, buf_size));
 	}
 
-	status = rec_get_status(rec);
+	ulint		prefix_len	= 0;
+	ulint		instant_omit	= 0;
+	const byte*	nulls		= rec - (REC_N_NEW_EXTRA_BYTES + 1);
+	const byte*	nullf		= nulls;
+	const byte*	lens		= nulls - index->n_core_null_bytes;
 
-	switch (status) {
+	switch (rec_get_status(rec)) {
+	default:
+		/* infimum or supremum record: no sense to copy anything */
+		ut_error;
+		return(NULL);
 	case REC_STATUS_ORDINARY:
-		ut_ad(n_fields <= dict_index_get_n_fields(index));
+		ut_ad(n_fields <= index->n_core_fields);
 		break;
 	case REC_STATUS_NODE_PTR:
 		/* For R-tree, we need to copy the child page number field. */
+		compile_time_assert(DICT_INDEX_SPATIAL_NODEPTR_SIZE == 1);
 		if (dict_index_is_spatial(index)) {
+			ut_ad(index->n_core_null_bytes == 0);
 			ut_ad(n_fields == DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1);
-			is_rtr_node_ptr = true;
-		} else {
-			/* it doesn't make sense to copy the child page number
-			field */
-			ut_ad(n_fields <=
-			      dict_index_get_n_unique_in_tree_nonleaf(index));
+			ut_ad(index->fields[0].col->prtype & DATA_NOT_NULL);
+			ut_ad(DATA_BIG_COL(index->fields[0].col));
+			/* This is a deficiency of the format introduced
+			in MySQL 5.7. The length in the R-tree index should
+			always be DATA_MBR_LEN. */
+			ut_ad(!index->fields[0].fixed_len);
+			ut_ad(*lens == DATA_MBR_LEN);
+			lens--;
+			prefix_len = DATA_MBR_LEN + REC_NODE_PTR_SIZE;
+			n_fields = 0; /* skip the "for" loop below */
+			break;
 		}
+		/* it doesn't make sense to copy the child page number field */
+		ut_ad(n_fields
+		      <= dict_index_get_n_unique_in_tree_nonleaf(index));
 		break;
-	case REC_STATUS_INFIMUM:
-	case REC_STATUS_SUPREMUM:
-		/* infimum or supremum record: no sense to copy anything */
-	default:
-		ut_error;
-		return(NULL);
+	case REC_STATUS_COLUMNS_ADDED:
+		/* We would have !index->is_instant() when rolling back
+		an instant ADD COLUMN operation. */
+		ut_ad(index->is_instant() || page_rec_is_metadata(rec));
+		nulls++;
+		const ulint n_rec = ulint(index->n_core_fields) + 1
+			+ rec_get_n_add_field(nulls);
+		instant_omit = ulint(&rec[-REC_N_NEW_EXTRA_BYTES] - nulls);
+		ut_ad(instant_omit == 1 || instant_omit == 2);
+		nullf = nulls;
+		const uint nb = UT_BITS_IN_BYTES(index->get_n_nullable(n_rec));
+		instant_omit += nb - index->n_core_null_bytes;
+		lens = --nulls - nb;
 	}
 
-	nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
-	lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
+	const byte* const lenf = lens;
 	UNIV_PREFETCH_R(lens);
-	prefix_len = 0;
-	null_mask = 1;
 
 	/* read the lengths of fields 0..n */
-	for (i = 0; i < n_fields; i++) {
+	for (ulint i = 0, null_mask = 1; i < n_fields; i++) {
 		const dict_field_t*	field;
 		const dict_col_t*	col;
 
@@ -1612,11 +1937,7 @@ rec_copy_prefix_to_buf(
 			null_mask <<= 1;
 		}
 
-		if (is_rtr_node_ptr && i == 1) {
-			/* For rtree node ptr rec, we need to
-			copy the page no field with 4 bytes len. */
-			prefix_len += 4;
-		} else if (field->fixed_len) {
+		if (field->fixed_len) {
 			prefix_len += field->fixed_len;
 		} else {
 			ulint	len = *lens--;
@@ -1642,17 +1963,41 @@ rec_copy_prefix_to_buf(
 
 	UNIV_PREFETCH_R(rec + prefix_len);
 
-	prefix_len += rec - (lens + 1);
+	ulint size = prefix_len + ulint(rec - (lens + 1)) - instant_omit;
 
-	if ((*buf == NULL) || (*buf_size < prefix_len)) {
+	if (*buf == NULL || *buf_size < size) {
 		ut_free(*buf);
-		*buf_size = prefix_len;
-		*buf = static_cast<byte*>(ut_malloc_nokey(prefix_len));
+		*buf_size = size;
+		*buf = static_cast<byte*>(ut_malloc_nokey(size));
 	}
 
-	memcpy(*buf, lens + 1, prefix_len);
-
-	return(*buf + (rec - (lens + 1)));
+	if (instant_omit) {
+		/* Copy and convert the record header to a format where
+		instant ADD COLUMN has not been used:
+		+ lengths of variable-length fields in the prefix
+		- omit any null flag bytes for any instantly added columns
+		+ index->n_core_null_bytes of null flags
+		- omit the n_add_fields header (1 or 2 bytes)
+		+ REC_N_NEW_EXTRA_BYTES of fixed header */
+		byte* b = *buf;
+		/* copy the lengths of the variable-length fields */
+		memcpy(b, lens + 1, ulint(lenf - lens));
+		b += ulint(lenf - lens);
+		/* copy the null flags */
+		memcpy(b, nullf - index->n_core_null_bytes,
+		       index->n_core_null_bytes);
+		b += index->n_core_null_bytes + REC_N_NEW_EXTRA_BYTES;
+		ut_ad(ulint(b - *buf) + prefix_len == size);
+		/* copy the fixed-size header and the record prefix */
+		memcpy(b - REC_N_NEW_EXTRA_BYTES, rec - REC_N_NEW_EXTRA_BYTES,
+		       prefix_len + REC_N_NEW_EXTRA_BYTES);
+		ut_ad(rec_get_status(b) == REC_STATUS_COLUMNS_ADDED);
+		rec_set_status(b, REC_STATUS_ORDINARY);
+		return b;
+	} else {
+		memcpy(*buf, lens + 1, size);
+		return *buf + (rec - (lens + 1));
+	}
 }
 
 /***************************************************************//**
@@ -1680,7 +2025,7 @@ rec_validate_old(
 	for (i = 0; i < n_fields; i++) {
 		rec_get_nth_field_offs_old(rec, i, &len);
 
-		if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
+		if (!((len < srv_page_size) || (len == UNIV_SQL_NULL))) {
 			ib::error() << "Record field " << i << " len " << len;
 			return(FALSE);
 		}
@@ -1723,20 +2068,27 @@ rec_validate(
 		return(FALSE);
 	}
 
-	ut_a(rec_offs_comp(offsets) || n_fields <= rec_get_n_fields_old(rec));
+	ut_a(rec_offs_any_flag(offsets, REC_OFFS_COMPACT | REC_OFFS_DEFAULT)
+	     || n_fields <= rec_get_n_fields_old(rec));
 
 	for (i = 0; i < n_fields; i++) {
 		rec_get_nth_field_offs(offsets, i, &len);
 
-		if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
-			ib::error() << "Record field " << i << " len " << len;
-			return(FALSE);
-		}
-
-		if (len != UNIV_SQL_NULL) {
+		switch (len) {
+		default:
+			if (len >= srv_page_size) {
+				ib::error() << "Record field " << i
+					<< " len " << len;
+				return(FALSE);
+			}
 			len_sum += len;
-		} else if (!rec_offs_comp(offsets)) {
-			len_sum += rec_get_nth_field_size(rec, i);
+			break;
+		case UNIV_SQL_DEFAULT:
+			break;
+		case UNIV_SQL_NULL:
+			if (!rec_offs_comp(offsets)) {
+				len_sum += rec_get_nth_field_size(rec, i);
+			}
 		}
 	}
 
@@ -1818,14 +2170,22 @@ rec_print_comp(
 	ulint	i;
 
 	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
-		const byte*	data;
+          const byte*	UNINIT_VAR(data);
 		ulint		len;
 
-		data = rec_get_nth_field(rec, offsets, i, &len);
+		if (rec_offs_nth_default(offsets, i)) {
+			len = UNIV_SQL_DEFAULT;
+		} else {
+			data = rec_get_nth_field(rec, offsets, i, &len);
+		}
 
 		fprintf(file, " " ULINTPF ":", i);
 
-		if (len != UNIV_SQL_NULL) {
+		if (len == UNIV_SQL_NULL) {
+			fputs(" SQL NULL", file);
+		} else if (len == UNIV_SQL_DEFAULT) {
+			fputs(" SQL DEFAULT", file);
+		} else {
 			if (len <= 30) {
 
 				ut_print_buf(file, data, len);
@@ -1843,8 +2203,6 @@ rec_print_comp(
 				fprintf(file, " (total " ULINTPF " bytes)",
 					len);
 			}
-		} else {
-			fputs(" SQL NULL", file);
 		}
 		putc(';', file);
 		putc('\n', file);
@@ -1938,6 +2296,7 @@ rec_print_mbr_rec(
 	ut_ad(rec);
 	ut_ad(offsets);
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(!rec_offs_any_default(offsets));
 
 	if (!rec_offs_comp(offsets)) {
 		rec_print_mbr_old(file, rec);
@@ -2089,6 +2448,11 @@ rec_print(
 
 		data = rec_get_nth_field(rec, offsets, i, &len);
 
+		if (len == UNIV_SQL_DEFAULT) {
+			o << "DEFAULT";
+			continue;
+		}
+
 		if (len == UNIV_SQL_NULL) {
 			o << "NULL";
 			continue;
@@ -2238,6 +2602,7 @@ wsrep_rec_get_foreign_key(
 			dict_index_get_nth_field(index_ref, i);
 		const dict_col_t* col_r = dict_field_get_col(field_r);
 
+		ut_ad(!rec_offs_nth_default(offsets, i));
 		data = rec_get_nth_field(rec, offsets, i, &len);
 		if (key_len + ((len != UNIV_SQL_NULL) ? len + 1 : 1) > 
 		    *buf_len) {
diff --git a/storage/innobase/row/row0ext.cc b/storage/innobase/row/row0ext.cc
index 72c68940d11..503f7d0d3e7 100644
--- a/storage/innobase/row/row0ext.cc
+++ b/storage/innobase/row/row0ext.cc
@@ -70,7 +70,7 @@ row_ext_cache_fill(
 		} else {
 			/* Fetch at most ext->max_len of the column.
 			The column should be non-empty.  However,
-			trx_rollback_or_clean_all_recovered() may try to
+			trx_rollback_all_recovered() may try to
 			access a half-deleted BLOB if the server previously
 			crashed during the execution of
 			btr_free_externally_stored_field(). */
diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc
index 810b5387aec..f46c381fbf7 100644
--- a/storage/innobase/row/row0ftsort.cc
+++ b/storage/innobase/row/row0ftsort.cc
@@ -65,15 +65,15 @@ integer value)
 dict_index_t*
 row_merge_create_fts_sort_index(
 /*============================*/
-	dict_index_t*		index,	/*!< in: Original FTS index
-					based on which this sort index
-					is created */
-	const dict_table_t*	table,	/*!< in: table that FTS index
-					is being created on */
-	ibool*			opt_doc_id_size)
-					/*!< out: whether to use 4 bytes
-					instead of 8 bytes integer to
-					store Doc ID during sort */
+	dict_index_t*	index,	/*!< in: Original FTS index
+				based on which this sort index
+				is created */
+	dict_table_t*	table,	/*!< in,out: table that FTS index
+				is being created on */
+	ibool*		opt_doc_id_size)
+				/*!< out: whether to use 4 bytes
+				instead of 8 bytes integer to
+				store Doc ID during sort */
 {
 	dict_index_t*   new_index;
 	dict_field_t*   field;
@@ -81,11 +81,9 @@ row_merge_create_fts_sort_index(
 	CHARSET_INFO*	charset;
 
 	// FIXME: This name shouldn't be hard coded here.
-	new_index = dict_mem_index_create(
-		index->table->name.m_name, "tmp_fts_idx", 0, DICT_FTS, 3);
+	new_index = dict_mem_index_create(table, "tmp_fts_idx", DICT_FTS, 3);
 
 	new_index->id = index->id;
-	new_index->table = (dict_table_t*) table;
 	new_index->n_uniq = FTS_NUM_FIELDS_SORT;
 	new_index->n_def = FTS_NUM_FIELDS_SORT;
 	new_index->cached = TRUE;
@@ -105,7 +103,7 @@ row_merge_create_fts_sort_index(
 		? DATA_VARCHAR : DATA_VARMYSQL;
 	field->col->mbminlen = idx_field->col->mbminlen;
 	field->col->mbmaxlen = idx_field->col->mbmaxlen;
-	field->col->len = HA_FT_MAXCHARLEN * field->col->mbmaxlen;
+	field->col->len = HA_FT_MAXCHARLEN * unsigned(field->col->mbmaxlen);
 
 	field->fixed_len = 0;
 
@@ -252,7 +250,7 @@ row_fts_psort_info_init(
 				dup->index);
 
 			if (row_merge_file_create(psort_info[j].merge_file[i],
-						  path) < 0) {
+						  path) == OS_FILE_CLOSED) {
 				goto func_exit;
 			}
 
@@ -412,9 +410,9 @@ row_merge_fts_doc_add_word_for_parser(
 	ut_ad(t_ctx);
 
 	str.f_str = (byte*)(word);
-	str.f_len = word_len;
+	str.f_len = ulint(word_len);
 	str.f_n_char = fts_get_token_size(
-		(CHARSET_INFO*)param->cs, word, word_len);
+		(CHARSET_INFO*)param->cs, word, ulint(word_len));
 
 	/* JAN: TODO: MySQL 5.7 FTS
 	ut_ad(boolean_info->position >= 0);
@@ -668,7 +666,7 @@ row_merge_fts_doc_tokenize(
 		MySQL 5.7 changed the fulltext parser plugin interface
 		by adding MYSQL_FTPARSER_BOOLEAN_INFO::position.
 		Below we assume that the field is always 0. */
-		unsigned	pos = t_ctx->init_pos;
+		ulint	pos = t_ctx->init_pos;
 		byte		position[4];
 		if (parser == NULL) {
 			pos += t_ctx->processed_len + inc - str.f_len;
@@ -761,7 +759,7 @@ It also performs the initial in memory sort of the parsed records.
 @return OS_THREAD_DUMMY_RETURN */
 static
 os_thread_ret_t
-fts_parallel_tokenization(
+DECLARE_THREAD(fts_parallel_tokenization)(
 /*======================*/
 	void*		arg)	/*!< in: psort_info for the thread */
 {
@@ -773,7 +771,7 @@ fts_parallel_tokenization(
 	merge_file_t**		merge_file;
 	row_merge_block_t**	block;
 	row_merge_block_t**	crypt_block;
-	int			tmpfd[FTS_NUM_AUX_INDEX];
+	pfs_os_file_t		tmpfd[FTS_NUM_AUX_INDEX];
 	ulint			mycount[FTS_NUM_AUX_INDEX];
 	ib_uint64_t		total_rec = 0;
 	ulint			num_doc_processed = 0;
@@ -902,7 +900,7 @@ loop:
 				     merge_file[t_ctx.buf_used]->offset++,
 				     block[t_ctx.buf_used],
 				     crypt_block[t_ctx.buf_used],
-				     table->space)) {
+				     table->space_id)) {
 			error = DB_TEMP_FILE_WRITE_FAIL;
 			goto func_exit;
 		}
@@ -996,7 +994,7 @@ exit:
 						merge_file[i]->offset++,
 						block[i],
 						crypt_block[i],
-						table->space)) {
+						table->space_id)) {
 					error = DB_TEMP_FILE_WRITE_FAIL;
 					goto func_exit;
 				}
@@ -1025,7 +1023,7 @@ exit:
 		}
 
 		tmpfd[i] = row_merge_file_create_low(path);
-		if (tmpfd[i] < 0) {
+		if (tmpfd[i] == OS_FILE_CLOSED) {
 			error = DB_OUT_OF_MEMORY;
 			goto func_exit;
 		}
@@ -1034,15 +1032,15 @@ exit:
 				       psort_info->psort_common->dup,
 				       merge_file[i], block[i], &tmpfd[i],
 				       false, 0.0/* pct_progress */, 0.0/* pct_cost */,
-				       crypt_block[i], table->space);
+				       crypt_block[i], table->space_id);
 
 		if (error != DB_SUCCESS) {
-			close(tmpfd[i]);
+			os_file_close(tmpfd[i]);
 			goto func_exit;
 		}
 
 		total_rec += merge_file[i]->n_rec;
-		close(tmpfd[i]);
+		os_file_close(tmpfd[i]);
 	}
 
 func_exit:
@@ -1100,7 +1098,7 @@ Function performs the merge and insertion of the sorted records.
 @return OS_THREAD_DUMMY_RETURN */
 static
 os_thread_ret_t
-fts_parallel_merge(
+DECLARE_THREAD(fts_parallel_merge)(
 /*===============*/
 	void*		arg)		/*!< in: parallel merge info */
 {
@@ -1131,7 +1129,7 @@ row_fts_start_parallel_merge(
 /*=========================*/
 	fts_psort_t*	merge_info)	/*!< in: parallel sort info */
 {
-	int		i = 0;
+	ulint		i = 0;
 
 	/* Kick off merge/insert threads */
 	for (i = 0; i <  FTS_NUM_AUX_INDEX; i++) {
@@ -1371,10 +1369,10 @@ row_fts_insert_tuple(
 Propagate a newly added record up one level in the selection tree
 @return parent where this value propagated to */
 static
-int
+ulint
 row_fts_sel_tree_propagate(
 /*=======================*/
-	int		propogated,	/*<! in: tree node propagated */
+	ulint		propogated,	/*<! in: tree node propagated */
 	int*		sel_tree,	/*<! in: selection tree */
 	const mrec_t**	mrec,		/*<! in: sort record */
 	ulint**		offsets,	/*<! in: record offsets */
@@ -1413,7 +1411,7 @@ row_fts_sel_tree_propagate(
 
 	sel_tree[parent] = selected;
 
-	return(static_cast<int>(parent));
+	return parent;
 }
 
 /*********************************************************************//**
@@ -1433,8 +1431,8 @@ row_fts_sel_tree_update(
 	ulint	i;
 
 	for (i = 1; i <= height; i++) {
-		propagated = static_cast<ulint>(row_fts_sel_tree_propagate(
-			static_cast<int>(propagated), sel_tree, mrec, offsets, index));
+		propagated = row_fts_sel_tree_propagate(
+			propagated, sel_tree, mrec, offsets, index);
 	}
 
 	return(sel_tree[0]);
@@ -1514,7 +1512,7 @@ row_fts_build_sel_tree(
 {
 	ulint	treelevel = 1;
 	ulint	num = 2;
-	int	i = 0;
+	ulint	i = 0;
 	ulint	start;
 
 	/* No need to build selection tree if we only have two merge threads */
@@ -1529,13 +1527,13 @@ row_fts_build_sel_tree(
 
 	start = (ulint(1) << treelevel) - 1;
 
-	for (i = 0; i < (int) fts_sort_pll_degree; i++) {
-		sel_tree[i + start] = i;
+	for (i = 0; i < fts_sort_pll_degree; i++) {
+		sel_tree[i + start] = int(i);
 	}
 
-	for (i = static_cast<int>(treelevel) - 1; i >= 0; i--) {
+	for (i = treelevel; --i; ) {
 		row_fts_build_sel_tree_level(
-			sel_tree, static_cast<ulint>(i), mrec, offsets, index);
+			sel_tree, i, mrec, offsets, index);
 	}
 
 	return(treelevel);
@@ -1566,7 +1564,7 @@ row_fts_merge_insert(
 	ib_alloc_t*		heap_alloc;
 	ulint			i;
 	mrec_buf_t**		buf;
-	int*			fd;
+	pfs_os_file_t*			fd;
 	byte**			block;
 	byte**			crypt_block;
 	const mrec_t**		mrec;
@@ -1575,7 +1573,7 @@ row_fts_merge_insert(
 	ulint			height;
 	ulint			start;
 	fts_psort_insert_t	ins_ctx;
-	ulint			count_diag = 0;
+	uint64_t		count_diag = 0;
 	fts_table_t		fts_table;
 	char			aux_table_name[MAX_FULL_NAME_LEN];
 	dict_table_t*		aux_table;
@@ -1591,7 +1589,7 @@ row_fts_merge_insert(
 	/* We use the insert query graph as the dummy graph
 	needed in the row module call */
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 	trx_start_if_not_started(trx, true);
 
 	trx->op_info = "inserting index entries";
@@ -1608,7 +1606,7 @@ row_fts_merge_insert(
 		heap, sizeof(*offsets) * fts_sort_pll_degree);
 	buf = (mrec_buf_t**) mem_heap_alloc(
 		heap, sizeof(*buf) * fts_sort_pll_degree);
-	fd = (int*) mem_heap_alloc(heap, sizeof(*fd) * fts_sort_pll_degree);
+	fd = (pfs_os_file_t*) mem_heap_alloc(heap, sizeof(*fd) * fts_sort_pll_degree);
 	block = (byte**) mem_heap_alloc(
 		heap, sizeof(*block) * fts_sort_pll_degree);
 	crypt_block = (byte**) mem_heap_alloc(
@@ -1641,7 +1639,7 @@ row_fts_merge_insert(
 		buf[i] = static_cast<mrec_buf_t*>(
 			mem_heap_alloc(heap, sizeof *buf[i]));
 
-		count_diag += (int) psort_info[i].merge_file[id]->n_rec;
+		count_diag += psort_info[i].merge_file[id]->n_rec;
 	}
 
 	if (fts_enable_diag_print) {
@@ -1678,6 +1676,11 @@ row_fts_merge_insert(
 	dict_table_close(aux_table, FALSE, FALSE);
 	aux_index = dict_table_get_first_index(aux_table);
 
+	ut_ad(!aux_index->is_instant());
+	/* row_merge_write_fts_node() depends on the correct value */
+	ut_ad(aux_index->n_core_null_bytes
+	      == UT_BITS_IN_BYTES(aux_index->n_nullable));
+
 	/* Create bulk load instance */
 	ins_ctx.btr_bulk = UT_NEW_NOKEY(
 		BtrBulk(aux_index, trx, psort_info[0].psort_common->trx
@@ -1699,7 +1702,7 @@ row_fts_merge_insert(
 #ifdef UNIV_DEBUG
 	ins_ctx.aux_index_id = id;
 #endif
-	const ulint space = table->space;
+	const ulint space = table->space_id;
 
 	for (i = 0; i < fts_sort_pll_degree; i++) {
 		if (psort_info[i].merge_file[id]->n_rec == 0) {
@@ -1726,7 +1729,7 @@ row_fts_merge_insert(
 	height = row_fts_build_sel_tree(sel_tree, (const mrec_t **) mrec,
 					offsets, index);
 
-	start = (1 << height) - 1;
+	start = (1U << height) - 1;
 
 	/* Fetch sorted records from sort buffer and insert them into
 	corresponding FTS index auxiliary tables */
@@ -1810,7 +1813,7 @@ exit:
 	error = ins_ctx.btr_bulk->finish(error);
 	UT_DELETE(ins_ctx.btr_bulk);
 
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	mem_heap_free(heap);
 
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index 4fdc6581005..1fdd2ac9b94 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -31,12 +31,12 @@ Created 2012-02-08 by Sunny Bains.
 #include "dict0load.h"
 #include "ibuf0ibuf.h"
 #include "pars0pars.h"
-#include "row0upd.h"
 #include "row0sel.h"
 #include "row0mysql.h"
 #include "srv0start.h"
 #include "row0quiesce.h"
 #include "fil0pagecompress.h"
+#include "trx0undo.h"
 #ifdef HAVE_LZO
 #include "lzo/lzo1x.h"
 #endif
@@ -354,11 +354,11 @@ class AbstractCallback
 public:
 	/** Constructor
 	@param trx covering transaction */
-	AbstractCallback(trx_t* trx)
+	AbstractCallback(trx_t* trx, ulint space_id)
 		:
 		m_page_size(0, 0, false),
 		m_trx(trx),
-		m_space(ULINT_UNDEFINED),
+		m_space(space_id),
 		m_xdes(),
 		m_xdes_page_no(ULINT_UNDEFINED),
 		m_space_flags(ULINT_UNDEFINED) UNIV_NOTHROW { }
@@ -408,14 +408,13 @@ public:
 	Called for every page in the tablespace. If the page was not
 	updated then its state must be set to BUF_PAGE_NOT_USED. For
 	compressed tables the page descriptor memory will be at offset:
-		block->frame + UNIV_PAGE_SIZE;
+		block->frame + srv_page_size;
 	@param block block read from file, note it is not from the buffer pool
 	@retval DB_SUCCESS or error code. */
 	virtual dberr_t operator()(buf_block_t* block) UNIV_NOTHROW = 0;
 
-	/**
-	@return the space id of the tablespace */
-	virtual ulint get_space_id() const UNIV_NOTHROW = 0;
+	/** @return the tablespace identifier */
+	ulint get_space_id() const { return m_space; }
 
 	bool is_interrupted() const { return trx_is_interrupted(m_trx); }
 
@@ -575,7 +574,7 @@ AbstractCallback::init(
 
 		ib::error() << "Page size " << m_page_size.physical()
 			<< " of ibd file is not the same as the server page"
-			" size " << univ_page_size.physical();
+			" size " << srv_page_size;
 
 		return(DB_CORRUPTION);
 
@@ -588,11 +587,12 @@ AbstractCallback::init(
 		return(DB_CORRUPTION);
 	}
 
-	ut_a(m_space == ULINT_UNDEFINED);
-
 	m_size  = mach_read_from_4(page + FSP_SIZE);
 	m_free_limit = mach_read_from_4(page + FSP_FREE_LIMIT);
-	m_space = mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID);
+	if (m_space == ULINT_UNDEFINED) {
+		m_space = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID
+					   + page);
+	}
 
 	return set_current_xdes(0, page);
 }
@@ -621,19 +621,12 @@ struct FetchIndexRootPages : public AbstractCallback {
 	@param table table definition in server .*/
 	FetchIndexRootPages(const dict_table_t* table, trx_t* trx)
 		:
-		AbstractCallback(trx),
+		AbstractCallback(trx, ULINT_UNDEFINED),
 		m_table(table) UNIV_NOTHROW { }
 
 	/** Destructor */
 	virtual ~FetchIndexRootPages() UNIV_NOTHROW { }
 
-	/**
-	@retval the space id of the tablespace being iterated over */
-	virtual ulint get_space_id() const UNIV_NOTHROW
-	{
-		return(m_space);
-	}
-
 	/** Called for each block as it is read from the file.
 	@param block block to convert, it is not from the buffer pool.
 	@retval DB_SUCCESS or error code. */
@@ -792,8 +785,23 @@ class PageConverter : public AbstractCallback {
 public:
 	/** Constructor
 	@param cfg config of table being imported.
+	@param space_id tablespace identifier
 	@param trx transaction covering the import */
-	PageConverter(row_import* cfg, trx_t* trx) UNIV_NOTHROW;
+	PageConverter(row_import* cfg, ulint space_id, trx_t* trx)
+		:
+		AbstractCallback(trx, space_id),
+		m_cfg(cfg),
+		m_index(cfg->m_indexes),
+		m_current_lsn(log_get_lsn()),
+		m_page_zip_ptr(0),
+		m_rec_iter(),
+		m_offsets_(), m_offsets(m_offsets_),
+		m_heap(0),
+		m_cluster_index(dict_table_get_first_index(cfg->m_table))
+	{
+		ut_ad(m_current_lsn);
+		rec_offs_init(m_offsets_);
+	}
 
 	virtual ~PageConverter() UNIV_NOTHROW
 	{
@@ -802,13 +810,6 @@ public:
 		}
 	}
 
-	/**
-	@retval the server space id of the tablespace being iterated over */
-	virtual ulint get_space_id() const UNIV_NOTHROW
-	{
-		return(m_cfg->m_table->space);
-	}
-
 	/** Called for each block as it is read from the file.
 	@param block block to convert, it is not from the buffer pool.
 	@retval DB_SUCCESS or error code. */
@@ -868,17 +869,14 @@ private:
 
 	/** Purge delete-marked records, only if it is possible to do
 	so without re-organising the B+tree.
-	@param offsets current row offsets.
 	@retval true if purged */
-	bool	purge(const ulint* offsets) UNIV_NOTHROW;
+	bool purge() UNIV_NOTHROW;
 
 	/** Adjust the BLOB references and sys fields for the current record.
-	@param index the index being converted
 	@param rec record to update
 	@param offsets column offsets for the record
 	@return DB_SUCCESS or error code. */
 	dberr_t	adjust_cluster_record(
-		const dict_index_t*	index,
 		rec_t*			rec,
 		const ulint*		offsets) UNIV_NOTHROW;
 
@@ -1347,8 +1345,6 @@ row_import::set_root_by_name() UNIV_NOTHROW
 		/* We've already checked that it exists. */
 		ut_a(index != 0);
 
-		/* Set the root page number and space id. */
-		index->space = m_table->space;
 		index->page = cfg_index->m_page_no;
 	}
 }
@@ -1408,7 +1404,6 @@ row_import::set_root_by_heuristic() UNIV_NOTHROW
 
 			cfg_index[i].m_srv_index = index;
 
-			index->space = m_table->space;
 			index->page = cfg_index[i].m_page_no;
 
 			++i;
@@ -1463,6 +1458,13 @@ IndexPurge::open() UNIV_NOTHROW
 
 	btr_pcur_open_at_index_side(
 		true, m_index, BTR_MODIFY_LEAF, &m_pcur, true, 0, &m_mtr);
+	btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr);
+	if (rec_is_metadata(btr_pcur_get_rec(&m_pcur), m_index)) {
+		ut_ad(btr_pcur_is_on_user_rec(&m_pcur));
+		/* Skip the metadata pseudo-record. */
+	} else {
+		btr_pcur_move_to_prev_on_page(&m_pcur);
+	}
 }
 
 /**
@@ -1552,28 +1554,6 @@ IndexPurge::purge() UNIV_NOTHROW
 	btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
 }
 
-/** Constructor
-@param cfg config of table being imported.
-@param trx transaction covering the import */
-inline
-PageConverter::PageConverter(
-	row_import*	cfg,
-	trx_t*		trx)
-	:
-	AbstractCallback(trx),
-	m_cfg(cfg),
-	m_index(cfg->m_indexes),
-	m_current_lsn(log_get_lsn()),
-	m_page_zip_ptr(0),
-	m_rec_iter(),
-	m_offsets_(), m_offsets(m_offsets_),
-	m_heap(0),
-	m_cluster_index(dict_table_get_first_index(cfg->m_table)) UNIV_NOTHROW
-{
-	ut_a(m_current_lsn > 0);
-	rec_offs_init(m_offsets_);
-}
-
 /** Adjust the BLOB reference for a single column that is externally stored
 @param rec record to update
 @param offsets column offsets for the record
@@ -1606,7 +1586,7 @@ PageConverter::adjust_cluster_index_blob_column(
 		return(DB_CORRUPTION);
 	}
 
-	field += BTR_EXTERN_SPACE_ID - BTR_EXTERN_FIELD_REF_SIZE + len;
+	field += len - (BTR_EXTERN_FIELD_REF_SIZE - BTR_EXTERN_SPACE_ID);
 
 	mach_write_to_4(field, get_space_id());
 
@@ -1677,11 +1657,8 @@ PageConverter::adjust_cluster_index_blob_ref(
 
 /** Purge delete-marked records, only if it is possible to do so without
 re-organising the B+tree.
-@param offsets current row offsets.
 @return true if purge succeeded */
-inline
-bool
-PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
+inline bool PageConverter::purge() UNIV_NOTHROW
 {
 	const dict_index_t*	index = m_index->m_srv_index;
 
@@ -1705,7 +1682,6 @@ PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
 inline
 dberr_t
 PageConverter::adjust_cluster_record(
-	const dict_index_t*	index,
 	rec_t*			rec,
 	const ulint*		offsets) UNIV_NOTHROW
 {
@@ -1716,10 +1692,20 @@ PageConverter::adjust_cluster_record(
 		/* Reset DB_TRX_ID and DB_ROLL_PTR.  Normally, these fields
 		are only written in conjunction with other changes to the
 		record. */
-
-		row_upd_rec_sys_fields(
-			rec, m_page_zip_ptr, m_cluster_index, m_offsets,
-			m_trx, roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS);
+		ulint	trx_id_pos = m_cluster_index->n_uniq
+			? m_cluster_index->n_uniq : 1;
+		if (m_page_zip_ptr) {
+			page_zip_write_trx_id_and_roll_ptr(
+				m_page_zip_ptr, rec, m_offsets, trx_id_pos,
+				0, roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS,
+				NULL);
+		} else {
+			ulint	len;
+			byte*	ptr = rec_get_nth_field(
+				rec, m_offsets, trx_id_pos, &len);
+			ut_ad(len == DATA_TRX_ID_LEN);
+			memcpy(ptr, reset_trx_id, sizeof reset_trx_id);
+		}
 	}
 
 	return(err);
@@ -1758,8 +1744,7 @@ PageConverter::update_records(
 
 		if (clust_index) {
 
-			dberr_t err = adjust_cluster_record(
-				m_index->m_srv_index, rec, m_offsets);
+			dberr_t err = adjust_cluster_record(rec, m_offsets);
 
 			if (err != DB_SUCCESS) {
 				return(err);
@@ -1773,7 +1758,7 @@ PageConverter::update_records(
 			/* A successful purge will move the cursor to the
 			next record. */
 
-			if (!purge(m_offsets)) {
+			if (!purge()) {
 				m_rec_iter.next();
 			}
 
@@ -1836,6 +1821,13 @@ PageConverter::update_index_page(
 	if (dict_index_is_clust(m_index->m_srv_index)) {
 		if (page_is_root(page)) {
 			/* Preserve the PAGE_ROOT_AUTO_INC. */
+			if (m_index->m_srv_index->table->supports_instant()
+			    && btr_cur_instant_root_init(
+				    const_cast<dict_index_t*>(
+					    m_index->m_srv_index),
+				    page)) {
+				return(DB_CORRUPTION);
+			}
 		} else {
 			/* Clear PAGE_MAX_TRX_ID so that it can be
 			used for other purposes in the future. IMPORT
@@ -1938,6 +1930,8 @@ PageConverter::update_page(
 			return(DB_CORRUPTION);
 		}
 
+		/* fall through */
+	case FIL_PAGE_TYPE_INSTANT:
 		/* This is on every page in the tablespace. */
 		mach_write_to_4(
 			get_frame(block)
@@ -2052,12 +2046,13 @@ row_import_discard_changes(
 		index = UT_LIST_GET_NEXT(indexes, index)) {
 
 		index->page = FIL_NULL;
-		index->space = FIL_NULL;
 	}
 
 	table->file_unreadable = true;
-
-	fil_close_tablespace(trx, table->space);
+	if (table->space) {
+		fil_close_tablespace(trx, table->space_id);
+		table->space = NULL;
+	}
 }
 
 /*****************************************************************//**
@@ -2084,7 +2079,7 @@ row_import_cleanup(
 
 	row_mysql_unlock_data_dictionary(trx);
 
-	trx_free_for_mysql(trx);
+	trx_free(trx);
 
 	prebuilt->trx->op_info = "";
 
@@ -2129,8 +2124,6 @@ static	MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_import_adjust_root_pages_of_secondary_indexes(
 /*==============================================*/
-	row_prebuilt_t*		prebuilt,	/*!< in/out: prebuilt from
-						handler */
 	trx_t*			trx,		/*!< in: transaction used for
 						the import */
 	dict_table_t*		table,		/*!< in: table the indexes
@@ -2154,7 +2147,6 @@ row_import_adjust_root_pages_of_secondary_indexes(
 		ut_a(!dict_index_is_clust(index));
 
 		if (!(index->type & DICT_CORRUPT)
-		    && index->space != FIL_NULL
 		    && index->page != FIL_NULL) {
 
 			/* Update the Btree segment headers for index node and
@@ -2269,7 +2261,14 @@ row_import_set_sys_max_row_id(
 	rec = btr_pcur_get_rec(&pcur);
 
 	/* Check for empty table. */
-	if (!page_rec_is_infimum(rec)) {
+	if (page_rec_is_infimum(rec)) {
+		/* The table is empty. */
+		err = DB_SUCCESS;
+	} else if (rec_is_metadata(rec, index)) {
+		/* The clustered index contains the metadata record only,
+		that is, the table is empty. */
+		err = DB_SUCCESS;
+	} else {
 		ulint		len;
 		const byte*	field;
 		mem_heap_t*	heap = NULL;
@@ -2296,9 +2295,6 @@ row_import_set_sys_max_row_id(
 		if (heap != NULL) {
 			mem_heap_free(heap);
 		}
-	} else {
-		/* The table is empty. */
-		err = DB_SUCCESS;
 	}
 
 	btr_pcur_close(&pcur);
@@ -2385,8 +2381,7 @@ row_import_cfg_read_index_fields(
 /*=============================*/
 	FILE*			file,	/*!< in: file to write to */
 	THD*			thd,	/*!< in/out: session */
-	row_index_t*		index,	/*!< Index being read in */
-	row_import*		cfg)	/*!< in/out: meta-data read */
+	row_index_t*		index)	/*!< Index being read in */
 {
 	byte			row[sizeof(ib_uint32_t) * 3];
 	ulint			n_fields = index->m_n_fields;
@@ -2606,8 +2601,7 @@ row_import_read_index_data(
 			return(err);
 		}
 
-		err = row_import_cfg_read_index_fields(
-			file, thd, cfg_index, cfg);
+		err = row_import_cfg_read_index_fields(file, thd, cfg_index);
 
 		if (err != DB_SUCCESS) {
 			return(err);
@@ -2937,14 +2931,14 @@ row_import_read_v1(
 	const ulint	logical_page_size = mach_read_from_4(ptr);
 	ptr += sizeof(ib_uint32_t);
 
-	if (logical_page_size != univ_page_size.logical()) {
+	if (logical_page_size != srv_page_size) {
 
 		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
 			"Tablespace to be imported has a different"
 			" page size than this server. Server page size"
-			" is " ULINTPF ", whereas tablespace page size"
+			" is %lu, whereas tablespace page size"
 			" is " ULINTPF,
-			univ_page_size.logical(),
+			srv_page_size,
 			logical_page_size);
 
 		return(DB_ERROR);
@@ -2983,7 +2977,6 @@ static	MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_import_read_meta_data(
 /*======================*/
-	dict_table_t*	table,		/*!< in: table */
 	FILE*		file,		/*!< in: File to read from */
 	THD*		thd,		/*!< in: session */
 	row_import&	cfg)		/*!< out: contents of the .cfg file */
@@ -3057,7 +3050,7 @@ row_import_read_cfg(
 
 		cfg.m_missing = false;
 
-		err = row_import_read_meta_data(table, file, thd, cfg);
+		err = row_import_read_meta_data(file, thd, cfg);
 		fclose(file);
 	}
 
@@ -3086,6 +3079,8 @@ row_import_update_index_root(
 	que_t*			graph = 0;
 	dberr_t			err = DB_SUCCESS;
 
+	ut_ad(reset || table->space->id == table->space_id);
+
 	static const char	sql[] = {
 		"PROCEDURE UPDATE_INDEX_ROOT() IS\n"
 		"BEGIN\n"
@@ -3123,7 +3118,7 @@ row_import_update_index_root(
 
 		mach_write_to_4(
 			reinterpret_cast<byte*>(&space),
-			reset ? FIL_NULL : index->space);
+			reset ? FIL_NULL : index->table->space_id);
 
 		mach_write_to_8(
 			reinterpret_cast<byte*>(&index_id),
@@ -3226,22 +3221,13 @@ row_import_set_discarded(
 	return(FALSE);
 }
 
-/*****************************************************************//**
-Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
-@return DB_SUCCESS or error code. */
-dberr_t
-row_import_update_discarded_flag(
-/*=============================*/
-	trx_t*		trx,		/*!< in/out: transaction that
-					covers the update */
-	table_id_t	table_id,	/*!< in: Table for which we want
-					to set the root table->flags2 */
-	bool		discarded,	/*!< in: set MIX_LEN column bit
-					to discarded, if true */
-	bool		dict_locked)	/*!< in: set to true if the
-					caller already owns the
-					dict_sys_t:: mutex. */
-
+/** Update the DICT_TF2_DISCARDED flag in SYS_TABLES.MIX_LEN.
+@param[in,out]	trx		dictionary transaction
+@param[in]	table_id	table identifier
+@param[in]	discarded	whether to set or clear the flag
+@return DB_SUCCESS or error code */
+dberr_t row_import_update_discarded_flag(trx_t* trx, table_id_t table_id,
+					 bool discarded)
 {
 	pars_info_t*		info;
 	discard_t		discard;
@@ -3280,7 +3266,7 @@ row_import_update_discarded_flag(
 	pars_info_bind_function(
 		info, "my_func", row_import_set_discarded, &discard);
 
-	dberr_t	err = que_eval_sql(info, sql, !dict_locked, trx);
+	dberr_t	err = que_eval_sql(info, sql, false, trx);
 
 	ut_a(discard.n_recs == 1);
 	ut_a(discard.flags2 != ULINT32_UNDEFINED);
@@ -3659,8 +3645,8 @@ fil_tablespace_iterate(
 	We allocate an extra page in case it is a compressed table. One
 	page is to ensure alignement. */
 
-	void*	page_ptr = ut_malloc_nokey(3 * UNIV_PAGE_SIZE);
-	byte*	page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
+	void*	page_ptr = ut_malloc_nokey(3U << srv_page_size_shift);
+	byte*	page = static_cast<byte*>(ut_align(page_ptr, srv_page_size));
 
 	buf_block_t* block = reinterpret_cast<buf_block_t*>
 		(ut_zalloc_nokey(sizeof *block));
@@ -3676,7 +3662,7 @@ fil_tablespace_iterate(
 	request.disable_partial_io_warnings();
 
 	err = os_file_read_no_error_handling(request, file, page, 0,
-					     UNIV_PAGE_SIZE, 0);
+					     srv_page_size, 0);
 
 	if (err == DB_SUCCESS) {
 		err = callback.init(file_size, block);
@@ -3715,23 +3701,24 @@ fil_tablespace_iterate(
 
 		/* Add an extra page for compressed page scratch area. */
 		void*	io_buffer = ut_malloc_nokey(
-			(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+			(2 + iter.n_io_buffers) << srv_page_size_shift);
 
 		iter.io_buffer = static_cast<byte*>(
-			ut_align(io_buffer, UNIV_PAGE_SIZE));
+			ut_align(io_buffer, srv_page_size));
 
 		void* crypt_io_buffer = NULL;
 		if (iter.crypt_data) {
 			crypt_io_buffer = ut_malloc_nokey(
-				(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+				(2 + iter.n_io_buffers)
+				<< srv_page_size_shift);
 			iter.crypt_io_buffer = static_cast<byte*>(
-				ut_align(crypt_io_buffer, UNIV_PAGE_SIZE));
+				ut_align(crypt_io_buffer, srv_page_size));
 		}
 
 		if (block->page.zip.ssize) {
 			ut_ad(iter.n_io_buffers == 1);
 			block->frame = iter.io_buffer;
-			block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
+			block->page.zip.data = block->frame + srv_page_size;
 		}
 
 		err = fil_iterate(iter, block, callback);
@@ -3783,17 +3770,18 @@ row_import_for_mysql(
 	/* The caller assured that this is not read_only_mode and that no
 	temorary tablespace is being imported. */
 	ut_ad(!srv_read_only_mode);
-	ut_ad(!dict_table_is_temporary(table));
+	ut_ad(!table->is_temporary());
 
-	ut_a(table->space);
+	ut_ad(table->space_id);
+	ut_ad(table->space_id < SRV_LOG_SPACE_FIRST_ID);
 	ut_ad(prebuilt->trx);
-	ut_a(!table->is_readable());
+	ut_ad(!table->is_readable());
 
-	ibuf_delete_for_discarded_space(table->space);
+	ibuf_delete_for_discarded_space(table->space_id);
 
 	trx_start_if_not_started(prebuilt->trx, true);
 
-	trx = trx_allocate_for_mysql();
+	trx = trx_create();
 
 	/* So that the table is not DROPped during recovery. */
 	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
@@ -3811,14 +3799,13 @@ row_import_for_mysql(
 	/* Assign an undo segment for the transaction, so that the
 	transaction will be recovered after a crash. */
 
-	mutex_enter(&trx->undo_mutex);
-
 	/* TODO: Do not write any undo log for the IMPORT cleanup. */
-	trx_undo_t**	pundo = &trx->rsegs.m_redo.update_undo;
-	err = trx_undo_assign_undo(trx, trx->rsegs.m_redo.rseg, pundo,
-				   TRX_UNDO_UPDATE);
-
-	mutex_exit(&trx->undo_mutex);
+	{
+		mtr_t mtr;
+		mtr.start();
+		trx_undo_assign(trx, &err, &mtr);
+		mtr.commit();
+	}
 
 	DBUG_EXECUTE_IF("ib_import_undo_assign_failure",
 			err = DB_TOO_MANY_CONCURRENT_TRXS;);
@@ -3827,7 +3814,7 @@ row_import_for_mysql(
 
 		return(row_import_cleanup(prebuilt, trx, err));
 
-	} else if (trx->rsegs.m_redo.update_undo == 0) {
+	} else if (trx->rsegs.m_redo.undo == 0) {
 
 		err = DB_TOO_MANY_CONCURRENT_TRXS;
 		return(row_import_cleanup(prebuilt, trx, err));
@@ -3915,7 +3902,7 @@ row_import_for_mysql(
 	/* Iterate over all the pages and do the sanity checking and
 	the conversion required to import the tablespace. */
 
-	PageConverter	converter(&cfg, trx);
+	PageConverter	converter(&cfg, table->space_id, trx);
 
 	/* Set the IO buffer size in pages. */
 
@@ -3994,18 +3981,19 @@ row_import_for_mysql(
 	have an x-lock on dict_operation_lock and dict_sys->mutex.
 	The tablespace is initially opened as a temporary one, because
 	we will not be writing any redo log for it before we have invoked
-	fil_space_set_imported() to declare it a persistent tablespace. */
+	fil_space_t::set_imported() to declare it a persistent tablespace. */
 
 	ulint	fsp_flags = dict_tf_to_fsp_flags(table->flags);
 
-	err = fil_ibd_open(
-		true, true, FIL_TYPE_IMPORT, table->space,
-		fsp_flags, table->name.m_name, filepath);
+	table->space = fil_ibd_open(
+		true, true, FIL_TYPE_IMPORT, table->space_id,
+		fsp_flags, table->name, filepath, &err);
 
+	ut_ad((table->space == NULL) == (err != DB_SUCCESS));
 	DBUG_EXECUTE_IF("ib_import_open_tablespace_failure",
-			err = DB_TABLESPACE_NOT_FOUND;);
+			err = DB_TABLESPACE_NOT_FOUND; table->space = NULL;);
 
-	if (err != DB_SUCCESS) {
+	if (!table->space) {
 		row_mysql_unlock_data_dictionary(trx);
 
 		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
@@ -4072,7 +4060,7 @@ row_import_for_mysql(
 	during the page conversion phase. */
 
 	err = row_import_adjust_root_pages_of_secondary_indexes(
-		prebuilt, trx, table, cfg);
+		trx, table, cfg);
 
 	DBUG_EXECUTE_IF("ib_import_sec_root_adjust_failure",
 			err = DB_CORRUPTION;);
@@ -4101,7 +4089,7 @@ row_import_for_mysql(
 
 	{
 		FlushObserver observer(prebuilt->table->space, trx, NULL);
-		buf_LRU_flush_or_remove_pages(prebuilt->table->space,
+		buf_LRU_flush_or_remove_pages(prebuilt->table->space_id,
 					      &observer);
 
 		if (observer.is_interrupted()) {
@@ -4112,7 +4100,7 @@ row_import_for_mysql(
 	}
 
 	ib::info() << "Phase IV - Flush complete";
-	fil_space_set_imported(prebuilt->table->space);
+	prebuilt->table->space->set_imported();
 
 	/* The dictionary latches will be released in in row_import_cleanup()
 	after the transaction commit, for both success and error. */
@@ -4126,8 +4114,7 @@ row_import_for_mysql(
 		return(row_import_error(prebuilt, trx, err));
 	}
 
-	/* Update the table's discarded flag, unset it. */
-	err = row_import_update_discarded_flag(trx, table->id, false, true);
+	err = row_import_update_discarded_flag(trx, table->id, false);
 
 	if (err != DB_SUCCESS) {
 		return(row_import_error(prebuilt, trx, err));
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index 2db545973a3..bf454fbb505 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -147,6 +147,8 @@ row_ins_alloc_sys_fields(
 	memset(node->sys_buf, 0, sizeof node->sys_buf);
 	/* Assign DB_ROLL_PTR to 1 << ROLL_PTR_INSERT_FLAG_POS */
 	node->sys_buf[DATA_ROW_ID_LEN + DATA_TRX_ID_LEN] = 0x80;
+	ut_ad(!memcmp(node->sys_buf + DATA_ROW_ID_LEN, reset_trx_id,
+		      sizeof reset_trx_id));
 
 	/* 1. Populate row-id */
 	col = dict_table_get_sys_col(table, DATA_ROW_ID);
@@ -351,7 +353,8 @@ row_ins_clust_index_entry_by_modify(
 		cursor->index, entry, rec, NULL, true,
 		thr_get_trx(thr), heap, mysql_table);
 	if (mode != BTR_MODIFY_TREE) {
-		ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
+		ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED))
+		      == BTR_MODIFY_LEAF);
 
 		/* Try optimistic updating of the record, keeping changes
 		within the page */
@@ -417,7 +420,7 @@ row_ins_cascade_ancestor_updates_table(
 
 		upd_node = static_cast<upd_node_t*>(parent);
 
-		if (upd_node->table == table && upd_node->is_delete == FALSE) {
+		if (upd_node->table == table && !upd_node->is_delete) {
 
 			return(TRUE);
 		}
@@ -545,6 +548,8 @@ row_ins_cascade_calc_update_vec(
 				ufield->exp = NULL;
 
 				ufield->new_val = parent_ufield->new_val;
+				dfield_get_type(&ufield->new_val)->prtype |=
+					col->prtype & DATA_VERSIONED;
 				ufield_len = dfield_get_len(&ufield->new_val);
 
 				/* Clear the "external storage" flag */
@@ -630,7 +635,7 @@ row_ins_cascade_calc_update_vec(
 				    && dict_table_is_fts_column(
 					table->fts->indexes,
 					dict_col_get_no(col),
-					dict_col_is_virtual(col))
+					col->is_virtual())
 					!= ULINT_UNDEFINED) {
 					affects_fulltext = true;
 				}
@@ -762,8 +767,6 @@ row_ins_foreign_trx_print(
 	heap_size = mem_heap_get_size(trx->lock.lock_heap);
 	lock_mutex_exit();
 
-	trx_sys_mutex_enter();
-
 	mutex_enter(&dict_foreign_err_mutex);
 	rewind(dict_foreign_err_file);
 	ut_print_timestamp(dict_foreign_err_file);
@@ -772,8 +775,6 @@ row_ins_foreign_trx_print(
 	trx_print_low(dict_foreign_err_file, trx, 600,
 		      n_rec_locks, n_trx_locks, heap_size);
 
-	trx_sys_mutex_exit();
-
 	ut_ad(mutex_own(&dict_foreign_err_mutex));
 }
 
@@ -908,8 +909,7 @@ row_ins_invalidate_query_cache(
 	const char*	name)		/*!< in: table name prefixed with
 					database name and a '/' character */
 {
-	ulint	len = strlen(name) + 1;
-	innobase_invalidate_query_cache(thr_get_trx(thr), name, len);
+	innobase_invalidate_query_cache(thr_get_trx(thr), name);
 }
 
 
@@ -1133,8 +1133,13 @@ row_ins_foreign_check_on_constraint(
 	cascade = node->cascade_node;
 	cascade->table = table;
 	cascade->foreign = foreign;
-	if (!(cascade->is_delete = node->is_delete
-	      && (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE))) {
+
+	if (node->is_delete
+	    && (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)) {
+		cascade->is_delete = PLAIN_DELETE;
+	} else {
+		cascade->is_delete = NO_DELETE;
+
 		if (foreign->n_fields > cascade->update_n_fields) {
 			/* We have to make the update vector longer */
 
@@ -1303,9 +1308,9 @@ row_ins_foreign_check_on_constraint(
 			if (!affects_fulltext
 			    && table->fts && dict_table_is_fts_column(
 				    table->fts->indexes,
-				    dict_index_get_nth_col_no(index, i),
-				    dict_col_is_virtual(
-					    dict_index_get_nth_col(index, i)))
+				    dict_index_get_nth_col(index, i)->ind,
+				    dict_index_get_nth_col(index, i)
+				    ->is_virtual())
 			    != ULINT_UNDEFINED) {
 				affects_fulltext = true;
 			}
@@ -1325,16 +1330,15 @@ row_ins_foreign_check_on_constraint(
 				goto nonstandard_exit_func;
 			}
 		}
-	} else if (table->fts && cascade->is_delete) {
+	} else if (table->fts && cascade->is_delete == PLAIN_DELETE) {
 		/* DICT_FOREIGN_ON_DELETE_CASCADE case */
 		bool affects_fulltext = false;
 
 		for (ulint i = 0; i < foreign->n_fields; i++) {
 			if (dict_table_is_fts_column(
 				table->fts->indexes,
-				dict_index_get_nth_col_no(index, i),
-				dict_col_is_virtual(
-					dict_index_get_nth_col(index, i)))
+				dict_index_get_nth_col(index, i)->ind,
+				dict_index_get_nth_col(index, i)->is_virtual())
 			    != ULINT_UNDEFINED) {
 				affects_fulltext = true;
 				break;
@@ -1396,6 +1400,15 @@ row_ins_foreign_check_on_constraint(
 		}
 	}
 
+	if (table->versioned() && cascade->is_delete != PLAIN_DELETE
+	    && cascade->update->affects_versioned()) {
+		ut_ad(!cascade->historical_heap);
+		cascade->historical_heap = mem_heap_create(128);
+		cascade->historical_row = row_build(
+			ROW_COPY_POINTERS, clust_index, clust_rec, NULL, table,
+			NULL, NULL, NULL, cascade->historical_heap);
+	}
+
 	/* Store pcur position and initialize or store the cascade node
 	pcur stored position */
 
@@ -1426,9 +1439,7 @@ row_ins_foreign_check_on_constraint(
 
 	/* Release the data dictionary latch for a while, so that we do not
 	starve other threads from doing CREATE TABLE etc. if we have a huge
-	cascaded operation running. The counter n_foreign_key_checks_running
-	will prevent other users from dropping or ALTERing the table when we
-	release the latch. */
+	cascaded operation running. */
 
 	row_mysql_unfreeze_data_dictionary(thr_get_trx(thr));
 
@@ -1583,8 +1594,14 @@ row_ins_check_foreign_constraint(
 	/* If any of the foreign key fields in entry is SQL NULL, we
 	suppress the foreign key check: this is compatible with Oracle,
 	for example */
-	for (ulint i = 0; i < foreign->n_fields; i++) {
-		if (dfield_is_null(dtuple_get_nth_field(entry, i))) {
+	for (ulint i = 0; i < entry->n_fields; i++) {
+		dfield_t* field = dtuple_get_nth_field(entry, i);
+		if (i < foreign->n_fields && dfield_is_null(field)) {
+			goto exit_func;
+		}
+		/* System Versioning: if row_end != Inf, we
+		suppress the foreign key check */
+		if (field->type.vers_sys_end() && field->vers_history_row()) {
 			goto exit_func;
 		}
 	}
@@ -1592,7 +1609,8 @@ row_ins_check_foreign_constraint(
 	if (que_node_get_type(thr->run_node) == QUE_NODE_UPDATE) {
 		upd_node = static_cast<upd_node_t*>(thr->run_node);
 
-		if (!(upd_node->is_delete) && upd_node->foreign == foreign) {
+		if (upd_node->is_delete != PLAIN_DELETE
+		    && upd_node->foreign == foreign) {
 			/* If a cascaded update is done as defined by a
 			foreign key constraint, do not check that
 			constraint for the child row. In ON UPDATE CASCADE
@@ -1613,6 +1631,19 @@ row_ins_check_foreign_constraint(
 		}
 	}
 
+	if (que_node_get_type(thr->run_node) == QUE_NODE_INSERT) {
+		ins_node_t* insert_node =
+			static_cast<ins_node_t*>(thr->run_node);
+		dict_table_t* table = insert_node->index->table;
+		if (table->versioned()) {
+			dfield_t* row_end = dtuple_get_nth_field(
+				insert_node->row, table->vers_end);
+			if (row_end->vers_history_row()) {
+				goto exit_func;
+			}
+		}
+	}
+
 	if (check_ref) {
 		check_table = foreign->referenced_table;
 		check_index = foreign->referenced_index;
@@ -1623,8 +1654,7 @@ row_ins_check_foreign_constraint(
 
 	if (check_table == NULL
 	    || !check_table->is_readable()
-	    || check_index == NULL
-	    || fil_space_get(check_table->space)->is_being_truncated) {
+	    || check_index == NULL) {
 
 		if (!srv_read_only_mode && check_ref) {
 			FILE*	ef = dict_foreign_err_file;
@@ -1716,6 +1746,23 @@ row_ins_check_foreign_constraint(
 		cmp = cmp_dtuple_rec(entry, rec, offsets);
 
 		if (cmp == 0) {
+			if (check_table->versioned()) {
+				bool history_row = false;
+
+				if (check_index->is_primary()) {
+					history_row = check_index->
+						vers_history_row(rec, offsets);
+				} else if (check_index->
+					vers_history_row(rec, history_row))
+				{
+					break;
+				}
+
+				if (history_row) {
+					continue;
+				}
+			}
+
 			if (rec_get_deleted_flag(rec,
 						 rec_offs_comp(offsets))) {
 				/* In delete-marked records, DB_TRX_ID must
@@ -1857,9 +1904,7 @@ do_possible_lock_wait:
 
 		thr->lock_state = QUE_THR_LOCK_ROW;
 
-		/* To avoid check_table being dropped, increment counter */
-		my_atomic_addlint(
-			&check_table->n_foreign_key_checks_running, 1);
+		check_table->inc_fk_checks();
 
 		lock_wait_suspend_thread(thr);
 
@@ -1873,8 +1918,7 @@ do_possible_lock_wait:
 			err = DB_LOCK_WAIT;
 		}
 
-		my_atomic_addlint(&check_table->n_foreign_key_checks_running,
-				  -1);
+		check_table->dec_fk_checks();
 	}
 
 exit_func:
@@ -1936,9 +1980,7 @@ row_ins_check_foreign_constraints(
 			}
 
 			if (referenced_table) {
-				my_atomic_addlint(
-					&foreign->foreign_table
-					->n_foreign_key_checks_running, 1);
+				foreign->foreign_table->inc_fk_checks();
 			}
 
 			/* NOTE that if the thread ends up waiting for a lock
@@ -1950,9 +1992,7 @@ row_ins_check_foreign_constraints(
 				TRUE, foreign, table, entry, thr);
 
 			if (referenced_table) {
-				my_atomic_addlint(
-					&foreign->foreign_table
-					->n_foreign_key_checks_running, -1);
+				foreign->foreign_table->dec_fk_checks();
 			}
 
 			if (got_s_lock) {
@@ -2222,6 +2262,8 @@ row_ins_duplicate_error_in_clust_online(
 	dberr_t		err	= DB_SUCCESS;
 	const rec_t*	rec	= btr_cur_get_rec(cursor);
 
+	ut_ad(!cursor->index->is_instant());
+
 	if (cursor->low_match >= n_uniq && !page_rec_is_infimum(rec)) {
 		*offsets = rec_get_offsets(rec, cursor->index, *offsets, true,
 					   ULINT_UNDEFINED, heap);
@@ -2461,7 +2503,7 @@ row_ins_index_entry_big_rec(
 	if (index->table->is_temporary()) {
 		mtr.set_log_mode(MTR_LOG_NO_REDO);
 	} else {
-		mtr.set_named_space(index->space);
+		index->set_modified(mtr);
 	}
 
 	btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_TREE,
@@ -2535,7 +2577,7 @@ row_ins_clust_index_entry_low(
 
 	mtr_start(&mtr);
 
-	if (dict_table_is_temporary(index->table)) {
+	if (index->table->is_temporary()) {
 		/* Disable REDO logging as the lifetime of temp-tables is
 		limited to server or connection lifetime and so REDO
 		information is not needed on restart for recovery.
@@ -2544,9 +2586,10 @@ row_ins_clust_index_entry_low(
 		ut_ad(flags & BTR_NO_LOCKING_FLAG);
 		ut_ad(!dict_index_is_online_ddl(index));
 		ut_ad(!index->table->persistent_autoinc);
+		ut_ad(!index->is_instant());
 		mtr.set_log_mode(MTR_LOG_NO_REDO);
 	} else {
-		mtr.set_named_space(index->space);
+		index->set_modified(mtr);
 
 		if (mode == BTR_MODIFY_LEAF
 		    && dict_index_is_online_ddl(index)) {
@@ -2595,6 +2638,40 @@ row_ins_clust_index_entry_low(
 	}
 #endif /* UNIV_DEBUG */
 
+	if (UNIV_UNLIKELY(entry->info_bits != 0)) {
+		ut_ad(entry->info_bits == REC_INFO_METADATA);
+		ut_ad(flags == BTR_NO_LOCKING_FLAG);
+		ut_ad(index->is_instant());
+		ut_ad(!dict_index_is_online_ddl(index));
+		ut_ad(!dup_chk_only);
+
+		const rec_t* rec = btr_cur_get_rec(cursor);
+
+		switch (rec_get_info_bits(rec, page_rec_is_comp(rec))
+			& (REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG)) {
+		case REC_INFO_MIN_REC_FLAG:
+			thr_get_trx(thr)->error_info = index;
+			err = DB_DUPLICATE_KEY;
+			goto err_exit;
+		case REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG:
+			/* The metadata record never carries the delete-mark
+			in MariaDB Server 10.3.
+			If a table loses its 'instantness', it happens
+			by the rollback of this first-time insert, or
+			by a call to btr_page_empty() on the root page
+			when the table becomes empty. */
+			err = DB_CORRUPTION;
+			goto err_exit;
+		default:
+			ut_ad(!row_ins_must_modify_rec(cursor));
+			goto do_insert;
+		}
+	}
+
+	if (rec_is_metadata(btr_cur_get_rec(cursor), index)) {
+		goto do_insert;
+	}
+
 	if (n_uniq
 	    && (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) {
 
@@ -2657,10 +2734,12 @@ err_exit:
 		mtr_commit(&mtr);
 		mem_heap_free(entry_heap);
 	} else {
+		if (index->is_instant()) entry->trim(*index);
+do_insert:
 		rec_t*	insert_rec;
 
 		if (mode != BTR_MODIFY_TREE) {
-			ut_ad((mode & ~BTR_ALREADY_S_LATCHED)
+			ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED))
 			      == BTR_MODIFY_LEAF);
 			err = btr_cur_optimistic_insert(
 				flags, cursor, &offsets, &offsets_heap,
@@ -2742,12 +2821,12 @@ row_ins_sec_mtr_start_and_check_if_aborted(
 	ulint		search_mode)
 {
 	ut_ad(!dict_index_is_clust(index));
-	ut_ad(mtr->is_named_space(index->space));
+	ut_ad(mtr->is_named_space(index->table->space));
 
 	const mtr_log_t	log_mode = mtr->get_log_mode();
 
-	mtr_start(mtr);
-	mtr->set_named_space(index->space);
+	mtr->start();
+	index->set_modified(*mtr);
 	mtr->set_log_mode(log_mode);
 
 	if (!check) {
@@ -2829,7 +2908,7 @@ row_ins_sec_index_entry_low(
 		ut_ad(flags & BTR_NO_LOCKING_FLAG);
 		mtr.set_log_mode(MTR_LOG_NO_REDO);
 	} else {
-		mtr.set_named_space(index->space);
+		index->set_modified(mtr);
 		if (!dict_index_is_spatial(index)) {
 			search_mode |= BTR_INSERT;
 		}
@@ -2882,7 +2961,7 @@ row_ins_sec_index_entry_low(
 					  index, false);
 			rtr_info_update_btr(&cursor, &rtr_info);
 			mtr_start(&mtr);
-			mtr.set_named_space(index->space);
+			index->set_modified(mtr);
 			search_mode &= ulint(~BTR_MODIFY_LEAF);
 			search_mode |= BTR_MODIFY_TREE;
 			err = btr_cur_search_to_nth_level(
@@ -3055,7 +3134,7 @@ row_ins_sec_index_entry_low(
 
 		if (err == DB_SUCCESS && dict_index_is_spatial(index)
 		    && rtr_info.mbr_adj) {
-			err = rtr_ins_enlarge_mbr(&cursor, thr, &mtr);
+			err = rtr_ins_enlarge_mbr(&cursor, &mtr);
 		}
 	} else {
 		rec_t*		insert_rec;
@@ -3069,7 +3148,7 @@ row_ins_sec_index_entry_low(
 			if (err == DB_SUCCESS
 			    && dict_index_is_spatial(index)
 			    && rtr_info.mbr_adj) {
-				err = rtr_ins_enlarge_mbr(&cursor, thr, &mtr);
+				err = rtr_ins_enlarge_mbr(&cursor, &mtr);
 			}
 		} else {
 			ut_ad(mode == BTR_MODIFY_TREE);
@@ -3094,7 +3173,7 @@ row_ins_sec_index_entry_low(
 			if (err == DB_SUCCESS
 				   && dict_index_is_spatial(index)
 				   && rtr_info.mbr_adj) {
-				err = rtr_ins_enlarge_mbr(&cursor, thr, &mtr);
+				err = rtr_ins_enlarge_mbr(&cursor, &mtr);
 			}
 		}
 
@@ -3150,9 +3229,13 @@ row_ins_clust_index_entry(
 
 	n_uniq = dict_index_is_unique(index) ? index->n_uniq : 0;
 
-	ulint	flags = dict_table_is_temporary(index->table)
-		? BTR_NO_LOCKING_FLAG
-		: 0;
+	ulint	flags = index->table->no_rollback() ? BTR_NO_ROLLBACK
+		: index->table->is_temporary()
+		? BTR_NO_LOCKING_FLAG : 0;
+	const ulint	orig_n_fields = entry->n_fields;
+
+	/* Try first optimistic descent to the B-tree */
+	log_free_check();
 
 	/* For intermediate table during copy alter table,
 	   skip the undo log and record lock checking for
@@ -3169,6 +3252,7 @@ row_ins_clust_index_entry(
 		flags, BTR_MODIFY_LEAF, index, n_uniq, entry,
 		n_ext, thr, dup_chk_only);
 
+	entry->n_fields = orig_n_fields;
 
 	DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
 			    "after_row_ins_clust_index_entry_leaf");
@@ -3185,6 +3269,8 @@ row_ins_clust_index_entry(
 		flags, BTR_MODIFY_TREE, index, n_uniq, entry,
 		n_ext, thr, dup_chk_only);
 
+	entry->n_fields = orig_n_fields;
+
 	DBUG_RETURN(err);
 }
 
@@ -3230,7 +3316,7 @@ row_ins_sec_index_entry(
 	/* Try first optimistic descent to the B-tree */
 
 	log_free_check();
-	ulint flags = dict_table_is_temporary(index->table)
+	ulint flags = index->table->is_temporary()
 		? BTR_NO_LOCKING_FLAG
 		: 0;
 
@@ -3249,7 +3335,7 @@ row_ins_sec_index_entry(
 	if (err == DB_FAIL) {
 		mem_heap_empty(heap);
 
-		if (index->space == IBUF_SPACE_ID
+		if (index->table->space == fil_system.sys_space
 		    && !(index->type & (DICT_UNIQUE | DICT_SPATIAL))) {
 			ibuf_free_excess_pages();
 		}
@@ -3282,13 +3368,13 @@ row_ins_index_entry(
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ut_ad(thr_get_trx(thr)->id != 0);
+	ut_ad(thr_get_trx(thr)->id || index->table->no_rollback());
 
 	DBUG_EXECUTE_IF("row_ins_index_entry_timeout", {
 			DBUG_SET("-d,row_ins_index_entry_timeout");
 			return(DB_LOCK_WAIT);});
 
-	if (dict_index_is_clust(index)) {
+	if (index->is_primary()) {
 		return(row_ins_clust_index_entry(index, entry, thr, 0, false));
 	} else {
 		return(row_ins_sec_index_entry(index, entry, thr, false));
@@ -3362,7 +3448,7 @@ row_ins_index_entry_set_vals(
 			col = ind_field->col;
 		}
 
-		if (dict_col_is_virtual(col)) {
+		if (col->is_virtual()) {
 			const dict_v_col_t*     v_col
 				= reinterpret_cast<const dict_v_col_t*>(col);
 			ut_ad(dtuple_get_n_fields(row)
@@ -3733,8 +3819,6 @@ row_ins_step(
 
 	trx = thr_get_trx(thr);
 
-	trx_start_if_not_started_xa(trx, true);
-
 	node = static_cast<ins_node_t*>(thr->run_node);
 
 	ut_ad(que_node_get_type(node) == QUE_NODE_INSERT);
@@ -3756,7 +3840,28 @@ row_ins_step(
 	table during the search operation, and there is no need to set
 	it again here. But we must write trx->id to node->sys_buf. */
 
-	trx_write_trx_id(&node->sys_buf[DATA_ROW_ID_LEN], trx->id);
+	if (node->table->no_rollback()) {
+		/* No-rollback tables should only be written to by a
+		single thread at a time, but there can be multiple
+		concurrent readers. We must hold an open table handle. */
+		DBUG_ASSERT(node->table->get_ref_count() > 0);
+		DBUG_ASSERT(node->ins_type == INS_DIRECT);
+		/* No-rollback tables can consist only of a single index. */
+		DBUG_ASSERT(UT_LIST_GET_LEN(node->entry_list) == 1);
+		DBUG_ASSERT(UT_LIST_GET_LEN(node->table->indexes) == 1);
+		/* There should be no possibility for interruption and
+		restarting here. In theory, we could allow resumption
+		from the INS_NODE_INSERT_ENTRIES state here. */
+		DBUG_ASSERT(node->state == INS_NODE_SET_IX_LOCK);
+		node->index = dict_table_get_first_index(node->table);
+		node->entry = UT_LIST_GET_FIRST(node->entry_list);
+		node->state = INS_NODE_INSERT_ENTRIES;
+		goto do_insert;
+	}
+
+	if (UNIV_LIKELY(!node->table->skip_alter_undo)) {
+		trx_write_trx_id(&node->sys_buf[DATA_ROW_ID_LEN], trx->id);
+	}
 
 	if (node->state == INS_NODE_SET_IX_LOCK) {
 
@@ -3805,7 +3910,7 @@ same_trx:
 
 		return(thr);
 	}
-
+do_insert:
 	/* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
 
 	err = row_ins(node, thr);
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 16a71fbe896..3ad4aff8b24 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -38,6 +38,7 @@ Created 2011-05-26 Marko Makela
 #include "ut0stage.h"
 #include "trx0rec.h"
 
+#include <sql_class.h>
 #include <algorithm>
 #include <map>
 
@@ -70,6 +71,7 @@ enum row_op {
 /** Log block for modifications during online ALTER TABLE */
 struct row_log_buf_t {
 	byte*		block;	/*!< file block buffer */
+	size_t		size; /*!< length of block in bytes */
 	ut_new_pfx_t	block_pfx; /*!< opaque descriptor of "block". Set
 				by ut_allocator::allocate_large() and fed to
 				ut_allocator::deallocate_large(). */
@@ -169,7 +171,7 @@ When head.blocks == tail.blocks, the reader will access tail.block
 directly. When also head.bytes == tail.bytes, both counts will be
 reset to 0 and the file will be truncated. */
 struct row_log_t {
-	int		fd;	/*!< file descriptor */
+	pfs_os_file_t	fd;	/*!< file descriptor */
 	ib_mutex_t	mutex;	/*!< mutex protecting error,
 				max_trx and tail */
 	page_no_map*	blobs;	/*!< map of page numbers of off-page columns
@@ -181,12 +183,27 @@ struct row_log_t {
 				index that is being created online */
 	bool		same_pk;/*!< whether the definition of the PRIMARY KEY
 				has remained the same */
-	const dtuple_t*	add_cols;
-				/*!< default values of added columns, or NULL */
+	const dtuple_t*	defaults;
+				/*!< default values of added, changed columns,
+				or NULL */
 	const ulint*	col_map;/*!< mapping of old column numbers to
 				new ones, or NULL if !table */
 	dberr_t		error;	/*!< error that occurred during online
 				table rebuild */
+	/** The transaction ID of the ALTER TABLE transaction.  Any
+	concurrent DML would necessarily be logged with a larger
+	transaction ID, because ha_innobase::prepare_inplace_alter_table()
+	acts as a barrier that ensures that any concurrent transaction
+	that operates on the table would have been started after
+	ha_innobase::prepare_inplace_alter_table() returns and before
+	ha_innobase::commit_inplace_alter_table(commit=true) is invoked.
+
+	Due to the nondeterministic nature of purge and due to the
+	possibility of upgrading from an earlier version of MariaDB
+	or MySQL, it is possible that row_log_table_low() would be
+	fed DB_TRX_ID that precedes than min_trx. We must normalize
+	such references to reset_trx_id[]. */
+	trx_id_t	min_trx;
 	trx_id_t	max_trx;/*!< biggest observed trx_id in
 				row_log_online_op();
 				protected by mutex and index->lock S-latch,
@@ -204,24 +221,55 @@ struct row_log_t {
 				decryption or NULL */
 	const char*	path;	/*!< where to create temporary file during
 				log operation */
+	/** the number of core fields in the clustered index of the
+	source table; before row_log_table_apply() completes, the
+	table could be emptied, so that table->is_instant() no longer holds,
+	but all log records must be in the "instant" format. */
+	unsigned	n_core_fields;
+	/** the default values of non-core fields when the operation started */
+	dict_col_t::def_t* non_core_fields;
+	bool		allow_not_null; /*!< Whether the alter ignore is being
+				used or if the sql mode is non-strict mode;
+				if not, NULL values will not be converted to
+				defaults */
+	const TABLE*	old_table; /*< Use old table in case of error. */
+
+	uint64_t	n_rows; /*< Number of rows read from the table */
+	/** Determine whether the log should be in the 'instant ADD' format
+	@param[in]	index	the clustered index of the source table
+	@return	whether to use the 'instant ADD COLUMN' format */
+	bool is_instant(const dict_index_t* index) const
+	{
+		ut_ad(table);
+		ut_ad(n_core_fields <= index->n_fields);
+		return n_core_fields != index->n_fields;
+	}
+
+	const byte* instant_field_value(ulint n, ulint* len) const
+	{
+		ut_ad(n >= n_core_fields);
+		const dict_col_t::def_t& d= non_core_fields[n - n_core_fields];
+		*len = d.len;
+		return static_cast<const byte*>(d.data);
+	}
 };
 
 /** Create the file or online log if it does not exist.
 @param[in,out] log     online rebuild log
 @return true if success, false if not */
 static MY_ATTRIBUTE((warn_unused_result))
-int
+pfs_os_file_t
 row_log_tmpfile(
 	row_log_t*	log)
 {
 	DBUG_ENTER("row_log_tmpfile");
-	if (log->fd < 0) {
+	if (log->fd == OS_FILE_CLOSED) {
 		log->fd = row_merge_file_create_low(log->path);
 		DBUG_EXECUTE_IF("row_log_tmpfile_fail",
-				if (log->fd > 0)
+				if (log->fd != OS_FILE_CLOSED)
 					row_merge_file_destroy_low(log->fd);
-				log->fd = -1;);
-		if (log->fd >= 0) {
+				log->fd = OS_FILE_CLOSED;);
+		if (log->fd != OS_FILE_CLOSED) {
 			MONITOR_ATOMIC_INC(MONITOR_ALTER_TABLE_LOG_FILES);
 		}
 	}
@@ -250,6 +298,7 @@ row_log_block_allocate(
 		if (log_buf.block == NULL) {
 			DBUG_RETURN(false);
 		}
+		log_buf.size = srv_sort_buf_size;
 	}
 	DBUG_RETURN(true);
 }
@@ -264,7 +313,7 @@ row_log_block_free(
 	DBUG_ENTER("row_log_block_free");
 	if (log_buf.block != NULL) {
 		ut_allocator<byte>(mem_key_row_log_buf).deallocate_large(
-			log_buf.block, &log_buf.block_pfx);
+			log_buf.block, &log_buf.block_pfx, log_buf.size);
 		log_buf.block = NULL;
 	}
 	DBUG_VOID_RETURN;
@@ -375,7 +424,7 @@ row_log_online_op(
 
 		UNIV_MEM_ASSERT_RW(buf, srv_sort_buf_size);
 
-		if (row_log_tmpfile(log) < 0) {
+		if (row_log_tmpfile(log) == OS_FILE_CLOSED) {
 			log->error = DB_OUT_OF_MEMORY;
 			goto err_exit;
 		}
@@ -386,7 +435,7 @@ row_log_online_op(
 			if (!log_tmp_block_encrypt(
 				    buf, srv_sort_buf_size,
 				    log->crypt_tail, byte_offset,
-				    index->table->space)) {
+				    index->table->space_id)) {
 				log->error = DB_DECRYPTION_FAILED;
 				goto write_failed;
 			}
@@ -396,7 +445,7 @@ row_log_online_op(
 		}
 
 		log->tail.blocks++;
-		if (!os_file_write_int_fd(
+		if (!os_file_write(
 			    request,
 			    "(modification log)",
 			    log->fd,
@@ -513,7 +562,7 @@ row_log_table_close_func(
 
 		UNIV_MEM_ASSERT_RW(buf, srv_sort_buf_size);
 
-		if (row_log_tmpfile(log) < 0) {
+		if (row_log_tmpfile(log) == OS_FILE_CLOSED) {
 			log->error = DB_OUT_OF_MEMORY;
 			goto err_exit;
 		}
@@ -524,7 +573,7 @@ row_log_table_close_func(
 			if (!log_tmp_block_encrypt(
 				    log->tail.block, srv_sort_buf_size,
 				    log->crypt_tail, byte_offset,
-				    index->table->space)) {
+				    index->table->space_id)) {
 				log->error = DB_DECRYPTION_FAILED;
 				goto err_exit;
 			}
@@ -534,7 +583,7 @@ row_log_table_close_func(
 		}
 
 		log->tail.blocks++;
-		if (!os_file_write_int_fd(
+		if (!os_file_write(
 			    request,
 			    "(modification log)",
 			    log->fd,
@@ -623,6 +672,7 @@ row_log_table_delete(
 
 	ut_ad(dict_index_is_clust(new_index));
 	ut_ad(!dict_index_is_online_ddl(new_index));
+	ut_ad(index->online_log->min_trx);
 
 	/* Create the tuple PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in new_table. */
 	if (index->online_log->same_pk) {
@@ -633,8 +683,9 @@ row_log_table_delete(
 		fields of the record. */
 		heap = mem_heap_create(
 			DATA_TRX_ID_LEN
-			+ DTUPLE_EST_ALLOC(new_index->n_uniq + 2));
-		old_pk = tuple = dtuple_create(heap, new_index->n_uniq + 2);
+			+ DTUPLE_EST_ALLOC(unsigned(new_index->n_uniq) + 2));
+		old_pk = tuple = dtuple_create(
+			heap, unsigned(new_index->n_uniq) + 2);
 		dict_index_copy_types(tuple, new_index, tuple->n_fields);
 		dtuple_set_n_fields_cmp(tuple, new_index->n_uniq);
 
@@ -649,16 +700,27 @@ row_log_table_delete(
 			dfield_set_data(dfield, field, len);
 		}
 
-		if (sys) {
-			dfield_set_data(
-				dtuple_get_nth_field(tuple,
-						     new_index->n_uniq),
-				sys, DATA_TRX_ID_LEN);
-			dfield_set_data(
-				dtuple_get_nth_field(tuple,
-						     new_index->n_uniq + 1),
-				sys + DATA_TRX_ID_LEN, DATA_ROLL_PTR_LEN);
+		dfield_t* db_trx_id = dtuple_get_nth_field(
+			tuple, new_index->n_uniq);
+
+		const bool replace_sys_fields
+			= sys
+			|| trx_read_trx_id(static_cast<byte*>(db_trx_id->data))
+			< index->online_log->min_trx;
+
+		if (replace_sys_fields) {
+			if (!sys || trx_read_trx_id(sys)
+			    < index->online_log->min_trx) {
+				sys = reset_trx_id;
+			}
+
+			dfield_set_data(db_trx_id, sys, DATA_TRX_ID_LEN);
+			dfield_set_data(db_trx_id + 1, sys + DATA_TRX_ID_LEN,
+					DATA_ROLL_PTR_LEN);
 		}
+
+		ut_d(trx_id_check(db_trx_id->data,
+				  index->online_log->min_trx));
 	} else {
 		/* The PRIMARY KEY has changed. Translate the tuple. */
 		old_pk = row_log_table_get_pk(
@@ -731,21 +793,23 @@ row_log_table_low_redundant(
 	ulint		avail_size;
 	mem_heap_t*	heap		= NULL;
 	dtuple_t*	tuple;
+	const ulint	n_fields = rec_get_n_fields_old(rec);
 
 	ut_ad(!page_is_comp(page_align(rec)));
-	ut_ad(dict_index_get_n_fields(index) == rec_get_n_fields_old(rec));
+	ut_ad(index->n_fields >= n_fields);
+	ut_ad(index->n_fields == n_fields || index->is_instant());
 	ut_ad(dict_tf2_is_valid(index->table->flags, index->table->flags2));
 	ut_ad(!dict_table_is_comp(index->table));  /* redundant row format */
 	ut_ad(dict_index_is_clust(new_index));
 
-	heap = mem_heap_create(DTUPLE_EST_ALLOC(index->n_fields));
-	tuple = dtuple_create(heap, index->n_fields);
-	dict_index_copy_types(tuple, index, index->n_fields);
+	heap = mem_heap_create(DTUPLE_EST_ALLOC(n_fields));
+	tuple = dtuple_create(heap, n_fields);
+	dict_index_copy_types(tuple, index, n_fields);
 
 	dtuple_set_n_fields_cmp(tuple, dict_index_get_n_unique(index));
 
 	if (rec_get_1byte_offs_flag(rec)) {
-		for (ulint i = 0; i < index->n_fields; i++) {
+		for (ulint i = 0; i < n_fields; i++) {
 			dfield_t*	dfield;
 			ulint		len;
 			const void*	field;
@@ -756,7 +820,7 @@ row_log_table_low_redundant(
 			dfield_set_data(dfield, field, len);
 		}
 	} else {
-		for (ulint i = 0; i < index->n_fields; i++) {
+		for (ulint i = 0; i < n_fields; i++) {
 			dfield_t*	dfield;
 			ulint		len;
 			const void*	field;
@@ -772,8 +836,28 @@ row_log_table_low_redundant(
 		}
 	}
 
+	dfield_t* db_trx_id = dtuple_get_nth_field(tuple, index->n_uniq);
+	ut_ad(dfield_get_len(db_trx_id) == DATA_TRX_ID_LEN);
+	ut_ad(dfield_get_len(db_trx_id + 1) == DATA_ROLL_PTR_LEN);
+
+	if (trx_read_trx_id(static_cast<const byte*>
+			    (dfield_get_data(db_trx_id)))
+	    < index->online_log->min_trx) {
+		dfield_set_data(db_trx_id, reset_trx_id, DATA_TRX_ID_LEN);
+		dfield_set_data(db_trx_id + 1, reset_trx_id + DATA_TRX_ID_LEN,
+				DATA_ROLL_PTR_LEN);
+	}
+
+	const bool is_instant = index->online_log->is_instant(index);
+	rec_comp_status_t status = is_instant
+		? REC_STATUS_COLUMNS_ADDED : REC_STATUS_ORDINARY;
+
 	size = rec_get_converted_size_temp(
-		index, tuple->fields, tuple->n_fields, &extra_size);
+		index, tuple->fields, tuple->n_fields, &extra_size, status);
+	if (is_instant) {
+		size++;
+		extra_size++;
+	}
 
 	mrec_size = ROW_LOG_HEADER_SIZE + size + (extra_size >= 0x80);
 
@@ -797,15 +881,19 @@ row_log_table_low_redundant(
 
 	if (byte* b = row_log_table_open(index->online_log,
 					 mrec_size, &avail_size)) {
-		*b++ = insert ? ROW_T_INSERT : ROW_T_UPDATE;
+		if (insert) {
+			*b++ = ROW_T_INSERT;
+		} else {
+			*b++ = ROW_T_UPDATE;
 
-		if (old_pk_size) {
-			*b++ = static_cast<byte>(old_pk_extra_size);
+			if (old_pk_size) {
+				*b++ = static_cast<byte>(old_pk_extra_size);
 
-			rec_convert_dtuple_to_temp(
-				b + old_pk_extra_size, new_index,
-				old_pk->fields, old_pk->n_fields);
-			b += old_pk_size;
+				rec_convert_dtuple_to_temp(
+					b + old_pk_extra_size, new_index,
+					old_pk->fields, old_pk->n_fields);
+				b += old_pk_size;
+			}
 		}
 
 		if (extra_size < 0x80) {
@@ -816,8 +904,17 @@ row_log_table_low_redundant(
 			*b++ = static_cast<byte>(extra_size);
 		}
 
+		if (status == REC_STATUS_COLUMNS_ADDED) {
+			ut_ad(is_instant);
+			if (n_fields <= index->online_log->n_core_fields) {
+				status = REC_STATUS_ORDINARY;
+			}
+			*b = status;
+		}
+
 		rec_convert_dtuple_to_temp(
-			b + extra_size, index, tuple->fields, tuple->n_fields);
+			b + extra_size, index, tuple->fields, tuple->n_fields,
+			status);
 		b += size;
 
 		row_log_table_close(index, b, mrec_size, avail_size);
@@ -841,26 +938,38 @@ row_log_table_low(
 	const dtuple_t*	old_pk)	/*!< in: old PRIMARY KEY value (if !insert
 				and a PRIMARY KEY is being created) */
 {
-	ulint			omit_size;
 	ulint			old_pk_size;
 	ulint			old_pk_extra_size;
 	ulint			extra_size;
 	ulint			mrec_size;
 	ulint			avail_size;
 	const dict_index_t*	new_index;
+	row_log_t*		log = index->online_log;
 
-	new_index = dict_table_get_first_index(index->online_log->table);
+	new_index = dict_table_get_first_index(log->table);
 
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(dict_index_is_clust(new_index));
 	ut_ad(!dict_index_is_online_ddl(new_index));
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
-	ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf);
+	ut_ad(rec_offs_size(offsets) <= sizeof log->tail.buf);
 	ut_ad(rw_lock_own_flagged(
 			&index->lock,
 			RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
-	ut_ad(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
+#ifdef UNIV_DEBUG
+	switch (fil_page_get_type(page_align(rec))) {
+	case FIL_PAGE_INDEX:
+		break;
+	case FIL_PAGE_TYPE_INSTANT:
+		ut_ad(index->is_instant());
+		ut_ad(page_is_root(page_align(rec)));
+		break;
+	default:
+		ut_ad(!"wrong page type");
+	}
+#endif /* UNIV_DEBUG */
+	ut_ad(!rec_is_metadata(rec, index));
 	ut_ad(page_rec_is_leaf(rec));
 	ut_ad(!page_is_comp(page_align(rec)) == !rec_offs_comp(offsets));
 	/* old_pk=row_log_table_get_pk() [not needed in INSERT] is a prefix
@@ -871,7 +980,7 @@ row_log_table_low(
 
 	if (index->online_status != ONLINE_INDEX_CREATION
 	    || (index->type & DICT_CORRUPT) || index->table->corrupted
-	    || index->online_log->error != DB_SUCCESS) {
+	    || log->error != DB_SUCCESS) {
 		return;
 	}
 
@@ -882,16 +991,38 @@ row_log_table_low(
 	}
 
 	ut_ad(page_is_comp(page_align(rec)));
-	ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
-
-	omit_size = REC_N_NEW_EXTRA_BYTES;
-
-	extra_size = rec_offs_extra_size(offsets) - omit_size;
+	ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY
+	      || rec_get_status(rec) == REC_STATUS_COLUMNS_ADDED);
+
+	const ulint omit_size = REC_N_NEW_EXTRA_BYTES;
+
+	const ulint rec_extra_size = rec_offs_extra_size(offsets) - omit_size;
+	const bool is_instant = log->is_instant(index);
+	extra_size = rec_extra_size + is_instant;
+
+	unsigned fake_extra_size = 0;
+	byte fake_extra_buf[2];
+	if (is_instant && UNIV_UNLIKELY(!index->is_instant())) {
+		/* The source table was emptied after ALTER TABLE
+		started, and it was converted to non-instant format.
+		Because row_log_table_apply_op() expects to find
+		all records to be logged in the same way, we will
+		be unable to copy the rec_extra_size bytes from the
+		record header, but must convert them here. */
+		unsigned n_add = index->n_fields - 1 - log->n_core_fields;
+		fake_extra_size = rec_get_n_add_field_len(n_add);
+		ut_ad(fake_extra_size == 1 || fake_extra_size == 2);
+		extra_size += fake_extra_size;
+		byte* fake_extra = fake_extra_buf + fake_extra_size - 1;
+		rec_set_n_add_field(fake_extra, n_add);
+		ut_ad(fake_extra + 1 == fake_extra_buf);
+	}
 
 	mrec_size = ROW_LOG_HEADER_SIZE
-		+ (extra_size >= 0x80) + rec_offs_size(offsets) - omit_size;
+		+ (extra_size >= 0x80) + rec_offs_size(offsets) - omit_size
+		+ is_instant + fake_extra_size;
 
-	if (insert || index->online_log->same_pk) {
+	if (insert || log->same_pk) {
 		ut_ad(!old_pk);
 		old_pk_extra_size = old_pk_size = 0;
 	} else {
@@ -909,17 +1040,20 @@ row_log_table_low(
 		mrec_size += 1/*old_pk_extra_size*/ + old_pk_size;
 	}
 
-	if (byte* b = row_log_table_open(index->online_log,
-					 mrec_size, &avail_size)) {
-		*b++ = insert ? ROW_T_INSERT : ROW_T_UPDATE;
+	if (byte* b = row_log_table_open(log, mrec_size, &avail_size)) {
+		if (insert) {
+			*b++ = ROW_T_INSERT;
+		} else {
+			*b++ = ROW_T_UPDATE;
 
-		if (old_pk_size) {
-			*b++ = static_cast<byte>(old_pk_extra_size);
+			if (old_pk_size) {
+				*b++ = static_cast<byte>(old_pk_extra_size);
 
-			rec_convert_dtuple_to_temp(
-				b + old_pk_extra_size, new_index,
-				old_pk->fields, old_pk->n_fields);
-			b += old_pk_size;
+				rec_convert_dtuple_to_temp(
+					b + old_pk_extra_size, new_index,
+					old_pk->fields, old_pk->n_fields);
+				b += old_pk_size;
+			}
 		}
 
 		if (extra_size < 0x80) {
@@ -930,9 +1064,27 @@ row_log_table_low(
 			*b++ = static_cast<byte>(extra_size);
 		}
 
-		memcpy(b, rec - rec_offs_extra_size(offsets), extra_size);
-		b += extra_size;
+		if (is_instant) {
+			*b++ = fake_extra_size
+				? REC_STATUS_COLUMNS_ADDED
+				: rec_get_status(rec);
+		} else {
+			ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
+		}
+
+		memcpy(b, rec - rec_extra_size - omit_size, rec_extra_size);
+		b += rec_extra_size;
+		memcpy(b, fake_extra_buf, fake_extra_size);
+		b += fake_extra_size;
+		ulint len;
+		ulint trx_id_offs = rec_get_nth_field_offs(
+			offsets, index->n_uniq, &len);
+		ut_ad(len == DATA_TRX_ID_LEN);
 		memcpy(b, rec, rec_offs_data_size(offsets));
+		if (trx_read_trx_id(b + trx_id_offs) < log->min_trx) {
+			memcpy(b + trx_id_offs,
+			       reset_trx_id, sizeof reset_trx_id);
+		}
 		b += rec_offs_data_size(offsets);
 
 		row_log_table_close(index, b, mrec_size, avail_size);
@@ -979,7 +1131,6 @@ row_log_table_get_pk_old_col(
 }
 
 /** Maps an old table column of a PRIMARY KEY column.
-@param[in]	col		old table column (before ALTER TABLE)
 @param[in]	ifield		clustered index field in the new table (after
 ALTER TABLE)
 @param[in,out]	dfield		clustered index tuple field in the new table
@@ -990,12 +1141,12 @@ table
 @param[in]	i		rec field corresponding to col
 @param[in]	page_size	page size of the old table
 @param[in]	max_len		maximum length of dfield
+@param[in]	log		row log for the table
 @retval DB_INVALID_NULL		if a NULL value is encountered
 @retval DB_TOO_BIG_INDEX_COL	if the maximum prefix length is exceeded */
 static
 dberr_t
 row_log_table_get_pk_col(
-	const dict_col_t*	col,
 	const dict_field_t*	ifield,
 	dfield_t*		dfield,
 	mem_heap_t*		heap,
@@ -1003,7 +1154,8 @@ row_log_table_get_pk_col(
 	const ulint*		offsets,
 	ulint			i,
 	const page_size_t&	page_size,
-	ulint			max_len)
+	ulint			max_len,
+	const row_log_t*	log)
 {
 	const byte*	field;
 	ulint		len;
@@ -1011,7 +1163,18 @@ row_log_table_get_pk_col(
 	field = rec_get_nth_field(rec, offsets, i, &len);
 
 	if (len == UNIV_SQL_NULL) {
-		return(DB_INVALID_NULL);
+		if (!log->allow_not_null) {
+			return(DB_INVALID_NULL);
+		}
+
+		ulint n_default_cols = i - DATA_N_SYS_COLS;
+
+		field = static_cast<const byte*>(
+			log->defaults->fields[n_default_cols].data);
+		if (!field) {
+			return(DB_INVALID_NULL);
+		}
+		len = log->defaults->fields[i - DATA_N_SYS_COLS].len;
 	}
 
 	if (rec_offs_nth_extern(offsets, i)) {
@@ -1071,6 +1234,7 @@ row_log_table_get_pk(
 
 	ut_ad(log);
 	ut_ad(log->table);
+	ut_ad(log->min_trx);
 
 	if (log->same_pk) {
 		/* The PRIMARY KEY columns are unchanged. */
@@ -1095,8 +1259,13 @@ row_log_table_get_pk(
 				ut_ad(len == DATA_TRX_ID_LEN);
 			}
 
-			memcpy(sys, rec + trx_id_offs,
-			       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+			const byte* ptr = trx_read_trx_id(rec + trx_id_offs)
+				< log->min_trx
+				? reset_trx_id
+				: rec + trx_id_offs;
+
+			memcpy(sys, ptr, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+			ut_d(trx_id_check(sys, log->min_trx));
 		}
 
 		return(NULL);
@@ -1117,7 +1286,7 @@ row_log_table_get_pk(
 
 			if (!offsets) {
 				size += (1 + REC_OFFS_HEADER_SIZE
-					 + index->n_fields)
+					 + unsigned(index->n_fields))
 					* sizeof *offsets;
 			}
 
@@ -1168,8 +1337,8 @@ row_log_table_get_pk(
 				}
 
 				log->error = row_log_table_get_pk_col(
-					col, ifield, dfield, *heap,
-					rec, offsets, i, page_size, max_len);
+					ifield, dfield, *heap,
+					rec, offsets, i, page_size, max_len, log);
 
 				if (log->error != DB_SUCCESS) {
 err_exit:
@@ -1184,10 +1353,10 @@ err_exit:
 				/* No matching column was found in the old
 				table, so this must be an added column.
 				Copy the default value. */
-				ut_ad(log->add_cols);
+				ut_ad(log->defaults);
 
 				dfield_copy(dfield, dtuple_get_nth_field(
-						    log->add_cols, col_no));
+						    log->defaults, col_no));
 				mbminlen = dfield->type.mbminlen;
 				mbmaxlen = dfield->type.mbmaxlen;
 				prtype = dfield->type.prtype;
@@ -1215,7 +1384,13 @@ err_exit:
 		/* Copy the fields, because the fields will be updated
 		or the record may be moved somewhere else in the B-tree
 		as part of the upcoming operation. */
-		if (sys) {
+		if (trx_read_trx_id(trx_roll) < log->min_trx) {
+			trx_roll = reset_trx_id;
+			if (sys) {
+				memcpy(sys, trx_roll,
+				       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+			}
+		} else if (sys) {
 			memcpy(sys, trx_roll,
 			       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 			trx_roll = sys;
@@ -1226,6 +1401,8 @@ err_exit:
 					DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
 		}
 
+		ut_d(trx_id_check(trx_roll, log->min_trx));
+
 		dfield_set_data(dtuple_get_nth_field(tuple, new_n_uniq),
 				trx_roll, DATA_TRX_ID_LEN);
 		dfield_set_data(dtuple_get_nth_field(tuple, new_n_uniq + 1),
@@ -1339,20 +1516,20 @@ row_log_table_apply_convert_mrec(
 	const mrec_t*		mrec,		/*!< in: merge record */
 	dict_index_t*		index,		/*!< in: index of mrec */
 	const ulint*		offsets,	/*!< in: offsets of mrec */
-	const row_log_t*	log,		/*!< in: rebuild context */
+	row_log_t*		log,		/*!< in: rebuild context */
 	mem_heap_t*		heap,		/*!< in/out: memory heap */
-	trx_id_t		trx_id,		/*!< in: DB_TRX_ID of mrec */
 	dberr_t*		error)		/*!< out: DB_SUCCESS or
 						DB_MISSING_HISTORY or
 						reason of failure */
 {
 	dtuple_t*	row;
 
+	log->n_rows++;
 	*error = DB_SUCCESS;
 
 	/* This is based on row_build(). */
-	if (log->add_cols) {
-		row = dtuple_copy(log->add_cols, heap);
+	if (log->defaults) {
+		row = dtuple_copy(log->defaults, heap);
 		/* dict_table_copy_types() would set the fields to NULL */
 		for (ulint i = 0; i < dict_table_get_n_cols(log->table); i++) {
 			dict_col_copy_type(
@@ -1430,6 +1607,9 @@ blob_done:
 			rw_lock_x_unlock(dict_index_get_lock(index));
 		} else {
 			data = rec_get_nth_field(mrec, offsets, i, &len);
+			if (len == UNIV_SQL_DEFAULT) {
+				data = log->instant_field_value(i, &len);
+			}
 			dfield_set_data(dfield, data, len);
 		}
 
@@ -1470,9 +1650,21 @@ blob_done:
 
 		if ((new_col->prtype & DATA_NOT_NULL)
 		    && dfield_is_null(dfield)) {
-			/* We got a NULL value for a NOT NULL column. */
-			*error = DB_INVALID_NULL;
-			return(NULL);
+
+			const dfield_t& default_field
+				= log->defaults->fields[col_no];
+			Field* field = log->old_table->field[col_no];
+
+			field->set_warning(Sql_condition::WARN_LEVEL_WARN,
+					   WARN_DATA_TRUNCATED, 1, ulong(log->n_rows));
+
+			if (!log->allow_not_null) {
+				/* We got a NULL value for a NOT NULL column. */
+				*error = DB_INVALID_NULL;
+				return NULL;
+			}
+
+			*dfield = default_field;
 		}
 
 		/* Adjust the DATA_NOT_NULL flag in the parsed row. */
@@ -1495,7 +1687,6 @@ row_log_table_apply_insert_low(
 	que_thr_t*		thr,		/*!< in: query graph */
 	const dtuple_t*		row,		/*!< in: table row
 						in the old table definition */
-	trx_id_t		trx_id,		/*!< in: trx_id of the row */
 	mem_heap_t*		offsets_heap,	/*!< in/out: memory heap
 						that can be emptied */
 	mem_heap_t*		heap,		/*!< in/out: memory heap */
@@ -1509,7 +1700,6 @@ row_log_table_apply_insert_low(
 	ulint		n_index = 0;
 
 	ut_ad(dtuple_validate(row));
-	ut_ad(trx_id);
 
 	DBUG_LOG("ib_alter_table",
 		 "insert table " << index->table->id << " (index "
@@ -1548,8 +1738,8 @@ row_log_table_apply_insert_low(
 		entry = row_build_index_entry(row, NULL, index, heap);
 		error = row_ins_sec_index_entry_low(
 			flags, BTR_MODIFY_TREE,
-			index, offsets_heap, heap, entry, trx_id, thr,
-			false);
+			index, offsets_heap, heap, entry,
+			thr_get_trx(thr)->id, thr, false);
 
 		if (error != DB_SUCCESS) {
 			if (error == DB_DUPLICATE_KEY) {
@@ -1575,14 +1765,13 @@ row_log_table_apply_insert(
 	mem_heap_t*		offsets_heap,	/*!< in/out: memory heap
 						that can be emptied */
 	mem_heap_t*		heap,		/*!< in/out: memory heap */
-	row_merge_dup_t*	dup,		/*!< in/out: for reporting
+	row_merge_dup_t*	dup)		/*!< in/out: for reporting
 						duplicate key errors */
-	trx_id_t		trx_id)		/*!< in: DB_TRX_ID of mrec */
 {
-	const row_log_t*log	= dup->index->online_log;
+	row_log_t*log	= dup->index->online_log;
 	dberr_t		error;
 	const dtuple_t*	row	= row_log_table_apply_convert_mrec(
-		mrec, dup->index, offsets, log, heap, trx_id, &error);
+		mrec, dup->index, offsets, log, heap, &error);
 
 	switch (error) {
 	case DB_MISSING_HISTORY:
@@ -1605,7 +1794,7 @@ row_log_table_apply_insert(
 	}
 
 	error = row_log_table_apply_insert_low(
-		thr, row, trx_id, offsets_heap, heap, dup);
+		thr, row, offsets_heap, heap, dup);
 	if (error != DB_SUCCESS) {
 		/* Report the erroneous row using the new
 		version of the table. */
@@ -1664,8 +1853,8 @@ row_log_table_apply_delete_low(
 
 		const dtuple_t*	entry = row_build_index_entry(
 			row, ext, index, heap);
-		mtr_start(mtr);
-		mtr->set_named_space(index->space);
+		mtr->start();
+		index->set_modified(*mtr);
 		btr_pcur_open(index, entry, PAGE_CUR_LE,
 			      BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
 			      pcur, mtr);
@@ -1692,14 +1881,14 @@ flag_ok:
 			found, because new_table is being modified by
 			this thread only, and all indexes should be
 			updated in sync. */
-			mtr_commit(mtr);
+			mtr->commit();
 			return(DB_INDEX_CORRUPT);
 		}
 
 		btr_cur_pessimistic_delete(&error, FALSE,
 					   btr_pcur_get_btr_cur(pcur),
 					   BTR_CREATE_FLAG, false, mtr);
-		mtr_commit(mtr);
+		mtr->commit();
 	}
 
 	return(error);
@@ -1747,7 +1936,7 @@ row_log_table_apply_delete(
 	}
 
 	mtr_start(&mtr);
-	mtr.set_named_space(index->space);
+	index->set_modified(mtr);
 	btr_pcur_open(index, old_pk, PAGE_CUR_LE,
 		      BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
 		      &pcur, &mtr);
@@ -1797,6 +1986,8 @@ all_done:
 			= rec_get_nth_field(btr_pcur_get_rec(&pcur), offsets,
 					    trx_id_col, &len);
 		ut_ad(len == DATA_TRX_ID_LEN);
+		ut_d(trx_id_check(rec_trx_id, log->min_trx));
+		ut_d(trx_id_check(mrec_trx_id, log->min_trx));
 
 		ut_ad(rec_get_nth_field(mrec, moffsets, trx_id_col + 1, &len)
 		      == mrec_trx_id + DATA_TRX_ID_LEN);
@@ -1843,13 +2034,12 @@ row_log_table_apply_update(
 	mem_heap_t*		heap,		/*!< in/out: memory heap */
 	row_merge_dup_t*	dup,		/*!< in/out: for reporting
 						duplicate key errors */
-	trx_id_t		trx_id,		/*!< in: DB_TRX_ID of mrec */
 	const dtuple_t*		old_pk)		/*!< in: PRIMARY KEY and
 						DB_TRX_ID,DB_ROLL_PTR
 						of the old value,
 						or PRIMARY KEY if same_pk */
 {
-	const row_log_t*log	= dup->index->online_log;
+	row_log_t*	log	= dup->index->online_log;
 	const dtuple_t*	row;
 	dict_index_t*	index	= dict_table_get_first_index(log->table);
 	mtr_t		mtr;
@@ -1864,7 +2054,7 @@ row_log_table_apply_update(
 	      + (log->same_pk ? 0 : 2));
 
 	row = row_log_table_apply_convert_mrec(
-		mrec, dup->index, offsets, log, heap, trx_id, &error);
+		mrec, dup->index, offsets, log, heap, &error);
 
 	switch (error) {
 	case DB_MISSING_HISTORY:
@@ -1892,7 +2082,7 @@ row_log_table_apply_update(
 	}
 
 	mtr_start(&mtr);
-	mtr.set_named_space(index->space);
+	index->set_modified(mtr);
 	btr_pcur_open(index, old_pk, PAGE_CUR_LE,
 		      BTR_MODIFY_TREE, &pcur, &mtr);
 #ifdef UNIV_DEBUG
@@ -1952,7 +2142,7 @@ row_log_table_apply_update(
 			ROW_T_UPDATE or ROW_T_DELETE will delete it. */
 			mtr_commit(&mtr);
 			error = row_log_table_apply_insert_low(
-				thr, row, trx_id, offsets_heap, heap, dup);
+				thr, row, offsets_heap, heap, dup);
 		} else {
 			/* Some BLOBs are missing, so we are interpreting
 			this ROW_T_UPDATE as ROW_T_DELETE (see *1).
@@ -1983,21 +2173,21 @@ func_exit_committed:
 		/* Only update the record if DB_TRX_ID,DB_ROLL_PTR match what
 		was buffered. */
 		ulint		len;
-		const void*	rec_trx_id
+		const byte*	rec_trx_id
 			= rec_get_nth_field(btr_pcur_get_rec(&pcur),
 					    cur_offsets, index->n_uniq, &len);
+		const dfield_t*	old_pk_trx_id
+			= dtuple_get_nth_field(old_pk, index->n_uniq);
 		ut_ad(len == DATA_TRX_ID_LEN);
-		ut_ad(dtuple_get_nth_field(old_pk, index->n_uniq)->len
-		      == DATA_TRX_ID_LEN);
-		ut_ad(dtuple_get_nth_field(old_pk, index->n_uniq + 1)->len
-		      == DATA_ROLL_PTR_LEN);
-		ut_ad(DATA_TRX_ID_LEN + static_cast<const char*>(
-			      dtuple_get_nth_field(old_pk,
-						   index->n_uniq)->data)
-		      == dtuple_get_nth_field(old_pk,
-					      index->n_uniq + 1)->data);
-		if (memcmp(rec_trx_id,
-			   dtuple_get_nth_field(old_pk, index->n_uniq)->data,
+		ut_d(trx_id_check(rec_trx_id, log->min_trx));
+		ut_ad(old_pk_trx_id->len == DATA_TRX_ID_LEN);
+		ut_ad(old_pk_trx_id[1].len == DATA_ROLL_PTR_LEN);
+		ut_ad(DATA_TRX_ID_LEN
+		      + static_cast<const char*>(old_pk_trx_id->data)
+		      == old_pk_trx_id[1].data);
+		ut_d(trx_id_check(old_pk_trx_id->data, log->min_trx));
+
+		if (memcmp(rec_trx_id, old_pk_trx_id->data,
 			   DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)) {
 			/* The ROW_T_UPDATE was logged for a different
 			DB_TRX_ID,DB_ROLL_PTR. This is possible if an
@@ -2077,7 +2267,7 @@ func_exit_committed:
 
 		if (error == DB_SUCCESS) {
 			error = row_log_table_apply_insert_low(
-				thr, row, trx_id, offsets_heap, heap, dup);
+				thr, row, offsets_heap, heap, dup);
 		}
 
 		goto func_exit_committed;
@@ -2152,7 +2342,7 @@ func_exit_committed:
 		}
 
 		mtr_start(&mtr);
-		mtr.set_named_space(index->space);
+		index->set_modified(mtr);
 
 		if (ROW_FOUND != row_search_index_entry(
 			    index, entry, BTR_MODIFY_TREE, &pcur, &mtr)) {
@@ -2176,7 +2366,7 @@ func_exit_committed:
 			BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
 			| BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG,
 			BTR_MODIFY_TREE, index, offsets_heap, heap,
-			entry, trx_id, thr, false);
+			entry, thr_get_trx(thr)->id, thr, false);
 
 		/* Report correct index name for duplicate key error. */
 		if (error == DB_DUPLICATE_KEY) {
@@ -2184,7 +2374,7 @@ func_exit_committed:
 		}
 
 		mtr_start(&mtr);
-		mtr.set_named_space(index->space);
+		index->set_modified(mtr);
 	}
 
 	goto func_exit;
@@ -2199,8 +2389,6 @@ const mrec_t*
 row_log_table_apply_op(
 /*===================*/
 	que_thr_t*		thr,		/*!< in: query graph */
-	ulint			trx_id_col,	/*!< in: position of
-						DB_TRX_ID in old index */
 	ulint			new_trx_id_col,	/*!< in: position of
 						DB_TRX_ID in new index */
 	row_merge_dup_t*	dup,		/*!< in/out: for reporting
@@ -2232,6 +2420,7 @@ row_log_table_apply_op(
 		return(NULL);
 	}
 
+	const bool is_instant = log->is_instant(dup->index);
 	const mrec_t* const mrec_start = mrec;
 
 	switch (*mrec++) {
@@ -2251,28 +2440,29 @@ row_log_table_apply_op(
 
 		mrec += extra_size;
 
+		ut_ad(extra_size || !is_instant);
+
 		if (mrec > mrec_end) {
 			return(NULL);
 		}
 
 		rec_offs_set_n_fields(offsets, dup->index->n_fields);
-		rec_init_offsets_temp(mrec, dup->index, offsets);
+		rec_init_offsets_temp(mrec, dup->index, offsets,
+				      log->n_core_fields, log->non_core_fields,
+				      is_instant
+				      ? static_cast<rec_comp_status_t>(
+					      *(mrec - extra_size))
+				      : REC_STATUS_ORDINARY);
 
 		next_mrec = mrec + rec_offs_data_size(offsets);
 
 		if (next_mrec > mrec_end) {
 			return(NULL);
 		} else {
-			log->head.total += next_mrec - mrec_start;
-
-			ulint		len;
-			const byte*	db_trx_id
-				= rec_get_nth_field(
-					mrec, offsets, trx_id_col, &len);
-			ut_ad(len == DATA_TRX_ID_LEN);
+			log->head.total += ulint(next_mrec - mrec_start);
 			*error = row_log_table_apply_insert(
 				thr, mrec, offsets, offsets_heap,
-				heap, dup, trx_read_trx_id(db_trx_id));
+				heap, dup);
 		}
 		break;
 
@@ -2289,14 +2479,18 @@ row_log_table_apply_op(
 		For fixed-length PRIMARY key columns, it is 0. */
 		mrec += extra_size;
 
-		rec_offs_set_n_fields(offsets, new_index->n_uniq + 2);
+		/* The ROW_T_DELETE record was converted by
+		rec_convert_dtuple_to_temp() using new_index. */
+		ut_ad(!new_index->is_instant());
+		rec_offs_set_n_fields(offsets,
+				      unsigned(new_index->n_uniq) + 2);
 		rec_init_offsets_temp(mrec, new_index, offsets);
 		next_mrec = mrec + rec_offs_data_size(offsets);
 		if (next_mrec > mrec_end) {
 			return(NULL);
 		}
 
-		log->head.total += next_mrec - mrec_start;
+		log->head.total += ulint(next_mrec - mrec_start);
 
 		*error = row_log_table_apply_delete(
 			new_trx_id_col,
@@ -2312,7 +2506,7 @@ row_log_table_apply_op(
 		is not changed, the log will only contain
 		DB_TRX_ID,new_row. */
 
-		if (dup->index->online_log->same_pk) {
+		if (log->same_pk) {
 			ut_ad(new_index->n_uniq == dup->index->n_uniq);
 
 			extra_size = *mrec++;
@@ -2326,12 +2520,20 @@ row_log_table_apply_op(
 
 			mrec += extra_size;
 
+			ut_ad(extra_size || !is_instant);
+
 			if (mrec > mrec_end) {
 				return(NULL);
 			}
 
 			rec_offs_set_n_fields(offsets, dup->index->n_fields);
-			rec_init_offsets_temp(mrec, dup->index, offsets);
+			rec_init_offsets_temp(mrec, dup->index, offsets,
+					      log->n_core_fields,
+					      log->non_core_fields,
+					      is_instant
+					      ? static_cast<rec_comp_status_t>(
+						      *(mrec - extra_size))
+					      : REC_STATUS_ORDINARY);
 
 			next_mrec = mrec + rec_offs_data_size(offsets);
 
@@ -2369,7 +2571,11 @@ row_log_table_apply_op(
 
 			/* Get offsets for PRIMARY KEY,
 			DB_TRX_ID, DB_ROLL_PTR. */
-			rec_offs_set_n_fields(offsets, new_index->n_uniq + 2);
+			/* The old_pk prefix was converted by
+			rec_convert_dtuple_to_temp() using new_index. */
+			ut_ad(!new_index->is_instant());
+			rec_offs_set_n_fields(offsets,
+					      unsigned(new_index->n_uniq) + 2);
 			rec_init_offsets_temp(mrec, new_index, offsets);
 
 			next_mrec = mrec + rec_offs_data_size(offsets);
@@ -2379,7 +2585,8 @@ row_log_table_apply_op(
 
 			/* Copy the PRIMARY KEY fields and
 			DB_TRX_ID, DB_ROLL_PTR from mrec to old_pk. */
-			old_pk = dtuple_create(heap, new_index->n_uniq + 2);
+			old_pk = dtuple_create(
+				heap, unsigned(new_index->n_uniq) + 2);
 			dict_index_copy_types(old_pk, new_index,
 					      old_pk->n_fields);
 
@@ -2415,12 +2622,20 @@ row_log_table_apply_op(
 
 			mrec += extra_size;
 
+			ut_ad(extra_size || !is_instant);
+
 			if (mrec > mrec_end) {
 				return(NULL);
 			}
 
 			rec_offs_set_n_fields(offsets, dup->index->n_fields);
-			rec_init_offsets_temp(mrec, dup->index, offsets);
+			rec_init_offsets_temp(mrec, dup->index, offsets,
+					      log->n_core_fields,
+					      log->non_core_fields,
+					      is_instant
+					      ? static_cast<rec_comp_status_t>(
+						      *(mrec - extra_size))
+					      : REC_STATUS_ORDINARY);
 
 			next_mrec = mrec + rec_offs_data_size(offsets);
 
@@ -2430,21 +2645,12 @@ row_log_table_apply_op(
 		}
 
 		ut_ad(next_mrec <= mrec_end);
-		log->head.total += next_mrec - mrec_start;
+		log->head.total += ulint(next_mrec - mrec_start);
 		dtuple_set_n_fields_cmp(old_pk, new_index->n_uniq);
 
-		{
-			ulint		len;
-			const byte*	db_trx_id
-				= rec_get_nth_field(
-					mrec, offsets, trx_id_col, &len);
-			ut_ad(len == DATA_TRX_ID_LEN);
-			*error = row_log_table_apply_update(
-				thr, new_trx_id_col,
-				mrec, offsets, offsets_heap,
-				heap, dup, trx_read_trx_id(db_trx_id), old_pk);
-		}
-
+		*error = row_log_table_apply_update(
+			thr, new_trx_id_col,
+			mrec, offsets, offsets_heap, heap, dup, old_pk);
 		break;
 	}
 
@@ -2469,10 +2675,8 @@ row_log_progress_inc_per_block()
 	/* We must increment the progress once per page (as in
 	univ_page_size, usually 16KiB). One block here is srv_sort_buf_size
 	(usually 1MiB). */
-	const ulint	pages_per_block = std::max(
-		static_cast<unsigned long>(
-			srv_sort_buf_size / univ_page_size.physical()),
-		1UL);
+	const ulint	pages_per_block = std::max<ulint>(
+		ulint(srv_sort_buf_size >> srv_page_size_shift), 1);
 
 	/* Multiply by an artificial factor of 6 to even the pace with
 	the rest of the ALTER TABLE phases, they process page_size amount
@@ -2540,8 +2744,6 @@ row_log_table_apply_ops(
 	const ulint	i		= 1 + REC_OFFS_HEADER_SIZE
 		+ ut_max(dict_index_get_n_fields(index),
 			 dict_index_get_n_unique(new_index) + 2);
-	const ulint	trx_id_col	= dict_col_get_clust_pos(
-		dict_table_get_sys_col(index->table, DATA_TRX_ID), index);
 	const ulint	new_trx_id_col	= dict_col_get_clust_pos(
 		dict_table_get_sys_col(new_table, DATA_TRX_ID), new_index);
 	trx_t*		trx		= thr_get_trx(thr);
@@ -2551,8 +2753,9 @@ row_log_table_apply_ops(
 	ut_ad(trx->mysql_thd);
 	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
 	ut_ad(!dict_index_is_online_ddl(new_index));
-	ut_ad(trx_id_col > 0);
-	ut_ad(trx_id_col != ULINT_UNDEFINED);
+	ut_ad(dict_col_get_clust_pos(
+		      dict_table_get_sys_col(index->table, DATA_TRX_ID), index)
+	      != ULINT_UNDEFINED);
 	ut_ad(new_trx_id_col > 0);
 	ut_ad(new_trx_id_col != ULINT_UNDEFINED);
 
@@ -2562,8 +2765,8 @@ row_log_table_apply_ops(
 	offsets[0] = i;
 	offsets[1] = dict_index_get_n_fields(index);
 
-	heap = mem_heap_create(UNIV_PAGE_SIZE);
-	offsets_heap = mem_heap_create(UNIV_PAGE_SIZE);
+	heap = mem_heap_create(srv_page_size);
+	offsets_heap = mem_heap_create(srv_page_size);
 	has_index_lock = true;
 
 next_block:
@@ -2655,12 +2858,12 @@ all_done:
 		IORequest		request(IORequest::READ);
 		byte*			buf = index->online_log->head.block;
 
-		if (!os_file_read_no_error_handling_int_fd(
+		if (!os_file_read_no_error_handling(
 			    request, index->online_log->fd,
-			    buf, ofs, srv_sort_buf_size)) {
+			    buf, ofs, srv_sort_buf_size, 0)) {
 			ib::error()
 				<< "Unable to read temporary file"
-				" for table " << index->table_name;
+				" for table " << index->table->name;
 			goto corruption;
 		}
 
@@ -2668,7 +2871,7 @@ all_done:
 			if (!log_tmp_block_decrypt(
 				    buf, srv_sort_buf_size,
 				    index->online_log->crypt_head,
-				    ofs, index->table->space)) {
+				    ofs, index->table->space_id)) {
 				error = DB_DECRYPTION_FAILED;
 				goto func_exit;
 			}
@@ -2709,9 +2912,9 @@ all_done:
 		ut_ad(mrec_end < (&index->online_log->head.buf)[1]);
 
 		memcpy((mrec_t*) mrec_end, next_mrec,
-		       (&index->online_log->head.buf)[1] - mrec_end);
+		       ulint((&index->online_log->head.buf)[1] - mrec_end));
 		mrec = row_log_table_apply_op(
-			thr, trx_id_col, new_trx_id_col,
+			thr, new_trx_id_col,
 			dup, &error, offsets_heap, heap,
 			index->online_log->head.buf,
 			(&index->online_log->head.buf)[1], offsets);
@@ -2726,7 +2929,7 @@ all_done:
 		it should proceed beyond the old end of the buffer. */
 		ut_a(mrec > mrec_end);
 
-		index->online_log->head.bytes = mrec - mrec_end;
+		index->online_log->head.bytes = ulint(mrec - mrec_end);
 		next_mrec += index->online_log->head.bytes;
 	}
 
@@ -2817,7 +3020,7 @@ all_done:
 		}
 
 		next_mrec = row_log_table_apply_op(
-			thr, trx_id_col, new_trx_id_col,
+			thr, new_trx_id_col,
 			dup, &error, offsets_heap, heap,
 			mrec, mrec_end, offsets);
 
@@ -2842,7 +3045,8 @@ process_next_block:
 			goto next_block;
 		} else if (next_mrec != NULL) {
 			ut_ad(next_mrec < next_mrec_end);
-			index->online_log->head.bytes += next_mrec - mrec;
+			index->online_log->head.bytes
+				+= ulint(next_mrec - mrec);
 		} else if (has_index_lock) {
 			/* When mrec is within tail.block, it should
 			be a complete record, because we are holding
@@ -2854,8 +3058,8 @@ process_next_block:
 			goto unexpected_eof;
 		} else {
 			memcpy(index->online_log->head.buf, mrec,
-			       mrec_end - mrec);
-			mrec_end += index->online_log->head.buf - mrec;
+			       ulint(mrec_end - mrec));
+			mrec_end += ulint(index->online_log->head.buf - mrec);
 			mrec = index->online_log->head.buf;
 			goto process_next_block;
 		}
@@ -2882,13 +3086,15 @@ func_exit:
 @param[in,out]	stage		performance schema accounting object, used by
 ALTER TABLE. stage->begin_phase_log_table() will be called initially and then
 stage->inc() will be called for each block of log that is applied.
+@param[in]	new_table	Altered table
 @return DB_SUCCESS, or error code on failure */
 dberr_t
 row_log_table_apply(
 	que_thr_t*		thr,
 	dict_table_t*		old_table,
 	struct TABLE*		table,
-	ut_stage_alter_t*	stage)
+	ut_stage_alter_t*	stage,
+	dict_table_t*		new_table)
 {
 	dberr_t		error;
 	dict_index_t*	clust_index;
@@ -2902,6 +3108,10 @@ row_log_table_apply(
 	ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_S));
 	clust_index = dict_table_get_first_index(old_table);
 
+	if (clust_index->online_log->n_rows == 0) {
+		clust_index->online_log->n_rows = new_table->stat_n_rows;
+	}
+
 	rw_lock_x_lock(dict_index_get_lock(clust_index));
 
 	if (!clust_index->online_log) {
@@ -2939,17 +3149,21 @@ for online creation.
 bool
 row_log_allocate(
 /*=============*/
+	const trx_t*	trx,	/*!< in: the ALTER TABLE transaction */
 	dict_index_t*	index,	/*!< in/out: index */
 	dict_table_t*	table,	/*!< in/out: new table being rebuilt,
 				or NULL when creating a secondary index */
 	bool		same_pk,/*!< in: whether the definition of the
 				PRIMARY KEY has remained the same */
-	const dtuple_t*	add_cols,
+	const dtuple_t*	defaults,
 				/*!< in: default values of
-				added columns, or NULL */
+				added, changed columns, or NULL */
 	const ulint*	col_map,/*!< in: mapping of old column
 				numbers to new ones, or NULL if !table */
-	const char*	path)	/*!< in: where to create temporary file */
+	const char*	path,	/*!< in: where to create temporary file */
+	const TABLE*	old_table,	/*!< in: table definition before alter */
+	const bool	allow_not_null) /*!< in: allow null to not-null
+					conversion */
 {
 	row_log_t*	log;
 	DBUG_ENTER("row_log_allocate");
@@ -2959,8 +3173,10 @@ row_log_allocate(
 	ut_ad(!table || index->table != table);
 	ut_ad(same_pk || table);
 	ut_ad(!table || col_map);
-	ut_ad(!add_cols || col_map);
+	ut_ad(!defaults || col_map);
 	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+	ut_ad(trx->id);
 
 	log = static_cast<row_log_t*>(ut_malloc_nokey(sizeof *log));
 
@@ -2968,15 +3184,16 @@ row_log_allocate(
 		DBUG_RETURN(false);
 	}
 
-	log->fd = -1;
+	log->fd = OS_FILE_CLOSED;
 	mutex_create(LATCH_ID_INDEX_ONLINE_LOG, &log->mutex);
 
 	log->blobs = NULL;
 	log->table = table;
 	log->same_pk = same_pk;
-	log->add_cols = add_cols;
+	log->defaults = defaults;
 	log->col_map = col_map;
 	log->error = DB_SUCCESS;
+	log->min_trx = trx->id;
 	log->max_trx = 0;
 	log->tail.blocks = log->tail.bytes = 0;
 	log->tail.total = 0;
@@ -2985,6 +3202,23 @@ row_log_allocate(
 	log->head.blocks = log->head.bytes = 0;
 	log->head.total = 0;
 	log->path = path;
+	log->n_core_fields = index->n_core_fields;
+	ut_ad(!table || log->is_instant(index) == index->is_instant());
+	log->allow_not_null = allow_not_null;
+	log->old_table = old_table;
+	log->n_rows = 0;
+
+	if (table && index->is_instant()) {
+		const unsigned n = log->n_core_fields;
+		log->non_core_fields = UT_NEW_ARRAY_NOKEY(
+			dict_col_t::def_t, index->n_fields - n);
+		for (unsigned i = n; i < index->n_fields; i++) {
+			log->non_core_fields[i - n]
+				= index->fields[i].col->def_val;
+		}
+	} else {
+		log->non_core_fields = NULL;
+	}
 
 	dict_index_set_online_status(index, ONLINE_INDEX_CREATION);
 	index->online_log = log;
@@ -3018,6 +3252,7 @@ row_log_free(
 	MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX);
 
 	UT_DELETE(log->blobs);
+	UT_DELETE_ARRAY(log->non_core_fields);
 	row_log_block_free(log->tail);
 	row_log_block_free(log->head);
 	row_merge_file_destroy_low(log->fd);
@@ -3090,7 +3325,7 @@ row_log_apply_op_low(
 		 << rec_printer(entry).str());
 
 	mtr_start(&mtr);
-	mtr.set_named_space(index->space);
+	index->set_modified(mtr);
 
 	/* We perform the pessimistic variant of the operations if we
 	already hold index->lock exclusively. First, search the
@@ -3147,7 +3382,7 @@ row_log_apply_op_low(
 				Lock the index tree exclusively. */
 				mtr_commit(&mtr);
 				mtr_start(&mtr);
-				mtr.set_named_space(index->space);
+				index->set_modified(mtr);
 				btr_cur_search_to_nth_level(
 					index, 0, entry, PAGE_CUR_LE,
 					BTR_MODIFY_TREE, &cursor, 0,
@@ -3250,7 +3485,7 @@ insert_the_rec:
 				Lock the index tree exclusively. */
 				mtr_commit(&mtr);
 				mtr_start(&mtr);
-				mtr.set_named_space(index->space);
+				index->set_modified(mtr);
 				btr_cur_search_to_nth_level(
 					index, 0, entry, PAGE_CUR_LE,
 					BTR_MODIFY_TREE, &cursor, 0,
@@ -3441,8 +3676,8 @@ row_log_apply_ops(
 	offsets[0] = i;
 	offsets[1] = dict_index_get_n_fields(index);
 
-	offsets_heap = mem_heap_create(UNIV_PAGE_SIZE);
-	heap = mem_heap_create(UNIV_PAGE_SIZE);
+	offsets_heap = mem_heap_create(srv_page_size);
+	heap = mem_heap_create(srv_page_size);
 	has_index_lock = true;
 
 next_block:
@@ -3526,9 +3761,9 @@ all_done:
 
 		byte*	buf = index->online_log->head.block;
 
-		if (!os_file_read_no_error_handling_int_fd(
+		if (!os_file_read_no_error_handling(
 			    request, index->online_log->fd,
-			    buf, ofs, srv_sort_buf_size)) {
+			    buf, ofs, srv_sort_buf_size, 0)) {
 			ib::error()
 				<< "Unable to read temporary file"
 				" for index " << index->name;
@@ -3539,7 +3774,7 @@ all_done:
 			if (!log_tmp_block_decrypt(
 				    buf, srv_sort_buf_size,
 				    index->online_log->crypt_head,
-				    ofs, index->table->space)) {
+				    ofs, index->table->space_id)) {
 				error = DB_DECRYPTION_FAILED;
 				goto func_exit;
 			}
@@ -3570,7 +3805,7 @@ all_done:
 		ut_ad(mrec_end < (&index->online_log->head.buf)[1]);
 
 		memcpy((mrec_t*) mrec_end, next_mrec,
-		       (&index->online_log->head.buf)[1] - mrec_end);
+		       ulint((&index->online_log->head.buf)[1] - mrec_end));
 		mrec = row_log_apply_op(
 			index, dup, &error, offsets_heap, heap,
 			has_index_lock, index->online_log->head.buf,
@@ -3586,7 +3821,7 @@ all_done:
 		it should proceed beyond the old end of the buffer. */
 		ut_a(mrec > mrec_end);
 
-		index->online_log->head.bytes = mrec - mrec_end;
+		index->online_log->head.bytes = ulint(mrec - mrec_end);
 		next_mrec += index->online_log->head.bytes;
 	}
 
@@ -3684,7 +3919,8 @@ process_next_block:
 			goto next_block;
 		} else if (next_mrec != NULL) {
 			ut_ad(next_mrec < next_mrec_end);
-			index->online_log->head.bytes += next_mrec - mrec;
+			index->online_log->head.bytes
+				+= ulint(next_mrec - mrec);
 		} else if (has_index_lock) {
 			/* When mrec is within tail.block, it should
 			be a complete record, because we are holding
@@ -3696,8 +3932,8 @@ process_next_block:
 			goto unexpected_eof;
 		} else {
 			memcpy(index->online_log->head.buf, mrec,
-			       mrec_end - mrec);
-			mrec_end += index->online_log->head.buf - mrec;
+			       ulint(mrec_end - mrec));
+			mrec_end += ulint(index->online_log->head.buf - mrec);
 			mrec = index->online_log->head.buf;
 			goto process_next_block;
 		}
@@ -3771,7 +4007,7 @@ row_log_apply(
 	}
 
 	if (error != DB_SUCCESS) {
-		ut_a(!dict_table_is_discarded(index->table));
+		ut_ad(index->table->space);
 		/* We set the flag directly instead of invoking
 		dict_set_corrupted_index_cache_only(index) here,
 		because the index is not "public" yet. */
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index f623cfb6df4..15d6ab8e76e 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -24,7 +24,7 @@ New index creation routines using a merge sort
 Created 12/4/2005 Jan Lindstrom
 Completed by Sunny Bains and Marko Makela
 *******************************************************/
-#include <my_config.h>
+#include <my_global.h>
 #include <log.h>
 #include <sql_class.h>
 
@@ -140,7 +140,7 @@ public:
 		ut_ad(dict_index_is_spatial(m_index));
 
 		DBUG_EXECUTE_IF("row_merge_instrument_log_check_flush",
-			log_sys->check_flush_or_checkpoint = true;
+			log_sys.check_flush_or_checkpoint = true;
 		);
 
 		for (idx_tuple_vec::iterator it = m_dtuple_vec->begin();
@@ -149,7 +149,7 @@ public:
 			dtuple = *it;
 			ut_ad(dtuple);
 
-			if (log_sys->check_flush_or_checkpoint) {
+			if (log_sys.check_flush_or_checkpoint) {
 				if (!(*mtr_committed)) {
 					btr_pcur_move_to_prev_on_page(pcur);
 					btr_pcur_store_position(pcur, scan_mtr);
@@ -161,7 +161,7 @@ public:
 			}
 
 			mtr.start();
-			mtr.set_named_space(m_index->space);
+			m_index->set_modified(mtr);
 
 			ins_cur.index = m_index;
 			rtr_init_rtr_info(&rtr_info, false, &ins_cur, m_index,
@@ -183,7 +183,7 @@ public:
 						  m_index, false);
 				rtr_info_update_btr(&ins_cur, &rtr_info);
 				mtr_start(&mtr);
-				mtr.set_named_space(m_index->space);
+				m_index->set_modified(mtr);
 				btr_cur_search_to_nth_level(
 					m_index, 0, dtuple,
 					PAGE_CUR_RTREE_INSERT,
@@ -199,7 +199,7 @@ public:
 				ut_ad(!big_rec);
 				mtr.commit();
 				mtr.start();
-				mtr.set_named_space(m_index->space);
+				m_index->set_modified(mtr);
 
 				rtr_clean_rtr_info(&rtr_info, true);
 				rtr_init_rtr_info(&rtr_info, false,
@@ -228,7 +228,7 @@ public:
 			if (error == DB_SUCCESS) {
 				if (rtr_info.mbr_adj) {
 					error = rtr_ins_enlarge_mbr(
-							&ins_cur, NULL, &mtr);
+							&ins_cur, &mtr);
 				}
 
 				if (error == DB_SUCCESS) {
@@ -286,7 +286,7 @@ dberr_t
 row_merge_insert_index_tuples(
 	dict_index_t*		index,
 	const dict_table_t*	old_table,
-	int			fd,
+	const pfs_os_file_t&	fd,
 	row_merge_block_t*	block,
 	const row_merge_buf_t*	row_buf,
 	BtrBulk*		btr_bulk,
@@ -557,7 +557,7 @@ row_merge_buf_add(
 		mem_heap_alloc(buf->heap, n_fields * sizeof *entry->fields));
 
 	data_size = 0;
-	extra_size = UT_BITS_IN_BYTES(index->n_nullable);
+	extra_size = UT_BITS_IN_BYTES(unsigned(index->n_nullable));
 
 	ifield = dict_index_get_nth_field(index, 0);
 
@@ -570,7 +570,7 @@ row_merge_buf_add(
 
 		col = ifield->col;
 		const dict_v_col_t*	v_col = NULL;
-		if (dict_col_is_virtual(col)) {
+		if (col->is_virtual()) {
 			v_col = reinterpret_cast<const dict_v_col_t*>(col);
 		}
 
@@ -579,7 +579,7 @@ row_merge_buf_add(
 		/* Process the Doc ID column */
 		if (*doc_id > 0
 		    && col_no == index->table->fts->doc_col
-		    && !dict_col_is_virtual(col)) {
+		    && !col->is_virtual()) {
 			fts_write_doc_id((byte*) &write_doc_id, *doc_id);
 
 			/* Note: field->data now points to a value on the
@@ -598,7 +598,7 @@ row_merge_buf_add(
 			field->type.len = ifield->col->len;
 		} else {
 			/* Use callback to get the virtual column value */
-			if (dict_col_is_virtual(col)) {
+			if (col->is_virtual()) {
 				dict_index_t*	clust_index
 					= dict_table_get_first_index(new_table);
 
@@ -737,7 +737,7 @@ row_merge_buf_add(
 					len = dfield_get_len(field);
 				}
 			}
-		} else if (!dict_col_is_virtual(col)) {
+		} else if (!col->is_virtual()) {
 			/* Only non-virtual column are stored externally */
 			const byte*	buf = row_ext_lookup(ext, col_no,
 							     &len);
@@ -824,9 +824,9 @@ row_merge_buf_add(
 
 	/* Record size can exceed page size while converting to
 	redundant row format. But there is assert
-	ut_ad(size < UNIV_PAGE_SIZE) in rec_offs_data_size().
+	ut_ad(size < srv_page_size) in rec_offs_data_size().
 	It may hit the assert before attempting to insert the row. */
-	if (conv_heap != NULL && data_size > UNIV_PAGE_SIZE) {
+	if (conv_heap != NULL && data_size > srv_page_size) {
 		*err = DB_TOO_BIG_RECORD;
 	}
 
@@ -1082,7 +1082,7 @@ row_merge_heap_create(
 bool
 row_merge_read(
 /*===========*/
-	int			fd,	/*!< in: file descriptor */
+	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 	ulint			offset,	/*!< in: offset where to read
 					in number of row_merge_block_t
 					elements */
@@ -1097,8 +1097,8 @@ row_merge_read(
 	DBUG_EXECUTE_IF("row_merge_read_failure", DBUG_RETURN(FALSE););
 
 	IORequest	request(IORequest::READ);
-	const bool	success = os_file_read_no_error_handling_int_fd(
-		request, fd, buf, ofs, srv_sort_buf_size);
+	const bool	success = os_file_read_no_error_handling(
+		request, fd, buf, ofs, srv_sort_buf_size, 0);
 
 	/* If encryption is enabled decrypt buffer */
 	if (success && log_tmp_is_encrypted()) {
@@ -1130,7 +1130,7 @@ UNIV_INTERN
 bool
 row_merge_write(
 /*============*/
-	int		fd,			/*!< in: file descriptor */
+	const pfs_os_file_t&	fd,			/*!< in: file descriptor */
 	ulint		offset,			/*!< in: offset where to write,
 						in number of row_merge_block_t elements */
 	const void*	buf,			/*!< in: data */
@@ -1159,7 +1159,7 @@ row_merge_write(
 	}
 
 	IORequest	request(IORequest::WRITE);
-	const bool	success = os_file_write_int_fd(
+	const bool	success = os_file_write(
 		request, "(merge)", fd, out_buf, ofs, buf_len);
 
 #ifdef POSIX_FADV_DONTNEED
@@ -1181,7 +1181,7 @@ row_merge_read_rec(
 	mrec_buf_t*		buf,	/*!< in/out: secondary buffer */
 	const byte*		b,	/*!< in: pointer to record */
 	const dict_index_t*	index,	/*!< in: index of the record */
-	int			fd,	/*!< in: file descriptor */
+	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 	ulint*			foffs,	/*!< in/out: file offset */
 	const mrec_t**		mrec,	/*!< out: pointer to merge record,
 					or NULL on end of list
@@ -1245,7 +1245,7 @@ err_exit:
 		to the auxiliary buffer and handle this as a special
 		case. */
 
-		avail_size = &block[srv_sort_buf_size] - b;
+		avail_size = ulint(&block[srv_sort_buf_size] - b);
 		ut_ad(avail_size < sizeof *buf);
 		memcpy(*buf, b, avail_size);
 
@@ -1300,7 +1300,7 @@ err_exit:
 	/* The record spans two blocks.  Copy it to buf. */
 
 	b -= extra_size + data_size;
-	avail_size = &block[srv_sort_buf_size] - b;
+	avail_size = ulint(&block[srv_sort_buf_size] - b);
 	memcpy(*buf, b, avail_size);
 	*mrec = *buf + extra_size;
 
@@ -1344,7 +1344,7 @@ row_merge_write_rec_low(
 	ulint		e,	/*!< in: encoded extra_size */
 #ifndef DBUG_OFF
 	ulint		size,	/*!< in: total size to write */
-	int		fd,	/*!< in: file descriptor */
+	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 	ulint		foffs,	/*!< in: file offset */
 #endif /* !DBUG_OFF */
 	const mrec_t*	mrec,	/*!< in: record to write */
@@ -1373,7 +1373,7 @@ row_merge_write_rec_low(
 	}
 
 	memcpy(b, mrec - rec_offs_extra_size(offsets), rec_offs_size(offsets));
-	DBUG_ASSERT(b + rec_offs_size(offsets) == end);
+	DBUG_SLOW_ASSERT(b + rec_offs_size(offsets) == end);
 	DBUG_VOID_RETURN;
 }
 
@@ -1387,7 +1387,7 @@ row_merge_write_rec(
 	row_merge_block_t*	block,	/*!< in/out: file buffer */
 	mrec_buf_t*		buf,	/*!< in/out: secondary buffer */
 	byte*			b,	/*!< in: pointer to end of block */
-	int			fd,	/*!< in: file descriptor */
+	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 	ulint*			foffs,	/*!< in/out: file offset */
 	const mrec_t*		mrec,	/*!< in: record to write */
 	const ulint*            offsets,/*!< in: offsets of mrec */
@@ -1416,7 +1416,7 @@ row_merge_write_rec(
 	if (UNIV_UNLIKELY(b + size >= &block[srv_sort_buf_size])) {
 		/* The record spans two blocks.
 		Copy it to the temporary buffer first. */
-		avail_size = &block[srv_sort_buf_size] - b;
+		avail_size = ulint(&block[srv_sort_buf_size] - b);
 
 		row_merge_write_rec_low(buf[0],
 					extra_size, size, fd, *foffs,
@@ -1457,7 +1457,7 @@ row_merge_write_eof(
 /*================*/
 	row_merge_block_t*	block,		/*!< in/out: file buffer */
 	byte*			b,		/*!< in: pointer to end of block */
-	int			fd,		/*!< in: file descriptor */
+	const pfs_os_file_t&	fd,		/*!< in: file descriptor */
 	ulint*			foffs,		/*!< in/out: file offset */
 	row_merge_block_t*	crypt_block, 	/*!< in: crypt buf or NULL */
 	ulint			space)	   	/*!< in: space id */
@@ -1480,7 +1480,7 @@ row_merge_write_eof(
 #ifdef UNIV_DEBUG_VALGRIND
 	/* The rest of the block is uninitialized.  Initialize it
 	to avoid bogus warnings. */
-	memset(b, 0xff, &block[srv_sort_buf_size] - b);
+	memset(b, 0xff, ulint(&block[srv_sort_buf_size] - b));
 #endif /* UNIV_DEBUG_VALGRIND */
 
 	if (!row_merge_write(fd, (*foffs)++, block, crypt_block, space)) {
@@ -1494,48 +1494,48 @@ row_merge_write_eof(
 /** Create a temporary file if it has not been created already.
 @param[in,out]	tmpfd	temporary file handle
 @param[in]	path	location for creating temporary file
-@return file descriptor, or -1 on failure */
+@return true on success, false on error */
 static MY_ATTRIBUTE((warn_unused_result))
-int
+bool
 row_merge_tmpfile_if_needed(
-	int*		tmpfd,
+	pfs_os_file_t*		tmpfd,
 	const char*	path)
 {
-	if (*tmpfd < 0) {
+	if (*tmpfd == OS_FILE_CLOSED) {
 		*tmpfd = row_merge_file_create_low(path);
-		if (*tmpfd >= 0) {
+		if (*tmpfd != OS_FILE_CLOSED) {
 			MONITOR_ATOMIC_INC(MONITOR_ALTER_TABLE_SORT_FILES);
 		}
 	}
 
-	return(*tmpfd);
+	return(*tmpfd != OS_FILE_CLOSED);
 }
 
 /** Create a temporary file for merge sort if it was not created already.
 @param[in,out]	file	merge file structure
 @param[in]	nrec	number of records in the file
 @param[in]	path	location for creating temporary file
-@return file descriptor, or -1 on failure */
+@return  true on success, false on error */
 static MY_ATTRIBUTE((warn_unused_result))
-int
+bool
 row_merge_file_create_if_needed(
 	merge_file_t*	file,
-	int*		tmpfd,
+	pfs_os_file_t*	tmpfd,
 	ulint		nrec,
 	const char*	path)
 {
-	ut_ad(file->fd < 0 || *tmpfd >=0);
-	if (file->fd < 0 && row_merge_file_create(file, path) >= 0) {
+	ut_ad(file->fd == OS_FILE_CLOSED || *tmpfd != OS_FILE_CLOSED);
+	if (file->fd == OS_FILE_CLOSED && row_merge_file_create(file, path)!= OS_FILE_CLOSED) {
 		MONITOR_ATOMIC_INC(MONITOR_ALTER_TABLE_SORT_FILES);
-		if (row_merge_tmpfile_if_needed(tmpfd, path) < 0) {
-			return(-1);
+		if (!row_merge_tmpfile_if_needed(tmpfd, path) ) {
+			return(false);
 		}
 
 		file->n_rec = nrec;
 	}
 
-	ut_ad(file->fd < 0 || *tmpfd >=0);
-	return(file->fd);
+	ut_ad(file->fd == OS_FILE_CLOSED || *tmpfd != OS_FILE_CLOSED);
+	return(file->fd != OS_FILE_CLOSED);
 }
 
 /** Copy the merge data tuple from another merge data tuple.
@@ -1669,7 +1669,7 @@ containing the index entries for the indexes to be built.
 @param[in]	files		temporary files
 @param[in]	key_numbers	MySQL key numbers to create
 @param[in]	n_index		number of indexes to create
-@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	defaults	default values of added, changed columns, or NULL
 @param[in]	add_v		newly added virtual columns along with indexes
 @param[in]	col_map		mapping of old column numbers to new ones, or
 NULL if old_table == new_table
@@ -1687,6 +1687,7 @@ stage->inc() will be called for each page read.
 @param[in,out]	crypt_block	crypted file buffer
 @param[in]	eval_table	mysql table used to evaluate virtual column
 				value, see innobase_get_computed_value().
+@param[in]	allow_not_null	allow null to not-null conversion
 @return DB_SUCCESS or error */
 static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
@@ -1694,7 +1695,7 @@ row_merge_read_clustered_index(
 	trx_t*			trx,
 	struct TABLE*		table,
 	const dict_table_t*	old_table,
-	const dict_table_t*	new_table,
+	dict_table_t*		new_table,
 	bool			online,
 	dict_index_t**		index,
 	dict_index_t*		fts_sort_idx,
@@ -1702,18 +1703,19 @@ row_merge_read_clustered_index(
 	merge_file_t*		files,
 	const ulint*		key_numbers,
 	ulint			n_index,
-	const dtuple_t*		add_cols,
+	const dtuple_t*		defaults,
 	const dict_add_v_col_t*	add_v,
 	const ulint*		col_map,
 	ulint			add_autoinc,
 	ib_sequence_t&		sequence,
 	row_merge_block_t*	block,
 	bool			skip_pk_sort,
-	int*			tmpfd,
+	pfs_os_file_t*			tmpfd,
 	ut_stage_alter_t*	stage,
 	double 			pct_cost,
 	row_merge_block_t*	crypt_block,
-	struct TABLE*		eval_table)
+	struct TABLE*		eval_table,
+	bool			allow_not_null)
 {
 	dict_index_t*		clust_index;	/* Clustered index */
 	mem_heap_t*		row_heap;	/* Heap memory to create
@@ -1747,11 +1749,17 @@ row_merge_read_clustered_index(
 	double 			curr_progress = 0.0;
 	ib_uint64_t		read_rows = 0;
 	ib_uint64_t		table_total_rows = 0;
+	char			new_sys_trx_start[8];
+	char			new_sys_trx_end[8];
+	byte			any_autoinc_data[8] = {0};
+	bool			vers_update_trt = false;
 
 	DBUG_ENTER("row_merge_read_clustered_index");
 
 	ut_ad((old_table == new_table) == !col_map);
-	ut_ad(!add_cols || col_map);
+	ut_ad(!defaults || col_map);
+	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+	ut_ad(trx->id);
 
 	table_total_rows = dict_table_get_n_rows(old_table);
 	if(table_total_rows == 0) {
@@ -1847,9 +1855,27 @@ row_merge_read_clustered_index(
 	based on that. */
 
 	clust_index = dict_table_get_first_index(old_table);
+	const ulint old_trx_id_col = DATA_TRX_ID - DATA_N_SYS_COLS
+		+ ulint(old_table->n_cols);
+	ut_ad(old_table->cols[old_trx_id_col].mtype == DATA_SYS);
+	ut_ad(old_table->cols[old_trx_id_col].prtype
+	      == (DATA_TRX_ID | DATA_NOT_NULL));
+	ut_ad(old_table->cols[old_trx_id_col + 1].mtype == DATA_SYS);
+	ut_ad(old_table->cols[old_trx_id_col + 1].prtype
+	      == (DATA_ROLL_PTR | DATA_NOT_NULL));
+	const ulint new_trx_id_col = col_map
+		? col_map[old_trx_id_col] : old_trx_id_col;
 
 	btr_pcur_open_at_index_side(
 		true, clust_index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
+	btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+	if (rec_is_metadata(btr_pcur_get_rec(&pcur), clust_index)) {
+		ut_ad(btr_pcur_is_on_user_rec(&pcur));
+		/* Skip the metadata pseudo-record. */
+	} else {
+		ut_ad(!clust_index->is_instant());
+		btr_pcur_move_to_prev_on_page(&pcur);
+	}
 
 	if (old_table != new_table) {
 		/* The table is being rebuilt.  Identify the columns
@@ -1901,9 +1927,14 @@ row_merge_read_clustered_index(
 		prev_fields = NULL;
 	}
 
+	mach_write_to_8(new_sys_trx_start, trx->id);
+	mach_write_to_8(new_sys_trx_end, TRX_ID_MAX);
+	uint64_t	n_rows = 0;
+
 	/* Scan the clustered index. */
 	for (;;) {
 		const rec_t*	rec;
+		trx_id_t	rec_trx_id;
 		ulint*		offsets;
 		const dtuple_t*	row;
 		row_ext_t*	ext;
@@ -1943,15 +1974,6 @@ row_merge_read_clustered_index(
 				}
 			}
 
-#ifdef DBUG_OFF
-# define dbug_run_purge	false
-#else /* DBUG_OFF */
-			bool	dbug_run_purge = false;
-#endif /* DBUG_OFF */
-			DBUG_EXECUTE_IF(
-				"ib_purge_on_create_index_page_switch",
-				dbug_run_purge = true;);
-
 			/* Insert the cached spatial index rows. */
 			bool	mtr_committed = false;
 
@@ -1968,8 +1990,8 @@ row_merge_read_clustered_index(
 				goto scan_next;
 			}
 
-			if (dbug_run_purge
-			    || dict_index_get_lock(clust_index)->waiters) {
+			if (my_atomic_load32_explicit(&clust_index->lock.waiters,
+						      MY_MEMORY_ORDER_RELAXED)) {
 				/* There are waiters on the clustered
 				index tree lock, likely the purge
 				thread. Store and restore the cursor
@@ -1990,18 +2012,6 @@ row_merge_read_clustered_index(
 				btr_pcur_store_position(&pcur, &mtr);
 				mtr_commit(&mtr);
 
-				if (dbug_run_purge) {
-					/* This is for testing
-					purposes only (see
-					DBUG_EXECUTE_IF above).  We
-					signal the purge thread and
-					hope that the purge batch will
-					complete before we execute
-					btr_pcur_restore_position(). */
-					trx_purge_run();
-					os_thread_sleep(1000000);
-				}
-
 				/* Give the waiters a chance to proceed. */
 				os_thread_yield();
 scan_next:
@@ -2055,6 +2065,8 @@ end_of_index:
 		if (online) {
 			offsets = rec_get_offsets(rec, clust_index, NULL, true,
 						  ULINT_UNDEFINED, &row_heap);
+			rec_trx_id = row_get_rec_trx_id(rec, clust_index,
+							offsets);
 
 			/* Perform a REPEATABLE READ.
 
@@ -2075,33 +2087,45 @@ end_of_index:
 			ONLINE_INDEX_COMPLETE state between the time
 			the DML thread has updated the clustered index
 			but has not yet accessed secondary index. */
-			ut_ad(MVCC::is_view_active(trx->read_view));
+			ut_ad(trx->read_view.is_open());
+			ut_ad(rec_trx_id != trx->id);
 
-			if (!trx->read_view->changes_visible(
-				    row_get_rec_trx_id(
-					    rec, clust_index, offsets),
-				    old_table->name)) {
+			if (!trx->read_view.changes_visible(
+				    rec_trx_id, old_table->name)) {
 				rec_t*	old_vers;
 
 				row_vers_build_for_consistent_read(
 					rec, &mtr, clust_index, &offsets,
-					trx->read_view, &row_heap,
+					&trx->read_view, &row_heap,
 					row_heap, &old_vers, NULL);
 
-				rec = old_vers;
-
-				if (!rec) {
+				if (!old_vers) {
 					continue;
 				}
+
+				/* The old version must necessarily be
+				in the "prehistory", because the
+				exclusive lock in
+				ha_innobase::prepare_inplace_alter_table()
+				forced the completion of any transactions
+				that accessed this table. */
+				ut_ad(row_get_rec_trx_id(old_vers, clust_index,
+							 offsets) < trx->id);
+
+				rec = old_vers;
+				rec_trx_id = 0;
 			}
 
 			if (rec_get_deleted_flag(
 				    rec,
 				    dict_table_is_comp(old_table))) {
 				/* In delete-marked records, DB_TRX_ID must
-				always refer to an existing undo log record. */
-				ut_ad(row_get_rec_trx_id(rec, clust_index,
-							 offsets));
+				always refer to an existing undo log record.
+				Above, we did reset rec_trx_id = 0
+				for rec = old_vers.*/
+				ut_ad(rec == page_cur_get_rec(cur)
+				      ? rec_trx_id
+				      : !rec_trx_id);
 				/* This record was deleted in the latest
 				committed version, or it was deleted and
 				then reinserted-by-update before purge
@@ -2114,19 +2138,37 @@ end_of_index:
 				   rec, dict_table_is_comp(old_table))) {
 			/* In delete-marked records, DB_TRX_ID must
 			always refer to an existing undo log record. */
-			ut_ad(rec_get_trx_id(rec, clust_index));
+			ut_d(rec_trx_id = rec_get_trx_id(rec, clust_index));
+			ut_ad(rec_trx_id);
+			/* This must be a purgeable delete-marked record,
+			and the transaction that delete-marked the record
+			must have been committed before this
+			!online ALTER TABLE transaction. */
+			ut_ad(rec_trx_id < trx->id);
 			/* Skip delete-marked records.
 
 			Skipping delete-marked records will make the
 			created indexes unuseable for transactions
 			whose read views were created before the index
-			creation completed, but preserving the history
-			would make it tricky to detect duplicate
-			keys. */
+			creation completed, but an attempt to preserve
+			the history would make it tricky to detect
+			duplicate keys. */
 			continue;
 		} else {
 			offsets = rec_get_offsets(rec, clust_index, NULL, true,
 						  ULINT_UNDEFINED, &row_heap);
+			/* This is a locking ALTER TABLE.
+
+			If we are not rebuilding the table, the
+			DB_TRX_ID does not matter, as it is not being
+			written to any secondary indexes; see
+			if (old_table == new_table) below.
+
+			If we are rebuilding the table, the
+			DB_TRX_ID,DB_ROLL_PTR should be reset, because
+			there will be no history available. */
+			ut_ad(rec_get_trx_id(rec, clust_index) < trx->id);
+			rec_trx_id = 0;
 		}
 
 		/* When !online, we are holding a lock on old_table, preventing
@@ -2138,19 +2180,35 @@ end_of_index:
 
 		row = row_build_w_add_vcol(ROW_COPY_POINTERS, clust_index,
 					   rec, offsets, new_table,
-					   add_cols, add_v, col_map, &ext,
+					   defaults, add_v, col_map, &ext,
 					   row_heap);
 		ut_ad(row);
 
 		for (ulint i = 0; i < n_nonnull; i++) {
-			const dfield_t*	field	= &row->fields[nonnull[i]];
+			dfield_t*	field	= &row->fields[nonnull[i]];
 
 			ut_ad(dfield_get_type(field)->prtype & DATA_NOT_NULL);
 
 			if (dfield_is_null(field)) {
-				err = DB_INVALID_NULL;
-				trx->error_key_num = 0;
-				goto func_exit;
+
+				Field* null_field =
+					table->field[nonnull[i]];
+
+				null_field->set_warning(
+					Sql_condition::WARN_LEVEL_WARN,
+					WARN_DATA_TRUNCATED, 1,
+					ulong(n_rows + 1));
+
+				if (!allow_not_null) {
+					err = DB_INVALID_NULL;
+					trx->error_key_num = 0;
+					goto func_exit;
+				}
+
+				const dfield_t& default_field
+					= defaults->fields[nonnull[i]];
+
+				*field = default_field;
 			}
 		}
 
@@ -2161,14 +2219,63 @@ end_of_index:
 			doc_id = 0;
 		}
 
+		ut_ad(row->fields[new_trx_id_col].type.mtype == DATA_SYS);
+		ut_ad(row->fields[new_trx_id_col].type.prtype
+		      == (DATA_TRX_ID | DATA_NOT_NULL));
+		ut_ad(row->fields[new_trx_id_col].len == DATA_TRX_ID_LEN);
+		ut_ad(row->fields[new_trx_id_col + 1].type.mtype == DATA_SYS);
+		ut_ad(row->fields[new_trx_id_col + 1].type.prtype
+		      == (DATA_ROLL_PTR | DATA_NOT_NULL));
+		ut_ad(row->fields[new_trx_id_col + 1].len == DATA_ROLL_PTR_LEN);
+
+		if (old_table == new_table) {
+			/* Do not bother touching DB_TRX_ID,DB_ROLL_PTR
+			because they are not going to be written into
+			secondary indexes. */
+		} else if (rec_trx_id < trx->id) {
+			/* Reset the DB_TRX_ID,DB_ROLL_PTR of old rows
+			for which history is not going to be
+			available after the rebuild operation.
+			This essentially mimics row_purge_reset_trx_id(). */
+			row->fields[new_trx_id_col].data
+				= const_cast<byte*>(reset_trx_id);
+			row->fields[new_trx_id_col + 1].data
+				= const_cast<byte*>(reset_trx_id
+						    + DATA_TRX_ID_LEN);
+		}
+
 		if (add_autoinc != ULINT_UNDEFINED) {
 
 			ut_ad(add_autoinc
 			      < dict_table_get_n_user_cols(new_table));
 
-			const dfield_t*	dfield;
+			bool history_row = false;
+			if (new_table->versioned()) {
+				const dfield_t* dfield = dtuple_get_nth_field(
+				    row, new_table->vers_end);
+				history_row = dfield->vers_history_row();
+			}
+
+			dfield_t*	dfield;
 
 			dfield = dtuple_get_nth_field(row, add_autoinc);
+
+			if (new_table->versioned()) {
+				if (history_row) {
+					if (dfield_get_type(dfield)->prtype & DATA_NOT_NULL) {
+						err = DB_UNSUPPORTED;
+						my_error(ER_UNSUPPORTED_EXTENSION, MYF(0),
+							 old_table->name.m_name);
+						goto func_exit;
+					}
+					dfield_set_null(dfield);
+				} else {
+					// set not null
+					ulint len = dfield_get_type(dfield)->len;
+					dfield_set_data(dfield, any_autoinc_data, len);
+				}
+			}
+
 			if (dfield_is_null(dfield)) {
 				goto write_buffers;
 			}
@@ -2214,10 +2321,26 @@ end_of_index:
 			}
 		}
 
+		if (old_table->versioned()) {
+			if (!new_table->versioned()
+			    && clust_index->vers_history_row(rec, offsets)) {
+				continue;
+			}
+		} else if (new_table->versioned()) {
+			dfield_t* start =
+			    dtuple_get_nth_field(row, new_table->vers_start);
+			dfield_t* end =
+			    dtuple_get_nth_field(row, new_table->vers_end);
+			dfield_set_data(start, new_sys_trx_start, 8);
+			dfield_set_data(end, new_sys_trx_end, 8);
+			vers_update_trt = true;
+		}
+
 write_buffers:
 		/* Build all entries for all the indexes to be created
 		in a single scan of the clustered index. */
 
+		n_rows++;
 		ulint	s_idx_cnt = 0;
 		bool	skip_sort = skip_pk_sort
 			&& dict_index_is_clust(merge_buf[0]->index);
@@ -2248,6 +2371,11 @@ write_buffers:
 				continue;
 			}
 
+			ut_ad(!row
+			      || !dict_index_is_clust(buf->index)
+			      || trx_id_check(row->fields[new_trx_id_col].data,
+					      trx->id));
+
 			if (UNIV_LIKELY
 			    (row && (rows_added = row_merge_buf_add(
 					buf, fts_index, old_table, new_table,
@@ -2404,12 +2532,13 @@ write_buffers:
 
 					err = row_merge_insert_index_tuples(
 						index[i], old_table,
-						-1, NULL, buf, clust_btr_bulk,
+						OS_FILE_CLOSED, NULL, buf,
+						clust_btr_bulk,
 						table_total_rows,
 						curr_progress,
 						pct_cost,
 						crypt_block,
-						new_table->space);
+						new_table->space_id);
 
 					if (row == NULL) {
 						err = clust_btr_bulk->finish(
@@ -2495,7 +2624,7 @@ write_buffers:
 				we can insert directly into the index without
 				temporary file if clustered index does not uses
 				temporary file. */
-				if (row == NULL && file->fd == -1
+				if (row == NULL && file->fd == OS_FILE_CLOSED
 				    && !clust_temp_file) {
 					DBUG_EXECUTE_IF(
 						"row_merge_write_failure",
@@ -2514,12 +2643,13 @@ write_buffers:
 
 					err = row_merge_insert_index_tuples(
 						index[i], old_table,
-						-1, NULL, buf, &btr_bulk,
+						OS_FILE_CLOSED, NULL, buf,
+						&btr_bulk,
 						table_total_rows,
 						curr_progress,
 						pct_cost,
 						crypt_block,
-						new_table->space);
+						new_table->space_id);
 
 					err = btr_bulk.finish(err);
 
@@ -2531,9 +2661,9 @@ write_buffers:
 						break;
 					}
 				} else {
-					if (row_merge_file_create_if_needed(
+					if (!row_merge_file_create_if_needed(
 						file, tmpfd,
-						buf->n_tuples, path) < 0) {
+						buf->n_tuples, path)) {
 						err = DB_OUT_OF_MEMORY;
 						trx->error_key_num = i;
 						goto func_exit;
@@ -2553,7 +2683,7 @@ write_buffers:
 					if (!row_merge_write(
 						    file->fd, file->offset++,
 						    block, crypt_block,
-						    new_table->space)) {
+						    new_table->space_id)) {
 						err = DB_TEMP_FILE_WRITE_FAIL;
 						trx->error_key_num = i;
 						break;
@@ -2590,6 +2720,10 @@ write_buffers:
 		}
 
 		if (row == NULL) {
+			if (old_table != new_table) {
+				new_table->stat_n_rows = n_rows;
+			}
+
 			goto all_done;
 		}
 
@@ -2747,6 +2881,15 @@ wait_again:
 		}
 	}
 
+	if (vers_update_trt) {
+		trx_mod_table_time_t& time =
+			trx->mod_tables
+				.insert(trx_mod_tables_t::value_type(
+					const_cast<dict_table_t*>(new_table), 0))
+				.first->second;
+		time.set_versioned(0);
+	}
+
 	trx->op_info = "";
 
 	DBUG_RETURN(err);
@@ -2803,10 +2946,10 @@ wait_again:
 @param[in,out]	foffs1	offset of second source list in the file
 @param[in,out]	of	output file
 @param[in,out]	stage	performance schema accounting object, used by
-@param[in,out]	crypt_block	encryption buffer
-@param[in]	space	tablespace ID for encryption
 ALTER TABLE. If not NULL stage->inc() will be called for each record
 processed.
+@param[in,out]	crypt_block	encryption buffer
+@param[in]	space	tablespace ID for encryption
 @return DB_SUCCESS or error code */
 static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
@@ -2817,7 +2960,7 @@ row_merge_blocks(
 	ulint*			foffs0,
 	ulint*			foffs1,
 	merge_file_t*		of,
-	ut_stage_alter_t*	stage,
+	ut_stage_alter_t*	stage MY_ATTRIBUTE((unused)),
 	row_merge_block_t*	crypt_block,
 	ulint			space)
 {
@@ -2925,10 +3068,10 @@ done1:
 @param[in,out]	foffs0	input file offset
 @param[in,out]	of	output file
 @param[in,out]	stage	performance schema accounting object, used by
-@param[in,out]	crypt_block	encryption buffer
-@param[in]	space	tablespace ID for encryption
 ALTER TABLE. If not NULL stage->inc() will be called for each record
 processed.
+@param[in,out]	crypt_block	encryption buffer
+@param[in]	space	tablespace ID for encryption
 @return TRUE on success, FALSE on failure */
 static MY_ATTRIBUTE((warn_unused_result))
 ibool
@@ -2938,7 +3081,7 @@ row_merge_blocks_copy(
 	row_merge_block_t*	block,
 	ulint*			foffs0,
 	merge_file_t*		of,
-	ut_stage_alter_t*	stage,
+	ut_stage_alter_t*	stage MY_ATTRIBUTE((unused)),
 	row_merge_block_t*	crypt_block,
 	ulint			space)
 {
@@ -3029,7 +3172,7 @@ row_merge(
 	const row_merge_dup_t*	dup,
 	merge_file_t*		file,
 	row_merge_block_t*	block,
-	int*			tmpfd,
+	pfs_os_file_t*		tmpfd,
 	ulint*			num_run,
 	ulint*			run_offset,
 	ut_stage_alter_t*	stage,
@@ -3171,7 +3314,7 @@ row_merge_sort(
 	const row_merge_dup_t*	dup,
 	merge_file_t*		file,
 	row_merge_block_t*	block,
-	int*			tmpfd,
+	pfs_os_file_t*			tmpfd,
 	const bool		update_progress,
 					/*!< in: update progress
 					status variable or not */
@@ -3385,7 +3528,7 @@ dberr_t
 row_merge_insert_index_tuples(
 	dict_index_t*		index,
 	const dict_table_t*	old_table,
-	int			fd,
+	const pfs_os_file_t&	fd,
 	row_merge_block_t*	block,
 	const row_merge_buf_t*	row_buf,
 	BtrBulk*		btr_bulk,
@@ -3438,7 +3581,7 @@ row_merge_insert_index_tuples(
 	}
 
 	if (row_buf != NULL) {
-		ut_ad(fd == -1);
+		ut_ad(fd == OS_FILE_CLOSED);
 		ut_ad(block == NULL);
 		DBUG_EXECUTE_IF("row_merge_read_failure",
 				error = DB_CORRUPTION;
@@ -3926,7 +4069,7 @@ row_merge_drop_temp_indexes(void)
 	/* Load the table definitions that contain partially defined
 	indexes, so that the data dictionary information can be checked
 	when accessing the tablename.ibd files. */
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 	trx->op_info = "dropping partially created indexes";
 	row_mysql_lock_data_dictionary(trx);
 	/* Ensure that this transaction will be rolled back and locks
@@ -3949,7 +4092,7 @@ row_merge_drop_temp_indexes(void)
 
 	trx_commit_for_mysql(trx);
 	row_mysql_unlock_data_dictionary(trx);
-	trx_free_for_background(trx);
+	trx_free(trx);
 }
 
 
@@ -3957,15 +4100,15 @@ row_merge_drop_temp_indexes(void)
 UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
 @param[in]	path	location for creating temporary merge files, or NULL
 @return File descriptor */
-int
+pfs_os_file_t
 row_merge_file_create_low(
 	const char*	path)
 {
-	int	fd;
 #ifdef UNIV_PFS_IO
 	/* This temp file open does not go through normal
 	file APIs, add instrumentation to register with
 	performance schema */
+	struct PSI_file_locker*	locker;
 	PSI_file_locker_state	state;
 	if (!path) {
 		path = mysql_tmpdir;
@@ -3975,27 +4118,21 @@ row_merge_file_create_low(
 		ut_malloc_nokey(strlen(path) + sizeof label));
 	strcpy(name, path);
 	strcat(name, label);
-	PSI_file_locker* locker = PSI_FILE_CALL(get_thread_file_name_locker)(
-		&state, innodb_temp_file_key, PSI_FILE_OPEN,
-		path ? name : label, &locker);
-	if (locker != NULL) {
-		PSI_FILE_CALL(start_file_open_wait)(locker,
-						    __FILE__,
-						    __LINE__);
-	}
+
+	register_pfs_file_open_begin(
+		&state, locker, innodb_temp_file_key,
+		PSI_FILE_CREATE, path ? name : label, __FILE__, __LINE__);
+
 #endif
-	fd = innobase_mysql_tmpfile(path);
+	pfs_os_file_t fd = innobase_mysql_tmpfile(path);
 #ifdef UNIV_PFS_IO
-	if (locker != NULL) {
-		PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(
-			      locker, fd);
-	}
+	register_pfs_file_open_end(locker, fd, 
+		(fd == OS_FILE_CLOSED)?NULL:&fd);
 	ut_free(name);
 #endif
 
-	if (fd < 0) {
+	if (fd == OS_FILE_CLOSED) {
 		ib::error() << "Cannot create temporary merge file";
-		return(-1);
 	}
 	return(fd);
 }
@@ -4004,8 +4141,8 @@ row_merge_file_create_low(
 /** Create a merge file in the given location.
 @param[out]	merge_file	merge file structure
 @param[in]	path		location for creating temporary file, or NULL
-@return file descriptor, or -1 on failure */
-int
+@return file descriptor, or OS_FILE_CLOSED on error */
+pfs_os_file_t
 row_merge_file_create(
 	merge_file_t*	merge_file,
 	const char*	path)
@@ -4014,7 +4151,7 @@ row_merge_file_create(
 	merge_file->offset = 0;
 	merge_file->n_rec = 0;
 
-	if (merge_file->fd >= 0) {
+	if (merge_file->fd != OS_FILE_CLOSED) {
 		if (srv_disable_sort_file_cache) {
 			os_file_set_nocache(merge_file->fd,
 				"row0merge.cc", "sort");
@@ -4029,26 +4166,11 @@ if UNIV_PFS_IO is defined. */
 void
 row_merge_file_destroy_low(
 /*=======================*/
-	int		fd)	/*!< in: merge file descriptor */
+	const pfs_os_file_t& fd)	/*!< in: merge file descriptor */
 {
-#ifdef UNIV_PFS_IO
-	struct PSI_file_locker*	locker = NULL;
-	PSI_file_locker_state	state;
-	locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(
-			       &state, fd, PSI_FILE_CLOSE);
-	if (locker != NULL) {
-		PSI_FILE_CALL(start_file_wait)(
-			      locker, 0, __FILE__, __LINE__);
-	}
-#endif
-	if (fd >= 0) {
-		close(fd);
+	if (fd != OS_FILE_CLOSED) {
+		os_file_close(fd);
 	}
-#ifdef UNIV_PFS_IO
-	if (locker != NULL) {
-		PSI_FILE_CALL(end_file_wait)(locker, 0);
-	}
-#endif
 }
 /*********************************************************************//**
 Destroy a merge file. */
@@ -4059,9 +4181,9 @@ row_merge_file_destroy(
 {
 	ut_ad(!srv_read_only_mode);
 
-	if (merge_file->fd != -1) {
+	if (merge_file->fd != OS_FILE_CLOSED) {
 		row_merge_file_destroy_low(merge_file->fd);
-		merge_file->fd = -1;
+		merge_file->fd = OS_FILE_CLOSED;
 	}
 }
 
@@ -4181,19 +4303,9 @@ row_make_new_pathname(
 	dict_table_t*	table,		/*!< in: table to be renamed */
 	const char*	new_name)	/*!< in: new name */
 {
-	char*	new_path;
-	char*	old_path;
-
-	ut_ad(!is_system_tablespace(table->space));
-
-	old_path = fil_space_get_first_path(table->space);
-	ut_a(old_path);
-
-	new_path = os_file_make_new_pathname(old_path, new_name);
-
-	ut_free(old_path);
-
-	return(new_path);
+	ut_ad(!is_system_tablespace(table->space_id));
+	return os_file_make_new_pathname(table->space->chain.start->name,
+					 new_name);
 }
 
 /*********************************************************************//**
@@ -4245,8 +4357,7 @@ row_merge_rename_tables_dict(
 	renamed is a single-table tablespace, which must be implicitly
 	renamed along with the table. */
 	if (err == DB_SUCCESS
-	    && dict_table_is_file_per_table(old_table)
-	    && fil_space_get(old_table->space) != NULL) {
+	    && old_table->space_id) {
 		/* Make pathname to update SYS_DATAFILES. */
 		char* tmp_path = row_make_new_pathname(old_table, tmp_name);
 
@@ -4255,7 +4366,7 @@ row_merge_rename_tables_dict(
 		pars_info_add_str_literal(info, "tmp_name", tmp_name);
 		pars_info_add_str_literal(info, "tmp_path", tmp_path);
 		pars_info_add_int4_literal(info, "old_space",
-					   (lint) old_table->space);
+					   old_table->space_id);
 
 		err = que_eval_sql(info,
 				   "PROCEDURE RENAME_OLD_SPACE () IS\n"
@@ -4286,7 +4397,7 @@ row_merge_rename_tables_dict(
 					  old_table->name.m_name);
 		pars_info_add_str_literal(info, "old_path", old_path);
 		pars_info_add_int4_literal(info, "new_space",
-					   (lint) new_table->space);
+					   new_table->space_id);
 
 		err = que_eval_sql(info,
 				   "PROCEDURE RENAME_NEW_SPACE () IS\n"
@@ -4302,9 +4413,9 @@ row_merge_rename_tables_dict(
 		ut_free(old_path);
 	}
 
-	if (err == DB_SUCCESS && dict_table_is_discarded(new_table)) {
+	if (err == DB_SUCCESS && (new_table->flags2 & DICT_TF2_DISCARDED)) {
 		err = row_import_update_discarded_flag(
-			trx, new_table->id, true, true);
+			trx, new_table->id, true);
 	}
 
 	trx->op_info = "";
@@ -4312,52 +4423,7 @@ row_merge_rename_tables_dict(
 	return(err);
 }
 
-/** Create and execute a query graph for creating an index.
-@param[in,out]	trx	trx
-@param[in,out]	table	table
-@param[in,out]	index	index
-@param[in]	add_v	new virtual columns added along with add index call
-@return DB_SUCCESS or error code */
-MY_ATTRIBUTE((nonnull(1,2,3), warn_unused_result))
-static
-dberr_t
-row_merge_create_index_graph(
-	trx_t*			trx,
-	dict_table_t*		table,
-	dict_index_t*		index,
-	const dict_add_v_col_t* add_v)
-{
-	ind_node_t*	node;		/*!< Index creation node */
-	mem_heap_t*	heap;		/*!< Memory heap */
-	que_thr_t*	thr;		/*!< Query thread */
-	dberr_t		err;
-
-	DBUG_ENTER("row_merge_create_index_graph");
-
-	ut_ad(trx);
-	ut_ad(table);
-	ut_ad(index);
-
-	heap = mem_heap_create(512);
-
-	index->table = table;
-	node = ind_create_graph_create(index, heap, add_v);
-	thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
-
-	ut_a(thr == que_fork_start_command(
-			static_cast<que_fork_t*>(que_node_get_parent(thr))));
-
-	que_run_threads(thr);
-
-	err = trx->error_state;
-
-	que_graph_free((que_t*) que_node_get_parent(thr));
-
-	DBUG_RETURN(err);
-}
-
 /** Create the index and load in to the dictionary.
-@param[in,out]	trx		trx (sets error_state)
 @param[in,out]	table		the index is on this table
 @param[in]	index_def	the index definition
 @param[in]	add_v		new virtual columns added along with add
@@ -4365,16 +4431,13 @@ row_merge_create_index_graph(
 @return index, or NULL on error */
 dict_index_t*
 row_merge_create_index(
-	trx_t*			trx,
 	dict_table_t*		table,
 	const index_def_t*	index_def,
 	const dict_add_v_col_t*	add_v)
 {
 	dict_index_t*	index;
-	dberr_t		err;
 	ulint		n_fields = index_def->n_fields;
 	ulint		i;
-	bool		has_new_v_col = false;
 
 	DBUG_ENTER("row_merge_create_index");
 
@@ -4384,11 +4447,8 @@ row_merge_create_index(
 	a persistent operation. We pass 0 as the space id, and determine at
 	a lower level the space id where to store the table. */
 
-	index = dict_mem_index_create(table->name.m_name, index_def->name,
-				      0, index_def->ind_type, n_fields);
-
-	ut_a(index);
-
+	index = dict_mem_index_create(table, index_def->name,
+				      index_def->ind_type, n_fields);
 	index->set_committed(index_def->rebuild);
 
 	for (i = 0; i < n_fields; i++) {
@@ -4402,7 +4462,7 @@ row_merge_create_index(
 				ut_ad(ifield->col_no >= table->n_v_def);
 				name = add_v->v_col_name[
 					ifield->col_no - table->n_v_def];
-				has_new_v_col = true;
+				index->has_new_v_col = true;
 			} else {
 				name = dict_table_get_v_col_name(
 					table, ifield->col_no);
@@ -4414,27 +4474,6 @@ row_merge_create_index(
 		dict_mem_index_add_field(index, name, ifield->prefix_len);
 	}
 
-	/* Add the index to SYS_INDEXES, using the index prototype. */
-	err = row_merge_create_index_graph(trx, table, index, add_v);
-
-	if (err == DB_SUCCESS) {
-
-		index = dict_table_get_index_on_name(table, index_def->name,
-						     index_def->rebuild);
-
-		ut_a(index);
-
-		index->parser = index_def->parser;
-		index->has_new_v_col = has_new_v_col;
-
-		/* Note the id of the transaction that created this
-		index, we use it to restrict readers from accessing
-		this index, to ensure read consistency. */
-		ut_ad(index->trx_id == trx->id);
-	} else {
-		index = NULL;
-	}
-
 	DBUG_RETURN(index);
 }
 
@@ -4453,10 +4492,10 @@ row_merge_is_index_usable(
 	}
 
 	return(!index->is_corrupted()
-	       && (dict_table_is_temporary(index->table)
+	       && (index->table->is_temporary()
 		   || index->trx_id == 0
-		   || !MVCC::is_view_active(trx->read_view)
-		   || trx->read_view->changes_visible(
+		   || !trx->read_view.is_open()
+		   || trx->read_view.changes_visible(
 			   index->trx_id,
 			   index->table->name)));
 }
@@ -4494,12 +4533,12 @@ row_merge_write_redo(
 	mtr_t	mtr;
 	byte*	log_ptr;
 
-	ut_ad(!dict_table_is_temporary(index->table));
+	ut_ad(!index->table->is_temporary());
 	mtr.start();
 	log_ptr = mlog_open(&mtr, 11 + 8);
 	log_ptr = mlog_write_initial_log_record_low(
 		MLOG_INDEX_LOAD,
-		index->space, index->page, log_ptr, &mtr);
+		index->table->space_id, index->page, log_ptr, &mtr);
 	mach_write_to_8(log_ptr, index->id);
 	mlog_close(&mtr, log_ptr + 8);
 	mtr.commit();
@@ -4518,7 +4557,7 @@ old_table unless creating a PRIMARY KEY
 @param[in]	n_indexes	size of indexes[]
 @param[in,out]	table		MySQL table, for reporting erroneous key value
 if applicable
-@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	defaults	default values of added, changed columns, or NULL
 @param[in]	col_map		mapping of old column numbers to new ones, or
 NULL if old_table == new_table
 @param[in]	add_autoinc	number of added AUTO_INCREMENT columns, or
@@ -4532,6 +4571,7 @@ this function and it will be passed to other functions for further accounting.
 @param[in]	add_v		new virtual columns added along with indexes
 @param[in]	eval_table	mysql table used to evaluate virtual column
 				value, see innobase_get_computed_value().
+@param[in]	allow_not_null	allow the conversion from null to not-null
 @return DB_SUCCESS or error code */
 dberr_t
 row_merge_build_indexes(
@@ -4543,24 +4583,26 @@ row_merge_build_indexes(
 	const ulint*		key_numbers,
 	ulint			n_indexes,
 	struct TABLE*		table,
-	const dtuple_t*		add_cols,
+	const dtuple_t*		defaults,
 	const ulint*		col_map,
 	ulint			add_autoinc,
 	ib_sequence_t&		sequence,
 	bool			skip_pk_sort,
 	ut_stage_alter_t*	stage,
 	const dict_add_v_col_t*	add_v,
-	struct TABLE*		eval_table)
+	struct TABLE*		eval_table,
+	bool			allow_not_null)
 {
 	merge_file_t*		merge_files;
 	row_merge_block_t*	block;
 	ut_new_pfx_t		block_pfx;
+	size_t			block_size;
 	ut_new_pfx_t		crypt_pfx;
 	row_merge_block_t*	crypt_block = NULL;
 	ulint			i;
 	ulint			j;
 	dberr_t			error;
-	int			tmpfd = -1;
+	pfs_os_file_t		tmpfd = OS_FILE_CLOSED;
 	dict_index_t*		fts_sort_idx = NULL;
 	fts_psort_t*		psort_info = NULL;
 	fts_psort_t*		merge_info = NULL;
@@ -4577,7 +4619,7 @@ row_merge_build_indexes(
 
 	ut_ad(!srv_read_only_mode);
 	ut_ad((old_table == new_table) == !col_map);
-	ut_ad(!add_cols || col_map);
+	ut_ad(!defaults || col_map);
 
 	stage->begin_phase_read_pk(skip_pk_sort && new_table != old_table
 				   ? n_indexes - 1
@@ -4590,7 +4632,8 @@ row_merge_build_indexes(
 
 	/* This will allocate "3 * srv_sort_buf_size" elements of type
 	row_merge_block_t. The latter is defined as byte. */
-	block = alloc.allocate_large(3 * srv_sort_buf_size, &block_pfx);
+	block_size = 3 * srv_sort_buf_size;
+	block = alloc.allocate_large(block_size, &block_pfx);
 
 	if (block == NULL) {
 		DBUG_RETURN(DB_OUT_OF_MEMORY);
@@ -4600,7 +4643,7 @@ row_merge_build_indexes(
 
 	if (log_tmp_is_encrypted()) {
 		crypt_block = static_cast<row_merge_block_t*>(
-			alloc.allocate_large(3 * srv_sort_buf_size,
+			alloc.allocate_large(block_size,
 					     &crypt_pfx));
 
 		if (crypt_block == NULL) {
@@ -4648,7 +4691,7 @@ row_merge_build_indexes(
 	merge file descriptor */
 
 	for (i = 0; i < n_indexes; i++) {
-		merge_files[i].fd = -1;
+		merge_files[i].fd = OS_FILE_CLOSED;
 		merge_files[i].offset = 0;
 	}
 
@@ -4688,10 +4731,6 @@ row_merge_build_indexes(
 		}
 	}
 
-	/* Reset the MySQL row buffer that is used when reporting
-	duplicate keys. */
-	innobase_rec_reset(table);
-
 	if (global_system_variables.log_warnings > 2) {
 		sql_print_information("InnoDB: Online DDL : Start reading"
 				      " clustered index of the table"
@@ -4718,9 +4757,9 @@ row_merge_build_indexes(
 	error = row_merge_read_clustered_index(
 		trx, table, old_table, new_table, online, indexes,
 		fts_sort_idx, psort_info, merge_files, key_numbers,
-		n_indexes, add_cols, add_v, col_map, add_autoinc,
+		n_indexes, defaults, add_v, col_map, add_autoinc,
 		sequence, block, skip_pk_sort, &tmpfd, stage,
-		pct_cost, crypt_block, eval_table);
+		pct_cost, crypt_block, eval_table, allow_not_null);
 
 	stage->end_phase_read_pk();
 
@@ -4824,7 +4863,7 @@ wait_again:
 #ifdef FTS_INTERNAL_DIAG_PRINT
 			DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Insert\n");
 #endif
-		} else if (merge_files[i].fd >= 0) {
+		} else if (merge_files[i].fd != OS_FILE_CLOSED) {
 			char	buf[NAME_LEN + 1];
 			row_merge_dup_t	dup = {
 				sort_idx, table, col_map, 0};
@@ -4857,7 +4896,8 @@ wait_again:
 					trx, &dup, &merge_files[i],
 					block, &tmpfd, true,
 					pct_progress, pct_cost,
-					crypt_block, new_table->space, stage);
+					crypt_block, new_table->space_id,
+					stage);
 
 			pct_progress += pct_cost;
 
@@ -4899,7 +4939,8 @@ wait_again:
 					merge_files[i].fd, block, NULL,
 					&btr_bulk,
 					merge_files[i].n_rec, pct_progress, pct_cost,
-					crypt_block, new_table->space, stage);
+					crypt_block, new_table->space_id,
+					stage);
 
 				error = btr_bulk.finish(error);
 
@@ -4980,10 +5021,10 @@ func_exit:
 
 	ut_free(merge_files);
 
-	alloc.deallocate_large(block, &block_pfx);
+	alloc.deallocate_large(block, &block_pfx, block_size);
 
 	if (crypt_block) {
-		alloc.deallocate_large(crypt_block, &crypt_pfx);
+		alloc.deallocate_large(crypt_block, &crypt_pfx, block_size);
 	}
 
 	DICT_TF2_FLAG_UNSET(new_table, DICT_TF2_FTS_ADD_DOC_ID);
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 9f4b024b19f..757a9ff232a 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -38,7 +38,6 @@ Created 9/17/2000 Heikki Tuuri
 #include "dict0dict.h"
 #include "dict0load.h"
 #include "dict0priv.h"
-#include "dict0boot.h"
 #include "dict0stats.h"
 #include "dict0stats_bg.h"
 #include "dict0defrag_bg.h"
@@ -485,7 +484,7 @@ row_mysql_store_col_in_innobase_format(
 			case 4:
 				/* space=0x00000020 */
 				/* Trim "half-chars", just in case. */
-				col_len &= ~3;
+				col_len &= ~3U;
 
 				while (col_len >= 4
 				       && ptr[col_len - 4] == 0x00
@@ -498,7 +497,7 @@ row_mysql_store_col_in_innobase_format(
 			case 2:
 				/* space=0x0020 */
 				/* Trim "half-chars", just in case. */
-				col_len &= ~1;
+				col_len &= ~1U;
 
 				while (col_len >= 2 && ptr[col_len - 2] == 0x00
 				       && ptr[col_len - 1] == 0x20) {
@@ -780,6 +779,12 @@ handle_new_error:
 			<< FK_MAX_CASCADE_DEL << ". Please drop excessive"
 			" foreign constraints and try again";
 		break;
+	case DB_UNSUPPORTED:
+		ib::error() << "Cannot delete/update rows with cascading"
+			" foreign key constraints in timestamp-based temporal"
+			" table. Please drop excessive"
+			" foreign constraints and try again";
+		break;
 	default:
 		ib::fatal() << "Unknown error code " << err << ": "
 			<< ut_strerr(err);
@@ -1297,7 +1302,7 @@ row_mysql_get_table_status(
 	bool 			push_warning = true)
 {
 	dberr_t err;
-	if (fil_space_t* space = fil_space_acquire_silent(table->space)) {
+	if (const fil_space_t* space = table->space) {
 		if (space->crypt_data && space->crypt_data->is_encrypted()) {
 			// maybe we cannot access the table due to failing
 			// to decrypt
@@ -1319,8 +1324,6 @@ row_mysql_get_table_status(
 
 			err = DB_CORRUPTION;
 		}
-
-		fil_space_release(space);
 	} else {
 		ib::error() << ".ibd file is missing for table "
 			<< table->name;
@@ -1330,6 +1333,23 @@ row_mysql_get_table_status(
 	return(err);
 }
 
+/** Writes 8 bytes to nth tuple field
+@param[in]	tuple	where to write
+@param[in]	nth	index in tuple
+@param[in]	data	what to write
+@param[in]	buf	field data buffer */
+static
+void
+set_tuple_col_8(dtuple_t* tuple, int col, uint64_t data, byte* buf) {
+	dfield_t* dfield = dtuple_get_nth_field(tuple, col);
+	ut_ad(dfield->type.len == 8);
+	if (dfield->len == UNIV_SQL_NULL) {
+		dfield_set_data(dfield, buf, 8);
+	}
+	ut_ad(dfield->len == dfield->type.len && dfield->data);
+	mach_write_to_8(dfield->data, data);
+}
+
 /** Does an insert for MySQL.
 @param[in]	mysql_rec	row in the MySQL format
 @param[in,out]	prebuilt	prebuilt struct in MySQL handle
@@ -1337,7 +1357,8 @@ row_mysql_get_table_status(
 dberr_t
 row_insert_for_mysql(
 	const byte*	mysql_rec,
-	row_prebuilt_t*	prebuilt)
+	row_prebuilt_t*	prebuilt,
+	ins_mode_t	ins_mode)
 {
 	trx_savept_t	savept;
 	que_thr_t*	thr;
@@ -1355,7 +1376,7 @@ row_insert_for_mysql(
 	ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
 	ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
 
-	if (dict_table_is_discarded(prebuilt->table)) {
+	if (!prebuilt->table->space) {
 
 		ib::error() << "The table " << prebuilt->table->name
 			<< " doesn't have a corresponding tablespace, it was"
@@ -1385,7 +1406,9 @@ row_insert_for_mysql(
 
 	row_mysql_delay_if_needed();
 
-	trx_start_if_not_started_xa(trx, true);
+	if (!table->no_rollback()) {
+		trx_start_if_not_started_xa(trx, true);
+	}
 
 	row_get_prebuilt_insert_row(prebuilt);
 	node = prebuilt->ins_node;
@@ -1393,6 +1416,29 @@ row_insert_for_mysql(
 	row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec,
 					  &blob_heap);
 
+	if (ins_mode != ROW_INS_NORMAL)
+	{
+		ut_ad(table->vers_start != table->vers_end);
+		/* Return back modified fields into mysql_rec, so that
+		   upper logic may benefit from it (f.ex. 'on duplicate key'). */
+		const mysql_row_templ_t* t = prebuilt->get_template_by_col(table->vers_end);
+		ut_ad(t);
+		ut_ad(t->mysql_col_len == 8);
+
+		if (ins_mode == ROW_INS_HISTORICAL) {
+			set_tuple_col_8(node->row, table->vers_end, trx->id, node->vers_end_buf);
+		}
+		else /* ROW_INS_VERSIONED */ {
+			set_tuple_col_8(node->row, table->vers_end, TRX_ID_MAX, node->vers_end_buf);
+			int8store(&mysql_rec[t->mysql_col_offset], TRX_ID_MAX);
+			t = prebuilt->get_template_by_col(table->vers_start);
+			ut_ad(t);
+			ut_ad(t->mysql_col_len == 8);
+			set_tuple_col_8(node->row, table->vers_start, trx->id, node->vers_start_buf);
+			int8store(&mysql_rec[t->mysql_col_offset], trx->id);
+		}
+	}
+
 	savept = trx_savept_take(trx);
 
 	thr = que_fork_get_first_thr(prebuilt->ins_graph);
@@ -1581,8 +1627,8 @@ row_create_update_node_for_mysql(
 
 	node = upd_node_create(heap);
 
-	node->in_mysql_interface = TRUE;
-	node->is_delete = FALSE;
+	node->in_mysql_interface = true;
+	node->is_delete = NO_DELETE;
 	node->searched_update = FALSE;
 	node->select = NULL;
 	node->pcur = btr_pcur_create_for_mysql();
@@ -1677,7 +1723,7 @@ row_fts_update_or_delete(
 	ut_a(dict_table_has_fts_index(prebuilt->table));
 
 	/* Deletes are simple; get them out of the way first. */
-	if (node->is_delete) {
+	if (node->is_delete == PLAIN_DELETE) {
 		/* A delete affects all FTS indexes, so we pass NULL */
 		fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL);
 	} else {
@@ -1776,7 +1822,9 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
 
 	init_fts_doc_id_for_ref(table, &fk_depth);
 
-	trx_start_if_not_started_xa(trx, true);
+	if (!table->no_rollback()) {
+		trx_start_if_not_started_xa(trx, true);
+	}
 
 	if (dict_table_is_referenced_by_foreign_key(table)) {
 		/* Share lock the data dictionary to prevent any
@@ -1791,7 +1839,7 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
 	}
 
 	node = prebuilt->upd_node;
-	const bool is_delete = node->is_delete;
+	const bool is_delete = node->is_delete == PLAIN_DELETE;
 	ut_ad(node->table == table);
 
 	clust_index = dict_table_get_first_index(table);
@@ -1822,6 +1870,16 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
 
 	que_thr_move_to_run_state_for_mysql(thr, trx);
 
+	ut_ad(!prebuilt->versioned_write || node->table->versioned());
+
+	if (prebuilt->versioned_write) {
+		if (node->is_delete == VERSIONED_DELETE) {
+			node->make_versioned_delete(trx);
+		} else if (node->update->affects_versioned()) {
+			node->make_versioned_update(trx);
+		}
+	}
+
 	for (;;) {
 		thr->run_node = node;
 		thr->prev_node = node;
@@ -1872,9 +1930,9 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
 	}
 
 	bool	update_statistics;
-	ut_ad(node->is_delete == is_delete);
+	ut_ad(is_delete == (node->is_delete == PLAIN_DELETE));
 
-	if (/*node->*/is_delete) {
+	if (is_delete) {
 		/* Not protected by dict_table_stats_lock() for performance
 		reasons, we would rather get garbage in stat_n_rows (which is
 		just an estimate anyway) than protecting the following code
@@ -2082,6 +2140,77 @@ row_mysql_unfreeze_data_dictionary(
 	trx->dict_operation_lock_mode = 0;
 }
 
+/** Write query start time as SQL field data to a buffer. Needed by InnoDB.
+@param	thd	Thread object
+@param	buf	Buffer to hold start time data */
+void thd_get_query_start_data(THD *thd, char *buf);
+
+/** Function restores btr_pcur_t, creates dtuple_t from rec_t,
+sets row_end = CURRENT_TIMESTAMP/trx->id, inserts it to a table and updates
+table statistics.
+This is used in UPDATE CASCADE/SET NULL of a system versioning table.
+@param[in]	thr	current query thread
+@param[in]	node	a node which just updated a row in a foreign table
+@return DB_SUCCESS or some error */
+static dberr_t row_update_vers_insert(que_thr_t* thr, upd_node_t* node)
+{
+	const trx_t* trx = thr_get_trx(thr);
+	dict_table_t* table = node->table;
+	ut_ad(table->versioned());
+
+	dtuple_t* row = node->historical_row;
+	ut_ad(row);
+	node->historical_row = NULL;
+
+	ins_node_t* insert_node =
+		ins_node_create(INS_DIRECT, table, node->historical_heap);
+
+	ins_node_set_new_row(insert_node, row);
+
+	dfield_t* row_end = dtuple_get_nth_field(row, table->vers_end);
+	char row_end_data[8];
+	if (dict_table_get_nth_col(table, table->vers_end)->vers_native()) {
+		mach_write_to_8(row_end_data, trx->id);
+		dfield_set_data(row_end, row_end_data, 8);
+	} else {
+		thd_get_query_start_data(trx->mysql_thd, row_end_data);
+		dfield_set_data(row_end, row_end_data, 7);
+	}
+
+	for (;;) {
+		thr->run_node = insert_node;
+		thr->prev_node = insert_node;
+
+		row_ins_step(thr);
+
+		switch (trx->error_state) {
+		case DB_LOCK_WAIT:
+			que_thr_stop_for_mysql(thr);
+			lock_wait_suspend_thread(thr);
+
+			if (trx->error_state == DB_SUCCESS) {
+				continue;
+			}
+
+			/* fall through */
+		default:
+			/* Other errors are handled for the parent node. */
+			thr->fk_cascade_depth = 0;
+			goto exit;
+
+		case DB_SUCCESS:
+			srv_stats.n_rows_inserted.inc(
+				static_cast<size_t>(trx->id));
+			dict_stats_update_if_needed(table, trx->mysql_thd);
+			goto exit;
+		}
+	}
+exit:
+	mem_heap_free(node->historical_heap);
+	node->historical_heap = NULL;
+	return trx->error_state;
+}
+
 /**********************************************************************//**
 Does a cascaded delete or set null in a foreign key operation.
 @return error code or DB_SUCCESS */
@@ -2103,6 +2232,18 @@ row_update_cascade_for_mysql(
 
 	const trx_t* trx = thr_get_trx(thr);
 
+	if (table->versioned()) {
+		if (node->is_delete == PLAIN_DELETE) {
+			node->make_versioned_delete(trx);
+		} else if (node->update->affects_versioned()) {
+			dberr_t err = row_update_vers_insert(thr, node);
+			if (err != DB_SUCCESS) {
+				return err;
+			}
+			node->make_versioned_update(trx);
+		}
+	}
+
 	for (;;) {
 		thr->run_node = node;
 		thr->prev_node = node;
@@ -2134,7 +2275,7 @@ row_update_cascade_for_mysql(
 			thr->fk_cascade_depth = 0;
 			bool stats;
 
-			if (node->is_delete) {
+			if (node->is_delete == PLAIN_DELETE) {
 				/* Not protected by
 				dict_table_stats_lock() for
 				performance reasons, we would rather
@@ -2283,18 +2424,10 @@ err_exit:
 	/* Update SYS_TABLESPACES and SYS_DATAFILES if a new file-per-table
 	tablespace was created. */
 	if (err == DB_SUCCESS && dict_table_is_file_per_table(table)) {
-
-		ut_ad(dict_table_is_file_per_table(table));
-
-		char*	path;
-		path = fil_space_get_first_path(table->space);
-
 		err = dict_replace_tablespace_in_dictionary(
-			table->space, table->name.m_name,
-			fil_space_get_flags(table->space),
-			path, trx);
-
-			ut_free(path);
+			table->space_id, table->name.m_name,
+			table->space->flags,
+			table->space->chain.start->name, trx);
 
 		if (err != DB_SUCCESS) {
 
@@ -2329,11 +2462,9 @@ err_exit:
 		/* We already have .ibd file here. it should be deleted. */
 
 		if (dict_table_is_file_per_table(table)
-		    && fil_delete_tablespace(table->space) != DB_SUCCESS) {
-
-			ib::error() << "Not able to delete tablespace "
-				<< table->space << " of table "
-				<< table->name << "!";
+		    && fil_delete_tablespace(table->space_id) != DB_SUCCESS) {
+			ib::error() << "Cannot delete the file of table "
+				<< table->name;
 		}
 		/* fall through */
 
@@ -2376,31 +2507,11 @@ row_create_index_for_mysql(
 	dberr_t		err;
 	ulint		i;
 	ulint		len;
-	char*		table_name;
-	char*		index_name;
-	dict_table_t*	table = NULL;
-	ibool		is_fts;
-
-	trx->op_info = "creating index";
-
-	/* Copy the table name because we may want to drop the
-	table later, after the index object is freed (inside
-	que_run_threads()) and thus index->table_name is not available. */
-	table_name = mem_strdup(index->table_name);
-	index_name = mem_strdup(index->name);
-
-	is_fts = (index->type == DICT_FTS);
+	dict_table_t*	table = index->table;
 
 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_ad(mutex_own(&dict_sys->mutex));
 
-	table = dict_table_open_on_name(table_name, TRUE, TRUE,
-					DICT_ERR_IGNORE_NONE);
-
-	if (!dict_table_is_temporary(table)) {
-		trx_start_if_not_started_xa(trx, true);
-	}
-
 	for (i = 0; i < index->n_def; i++) {
 		/* Check that prefix_len and actual length
 		< DICT_MAX_INDEX_COL_LEN */
@@ -2418,25 +2529,26 @@ row_create_index_for_mysql(
 
 		/* Column or prefix length exceeds maximum column length */
 		if (len > (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table)) {
-			err = DB_TOO_BIG_INDEX_COL;
-
 			dict_mem_index_free(index);
-			goto error_handling;
+			return DB_TOO_BIG_INDEX_COL;
 		}
 	}
 
-	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+	trx->op_info = "creating index";
 
 	/* For temp-table we avoid insertion into SYSTEM TABLES to
 	maintain performance and so we have separate path that directly
 	just updates dictonary cache. */
-	if (!dict_table_is_temporary(table)) {
+	if (!table->is_temporary()) {
+		trx_start_if_not_started_xa(trx, true);
+		trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 		/* Note that the space id where we store the index is
 		inherited from the table in dict_build_index_def_step()
 		in dict0crea.cc. */
 
 		heap = mem_heap_create(512);
-		node = ind_create_graph_create(index, heap, NULL);
+		node = ind_create_graph_create(index, table->name.m_name,
+					       heap);
 
 		thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
 
@@ -2448,52 +2560,36 @@ row_create_index_for_mysql(
 
 		err = trx->error_state;
 
-		que_graph_free((que_t*) que_node_get_parent(thr));
-	} else {
-		dict_build_index_def(table, index, trx);
+		index = node->index;
 
-		index_id_t index_id = index->id;
+		ut_ad(!index == (err != DB_SUCCESS));
 
-		/* add index to dictionary cache and also free index object. */
-		err = dict_index_add_to_cache(
-			table, index, FIL_NULL, trx_is_strict(trx));
+		que_graph_free((que_t*) que_node_get_parent(thr));
 
-		if (err != DB_SUCCESS) {
-			goto error_handling;
+		if (index && (index->type & DICT_FTS)) {
+			err = fts_create_index_tables(trx, index, table->id);
 		}
+	} else {
+		dict_build_index_def(table, index, trx);
 
-		/* as above function has freed index object re-load it
-		now from dictionary cache using index_id */
-		index = dict_index_get_if_in_cache_low(index_id);
-		ut_a(index != NULL);
-		index->table = table;
+		/* add index to dictionary cache and also free index object. */
+		index = dict_index_add_to_cache(
+			index, FIL_NULL, trx_is_strict(trx), &err);
+		if (index) {
+			ut_ad(!index->is_instant());
+			index->n_core_null_bytes = UT_BITS_IN_BYTES(
+				unsigned(index->n_nullable));
 
-		err = dict_create_index_tree_in_mem(index, trx);
+			err = dict_create_index_tree_in_mem(index, trx);
 
-		if (err != DB_SUCCESS) {
-			dict_index_remove_from_cache(table, index);
+			if (err != DB_SUCCESS) {
+				dict_index_remove_from_cache(table, index);
+			}
 		}
 	}
 
-	/* Create the index specific FTS auxiliary tables. */
-	if (err == DB_SUCCESS && is_fts) {
-		dict_index_t*	idx;
-
-		idx = dict_table_get_index_on_name(table, index_name);
-
-		ut_ad(idx);
-		err = fts_create_index_tables_low(
-			trx, idx, table->name.m_name, table->id);
-	}
-
-error_handling:
-	dict_table_close(table, TRUE, FALSE);
-
 	trx->op_info = "";
 
-	ut_free(table_name);
-	ut_free(index_name);
-
 	return(err);
 }
 
@@ -2514,7 +2610,7 @@ row_drop_table_for_mysql_in_background(
 	dberr_t	error;
 	trx_t*	trx;
 
-	trx = trx_allocate_for_background();
+	trx = trx_create();
 
 	/* If the original transaction was dropping a table referenced by
 	foreign keys, we must set the following to be able to drop the
@@ -2528,7 +2624,7 @@ row_drop_table_for_mysql_in_background(
 
 	trx_commit_for_mysql(trx);
 
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	return(error);
 }
@@ -2629,7 +2725,7 @@ row_mysql_drop_garbage_tables()
 	mem_heap_t*	heap = mem_heap_create(FN_REFLEN);
 	btr_pcur_t	pcur;
 	mtr_t		mtr;
-	trx_t*		trx = trx_allocate_for_background();
+	trx_t*		trx = trx_create();
 	trx->op_info = "dropping garbage tables";
 	row_mysql_lock_data_dictionary(trx);
 
@@ -2670,9 +2766,8 @@ row_mysql_drop_garbage_tables()
 
 			if (dict_load_table(table_name, true,
 					    DICT_ERR_IGNORE_ALL)) {
-				row_drop_table_for_mysql(
-					table_name, trx,
-					SQLCOM_DROP_TABLE);
+				row_drop_table_for_mysql(table_name, trx,
+							 SQLCOM_DROP_TABLE);
 				trx_commit_for_mysql(trx);
 			}
 
@@ -2687,7 +2782,7 @@ row_mysql_drop_garbage_tables()
 	btr_pcur_close(&pcur);
 	mtr.commit();
 	row_mysql_unlock_data_dictionary(trx);
-	trx_free_for_background(trx);
+	trx_free(trx);
 	mem_heap_free(heap);
 }
 
@@ -2748,9 +2843,6 @@ row_mysql_table_id_reassign(
 
 	dict_hdr_get_new_id(new_id, NULL, NULL, table, false);
 
-	/* Remove all locks except the table-level S and X locks. */
-	lock_remove_all_on_table(table, FALSE);
-
 	pars_info_add_ull_literal(info, "old_id", table->id);
 	pars_info_add_ull_literal(info, "new_id", *new_id);
 
@@ -2800,8 +2892,8 @@ row_discard_tablespace_begin(
 
 	if (table) {
 		dict_stats_wait_bg_to_stop_using_table(table, trx);
-		ut_a(!is_system_tablespace(table->space));
-		ut_a(table->n_foreign_key_checks_running == 0);
+		ut_a(!is_system_tablespace(table->space_id));
+		ut_ad(!table->n_foreign_key_checks_running);
 	}
 
 	return(table);
@@ -2920,19 +3012,15 @@ row_discard_tablespace(
 	their operations.
 
 	3) Insert buffer: we remove all entries for the tablespace in
-	the insert buffer tree.
-
-	4) FOREIGN KEY operations: if table->n_foreign_key_checks_running > 0,
-	we do not allow the discard. */
+	the insert buffer tree. */
 
-	ibuf_delete_for_discarded_space(table->space);
+	ibuf_delete_for_discarded_space(table->space_id);
 
 	table_id_t	new_id;
 
 	/* Set the TABLESPACE DISCARD flag in the table definition
 	on disk. */
-	err = row_import_update_discarded_flag(
-		trx, table->id, true, true);
+	err = row_import_update_discarded_flag(trx, table->id, true);
 
 	if (err != DB_SUCCESS) {
 		return(err);
@@ -2962,50 +3050,41 @@ row_discard_tablespace(
 	}
 
 	/* Discard the physical file that is used for the tablespace. */
-
-	err = fil_discard_tablespace(table->space);
-
+	err = fil_delete_tablespace(table->space_id);
 	switch (err) {
-	case DB_SUCCESS:
 	case DB_IO_ERROR:
+		ib::warn() << "ALTER TABLE " << table->name
+			<< " DISCARD TABLESPACE failed to delete file";
+		break;
 	case DB_TABLESPACE_NOT_FOUND:
-		/* All persistent operations successful, update the
-		data dictionary memory cache. */
-
-		table->file_unreadable = true;
-
-		table->flags2 |= DICT_TF2_DISCARDED;
-
-		dict_table_change_id_in_cache(table, new_id);
-
-		/* Reset the root page numbers. */
-
-		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
-		     index != 0;
-		     index = UT_LIST_GET_NEXT(indexes, index)) {
-
-			index->page = FIL_NULL;
-			index->space = FIL_NULL;
-		}
-
-		/* If the tablespace did not already exist or we couldn't
-		write to it, we treat that as a successful DISCARD. It is
-		unusable anyway. */
-
-		err = DB_SUCCESS;
+		ib::warn() << "ALTER TABLE " << table->name
+			<< " DISCARD TABLESPACE failed to find tablespace";
+		break;
+	case DB_SUCCESS:
 		break;
-
 	default:
-		/* We need to rollback the disk changes, something failed. */
+		ut_error;
+	}
 
-		trx->error_state = DB_SUCCESS;
+	/* All persistent operations successful, update the
+	data dictionary memory cache. */
 
-		trx_rollback_to_savepoint(trx, NULL);
+	table->file_unreadable = true;
+	table->space = NULL;
+	table->flags2 |= DICT_TF2_DISCARDED;
+	dict_table_change_id_in_cache(table, new_id);
 
-		trx->error_state = DB_SUCCESS;
-	}
+	/* Reset the root page numbers. */
 
-	return(err);
+	for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+	     index != 0;
+	     index = UT_LIST_GET_NEXT(indexes, index)) {
+		index->page = FIL_NULL;
+	}
+	/* If the tablespace did not already exist or we couldn't
+	write to it, we treat that as a successful DISCARD. It is
+	unusable anyway. */
+	return DB_SUCCESS;
 }
 
 /*********************************************************************//**
@@ -3028,14 +3107,14 @@ row_discard_tablespace_for_mysql(
 
 	if (table == 0) {
 		err = DB_TABLE_NOT_FOUND;
-	} else if (dict_table_is_temporary(table)) {
+	} else if (table->is_temporary()) {
 
 		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 			    ER_CANNOT_DISCARD_TEMPORARY_TABLE);
 
 		err = DB_ERROR;
 
-	} else if (table->space == TRX_SYS_SPACE) {
+	} else if (table->space_id == TRX_SYS_SPACE) {
 		char	table_name[MAX_FULL_NAME_LEN + 1];
 
 		innobase_format_name(
@@ -3047,19 +3126,9 @@ row_discard_tablespace_for_mysql(
 
 		err = DB_ERROR;
 
-	} else if (table->n_foreign_key_checks_running > 0) {
-		char	table_name[MAX_FULL_NAME_LEN + 1];
-
-		innobase_format_name(
-			table_name, sizeof(table_name),
-			table->name.m_name);
-
-		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
-			    ER_DISCARD_FK_CHECKS_RUNNING, table_name);
-
-		err = DB_ERROR;
-
 	} else {
+		ut_ad(!table->n_foreign_key_checks_running);
+
 		/* Do foreign key constraint checks. */
 
 		err = row_discard_tablespace_foreign_key_checks(trx, table);
@@ -3188,7 +3257,7 @@ row_drop_table_from_cache(
 	trx_t*		trx)
 {
 	dberr_t	err = DB_SUCCESS;
-	ut_ad(!dict_table_is_temporary(table));
+	ut_ad(!table->is_temporary());
 
 	/* Remove the pointer to this table object from the list
 	of modified tables by the transaction because the object
@@ -3207,46 +3276,7 @@ row_drop_table_from_cache(
 	return(err);
 }
 
-/** Drop a single-table tablespace as part of dropping or renaming a table.
-This deletes the fil_space_t if found and the file on disk.
-@param[in]	space_id	Tablespace ID
-@param[in]	tablename	Table name, same as the tablespace name
-@param[in]	filepath	File path of tablespace to delete
-@param[in]	table_flags	table flags
-@return error code or DB_SUCCESS */
-UNIV_INLINE
-dberr_t
-row_drop_single_table_tablespace(
-	ulint		space_id,
-	const char*	tablename,
-	const char*	filepath,
-	ulint		table_flags)
-{
-	dberr_t	err = DB_SUCCESS;
-
-	/* If the tablespace is not in the cache, just delete the file. */
-	if (!fil_space_for_table_exists_in_mem(
-		    space_id, tablename, true, NULL, table_flags)) {
-
-		/* Force a delete of any discarded or temporary files. */
-		fil_delete_file(filepath);
-
-		ib::info() << "Removed datafile " << filepath
-			<< " for table " << tablename;
-	} else if (fil_delete_tablespace(space_id) != DB_SUCCESS) {
-
-		ib::error() << "We removed the InnoDB internal data"
-			" dictionary entry of table " << tablename
-			<< " but we are not able to delete the tablespace "
-			<< space_id << " file " << filepath << "!";
-
-		err = DB_ERROR;
-	}
-
-	return(err);
-}
-
-/** Drop a table.
+/** Drop a table for MySQL.
 If the data dictionary was not already locked by the transaction,
 the transaction will be committed.  Otherwise, the data dictionary
 will remain locked.
@@ -3269,7 +3299,6 @@ row_drop_table_for_mysql(
 	dberr_t		err;
 	dict_foreign_t*	foreign;
 	dict_table_t*	table;
-	char*		filepath		= NULL;
 	char*		tablename		= NULL;
 	bool		locked_dictionary	= false;
 	pars_info_t*	info			= NULL;
@@ -3306,17 +3335,30 @@ row_drop_table_for_mysql(
 
 	if (!table) {
 		err = DB_TABLE_NOT_FOUND;
-		goto funct_exit;
+		goto funct_exit_all_freed;
+	}
+
+	if (table->is_temporary()) {
+		ut_ad(table->space == fil_system.temp_space);
+		for (dict_index_t* index = dict_table_get_first_index(table);
+		     index != NULL;
+		     index = dict_table_get_next_index(index)) {
+			btr_free(page_id_t(SRV_TMP_SPACE_ID, index->page),
+				 univ_page_size);
+		}
+		/* Remove the pointer to this table object from the list
+		of modified tables by the transaction because the object
+		is going to be destroyed below. */
+		trx->mod_tables.erase(table);
+		table->release();
+		dict_table_remove_from_cache(table);
+		err = DB_SUCCESS;
+		goto funct_exit_all_freed;
 	}
 
 	/* This function is called recursively via fts_drop_tables(). */
 	if (!trx_is_started(trx)) {
-
-		if (!dict_table_is_temporary(table)) {
-			trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
-		} else {
-			trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-		}
+		trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
 	}
 
 	/* Turn on this drop bit before we could release the dictionary
@@ -3346,20 +3388,14 @@ row_drop_table_for_mysql(
 			row_mysql_lock_data_dictionary(trx);
 		}
 
-		/* Do not bother to deal with persistent stats for temp
-		tables since we know temp tables do not use persistent
-		stats. */
-		if (!dict_table_is_temporary(table)) {
-			dict_stats_wait_bg_to_stop_using_table(
-				table, trx);
-		}
+		dict_stats_wait_bg_to_stop_using_table(table, trx);
 	}
 
 	/* make sure background stats thread is not running on the table */
 	ut_ad(!(table->stats_bg_flag & BG_STAT_IN_PROGRESS));
 
-	if (!dict_table_is_temporary(table)) {
-		if (table->space != TRX_SYS_SPACE) {
+	if (!table->no_rollback()) {
+		if (table->space != fil_system.sys_space) {
 #ifdef BTR_CUR_HASH_ADAPT
 			/* On DISCARD TABLESPACE, we would not drop the
 			adaptive hash index entries. If the tablespace is
@@ -3528,9 +3564,8 @@ defer:
 	case TRX_DICT_OP_INDEX:
 		/* If the transaction was previously flagged as
 		TRX_DICT_OP_INDEX, we should be dropping auxiliary
-		tables for full-text indexes or temp tables. */
-		ut_ad(strstr(table->name.m_name, "/FTS_")
-		      || strstr(table->name.m_name, TEMP_TABLE_PATH_PREFIX));
+		tables for full-text indexes. */
+		ut_ad(strstr(table->name.m_name, "/FTS_"));
 	}
 
 	/* Mark all indexes unavailable in the data dictionary cache
@@ -3559,178 +3594,162 @@ defer:
 		rw_lock_x_unlock(dict_index_get_lock(index));
 	}
 
-	if (!table->is_temporary()) {
-		/* We use the private SQL parser of Innobase to generate the
-		query graphs needed in deleting the dictionary data from system
-		tables in Innobase. Deleting a row from SYS_INDEXES table also
-		frees the file segments of the B-tree associated with the
-		index. */
+	/* Deleting a row from SYS_INDEXES table will invoke
+	dict_drop_index_tree(). */
+	info = pars_info_create();
 
-		info = pars_info_create();
+	pars_info_add_str_literal(info, "name", name);
+
+	if (sqlcom != SQLCOM_TRUNCATE
+	    && strchr(name, '/')
+	    && dict_table_get_low("SYS_FOREIGN")
+	    && dict_table_get_low("SYS_FOREIGN_COLS")) {
+		err = que_eval_sql(
+			info,
+			"PROCEDURE DROP_FOREIGN_PROC () IS\n"
+			"fid CHAR;\n"
 
-		pars_info_add_str_literal(info, "name", name);
+			"DECLARE CURSOR fk IS\n"
+			"SELECT ID FROM SYS_FOREIGN\n"
+			"WHERE FOR_NAME = :name\n"
+			"AND TO_BINARY(FOR_NAME) = TO_BINARY(:name)\n"
+			"FOR UPDATE;\n"
 
-		if (sqlcom != SQLCOM_TRUNCATE
-		    && strchr(name, '/')
-		    && dict_table_get_low("SYS_FOREIGN")
-		    && dict_table_get_low("SYS_FOREIGN_COLS")) {
+			"BEGIN\n"
+			"OPEN fk;\n"
+			"WHILE 1 = 1 LOOP\n"
+			"  FETCH fk INTO fid;\n"
+			"  IF (SQL % NOTFOUND) THEN RETURN; END IF;\n"
+			"  DELETE FROM SYS_FOREIGN_COLS WHERE ID=fid;\n"
+			"  DELETE FROM SYS_FOREIGN WHERE ID=fid;\n"
+			"END LOOP;\n"
+			"CLOSE fk;\n"
+			"END;\n", FALSE, trx);
+		if (err == DB_SUCCESS) {
+			info = pars_info_create();
+			pars_info_add_str_literal(info, "name", name);
+			goto do_drop;
+		}
+	} else {
+do_drop:
+		if (dict_table_get_low("SYS_VIRTUAL")) {
 			err = que_eval_sql(
 				info,
-				"PROCEDURE DROP_FOREIGN_PROC () IS\n"
-				"fid CHAR;\n"
-
-				"DECLARE CURSOR fk IS\n"
-				"SELECT ID FROM SYS_FOREIGN\n"
-				"WHERE FOR_NAME = :name\n"
-				"AND TO_BINARY(FOR_NAME) = TO_BINARY(:name)\n"
-				"FOR UPDATE;\n"
+				"PROCEDURE DROP_VIRTUAL_PROC () IS\n"
+				"tid CHAR;\n"
 
 				"BEGIN\n"
-				"OPEN fk;\n"
-				"WHILE 1 = 1 LOOP\n"
-				"  FETCH fk INTO fid;\n"
-				"  IF (SQL % NOTFOUND) THEN RETURN; END IF;\n"
-				"  DELETE FROM SYS_FOREIGN_COLS WHERE ID=fid;\n"
-				"  DELETE FROM SYS_FOREIGN WHERE ID=fid;\n"
-				"END LOOP;\n"
-				"CLOSE fk;\n"
+				"SELECT ID INTO tid FROM SYS_TABLES\n"
+				"WHERE NAME = :name FOR UPDATE;\n"
+				"IF (SQL % NOTFOUND) THEN RETURN;"
+				" END IF;\n"
+				"DELETE FROM SYS_VIRTUAL"
+				" WHERE TABLE_ID = tid;\n"
 				"END;\n", FALSE, trx);
 			if (err == DB_SUCCESS) {
 				info = pars_info_create();
-				pars_info_add_str_literal(info, "name", name);
-				goto do_drop;
+				pars_info_add_str_literal(
+					info, "name", name);
 			}
 		} else {
-do_drop:
-			if (dict_table_get_low("SYS_VIRTUAL")) {
-				err = que_eval_sql(
-					info,
-					"PROCEDURE DROP_VIRTUAL_PROC () IS\n"
-					"tid CHAR;\n"
-
-					"BEGIN\n"
-					"SELECT ID INTO tid FROM SYS_TABLES\n"
-					"WHERE NAME = :name FOR UPDATE;\n"
-					"IF (SQL % NOTFOUND) THEN RETURN;"
-					" END IF;\n"
-					"DELETE FROM SYS_VIRTUAL"
-					" WHERE TABLE_ID = tid;\n"
-					"END;\n", FALSE, trx);
-				if (err == DB_SUCCESS) {
-					info = pars_info_create();
-					pars_info_add_str_literal(
-						info, "name", name);
-				}
-			} else {
-				err = DB_SUCCESS;
-			}
+			err = DB_SUCCESS;
+		}
 
-			err = err == DB_SUCCESS ? que_eval_sql(
-				info,
-				"PROCEDURE DROP_TABLE_PROC () IS\n"
-				"tid CHAR;\n"
-				"iid CHAR;\n"
+		err = err == DB_SUCCESS ? que_eval_sql(
+			info,
+			"PROCEDURE DROP_TABLE_PROC () IS\n"
+			"tid CHAR;\n"
+			"iid CHAR;\n"
+
+			"DECLARE CURSOR cur_idx IS\n"
+			"SELECT ID FROM SYS_INDEXES\n"
+			"WHERE TABLE_ID = tid FOR UPDATE;\n"
 
-				"DECLARE CURSOR cur_idx IS\n"
-				"SELECT ID FROM SYS_INDEXES\n"
-				"WHERE TABLE_ID = tid FOR UPDATE;\n"
+			"BEGIN\n"
+			"SELECT ID INTO tid FROM SYS_TABLES\n"
+			"WHERE NAME = :name FOR UPDATE;\n"
+			"IF (SQL % NOTFOUND) THEN RETURN; END IF;\n"
+
+			"OPEN cur_idx;\n"
+			"WHILE 1 = 1 LOOP\n"
+			"  FETCH cur_idx INTO iid;\n"
+			"  IF (SQL % NOTFOUND) THEN EXIT; END IF;\n"
+			"  DELETE FROM SYS_FIELDS\n"
+			"  WHERE INDEX_ID = iid;\n"
+			"  DELETE FROM SYS_INDEXES\n"
+			"  WHERE ID = iid AND TABLE_ID = tid;\n"
+			"END LOOP;\n"
+			"CLOSE cur_idx;\n"
 
+			"DELETE FROM SYS_COLUMNS WHERE TABLE_ID=tid;\n"
+			"DELETE FROM SYS_TABLES WHERE NAME=:name;\n"
+
+			"END;\n", FALSE, trx) : err;
+
+		if (err == DB_SUCCESS && table->space
+		    && dict_table_get_low("SYS_TABLESPACES")
+		    && dict_table_get_low("SYS_DATAFILES")) {
+			info = pars_info_create();
+			pars_info_add_int4_literal(info, "id",
+						   lint(table->space_id));
+			err = que_eval_sql(
+				info,
+				"PROCEDURE DROP_SPACE_PROC () IS\n"
 				"BEGIN\n"
-				"SELECT ID INTO tid FROM SYS_TABLES\n"
-				"WHERE NAME = :name FOR UPDATE;\n"
-				"IF (SQL % NOTFOUND) THEN RETURN; END IF;\n"
-
-				"OPEN cur_idx;\n"
-				"WHILE 1 = 1 LOOP\n"
-				"  FETCH cur_idx INTO iid;\n"
-				"  IF (SQL % NOTFOUND) THEN EXIT; END IF;\n"
-				"  DELETE FROM SYS_FIELDS\n"
-				"  WHERE INDEX_ID = iid;\n"
-				"  DELETE FROM SYS_INDEXES\n"
-				"  WHERE ID = iid AND TABLE_ID = tid;\n"
-				"END LOOP;\n"
-				"CLOSE cur_idx;\n"
-
-				"DELETE FROM SYS_COLUMNS WHERE TABLE_ID=tid;\n"
-				"DELETE FROM SYS_TABLES WHERE NAME=:name;\n"
-
-				"END;\n", FALSE, trx) : err;
-
-			if (err == DB_SUCCESS && table->space
-			    && dict_table_get_low("SYS_TABLESPACES")
-			    && dict_table_get_low("SYS_DATAFILES")) {
-				info = pars_info_create();
-				pars_info_add_int4_literal(info, "id",
-							   lint(table->space));
-				err = que_eval_sql(
-					info,
-					"PROCEDURE DROP_SPACE_PROC () IS\n"
-					"BEGIN\n"
-					"DELETE FROM SYS_TABLESPACES\n"
-					"WHERE SPACE = :id;\n"
-					"DELETE FROM SYS_DATAFILES\n"
-					"WHERE SPACE = :id;\n"
-					"END;\n", FALSE, trx);
-			}
-		}
-	} else {
-		page_no = page_nos;
-		for (dict_index_t* index = dict_table_get_first_index(table);
-		     index != NULL;
-		     index = dict_table_get_next_index(index)) {
-			/* remove the index object associated. */
-			dict_drop_index_tree_in_mem(index, *page_no++);
+				"DELETE FROM SYS_TABLESPACES\n"
+				"WHERE SPACE = :id;\n"
+				"DELETE FROM SYS_DATAFILES\n"
+				"WHERE SPACE = :id;\n"
+				"END;\n", FALSE, trx);
 		}
-		trx->mod_tables.erase(table);
-		dict_table_remove_from_cache(table);
-		err = DB_SUCCESS;
-		goto funct_exit;
 	}
 
 	switch (err) {
-		ulint	space_id;
-		bool	is_discarded;
-		ulint	table_flags;
-
+		fil_space_t* space;
+		char* filepath;
 	case DB_SUCCESS:
-		space_id = table->space;
-		is_discarded = dict_table_is_discarded(table);
-		table_flags = table->flags;
-		ut_ad(!dict_table_is_temporary(table));
-
-		err = row_drop_ancillary_fts_tables(table, trx);
-		if (err != DB_SUCCESS) {
-			break;
+		if (!table->no_rollback()) {
+			err = row_drop_ancillary_fts_tables(table, trx);
+			if (err != DB_SUCCESS) {
+				break;
+			}
 		}
 
+		space = table->space;
+		ut_ad(!space || space->id == table->space_id);
 		/* Determine the tablespace filename before we drop
-		dict_table_t.  Free this memory before returning. */
+		dict_table_t. */
 		if (DICT_TF_HAS_DATA_DIR(table->flags)) {
 			dict_get_and_save_data_dir_path(table, true);
 			ut_a(table->data_dir_path);
-			filepath = fil_make_filepath(
+			filepath = space ? NULL : fil_make_filepath(
 				table->data_dir_path,
 				table->name.m_name, IBD, true);
 		} else {
-			filepath = fil_make_filepath(
+			filepath = space ? NULL : fil_make_filepath(
 				NULL, table->name.m_name, IBD, false);
 		}
 
 		/* Free the dict_table_t object. */
 		err = row_drop_table_from_cache(tablename, table, trx);
 		if (err != DB_SUCCESS) {
+			ut_free(filepath);
 			break;
 		}
 
 		/* Do not attempt to drop known-to-be-missing tablespaces,
 		nor the system tablespace. */
-		if (is_discarded || is_system_tablespace(space_id)) {
+		if (!space) {
+			fil_delete_file(filepath);
+			ut_free(filepath);
 			break;
 		}
 
-		/* We can now drop the single-table tablespace. */
-		err = row_drop_single_table_tablespace(
-			space_id, tablename, filepath, table_flags);
+		ut_ad(!filepath);
+
+		if (space->id != TRX_SYS_SPACE) {
+			err = fil_delete_tablespace(space->id);
+		}
 		break;
 
 	case DB_OUT_OF_FILE_SPACE:
@@ -3793,8 +3812,7 @@ funct_exit:
 		mem_heap_free(heap);
 	}
 
-	ut_free(filepath);
-
+funct_exit_all_freed:
 	if (locked_dictionary) {
 
 		if (trx_is_started(trx)) {
@@ -3964,8 +3982,7 @@ loop:
 					<< table->name << ".frm' was lost.";
 			}
 
-			if (!table->is_readable()
-			    && !fil_space_get(table->space)) {
+			if (!table->is_readable() && !table->space) {
 				ib::warn() << "Missing .ibd file for table "
 					<< table->name << ".";
 			}
@@ -4220,9 +4237,8 @@ row_rename_table_for_mysql(
 		err = DB_TABLE_NOT_FOUND;
 		goto funct_exit;
 
-	} else if (!table->is_readable()
-		   && fil_space_get(table->space) == NULL
-		   && !dict_table_is_discarded(table)) {
+	} else if (!table->is_readable() && !table->space
+		   && !(table->flags2 & DICT_TF2_DISCARDED)) {
 
 		err = DB_TABLE_NOT_FOUND;
 
@@ -4267,7 +4283,7 @@ row_rename_table_for_mysql(
 		goto funct_exit;
 	}
 
-	if (!table->is_temporary() && srv_safe_truncate) {
+	if (!table->is_temporary()) {
 		err = trx_undo_report_rename(trx, table);
 
 		if (err != DB_SUCCESS) {
@@ -4297,22 +4313,18 @@ row_rename_table_for_mysql(
 	if (err == DB_SUCCESS
 	    && dict_table_is_file_per_table(table)) {
 		/* Make a new pathname to update SYS_DATAFILES. */
-		char*	new_path = row_make_new_pathname(table, new_name);
-		char*	old_path = fil_space_get_first_path(table->space);
-
 		/* If old path and new path are the same means tablename
 		has not changed and only the database name holding the table
 		has changed so we need to make the complete filepath again. */
-		if (!dict_tables_have_same_db(old_name, new_name)) {
-			ut_free(new_path);
-			new_path = fil_make_filepath(NULL, new_name, IBD, false);
-		}
+		char*	new_path = dict_tables_have_same_db(old_name, new_name)
+			? row_make_new_pathname(table, new_name)
+			: fil_make_filepath(NULL, new_name, IBD, false);
 
 		info = pars_info_create();
 
 		pars_info_add_str_literal(info, "new_table_name", new_name);
 		pars_info_add_str_literal(info, "new_path_name", new_path);
-		pars_info_add_int4_literal(info, "space_id", table->space);
+		pars_info_add_int4_literal(info, "space_id", table->space_id);
 
 		err = que_eval_sql(info,
 				   "PROCEDURE RENAME_SPACE () IS\n"
@@ -4326,7 +4338,6 @@ row_rename_table_for_mysql(
 				   "END;\n"
 				   , FALSE, trx);
 
-		ut_free(old_path);
 		ut_free(new_path);
 	}
 	if (err != DB_SUCCESS) {
@@ -4465,7 +4476,7 @@ row_rename_table_for_mysql(
 
 	if (err == DB_SUCCESS
 	    && (dict_table_has_fts_index(table)
-		|| DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID))
+	    || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID))
 	    && !dict_tables_have_same_db(old_name, new_name)) {
 		err = fts_rename_aux_tables(table, new_name, trx);
 		if (err != DB_TABLE_NOT_FOUND) {
@@ -4590,7 +4601,7 @@ funct_exit:
 	    && table != NULL && (table->space != 0)) {
 
 		char*	orig_name = table->name.m_name;
-		trx_t*	trx_bg = trx_allocate_for_background();
+		trx_t*	trx_bg = trx_create();
 
 		/* If the first fts_rename fails, the trx would
 		be rolled back and committed, we can't use it any more,
@@ -4614,7 +4625,7 @@ funct_exit:
 
 		trx_bg->dict_operation_lock_mode = 0;
 		trx_commit_for_mysql(trx_bg);
-		trx_free_for_background(trx_bg);
+		trx_free(trx_bg);
 	}
 
 	if (table != NULL) {
@@ -4686,7 +4697,8 @@ row_scan_index_for_mysql(
 		return(DB_SUCCESS);
 	}
 
-	ulint bufsize = ut_max(UNIV_PAGE_SIZE, prebuilt->mysql_row_len);
+	ulint bufsize = std::max<ulint>(srv_page_size,
+					prebuilt->mysql_row_len);
 	buf = static_cast<byte*>(ut_malloc_nokey(bufsize));
 	heap = mem_heap_create(100);
 
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index b439b593e33..5699c8b2f56 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -143,7 +143,7 @@ row_purge_remove_clust_if_poss_low(
 
 	log_free_check();
 	mtr_start(&mtr);
-	mtr.set_named_space(index->space);
+	index->set_modified(mtr);
 
 	if (!row_purge_reposition_pcur(mode, node, &mtr)) {
 		/* The record was already removed. */
@@ -272,7 +272,7 @@ static bool row_purge_restore_vsec_cur(
 	bool		is_tree)
 {
 	sec_mtr->start();
-	sec_mtr->set_named_space(index->space);
+	index->set_modified(*sec_mtr);
 
 	return btr_pcur_restore_position(
 		is_tree ? BTR_PURGE_TREE : BTR_PURGE_LEAF,
@@ -410,7 +410,7 @@ row_purge_remove_sec_if_poss_tree(
 
 	log_free_check();
 	mtr_start(&mtr);
-	mtr.set_named_space(index->space);
+	index->set_modified(mtr);
 
 	if (!index->is_committed()) {
 		/* The index->online_status may change if the index is
@@ -539,9 +539,9 @@ row_purge_remove_sec_if_poss_leaf(
 
 	log_free_check();
 	ut_ad(index->table == node->table);
-	ut_ad(!dict_table_is_temporary(index->table));
+	ut_ad(!index->table->is_temporary());
 	mtr_start(&mtr);
-	mtr.set_named_space(index->space);
+	index->set_modified(mtr);
 
 	if (!index->is_committed()) {
 		/* For uncommitted spatial index, we also skip the purge. */
@@ -806,6 +806,73 @@ row_purge_del_mark(
 	return(row_purge_remove_clust_if_poss(node));
 }
 
+/** Reset DB_TRX_ID, DB_ROLL_PTR of a clustered index record
+whose old history can no longer be observed.
+@param[in,out]	node	purge node
+@param[in,out]	mtr	mini-transaction (will be started and committed) */
+static void row_purge_reset_trx_id(purge_node_t* node, mtr_t* mtr)
+{
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S)
+	      || node->vcol_info.is_used());
+	/* Reset DB_TRX_ID, DB_ROLL_PTR for old records. */
+	mtr->start();
+
+	if (row_purge_reposition_pcur(BTR_MODIFY_LEAF, node, mtr)) {
+		dict_index_t*	index = dict_table_get_first_index(
+			node->table);
+		ulint	trx_id_pos = index->n_uniq ? index->n_uniq : 1;
+		rec_t*	rec = btr_pcur_get_rec(&node->pcur);
+		mem_heap_t*	heap = NULL;
+		/* Reserve enough offsets for the PRIMARY KEY and 2 columns
+		so that we can access DB_TRX_ID, DB_ROLL_PTR. */
+		ulint	offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
+		rec_offs_init(offsets_);
+		ulint*	offsets = rec_get_offsets(
+			rec, index, offsets_, true, trx_id_pos + 2, &heap);
+		ut_ad(heap == NULL);
+
+		ut_ad(dict_index_get_nth_field(index, trx_id_pos)
+		      ->col->mtype == DATA_SYS);
+		ut_ad(dict_index_get_nth_field(index, trx_id_pos)
+		      ->col->prtype == (DATA_TRX_ID | DATA_NOT_NULL));
+		ut_ad(dict_index_get_nth_field(index, trx_id_pos + 1)
+		      ->col->mtype == DATA_SYS);
+		ut_ad(dict_index_get_nth_field(index, trx_id_pos + 1)
+		      ->col->prtype == (DATA_ROLL_PTR | DATA_NOT_NULL));
+
+		/* Only update the record if DB_ROLL_PTR matches (the
+		record has not been modified after this transaction
+		became purgeable) */
+		if (node->roll_ptr
+		    == row_get_rec_roll_ptr(rec, index, offsets)) {
+			ut_ad(!rec_get_deleted_flag(rec,
+						    rec_offs_comp(offsets)));
+			DBUG_LOG("purge", "reset DB_TRX_ID="
+				 << ib::hex(row_get_rec_trx_id(
+						    rec, index, offsets)));
+
+			index->set_modified(*mtr);
+			if (page_zip_des_t* page_zip
+			    = buf_block_get_page_zip(
+				    btr_pcur_get_block(&node->pcur))) {
+				page_zip_write_trx_id_and_roll_ptr(
+					page_zip, rec, offsets, trx_id_pos,
+					0, 1ULL << ROLL_PTR_INSERT_FLAG_POS,
+					mtr);
+			} else {
+				ulint	len;
+				byte*	ptr = rec_get_nth_field(
+					rec, offsets, trx_id_pos, &len);
+				ut_ad(len == DATA_TRX_ID_LEN);
+				mlog_write_string(ptr, reset_trx_id,
+						  sizeof reset_trx_id, mtr);
+			}
+		}
+	}
+
+	mtr->commit();
+}
+
 /***********************************************************//**
 Purges an update of an existing record. Also purges an update of a delete
 marked record if that record contained an externally stored field. */
@@ -858,6 +925,8 @@ row_purge_upd_exist_or_extern_func(
 	mem_heap_free(heap);
 
 skip_secondaries:
+	mtr_t		mtr;
+	dict_index_t*	index = dict_table_get_first_index(node->table);
 	/* Free possible externally stored fields */
 	for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
 
@@ -869,12 +938,10 @@ skip_secondaries:
 			buf_block_t*	block;
 			ulint		internal_offset;
 			byte*		data_field;
-			dict_index_t*	index;
 			ibool		is_insert;
 			ulint		rseg_id;
 			ulint		page_no;
 			ulint		offset;
-			mtr_t		mtr;
 
 			/* We use the fact that new_val points to
 			undo_rec and get thus the offset of
@@ -882,18 +949,18 @@ skip_secondaries:
 			can calculate from node->roll_ptr the file
 			address of the new_val data */
 
-			internal_offset
-				= ((const byte*)
-				   dfield_get_data(&ufield->new_val))
-				- undo_rec;
+			internal_offset = ulint(
+				static_cast<const byte*>
+				(dfield_get_data(&ufield->new_val))
+				- undo_rec);
 
-			ut_a(internal_offset < UNIV_PAGE_SIZE);
+			ut_a(internal_offset < srv_page_size);
 
 			trx_undo_decode_roll_ptr(node->roll_ptr,
 						 &is_insert, &rseg_id,
 						 &page_no, &offset);
 
-			rseg = trx_sys->rseg_array[rseg_id];
+			rseg = trx_sys.rseg_array[rseg_id];
 
 			ut_a(rseg != NULL);
 			ut_ad(rseg->id == rseg_id);
@@ -904,10 +971,9 @@ skip_secondaries:
 			/* We have to acquire an SX-latch to the clustered
 			index tree (exclude other tree changes) */
 
-			index = dict_table_get_first_index(node->table);
 			mtr_sx_lock(dict_index_get_lock(index), &mtr);
 
-			mtr.set_named_space(index->space);
+			index->set_modified(mtr);
 
 			/* NOTE: we must also acquire an X-latch to the
 			root page of the tree. We will need it when we
@@ -921,7 +987,7 @@ skip_secondaries:
 			btr_root_get(index, &mtr);
 
 			block = buf_page_get(
-				page_id_t(rseg->space, page_no),
+				page_id_t(rseg->space->id, page_no),
 				univ_page_size, RW_X_LATCH, &mtr);
 
 			buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
@@ -939,6 +1005,8 @@ skip_secondaries:
 			mtr_commit(&mtr);
 		}
 	}
+
+	row_purge_reset_trx_id(node, &mtr);
 }
 
 #ifdef UNIV_DEBUG
@@ -966,7 +1034,6 @@ row_purge_parse_undo_rec(
 	byte*		ptr;
 	undo_no_t	undo_no;
 	table_id_t	table_id;
-	trx_id_t	trx_id;
 	roll_ptr_t	roll_ptr;
 	ulint		info_bits;
 	ulint		type;
@@ -980,16 +1047,25 @@ row_purge_parse_undo_rec(
 
 	node->rec_type = type;
 
-	if (type == TRX_UNDO_UPD_DEL_REC && !*updated_extern) {
-
-		return(false);
+	switch (type) {
+	case TRX_UNDO_RENAME_TABLE:
+		return false;
+	case TRX_UNDO_INSERT_METADATA:
+	case TRX_UNDO_INSERT_REC:
+		break;
+	default:
+#ifdef UNIV_DEBUG
+		ut_ad(!"unknown undo log record type");
+		return false;
+	case TRX_UNDO_UPD_DEL_REC:
+	case TRX_UNDO_UPD_EXIST_REC:
+	case TRX_UNDO_DEL_MARK_REC:
+#endif /* UNIV_DEBUG */
+		ptr = trx_undo_update_rec_get_sys_cols(ptr, &node->trx_id,
+						       &roll_ptr, &info_bits);
+		break;
 	}
 
-	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
-					       &info_bits);
-	node->table = NULL;
-	node->trx_id = trx_id;
-
 	/* Prevent DROP TABLE etc. from running when we are doing the purge
 	for this row */
 
@@ -1004,14 +1080,21 @@ try_again:
 		goto err_exit;
 	}
 
-	ut_ad(!dict_table_is_temporary(node->table));
+	ut_ad(!node->table->is_temporary());
 
 	if (!fil_table_accessible(node->table)) {
 		goto close_exit;
 	}
 
-	if (node->table->n_v_cols && !node->table->vc_templ
-	    && dict_table_has_indexed_v_cols(node->table)) {
+	switch (type) {
+	case TRX_UNDO_INSERT_METADATA:
+	case TRX_UNDO_INSERT_REC:
+		break;
+	default:
+		if (!node->table->n_v_cols || node->table->vc_templ
+		    || !dict_table_has_indexed_v_cols(node->table)) {
+			break;
+		}
 		/* Need server fully up for virtual column computation */
 		if (!mysqld_server_started) {
 
@@ -1044,28 +1127,33 @@ err_exit:
 		return(false);
 	}
 
-	if (type == TRX_UNDO_UPD_EXIST_REC
-	    && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
-	    && !*updated_extern) {
-
-		/* Purge requires no changes to indexes: we may return */
-		goto close_exit;
+	if (type == TRX_UNDO_INSERT_METADATA) {
+		node->ref = &trx_undo_metadata;
+		return(true);
 	}
 
 	ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
 				       node->heap);
 
-	ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
+	if (type == TRX_UNDO_INSERT_REC) {
+		return(true);
+	}
+
+	ptr = trx_undo_update_rec_get_update(ptr, clust_index, type,
+					     node->trx_id,
 					     roll_ptr, info_bits,
 					     node->heap, &(node->update));
 
 	/* Read to the partial row the fields that occur in indexes */
 
 	if (!(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+		ut_ad(!(node->update->info_bits & REC_INFO_MIN_REC_FLAG));
 		ptr = trx_undo_rec_get_partial_row(
 			ptr, clust_index, node->update, &node->row,
 			type == TRX_UNDO_UPD_DEL_REC,
 			node->heap);
+	} else if (node->update->info_bits & REC_INFO_MIN_REC_FLAG) {
+		node->ref = &trx_undo_metadata;
 	}
 
 	return(true);
@@ -1110,8 +1198,14 @@ row_purge_record_func(
 			MONITOR_INC(MONITOR_N_DEL_ROW_PURGE);
 		}
 		break;
+	case TRX_UNDO_INSERT_METADATA:
+	case TRX_UNDO_INSERT_REC:
+		node->roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS;
+		/* fall through */
 	default:
 		if (!updated_extern) {
+			mtr_t		mtr;
+			row_purge_reset_trx_id(node, &mtr);
 			break;
 		}
 		/* fall through */
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
index 865780c3df5..352407b6ee5 100644
--- a/storage/innobase/row/row0quiesce.cc
+++ b/storage/innobase/row/row0quiesce.cc
@@ -142,7 +142,7 @@ row_quiesce_write_indexes(
 		mach_write_to_8(ptr, index->id);
 		ptr += sizeof(index_id_t);
 
-		mach_write_to_4(ptr, index->space);
+		mach_write_to_4(ptr, table->space_id);
 		ptr += sizeof(ib_uint32_t);
 
 		mach_write_to_4(ptr, index->page);
@@ -240,7 +240,7 @@ row_quiesce_write_table(
 		This field is also redundant, because the lengths
 		are a property of the character set encoding, which
 		in turn is encodedin prtype above. */
-		mach_write_to_4(ptr, col->mbmaxlen * 5 + col->mbminlen);
+		mach_write_to_4(ptr, ulint(col->mbmaxlen * 5 + col->mbminlen));
 		ptr += sizeof(ib_uint32_t);
 
 		mach_write_to_4(ptr, col->ind);
@@ -391,7 +391,7 @@ row_quiesce_write_header(
 	byte*		ptr = row;
 
 	/* Write the system page size. */
-	mach_write_to_4(ptr, UNIV_PAGE_SIZE);
+	mach_write_to_4(ptr, srv_page_size);
 	ptr += sizeof(ib_uint32_t);
 
 	/* Write the table->flags. */
@@ -518,15 +518,15 @@ row_quiesce_table_start(
 
 	ut_a(trx->mysql_thd != 0);
 
-	ut_ad(fil_space_get(table->space) != NULL);
+	ut_ad(table->space != NULL);
 	ib::info() << "Sync to disk of " << table->name << " started.";
 
 	if (srv_undo_sources) {
-		trx_purge_stop();
+		purge_sys.stop();
 	}
 
 	for (ulint count = 0;
-	     ibuf_merge_space(table->space) != 0
+	     ibuf_merge_space(table->space_id) != 0
 	     && !trx_is_interrupted(trx);
 	     ++count) {
 		if (!(count % 20)) {
@@ -538,7 +538,8 @@ row_quiesce_table_start(
 	if (!trx_is_interrupted(trx)) {
 		{
 			FlushObserver observer(table->space, trx, NULL);
-			buf_LRU_flush_or_remove_pages(table->space, &observer);
+			buf_LRU_flush_or_remove_pages(table->space_id,
+						      &observer);
 		}
 
 		if (trx_is_interrupted(trx)) {
@@ -605,7 +606,7 @@ row_quiesce_table_complete(
 	}
 
 	if (srv_undo_sources) {
-		trx_purge_run();
+		purge_sys.resume();
 	}
 
 	dberr_t	err = row_quiesce_set_state(table, QUIESCE_NONE, trx);
@@ -631,13 +632,13 @@ row_quiesce_set_state(
 
 		return(DB_UNSUPPORTED);
 
-	} else if (dict_table_is_temporary(table)) {
+	} else if (table->is_temporary()) {
 
 		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
 			    ER_CANNOT_DISCARD_TEMPORARY_TABLE);
 
 		return(DB_UNSUPPORTED);
-	} else if (table->space == TRX_SYS_SPACE) {
+	} else if (table->space_id == TRX_SYS_SPACE) {
 
 		char	table_name[MAX_FULL_NAME_LEN + 1];
 
diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc
index a01238fd21a..d419fd9998f 100644
--- a/storage/innobase/row/row0row.cc
+++ b/storage/innobase/row/row0row.cc
@@ -40,11 +40,140 @@ Created 4/20/1996 Heikki Tuuri
 #include "row0ext.h"
 #include "row0upd.h"
 #include "rem0cmp.h"
-#include "read0read.h"
 #include "ut0mem.h"
 #include "gis0geo.h"
 #include "row0mysql.h"
 
+/** Build a spatial index key.
+@param[in]	index	spatial index
+@param[in]	ext	externally stored column prefixes, or NULL
+@param[in,out]	dfield	field of the tuple to be copied
+@param[in]	dfield2	field of the tuple to copy
+@param[in]	flag	ROW_BUILD_NORMAL, ROW_BUILD_FOR_PURGE or
+			ROW_BUILD_FOR_UNDO
+@param[in,out]	heap	memory heap from which the memory
+			of the field entry is allocated.
+@retval false if undo log is logged before spatial index creation. */
+static bool row_build_spatial_index_key(
+	const dict_index_t*	index,
+	const row_ext_t*	ext,
+	dfield_t*		dfield,
+	const dfield_t*		dfield2,
+	ulint			flag,
+	mem_heap_t*		heap)
+{
+	double*			mbr;
+
+	dfield_copy(dfield, dfield2);
+	dfield->type.prtype |= DATA_GIS_MBR;
+
+	/* Allocate memory for mbr field */
+	mbr = static_cast<double*>(mem_heap_alloc(heap, DATA_MBR_LEN));
+
+	/* Set mbr field data. */
+	dfield_set_data(dfield, mbr, DATA_MBR_LEN);
+
+	const fil_space_t* space = index->table->space;
+
+	if (UNIV_UNLIKELY(!dfield2->data || !space)) {
+		/* FIXME: dfield contains uninitialized data,
+		but row_build_index_entry_low() will not return NULL.
+		This bug is inherited from MySQL 5.7.5
+		commit b66ad511b61fffe75c58d0a607cdb837c6e6c821. */
+		return true;
+	}
+
+	uchar*	dptr = NULL;
+	ulint	dlen = 0;
+	ulint	flen = 0;
+	double	tmp_mbr[SPDIMS * 2];
+	mem_heap_t*	temp_heap = NULL;
+
+	if (!dfield_is_ext(dfield2)) {
+		dptr = static_cast<uchar*>(dfield_get_data(dfield2));
+		dlen = dfield_get_len(dfield2);
+		goto write_mbr;
+	}
+
+	if (flag == ROW_BUILD_FOR_PURGE) {
+		byte*	ptr = static_cast<byte*>(dfield_get_data(dfield2));
+
+		switch (dfield_get_spatial_status(dfield2)) {
+		case SPATIAL_ONLY:
+			ut_ad(dfield_get_len(dfield2) == DATA_MBR_LEN);
+			break;
+
+		case SPATIAL_MIXED:
+			ptr += dfield_get_len(dfield2);
+			break;
+
+		case SPATIAL_UNKNOWN:
+			ut_ad(0);
+			/* fall through */
+		case SPATIAL_NONE:
+			/* Undo record is logged before
+			spatial index is created.*/
+			return false;
+		}
+
+		memcpy(mbr, ptr, DATA_MBR_LEN);
+		return true;
+	}
+
+	if (flag == ROW_BUILD_FOR_UNDO
+	    && dict_table_has_atomic_blobs(index->table)) {
+		/* For ROW_FORMAT=DYNAMIC or COMPRESSED, a prefix of
+		off-page records is stored in the undo log record (for
+		any column prefix indexes). For SPATIAL INDEX, we
+		must ignore this prefix. The full column value is
+		stored in the BLOB.  For non-spatial index, we would
+		have already fetched a necessary prefix of the BLOB,
+		available in the "ext" parameter.
+
+		Here, for SPATIAL INDEX, we are fetching the full
+		column, which is potentially wasting a lot of I/O,
+		memory, and possibly involving a concurrency problem,
+		similar to ones that existed before the introduction
+		of row_ext_t.
+
+		MDEV-11657 FIXME: write the MBR directly to the undo
+		log record, and avoid recomputing it here! */
+		flen = BTR_EXTERN_FIELD_REF_SIZE;
+		ut_ad(dfield_get_len(dfield2) >= BTR_EXTERN_FIELD_REF_SIZE);
+		dptr = static_cast<byte*>(dfield_get_data(dfield2))
+			+ dfield_get_len(dfield2)
+			- BTR_EXTERN_FIELD_REF_SIZE;
+	} else {
+		flen = dfield_get_len(dfield2);
+		dptr = static_cast<byte*>(dfield_get_data(dfield2));
+	}
+
+	temp_heap = mem_heap_create(1000);
+
+	dptr = btr_copy_externally_stored_field(
+		&dlen, dptr, ext ? ext->page_size : page_size_t(space->flags),
+		flen, temp_heap);
+
+write_mbr:
+	if (dlen <= GEO_DATA_HEADER_SIZE) {
+		for (uint i = 0; i < SPDIMS; i += 2) {
+			tmp_mbr[i] = DBL_MAX;
+			tmp_mbr[i + 1] = -DBL_MAX;
+		}
+	} else {
+		rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+				   uint(dlen - GEO_DATA_HEADER_SIZE),
+				   SPDIMS, tmp_mbr);
+	}
+
+	dfield_write_mbr(dfield, tmp_mbr);
+	if (temp_heap) {
+		mem_heap_free(temp_heap);
+	}
+
+	return true;
+}
+
 /*****************************************************************//**
 When an insert or purge to a table is performed, this function builds
 the entry to be inserted into or purged from an index on the table.
@@ -58,8 +187,8 @@ row_build_index_entry_low(
 					inserted or purged */
 	const row_ext_t*	ext,	/*!< in: externally stored column
 					prefixes, or NULL */
-	dict_index_t*		index,	/*!< in: index on the table */
-	mem_heap_t*		heap,	/*!< in: memory heap from which
+	const dict_index_t*	index,	/*!< in: index on the table */
+	mem_heap_t*		heap,	/*!< in,out: memory heap from which
 					the memory for the index entry
 					is allocated */
 	ulint			flag)	/*!< in: ROW_BUILD_NORMAL,
@@ -112,11 +241,10 @@ row_build_index_entry_low(
 			col_no = dict_col_get_no(col);
 			dfield = dtuple_get_nth_field(entry, i);
 		}
-#if DATA_MISSING != 0
-# error "DATA_MISSING != 0"
-#endif
 
-		if (dict_col_is_virtual(col)) {
+		compile_time_assert(DATA_MISSING == 0);
+
+		if (col->is_virtual()) {
 			const dict_v_col_t*	v_col
 				= reinterpret_cast<const dict_v_col_t*>(col);
 
@@ -149,119 +277,11 @@ row_build_index_entry_low(
 		/* Special handle spatial index, set the first field
 		which is for store MBR. */
 		if (dict_index_is_spatial(index) && i == 0) {
-			double*			mbr;
-
-			dfield_copy(dfield, dfield2);
-			dfield->type.prtype |= DATA_GIS_MBR;
-
-			/* Allocate memory for mbr field */
-			ulint mbr_len = DATA_MBR_LEN;
-			mbr = static_cast<double*>(mem_heap_alloc(heap, mbr_len));
-
-			/* Set mbr field data. */
-			dfield_set_data(dfield, mbr, mbr_len);
-
-			if (dfield2->data) {
-				uchar*	dptr = NULL;
-				ulint	dlen = 0;
-				ulint	flen = 0;
-				double	tmp_mbr[SPDIMS * 2];
-				mem_heap_t*	temp_heap = NULL;
-
-				if (dfield_is_ext(dfield2)) {
-					if (flag == ROW_BUILD_FOR_PURGE) {
-						byte*	ptr = NULL;
-
-						spatial_status_t spatial_status;
-						spatial_status =
-							dfield_get_spatial_status(
-								dfield2);
-
-						switch (spatial_status) {
-						case SPATIAL_ONLY:
-						ptr = static_cast<byte*>(
-							dfield_get_data(
-								dfield2));
-						ut_ad(dfield_get_len(dfield2)
-						      == DATA_MBR_LEN);
-						break;
-
-						case SPATIAL_MIXED:
-						ptr = static_cast<byte*>(
-							dfield_get_data(
-								dfield2))
-							+ dfield_get_len(
-								dfield2);
-						break;
-
-						case SPATIAL_UNKNOWN:
-							ut_ad(0);
-							/* fall through */
-						case SPATIAL_NONE:
-						/* Undo record is logged before
-						spatial index is created.*/
-						return(NULL);
-						}
-
-						memcpy(mbr, ptr, DATA_MBR_LEN);
-						continue;
-					}
-
-					if (flag == ROW_BUILD_FOR_UNDO
-					    && dict_table_get_format(index->table)
-						>= UNIV_FORMAT_B) {
-						/* For build entry for undo, and
-						the table is Barrcuda, we need
-						to skip the prefix data. */
-						flen = BTR_EXTERN_FIELD_REF_SIZE;
-						ut_ad(dfield_get_len(dfield2) >=
-						      BTR_EXTERN_FIELD_REF_SIZE);
-						dptr = static_cast<byte*>(
-							dfield_get_data(dfield2))
-							+ dfield_get_len(dfield2)
-							- BTR_EXTERN_FIELD_REF_SIZE;
-					} else {
-						flen = dfield_get_len(dfield2);
-						dptr = static_cast<byte*>(
-							dfield_get_data(dfield2));
-					}
-
-					temp_heap = mem_heap_create(1000);
-
-					const page_size_t	page_size
-						= (ext != NULL)
-						? ext->page_size
-						: dict_table_page_size(
-							index->table);
-
-					dptr = btr_copy_externally_stored_field(
-						&dlen, dptr,
-						page_size,
-						flen,
-						temp_heap);
-				} else {
-					dptr = static_cast<uchar*>(
-						dfield_get_data(dfield2));
-					dlen = dfield_get_len(dfield2);
-
-				}
-
-				if (dlen <= GEO_DATA_HEADER_SIZE) {
-					for (uint i = 0; i < SPDIMS; ++i) {
-						tmp_mbr[i * 2] = DBL_MAX;
-						tmp_mbr[i * 2 + 1] = -DBL_MAX;
-					}
-				} else {
-					rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
-							   static_cast<uint>(dlen
-							   - GEO_DATA_HEADER_SIZE),
-							   SPDIMS, tmp_mbr);
-				}
-				dfield_write_mbr(dfield, tmp_mbr);
-				if (temp_heap) {
-					mem_heap_free(temp_heap);
-				}
+			if (!row_build_spatial_index_key(
+				    index, ext, dfield, dfield2, flag, heap)) {
+				return NULL;
 			}
+
 			continue;
 		}
 
@@ -286,11 +306,11 @@ row_build_index_entry_low(
 
 		/* If the column is stored externally (off-page) in
 		the clustered index, it must be an ordering field in
-		the secondary index.  In the Antelope format, only
-		prefix-indexed columns may be stored off-page in the
-		clustered index record. In the Barracuda format, also
-		fully indexed long CHAR or VARCHAR columns may be
-		stored off-page. */
+		the secondary index. If !atomic_blobs, the only way
+		we may have a secondary index pointing to a clustered
+		index record with an off-page column is when it is a
+		column prefix index. If atomic_blobs, also fully
+		indexed long columns may be stored off-page. */
 		ut_ad(col->ord_part);
 
 		if (ext && !col->is_virtual()) {
@@ -305,9 +325,8 @@ row_build_index_entry_low(
 			}
 
 			if (ind_field->prefix_len == 0) {
-				/* In the Barracuda format
-				(ROW_FORMAT=DYNAMIC or
-				ROW_FORMAT=COMPRESSED), we can have a
+				/* If ROW_FORMAT=DYNAMIC or
+				ROW_FORMAT=COMPRESSED, we can have a
 				secondary index on an entire column
 				that is stored off-page in the
 				clustered index. As this is not a
@@ -317,11 +336,12 @@ row_build_index_entry_low(
 				continue;
 			}
 		} else if (dfield_is_ext(dfield)) {
-			/* This table is either in Antelope format
+			/* This table is either in
 			(ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPACT)
 			or a purge record where the ordered part of
 			the field is not external.
-			In Antelope, the maximum column prefix
+			In ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT,
+			the maximum column prefix
 			index length is 767 bytes, and the clustered
 			index record contains a 768-byte prefix of
 			each off-page column. */
@@ -356,7 +376,7 @@ addition of new virtual columns.
 				of an index, or NULL if
 				index->table should be
 				consulted instead
-@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	defaults	default values of added/changed columns, or NULL
 @param[in]	add_v		new virtual columns added
 				along with new indexes
 @param[in]	col_map		mapping of old column
@@ -374,7 +394,7 @@ row_build_low(
 	const rec_t*		rec,
 	const ulint*		offsets,
 	const dict_table_t*	col_table,
-	const dtuple_t*		add_cols,
+	const dtuple_t*		defaults,
 	const dict_add_v_col_t*	add_v,
 	const ulint*		col_map,
 	row_ext_t**		ext,
@@ -395,7 +415,7 @@ row_build_low(
 	ut_ad(rec != NULL);
 	ut_ad(heap != NULL);
 	ut_ad(dict_index_is_clust(index));
-	ut_ad(!trx_sys_mutex_own());
+	ut_ad(!mutex_own(&trx_sys.mutex));
 	ut_ad(!col_map || col_table);
 
 	if (!offsets) {
@@ -414,8 +434,9 @@ row_build_low(
 	times, and the cursor restore can happen multiple times for single
 	insert or update statement.  */
 	ut_a(!rec_offs_any_null_extern(rec, offsets)
-	     || trx_rw_is_active(row_get_rec_trx_id(rec, index, offsets),
-						    NULL, false));
+	     || trx_sys.is_registered(current_trx(),
+				      row_get_rec_trx_id(rec, index,
+							 offsets)));
 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 	if (type != ROW_COPY_POINTERS) {
@@ -435,17 +456,17 @@ row_build_low(
 	}
 
 	/* Avoid a debug assertion in rec_offs_validate(). */
-	rec_offs_make_valid(copy, index, const_cast<ulint*>(offsets));
+	rec_offs_make_valid(copy, index, true, const_cast<ulint*>(offsets));
 
 	if (!col_table) {
 		ut_ad(!col_map);
-		ut_ad(!add_cols);
+		ut_ad(!defaults);
 		col_table = index->table;
 	}
 
-	if (add_cols) {
+	if (defaults) {
 		ut_ad(col_map);
-		row = dtuple_copy(add_cols, heap);
+		row = dtuple_copy(defaults, heap);
 		/* dict_table_copy_types() would set the fields to NULL */
 		for (ulint i = 0; i < dict_table_get_n_cols(col_table); i++) {
 			dict_col_copy_type(
@@ -505,10 +526,14 @@ row_build_low(
 		}
 
 		dfield_t*	dfield = dtuple_get_nth_field(row, col_no);
-
-		const byte*	field = rec_get_nth_field(
+		const void*	field = rec_get_nth_field(
 			copy, offsets, i, &len);
-
+		if (len == UNIV_SQL_DEFAULT) {
+			field = index->instant_field_value(i, &len);
+			if (field && type != ROW_COPY_POINTERS) {
+				field = mem_heap_dup(heap, field, len);
+			}
+		}
 		dfield_set_data(dfield, field, len);
 
 		if (rec_offs_nth_extern(offsets, i)) {
@@ -525,7 +550,7 @@ row_build_low(
 		}
 	}
 
-	rec_offs_make_valid(rec, index, const_cast<ulint*>(offsets));
+	rec_offs_make_valid(rec, index, true, const_cast<ulint*>(offsets));
 
 	ut_ad(dtuple_check_typed(row));
 
@@ -588,9 +613,9 @@ row_build(
 					of an index, or NULL if
 					index->table should be
 					consulted instead */
-	const dtuple_t*		add_cols,
+	const dtuple_t*		defaults,
 					/*!< in: default values of
-					added columns, or NULL */
+					added and changed columns, or NULL */
 	const ulint*		col_map,/*!< in: mapping of old column
 					numbers to new ones, or NULL */
 	row_ext_t**		ext,	/*!< out, own: cache of
@@ -600,7 +625,7 @@ row_build(
 					 the memory needed is allocated */
 {
 	return(row_build_low(type, index, rec, offsets, col_table,
-			     add_cols, NULL, col_map, ext, heap));
+			     defaults, NULL, col_map, ext, heap));
 }
 
 /** An inverse function to row_build_index_entry. Builds a row from a
@@ -616,7 +641,7 @@ addition of new virtual columns.
 				of an index, or NULL if
 				index->table should be
 				consulted instead
-@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	defaults	default values of added, changed columns, or NULL
 @param[in]	add_v		new virtual columns added
 				along with new indexes
 @param[in]	col_map		mapping of old column
@@ -633,30 +658,34 @@ row_build_w_add_vcol(
 	const rec_t*		rec,
 	const ulint*		offsets,
 	const dict_table_t*	col_table,
-	const dtuple_t*		add_cols,
+	const dtuple_t*		defaults,
 	const dict_add_v_col_t*	add_v,
 	const ulint*		col_map,
 	row_ext_t**		ext,
 	mem_heap_t*		heap)
 {
 	return(row_build_low(type, index, rec, offsets, col_table,
-			     add_cols, add_v, col_map, ext, heap));
+			     defaults, add_v, col_map, ext, heap));
 }
 
-/*******************************************************************//**
-Converts an index record to a typed data tuple.
+/** Convert an index record to a data tuple.
+@tparam def whether the index->instant_field_value() needs to be accessed
+@param[in]	rec	index record
+@param[in]	index	index
+@param[in]	offsets	rec_get_offsets(rec, index)
+@param[out]	n_ext	number of externally stored columns
+@param[in,out]	heap	memory heap for allocations
 @return index entry built; does not set info_bits, and the data fields
 in the entry will point directly to rec */
+template<bool def>
+static inline
 dtuple_t*
-row_rec_to_index_entry_low(
-/*=======================*/
-	const rec_t*		rec,	/*!< in: record in the index */
-	const dict_index_t*	index,	/*!< in: index */
-	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
-	ulint*			n_ext,	/*!< out: number of externally
-					stored columns */
-	mem_heap_t*		heap)	/*!< in: memory heap from which
-					the memory needed is allocated */
+row_rec_to_index_entry_impl(
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	const ulint*		offsets,
+	ulint*			n_ext,
+	mem_heap_t*		heap)
 {
 	dtuple_t*	entry;
 	dfield_t*	dfield;
@@ -668,6 +697,7 @@ row_rec_to_index_entry_low(
 	ut_ad(rec != NULL);
 	ut_ad(heap != NULL);
 	ut_ad(index != NULL);
+	ut_ad(def || !rec_offs_any_default(offsets));
 
 	/* Because this function may be invoked by row0merge.cc
 	on a record whose header is in different format, the check
@@ -692,7 +722,9 @@ row_rec_to_index_entry_low(
 	for (i = 0; i < rec_len; i++) {
 
 		dfield = dtuple_get_nth_field(entry, i);
-		field = rec_get_nth_field(rec, offsets, i, &len);
+		field = def
+			? rec_get_nth_cfield(rec, index, offsets, i, &len)
+			: rec_get_nth_field(rec, offsets, i, &len);
 
 		dfield_set_data(dfield, field, len);
 
@@ -703,10 +735,27 @@ row_rec_to_index_entry_low(
 	}
 
 	ut_ad(dtuple_check_typed(entry));
-
 	return(entry);
 }
 
+/** Convert an index record to a data tuple.
+@param[in]	rec	index record
+@param[in]	index	index
+@param[in]	offsets	rec_get_offsets(rec, index)
+@param[out]	n_ext	number of externally stored columns
+@param[in,out]	heap	memory heap for allocations */
+dtuple_t*
+row_rec_to_index_entry_low(
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	const ulint*		offsets,
+	ulint*			n_ext,
+	mem_heap_t*		heap)
+{
+	return row_rec_to_index_entry_impl<false>(
+		rec, index, offsets, n_ext, heap);
+}
+
 /*******************************************************************//**
 Converts an index record to a typed data tuple. NOTE that externally
 stored (often big) fields are NOT copied to heap.
@@ -737,10 +786,12 @@ row_rec_to_index_entry(
 
 	copy_rec = rec_copy(buf, rec, offsets);
 
-	rec_offs_make_valid(copy_rec, index, const_cast<ulint*>(offsets));
-	entry = row_rec_to_index_entry_low(
+	rec_offs_make_valid(copy_rec, index, true,
+			    const_cast<ulint*>(offsets));
+	entry = row_rec_to_index_entry_impl<true>(
 		copy_rec, index, offsets, n_ext, heap);
-	rec_offs_make_valid(rec, index, const_cast<ulint*>(offsets));
+	rec_offs_make_valid(rec, index, true,
+			    const_cast<ulint*>(offsets));
 
 	dtuple_set_info_bits(entry,
 			     rec_get_info_bits(rec, rec_offs_comp(offsets)));
@@ -803,8 +854,7 @@ row_build_row_ref(
 			mem_heap_alloc(heap, rec_offs_size(offsets)));
 
 		rec = rec_copy(buf, rec, offsets);
-		/* Avoid a debug assertion in rec_offs_validate(). */
-		rec_offs_make_valid(rec, index, offsets);
+		rec_offs_make_valid(rec, index, true, offsets);
 	}
 
 	table = index->table;
@@ -824,6 +874,7 @@ row_build_row_ref(
 
 		ut_a(pos != ULINT_UNDEFINED);
 
+		ut_ad(!rec_offs_nth_default(offsets, pos));
 		field = rec_get_nth_field(rec, offsets, pos, &len);
 
 		dfield_set_data(dfield, field, len);
@@ -878,9 +929,8 @@ row_build_row_ref_in_tuple(
 					held as long as the row
 					reference is used! */
 	const dict_index_t*	index,	/*!< in: secondary index */
-	ulint*			offsets,/*!< in: rec_get_offsets(rec, index)
+	ulint*			offsets)/*!< in: rec_get_offsets(rec, index)
 					or NULL */
-	trx_t*			trx)	/*!< in: transaction */
 {
 	const dict_index_t*	clust_index;
 	dfield_t*		dfield;
@@ -925,6 +975,7 @@ row_build_row_ref_in_tuple(
 
 		ut_a(pos != ULINT_UNDEFINED);
 
+		ut_ad(!rec_offs_nth_default(offsets, pos));
 		field = rec_get_nth_field(rec, offsets, pos, &len);
 
 		dfield_set_data(dfield, field, len);
@@ -981,9 +1032,24 @@ row_search_on_row_ref(
 
 	index = dict_table_get_first_index(table);
 
-	ut_a(dtuple_get_n_fields(ref) == dict_index_get_n_unique(index));
-
-	btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr);
+	if (UNIV_UNLIKELY(ref->info_bits != 0)) {
+		ut_ad(ref->info_bits == REC_INFO_METADATA);
+		ut_ad(ref->n_fields <= index->n_uniq);
+		btr_pcur_open_at_index_side(true, index, mode, pcur, true, 0,
+					    mtr);
+		btr_pcur_move_to_next_user_rec(pcur, mtr);
+		/* We do not necessarily have index->is_instant() here,
+		because we could be executing a rollback of an
+		instant ADD COLUMN operation. The function
+		rec_is_metadata() asserts index->is_instant();
+		we do not want to call it here. */
+		return rec_get_info_bits(btr_pcur_get_rec(pcur),
+					 dict_table_is_comp(index->table))
+			& REC_INFO_MIN_REC_FLAG;
+	} else {
+		ut_a(ref->n_fields == index->n_uniq);
+		btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr);
+	}
 
 	low_match = btr_pcur_get_low_match(pcur);
 
@@ -1138,7 +1204,7 @@ row_raw_format_int(
 		value = mach_read_int_type(
 			(const byte*) data, data_len, unsigned_type);
 
-		ret = snprintf(
+		ret = (ulint) snprintf(
 			buf, buf_size,
 			unsigned_type ? "%llu" : "%lld", (longlong) value)+1;
 	} else {
@@ -1228,6 +1294,8 @@ row_raw_format(
 	ulint	ret;
 	ibool	format_in_hex;
 
+	ut_ad(data_len != UNIV_SQL_DEFAULT);
+
 	if (buf_size == 0) {
 
 		return(0);
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index e27b1b9df77..58b063a3b05 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -51,7 +51,6 @@ Created 12/19/1997 Heikki Tuuri
 #include "pars0sym.h"
 #include "pars0pars.h"
 #include "row0mysql.h"
-#include "read0read.h"
 #include "buf0lru.h"
 #include "srv0srv.h"
 #include "srv0mon.h"
@@ -106,10 +105,10 @@ row_sel_sec_rec_is_for_blob(
 	ulint	len;
 	byte	buf[REC_VERSION_56_MAX_INDEX_COL_LEN];
 
-	/* This function should never be invoked on an Antelope format
-	table, because they should always contain enough prefix in the
-	clustered index record. */
-	ut_ad(dict_table_get_format(table) >= UNIV_FORMAT_B);
+	/* This function should never be invoked on tables in
+	ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPACT, because they
+	should always contain enough prefix in the clustered index record. */
+	ut_ad(dict_table_has_atomic_blobs(table));
 	ut_a(clust_len >= BTR_EXTERN_FIELD_REF_SIZE);
 	ut_ad(prefix_len >= sec_len);
 	ut_ad(prefix_len > 0);
@@ -125,7 +124,7 @@ row_sel_sec_rec_is_for_blob(
 	}
 
 	len = btr_copy_externally_stored_field_prefix(
-		buf, prefix_len, dict_tf_get_page_size(table->flags),
+		buf, prefix_len, page_size_t(table->space->flags),
 		clust_field, clust_len);
 
 	if (len == 0) {
@@ -216,7 +215,7 @@ row_sel_sec_rec_is_for_clust_rec(
 		ifield = dict_index_get_nth_field(sec_index, i);
 		col = dict_field_get_col(ifield);
 
-		is_virtual = dict_col_is_virtual(col);
+		is_virtual = col->is_virtual();
 
 		/* For virtual column, its value will need to be
 		reconstructed from base column in cluster index */
@@ -255,7 +254,7 @@ row_sel_sec_rec_is_for_clust_rec(
 			clust_field = static_cast<byte*>(vfield->data);
 		} else {
 			clust_pos = dict_col_get_clust_pos(col, clust_index);
-
+			ut_ad(!rec_offs_nth_default(clust_offs, clust_pos));
 			clust_field = rec_get_nth_field(
 				clust_rec, clust_offs, clust_pos, &clust_len);
 		}
@@ -306,8 +305,8 @@ row_sel_sec_rec_is_for_clust_rec(
 			if (rec_offs_nth_extern(clust_offs, clust_pos)) {
 				dptr = btr_copy_externally_stored_field(
 					&clust_len, dptr,
-					dict_tf_get_page_size(
-						sec_index->table->flags),
+					page_size_t(clust_index->table->space
+						    ->flags),
 					len, heap);
 			}
 
@@ -520,8 +519,8 @@ row_sel_fetch_columns(
 
 		if (field_no != ULINT_UNDEFINED) {
 
-			if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
-							      field_no))) {
+			if (UNIV_UNLIKELY(rec_offs_nth_extern(
+						  offsets, field_no) != 0)) {
 
 				/* Copy an externally stored field to the
 				temporary heap, if possible. */
@@ -548,9 +547,8 @@ row_sel_fetch_columns(
 
 				needs_copy = TRUE;
 			} else {
-				data = rec_get_nth_field(rec, offsets,
-							 field_no, &len);
-
+				data = rec_get_nth_cfield(rec, index, offsets,
+							  field_no, &len);
 				needs_copy = column->copy_val;
 			}
 
@@ -815,7 +813,7 @@ row_sel_build_committed_vers_for_mysql(
 			rec_offs_size(*offsets));
 	}
 
-	row_vers_build_for_semi_consistent_read(
+	row_vers_build_for_semi_consistent_read(prebuilt->trx,
 		rec, mtr, clust_index, offsets, offset_heap,
 		prebuilt->old_vers_heap, old_vers, vrow);
 }
@@ -1127,13 +1125,14 @@ re_scan:
 			}
 			mutex_exit(&match->rtr_match_mutex);
 
+			/* MDEV-14059 FIXME: why re-latch the block?
+			pcur is already positioned on it! */
 			ulint		page_no = page_get_page_no(
-						btr_pcur_get_page(pcur));
-			page_id_t	page_id(dict_index_get_space(index),
-						page_no);
+				btr_pcur_get_page(pcur));
 
 			cur_block = buf_page_get_gen(
-				page_id, dict_table_page_size(index->table),
+				page_id_t(index->table->space_id, page_no),
+				page_size_t(index->table->space->flags),
 				RW_X_LATCH, NULL, BUF_GET,
 				__FILE__, __LINE__, mtr, &err);
 		} else {
@@ -1288,24 +1287,19 @@ static
 void
 row_sel_open_pcur(
 /*==============*/
-	plan_t*		plan,		/*!< in: table plan */
-	ibool		search_latch_locked,
-					/*!< in: TRUE if the thread currently
-					has the search latch locked in
-					s-mode */
-	mtr_t*		mtr)		/*!< in: mtr */
+	plan_t*		plan,	/*!< in: table plan */
+#ifdef BTR_CUR_HASH_ADAPT
+	rw_lock_t*	ahi_latch,
+				/*!< in: the adaptive hash index latch */
+#endif /* BTR_CUR_HASH_ADAPT */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	dict_index_t*	index;
 	func_node_t*	cond;
 	que_node_t*	exp;
 	ulint		n_fields;
-	ulint		has_search_latch = 0;	/* RW_S_LATCH or 0 */
 	ulint		i;
 
-	if (search_latch_locked) {
-		has_search_latch = RW_S_LATCH;
-	}
-
 	index = plan->index;
 
 	/* Calculate the value of the search tuple: the exact match columns
@@ -1341,7 +1335,7 @@ row_sel_open_pcur(
 
 		btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
 					   BTR_SEARCH_LEAF, &plan->pcur,
-					   has_search_latch, mtr);
+					   ahi_latch, mtr);
 	} else {
 		/* Open the cursor to the start or the end of the index
 		(FALSE: no init) */
@@ -1478,33 +1472,24 @@ row_sel_try_search_shortcut(
 	sel_node_t*	node,	/*!< in: select node for a consistent read */
 	plan_t*		plan,	/*!< in: plan for a unique search in clustered
 				index */
-	ibool		search_latch_locked,
-				/*!< in: whether the search holds latch on
-				search system. */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	dict_index_t*	index;
-	rec_t*		rec;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	ulint		ret;
-	rec_offs_init(offsets_);
-
-	index = plan->index;
+	dict_index_t*	index = plan->index;
 
 	ut_ad(node->read_view);
 	ut_ad(plan->unique_search);
 	ut_ad(!plan->must_get_clust);
-	ut_ad(!search_latch_locked
-	      || rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
 
-	row_sel_open_pcur(plan, search_latch_locked, mtr);
+	rw_lock_t* ahi_latch = btr_get_search_latch(index);
+	rw_lock_s_lock(ahi_latch);
 
-	rec = btr_pcur_get_rec(&(plan->pcur));
+	row_sel_open_pcur(plan, ahi_latch, mtr);
 
-	if (!page_rec_is_user_rec(rec)) {
+	const rec_t* rec = btr_pcur_get_rec(&(plan->pcur));
 
+	if (!page_rec_is_user_rec(rec) || rec_is_metadata(rec, index)) {
+retry:
+		rw_lock_s_unlock(ahi_latch);
 		return(SEL_RETRY);
 	}
 
@@ -1515,36 +1500,34 @@ row_sel_try_search_shortcut(
 	fields in the user record matched to the search tuple */
 
 	if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) {
-
+exhausted:
+		rw_lock_s_unlock(ahi_latch);
 		return(SEL_EXHAUSTED);
 	}
 
 	/* This is a non-locking consistent read: if necessary, fetch
 	a previous version of the record */
 
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
 	offsets = rec_get_offsets(rec, index, offsets, true,
 				  ULINT_UNDEFINED, &heap);
 
 	if (dict_index_is_clust(index)) {
 		if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
 						   node->read_view)) {
-			ret = SEL_RETRY;
-			goto func_exit;
+			goto retry;
 		}
 	} else if (!srv_read_only_mode
 		   && !lock_sec_rec_cons_read_sees(
 			rec, index, node->read_view)) {
-
-		ret = SEL_RETRY;
-		goto func_exit;
+		goto retry;
 	}
 
-	/* Test the deleted flag. */
-
 	if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) {
-
-		ret = SEL_EXHAUSTED;
-		goto func_exit;
+		goto exhausted;
 	}
 
 	/* Fetch the columns needed in test conditions.  The index
@@ -1558,20 +1541,18 @@ row_sel_try_search_shortcut(
 	/* Test the rest of search conditions */
 
 	if (!row_sel_test_other_conds(plan)) {
-
-		ret = SEL_EXHAUSTED;
-		goto func_exit;
+		goto exhausted;
 	}
 
 	ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
 
 	plan->n_rows_fetched++;
-	ret = SEL_FOUND;
-func_exit:
+	rw_lock_s_unlock(ahi_latch);
+
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
-	return(ret);
+	return(SEL_FOUND);
 }
 #endif /* BTR_CUR_HASH_ADAPT */
 
@@ -1618,12 +1599,6 @@ row_sel(
 
 	ut_ad(thr->run_node == node);
 
-#ifdef BTR_CUR_HASH_ADAPT
-	ibool		search_latch_locked = FALSE;
-#else /* BTR_CUR_HASH_ADAPT */
-# define search_latch_locked false
-#endif /* BTR_CUR_HASH_ADAPT */
-
 	if (node->read_view) {
 		/* In consistent reads, we try to do with the hash index and
 		not to use the buffer page get. This is to reduce memory bus
@@ -1672,33 +1647,14 @@ table_loop:
 #ifdef BTR_CUR_HASH_ADAPT
 	if (consistent_read && plan->unique_search && !plan->pcur_is_open
 	    && !plan->must_get_clust) {
-		if (!search_latch_locked) {
-			btr_search_s_lock(index);
-
-			search_latch_locked = TRUE;
-		} else if (rw_lock_get_writer(btr_get_search_latch(index))
-				== RW_LOCK_X_WAIT) {
-
-			/* There is an x-latch request waiting: release the
-			s-latch for a moment; as an s-latch here is often
-			kept for some 10 searches before being released,
-			a waiting x-latch request would block other threads
-			from acquiring an s-latch for a long time, lowering
-			performance significantly in multiprocessors. */
-
-			btr_search_s_unlock(index);
-			btr_search_s_lock(index);
-		}
-
-		switch (row_sel_try_search_shortcut(node, plan,
-						    search_latch_locked,
-						    &mtr)) {
+		switch (row_sel_try_search_shortcut(node, plan, &mtr)) {
 		case SEL_FOUND:
 			goto next_table;
 		case SEL_EXHAUSTED:
 			goto table_exhausted;
 		default:
 			ut_ad(0);
+			/* fall through */
 		case SEL_RETRY:
 			break;
 		}
@@ -1708,19 +1664,16 @@ table_loop:
 		mtr.commit();
 		mtr.start();
 	}
-
-	if (search_latch_locked) {
-		btr_search_s_unlock(index);
-
-		search_latch_locked = FALSE;
-	}
 #endif /* BTR_CUR_HASH_ADAPT */
 
 	if (!plan->pcur_is_open) {
 		/* Evaluate the expressions to build the search tuple and
 		open the cursor */
-
-		row_sel_open_pcur(plan, search_latch_locked, &mtr);
+		row_sel_open_pcur(plan,
+#ifdef BTR_CUR_HASH_ADAPT
+				  NULL,
+#endif /* BTR_CUR_HASH_ADAPT */
+				  &mtr);
 
 		cursor_just_opened = TRUE;
 
@@ -1834,6 +1787,12 @@ skip_lock:
 		goto next_rec;
 	}
 
+	if (rec_is_metadata(rec, index)) {
+		/* Skip the metadata pseudo-record. */
+		cost_counter++;
+		goto next_rec;
+	}
+
 	if (!consistent_read) {
 		/* Try to place a lock on the index record */
 		ulint	lock_type;
@@ -2117,8 +2076,6 @@ skip_lock:
 	}
 
 next_rec:
-	ut_ad(!search_latch_locked);
-
 	if (mtr_has_extra_clust_latch) {
 
 		/* We must commit &mtr if we are moving to the next
@@ -2156,8 +2113,6 @@ next_table:
 
 		plan->cursor_at_end = TRUE;
 	} else {
-		ut_ad(!search_latch_locked);
-
 		plan->stored_cursor_rec_processed = TRUE;
 
 		btr_pcur_store_position(&(plan->pcur), &mtr);
@@ -2248,8 +2203,6 @@ stop_for_a_while:
 	inserted new records which should have appeared in the result set,
 	which would result in the phantom problem. */
 
-	ut_ad(!search_latch_locked);
-
 	plan->stored_cursor_rec_processed = FALSE;
 	btr_pcur_store_position(&(plan->pcur), &mtr);
 
@@ -2266,7 +2219,6 @@ commit_mtr_for_a_while:
 
 	plan->stored_cursor_rec_processed = TRUE;
 
-	ut_ad(!search_latch_locked);
 	btr_pcur_store_position(&(plan->pcur), &mtr);
 
 	mtr.commit();
@@ -2280,7 +2232,6 @@ lock_wait_or_error:
 	/* See the note at stop_for_a_while: the same holds for this case */
 
 	ut_ad(!btr_pcur_is_before_first_on_page(&plan->pcur) || !node->asc);
-	ut_ad(!search_latch_locked);
 
 	plan->stored_cursor_rec_processed = FALSE;
 	btr_pcur_store_position(&(plan->pcur), &mtr);
@@ -2288,11 +2239,6 @@ lock_wait_or_error:
 	mtr.commit();
 
 func_exit:
-#ifdef BTR_CUR_HASH_ADAPT
-	if (search_latch_locked) {
-		btr_search_s_unlock(index);
-	}
-#endif /* BTR_CUR_HASH_ADAPT */
 	ut_ad(!sync_check_iterate(dict_sync_check()));
 
 	if (heap != NULL) {
@@ -2337,15 +2283,11 @@ row_sel_step(
 		plan_reset_cursor(sel_node_get_nth_plan(node, 0));
 
 		if (node->consistent_read) {
+			trx_t *trx = thr_get_trx(thr);
 			/* Assign a read view for the query */
-			trx_assign_read_view(thr_get_trx(thr));
-
-			if (thr_get_trx(thr)->read_view != NULL) {
-				node->read_view = thr_get_trx(thr)->read_view;
-			} else {
-				node->read_view = NULL;
-			}
-
+			trx->read_view.open(trx);
+			node->read_view = trx->read_view.is_open() ?
+					  &trx->read_view : NULL;
 		} else {
 			sym_node_t*	table_node;
 			lock_mode	i_lock_mode;
@@ -2560,8 +2502,7 @@ row_sel_convert_mysql_key_to_innobase(
 	ulint		buf_len,	/*!< in: buffer length */
 	dict_index_t*	index,		/*!< in: index of the key value */
 	const byte*	key_ptr,	/*!< in: MySQL key value */
-	ulint		key_len,	/*!< in: MySQL key value length */
-	trx_t*		trx)		/*!< in: transaction */
+	ulint		key_len)	/*!< in: MySQL key value length */
 {
 	byte*		original_buf	= buf;
 	const byte*	original_key_ptr = key_ptr;
@@ -2648,8 +2589,8 @@ row_sel_convert_mysql_key_to_innobase(
 				even though the actual value only takes data
 				len bytes from the start. */
 
-				data_len = key_ptr[data_offset]
-					   + 256 * key_ptr[data_offset + 1];
+				data_len = ulint(key_ptr[data_offset])
+					| ulint(key_ptr[data_offset + 1]) << 8;
 				data_field_len = data_offset + 2
 					+ field->prefix_len;
 
@@ -2888,7 +2829,8 @@ row_sel_field_store_in_mysql_format_func(
 			}
 		}
 
-		row_mysql_pad_col(templ->mbminlen, pad, field_end - pad);
+		row_mysql_pad_col(templ->mbminlen, pad,
+				  ulint(field_end - pad));
 		break;
 
 	case DATA_BLOB:
@@ -3014,7 +2956,7 @@ row_sel_store_mysql_field_func(
 	      || field_no == templ->icp_rec_field_no);
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
-	if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) {
+	if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no) != 0)) {
 
 		mem_heap_t*	heap;
 		/* Copy an externally stored field to a temporary heap */
@@ -3024,12 +2966,12 @@ row_sel_store_mysql_field_func(
 		if (DATA_LARGE_MTYPE(templ->type)) {
 			if (prebuilt->blob_heap == NULL) {
 				prebuilt->blob_heap = mem_heap_create(
-					UNIV_PAGE_SIZE);
+					srv_page_size);
 			}
 
 			heap = prebuilt->blob_heap;
 		} else {
-			heap = mem_heap_create(UNIV_PAGE_SIZE);
+			heap = mem_heap_create(srv_page_size);
 		}
 
 		/* NOTE: if we are retrieving a big BLOB, we may
@@ -3066,9 +3008,19 @@ row_sel_store_mysql_field_func(
 			mem_heap_free(heap);
 		}
 	} else {
-		/* Field is stored in the row. */
-
-		data = rec_get_nth_field(rec, offsets, field_no, &len);
+		/* The field is stored in the index record, or
+		in the metadata for instant ADD COLUMN. */
+
+		if (rec_offs_nth_default(offsets, field_no)) {
+			ut_ad(dict_index_is_clust(index));
+			ut_ad(index->is_instant());
+			const dict_index_t* clust_index
+				= dict_table_get_first_index(prebuilt->table);
+			ut_ad(index == clust_index);
+			data = clust_index->instant_field_value(field_no,&len);
+		} else {
+			data = rec_get_nth_field(rec, offsets, field_no, &len);
+		}
 
 		if (len == UNIV_SQL_NULL) {
 			/* MySQL assumes that the field for an SQL
@@ -3102,7 +3054,7 @@ row_sel_store_mysql_field_func(
 
 			if (prebuilt->blob_heap == NULL) {
 				prebuilt->blob_heap = mem_heap_create(
-					UNIV_PAGE_SIZE);
+					srv_page_size);
 				DBUG_PRINT("anna", ("blob_heap allocated: %p",
 						    prebuilt->blob_heap));
 			}
@@ -3348,7 +3300,7 @@ row_sel_get_clust_rec_for_mysql(
 		thd_get_thread_id(trx->mysql_thd));
 
 	row_build_row_ref_in_tuple(prebuilt->clust_ref, rec,
-				   sec_index, *offsets, trx);
+				   sec_index, *offsets);
 
 	clust_index = dict_table_get_first_index(sec_index->table);
 
@@ -3501,12 +3453,12 @@ row_sel_get_clust_rec_for_mysql(
 		if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
 		    && !lock_clust_rec_cons_read_sees(
 			    clust_rec, clust_index, *offsets,
-			    trx_get_read_view(trx))) {
+			    &trx->read_view)) {
 
 			/* The following call returns 'offsets' associated with
 			'old_vers' */
 			err = row_sel_build_prev_vers_for_mysql(
-				trx->read_view, clust_index, prebuilt,
+				&trx->read_view, clust_index, prebuilt,
 				clust_rec, offsets, offset_heap, &old_vers,
 				vrow, mtr);
 
@@ -3565,10 +3517,10 @@ err_exit:
 Restores cursor position after it has been stored. We have to take into
 account that the record cursor was positioned on may have been deleted.
 Then we may have to move the cursor one step up or down.
-@return TRUE if we may need to process the record the cursor is now
+@return true if we may need to process the record the cursor is now
 positioned on (i.e. we should not go to the next record yet) */
 static
-ibool
+bool
 sel_restore_position_for_mysql(
 /*===========================*/
 	ibool*		same_user_rec,	/*!< out: TRUE if we were able to restore
@@ -3608,21 +3560,28 @@ sel_restore_position_for_mysql(
 	case BTR_PCUR_ON:
 		if (!success && moves_up) {
 next:
-			btr_pcur_move_to_next(pcur, mtr);
-			return(TRUE);
+			if (btr_pcur_move_to_next(pcur, mtr)
+			    && rec_is_metadata(btr_pcur_get_rec(pcur),
+					       pcur->btr_cur.index)) {
+				btr_pcur_move_to_next(pcur, mtr);
+			}
+
+			return true;
 		}
 		return(!success);
 	case BTR_PCUR_AFTER_LAST_IN_TREE:
 	case BTR_PCUR_BEFORE_FIRST_IN_TREE:
-		return(TRUE);
+		return true;
 	case BTR_PCUR_AFTER:
 		/* positioned to record after pcur->old_rec. */
 		pcur->pos_state = BTR_PCUR_IS_POSITIONED;
 prev:
-		if (btr_pcur_is_on_user_rec(pcur) && !moves_up) {
+		if (btr_pcur_is_on_user_rec(pcur) && !moves_up
+		    && !rec_is_metadata(btr_pcur_get_rec(pcur),
+					pcur->btr_cur.index)) {
 			btr_pcur_move_to_prev(pcur, mtr);
 		}
-		return(TRUE);
+		return true;
 	case BTR_PCUR_BEFORE:
 		/* For non optimistic restoration:
 		The position is now set to the record before pcur->old_rec.
@@ -3644,19 +3603,19 @@ prev:
 				HANDLER READ idx PREV; */
 				goto prev;
 			}
-			return(TRUE);
+			return true;
 		case BTR_PCUR_IS_POSITIONED:
 			if (moves_up && btr_pcur_is_on_user_rec(pcur)) {
 				goto next;
 			}
-			return(TRUE);
+			return true;
 		case BTR_PCUR_WAS_POSITIONED:
 		case BTR_PCUR_NOT_POSITIONED:
 			break;
 		}
 	}
 	ut_ad(0);
-	return(TRUE);
+	return true;
 }
 
 /********************************************************************//**
@@ -3890,12 +3849,15 @@ row_sel_try_search_shortcut_for_mysql(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(!prebuilt->templ_contains_blob);
 
+	rw_lock_t* ahi_latch = btr_get_search_latch(index);
+	rw_lock_s_lock(ahi_latch);
 	btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
-				   BTR_SEARCH_LEAF, pcur, RW_S_LATCH, mtr);
+				   BTR_SEARCH_LEAF, pcur, ahi_latch, mtr);
 	rec = btr_pcur_get_rec(pcur);
 
-	if (!page_rec_is_user_rec(rec)) {
-
+	if (!page_rec_is_user_rec(rec) || rec_is_metadata(rec, index)) {
+retry:
+		rw_lock_s_unlock(ahi_latch);
 		return(SEL_RETRY);
 	}
 
@@ -3904,7 +3866,8 @@ row_sel_try_search_shortcut_for_mysql(
 	fields in the user record matched to the search tuple */
 
 	if (btr_pcur_get_up_match(pcur) < dtuple_get_n_fields(search_tuple)) {
-
+exhausted:
+		rw_lock_s_unlock(ahi_latch);
 		return(SEL_EXHAUSTED);
 	}
 
@@ -3914,22 +3877,21 @@ row_sel_try_search_shortcut_for_mysql(
 	*offsets = rec_get_offsets(rec, index, *offsets, true,
 				   ULINT_UNDEFINED, heap);
 
-	if (!lock_clust_rec_cons_read_sees(
-			rec, index, *offsets, trx_get_read_view(trx))) {
-
-		return(SEL_RETRY);
+	if (!lock_clust_rec_cons_read_sees(rec, index, *offsets,
+					   &trx->read_view)) {
+		goto retry;
 	}
 
 	if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) {
 		/* In delete-marked records, DB_TRX_ID must
 		always refer to an existing undo log record. */
 		ut_ad(row_get_rec_trx_id(rec, index, *offsets));
-
-		return(SEL_EXHAUSTED);
+		goto exhausted;
 	}
 
 	*out_rec = rec;
 
+	rw_lock_s_unlock(ahi_latch);
 	return(SEL_FOUND);
 }
 #endif /* BTR_CUR_HASH_ADAPT */
@@ -4041,6 +4003,9 @@ row_sel_fill_vrow(
 	rec_offs_init(offsets_);
 
 	ut_ad(!(*vrow));
+	ut_ad(heap);
+	ut_ad(!dict_index_is_clust(index));
+	ut_ad(!index->is_instant());
 	ut_ad(page_rec_is_leaf(rec));
 
 	offsets = rec_get_offsets(rec, index, offsets, true,
@@ -4054,18 +4019,18 @@ row_sel_fill_vrow(
 
 	for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
 		const dict_field_t*     field;
-                const dict_col_t*       col;
+		const dict_col_t*       col;
 
 		field = dict_index_get_nth_field(index, i);
 		col = dict_field_get_col(field);
 
-		if (dict_col_is_virtual(col)) {
+		if (col->is_virtual()) {
 			const byte*     data;
-		        ulint           len;
+			ulint           len;
 
 			data = rec_get_nth_field(rec, offsets, i, &len);
 
-                        const dict_v_col_t*     vcol = reinterpret_cast<
+			const dict_v_col_t*     vcol = reinterpret_cast<
 				const dict_v_col_t*>(col);
 
 			dfield_t* dfield = dtuple_get_nth_v_field(
@@ -4216,15 +4181,16 @@ row_search_mvcc(
 	ulint		direction)
 {
 	DBUG_ENTER("row_search_mvcc");
+	DBUG_ASSERT(prebuilt->index->table == prebuilt->table);
 
 	dict_index_t*	index		= prebuilt->index;
-	ibool		comp		= dict_table_is_comp(index->table);
+	ibool		comp		= dict_table_is_comp(prebuilt->table);
 	const dtuple_t*	search_tuple	= prebuilt->search_tuple;
 	btr_pcur_t*	pcur		= prebuilt->pcur;
 	trx_t*		trx		= prebuilt->trx;
 	dict_index_t*	clust_index;
 	que_thr_t*	thr;
-	const rec_t*	rec;
+	const rec_t*	UNINIT_VAR(rec);
 	const dtuple_t*	vrow = NULL;
 	const rec_t*	result_rec = NULL;
 	const rec_t*	clust_rec;
@@ -4265,10 +4231,10 @@ row_search_mvcc(
 
 	ut_ad(!sync_check_iterate(sync_check()));
 
-	if (dict_table_is_discarded(prebuilt->table)) {
+	if (!prebuilt->table->space) {
 		DBUG_RETURN(DB_TABLESPACE_DELETED);
 	} else if (!prebuilt->table->is_readable()) {
-		DBUG_RETURN(fil_space_get(prebuilt->table->space)
+		DBUG_RETURN(prebuilt->table->space
 			    ? DB_DECRYPTION_FAILED
 			    : DB_TABLESPACE_NOT_FOUND);
 	} else if (!prebuilt->index_usable) {
@@ -4417,28 +4383,18 @@ row_search_mvcc(
 	    && dict_index_is_clust(index)
 	    && !prebuilt->templ_contains_blob
 	    && !prebuilt->used_in_HANDLER
-	    && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
+	    && (prebuilt->mysql_row_len < srv_page_size / 8)) {
 
 		mode = PAGE_CUR_GE;
 
-		if (trx->mysql_n_tables_locked == 0
-		    && prebuilt->select_lock_type == LOCK_NONE
+		if (prebuilt->select_lock_type == LOCK_NONE
 		    && trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
-		    && MVCC::is_view_active(trx->read_view)) {
+		    && trx->read_view.is_open()) {
 
 			/* This is a SELECT query done as a consistent read,
 			and the read view has already been allocated:
 			let us try a search shortcut through the hash
-			index.
-			NOTE that we must also test that
-			mysql_n_tables_locked == 0, because this might
-			also be INSERT INTO ... SELECT ... or
-			CREATE TABLE ... SELECT ... . Our algorithm is
-			NOT prepared to inserts interleaved with the SELECT,
-			and if we try that, we can deadlock on the adaptive
-			hash index semaphore! */
-
-			rw_lock_s_lock(btr_get_search_latch(index));
+			index. */
 
 			switch (row_sel_try_search_shortcut_for_mysql(
 					&rec, prebuilt, &offsets, &heap,
@@ -4489,24 +4445,15 @@ row_search_mvcc(
 
 				/* NOTE that we do NOT store the cursor
 				position */
-
 				err = DB_SUCCESS;
-
-				rw_lock_s_unlock(btr_get_search_latch(index));
-
 				goto func_exit;
 
 			case SEL_EXHAUSTED:
 			shortcut_mismatch:
 				mtr.commit();
-
-				err = DB_RECORD_NOT_FOUND;
-
-				rw_lock_s_unlock(btr_get_search_latch(index));
-
 				/* NOTE that we do NOT store the cursor
 				position */
-
+				err = DB_RECORD_NOT_FOUND;
 				goto func_exit;
 
 			case SEL_RETRY:
@@ -4518,8 +4465,6 @@ row_search_mvcc(
 
 			mtr.commit();
 			mtr.start();
-
-                        rw_lock_s_unlock(btr_get_search_latch(index));
 		}
 	}
 #endif /* BTR_CUR_HASH_ADAPT */
@@ -4534,17 +4479,19 @@ row_search_mvcc(
 	thread that is currently serving the transaction. Because we
 	are that thread, we can read trx->state without holding any
 	mutex. */
-	ut_ad(prebuilt->sql_stat_start || trx->state == TRX_STATE_ACTIVE);
+	ut_ad(prebuilt->sql_stat_start
+	      || trx->state == TRX_STATE_ACTIVE
+	      || (prebuilt->table->no_rollback()
+		  && trx->state == TRX_STATE_NOT_STARTED));
 
 	ut_ad(!trx_is_started(trx) || trx->state == TRX_STATE_ACTIVE);
 
 	ut_ad(prebuilt->sql_stat_start
 	      || prebuilt->select_lock_type != LOCK_NONE
-	      || MVCC::is_view_active(trx->read_view)
+	      || trx->read_view.is_open()
+	      || prebuilt->table->no_rollback()
 	      || srv_read_only_mode);
 
-	trx_start_if_not_started(trx, false);
-
 	if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
 	    && prebuilt->select_lock_type != LOCK_NONE
 	    && trx->mysql_thd != NULL
@@ -4572,45 +4519,36 @@ row_search_mvcc(
 
 	que_thr_move_to_run_state_for_mysql(thr, trx);
 
-	clust_index = dict_table_get_first_index(index->table);
+	clust_index = dict_table_get_first_index(prebuilt->table);
 
 	/* Do some start-of-statement preparations */
 
-	if (!prebuilt->sql_stat_start) {
-		/* No need to set an intention lock or assign a read view */
-
-		if (!MVCC::is_view_active(trx->read_view)
-		    && !srv_read_only_mode
-		    && prebuilt->select_lock_type == LOCK_NONE) {
-
-			ib::error() << "MySQL is trying to perform a"
-				" consistent read but the read view is not"
-				" assigned!";
-			trx_print(stderr, trx, 600);
-			fputc('\n', stderr);
-			ut_error;
-		}
-	} else if (prebuilt->select_lock_type == LOCK_NONE) {
-		/* This is a consistent read */
-		/* Assign a read view for the query */
-
-		if (!srv_read_only_mode) {
-			trx_assign_read_view(trx);
-		}
-
+	if (prebuilt->table->no_rollback()) {
+		/* NO_ROLLBACK tables do not support MVCC or locking. */
+		prebuilt->select_lock_type = LOCK_NONE;
 		prebuilt->sql_stat_start = FALSE;
+	} else if (!prebuilt->sql_stat_start) {
+		/* No need to set an intention lock or assign a read view */
+		ut_a(prebuilt->select_lock_type != LOCK_NONE
+		     || srv_read_only_mode || trx->read_view.is_open());
 	} else {
+		prebuilt->sql_stat_start = FALSE;
+		trx_start_if_not_started(trx, false);
+
+		if (prebuilt->select_lock_type == LOCK_NONE) {
+			trx->read_view.open(trx);
+		} else {
 wait_table_again:
-		err = lock_table(0, index->table,
-				 prebuilt->select_lock_type == LOCK_S
-				 ? LOCK_IS : LOCK_IX, thr);
+			err = lock_table(0, prebuilt->table,
+					 prebuilt->select_lock_type == LOCK_S
+					 ? LOCK_IS : LOCK_IX, thr);
 
-		if (err != DB_SUCCESS) {
+			if (err != DB_SUCCESS) {
 
-			table_lock_waited = TRUE;
-			goto lock_table_wait;
+				table_lock_waited = TRUE;
+				goto lock_table_wait;
+			}
 		}
-		prebuilt->sql_stat_start = FALSE;
 	}
 
 	/* Open or restore index cursor position */
@@ -4622,7 +4560,7 @@ wait_table_again:
 			goto next_rec;
 		}
 
-		ibool	need_to_process = sel_restore_position_for_mysql(
+		bool	need_to_process = sel_restore_position_for_mysql(
 			&same_user_rec, BTR_SEARCH_LEAF,
 			pcur, moves_up, &mtr);
 
@@ -4817,12 +4755,24 @@ rec_loop:
 	corruption */
 
 	if (comp) {
+		if (rec_get_info_bits(rec, true) & REC_INFO_MIN_REC_FLAG) {
+			/* Skip the metadata pseudo-record. */
+			ut_ad(index->is_instant());
+			goto next_rec;
+		}
+
 		next_offs = rec_get_next_offs(rec, TRUE);
 		if (UNIV_UNLIKELY(next_offs < PAGE_NEW_SUPREMUM)) {
 
 			goto wrong_offs;
 		}
 	} else {
+		if (rec_get_info_bits(rec, false) & REC_INFO_MIN_REC_FLAG) {
+			/* Skip the metadata pseudo-record. */
+			ut_ad(index->is_instant());
+			goto next_rec;
+		}
+
 		next_offs = rec_get_next_offs(rec, FALSE);
 		if (UNIV_UNLIKELY(next_offs < PAGE_OLD_SUPREMUM)) {
 
@@ -4830,7 +4780,7 @@ rec_loop:
 		}
 	}
 
-	if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) {
+	if (UNIV_UNLIKELY(next_offs >= srv_page_size - PAGE_DIR)) {
 
 wrong_offs:
 		if (srv_force_recovery == 0 || moves_up == FALSE) {
@@ -5024,7 +4974,7 @@ wrong_offs:
 
 			/* At most one transaction can be active
 			for temporary table. */
-			if (dict_table_is_temporary(clust_index->table)) {
+			if (clust_index->table->is_temporary()) {
 				goto no_gap_lock;
 			}
 
@@ -5034,17 +4984,17 @@ wrong_offs:
 				/* In delete-marked records, DB_TRX_ID must
 				always refer to an existing undo log record. */
 				ut_ad(trx_id);
-				if (!trx_rw_is_active(trx_id, NULL, false)) {
+				if (!trx_sys.is_registered(trx, trx_id)) {
 					/* The clustered index record
 					was delete-marked in a committed
 					transaction. Ignore the record. */
 					goto locks_ok_del_marked;
 				}
-			} else if (trx_t* trx = row_vers_impl_x_locked(
-					   rec, index, offsets)) {
+			} else if (trx_t* t = row_vers_impl_x_locked(
+					   trx, rec, index, offsets)) {
 				/* The record belongs to an active
 				transaction. We must acquire a lock. */
-				trx_release_reference(trx);
+				t->release_reference();
 			} else {
 				/* The secondary index record does not
 				point to a delete-marked clustered index
@@ -5178,7 +5128,8 @@ no_gap_lock:
 		/* This is a non-locking consistent read: if necessary, fetch
 		a previous version of the record */
 
-		if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
+		if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED
+		    || prebuilt->table->no_rollback()) {
 
 			/* Do nothing: we let a non-locking SELECT read the
 			latest version of the record */
@@ -5190,16 +5141,15 @@ no_gap_lock:
 			high force recovery level set, we try to avoid crashes
 			by skipping this lookup */
 
-			if (srv_force_recovery < 5
-			    && !lock_clust_rec_cons_read_sees(
-				    rec, index, offsets,
-				    trx_get_read_view(trx))) {
-
+			if (!lock_clust_rec_cons_read_sees(
+				    rec, index, offsets, &trx->read_view)) {
+				ut_ad(srv_force_recovery
+				      < SRV_FORCE_NO_UNDO_LOG_SCAN);
 				rec_t*	old_vers;
 				/* The following call returns 'offsets'
 				associated with 'old_vers' */
 				err = row_sel_build_prev_vers_for_mysql(
-					trx->read_view, clust_index,
+					&trx->read_view, clust_index,
 					prebuilt, rec, &offsets, &heap,
 					&old_vers, need_vrow ? &vrow : NULL,
 					&mtr);
@@ -5229,7 +5179,7 @@ no_gap_lock:
 
 			if (!srv_read_only_mode
 			    && !lock_sec_rec_cons_read_sees(
-					rec, index, trx->read_view)) {
+					rec, index, &trx->read_view)) {
 				/* We should look at the clustered index.
 				However, as this is a non-locking read,
 				we can skip the clustered index lookup if
@@ -5604,25 +5554,25 @@ next_rec:
 	For R-tree spatial search, we also commit the mini-transaction
 	each time  */
 
-	if (mtr_has_extra_clust_latch || spatial_search) {
+	if (spatial_search) {
+		/* No need to do store restore for R-tree */
+		mtr.commit();
+		mtr.start();
+		mtr_has_extra_clust_latch = FALSE;
+	} else if (mtr_has_extra_clust_latch) {
 		/* If we have extra cluster latch, we must commit
 		mtr if we are moving to the next non-clustered
 		index record, because we could break the latching
 		order if we would access a different clustered
 		index page right away without releasing the previous. */
 
-		/* No need to do store restore for R-tree */
-		if (!spatial_search) {
-			btr_pcur_store_position(pcur, &mtr);
-		}
-
+		btr_pcur_store_position(pcur, &mtr);
 		mtr.commit();
 		mtr_has_extra_clust_latch = FALSE;
 
 		mtr.start();
 
-		if (!spatial_search
-		    && sel_restore_position_for_mysql(&same_user_rec,
+		if (sel_restore_position_for_mysql(&same_user_rec,
 						   BTR_SEARCH_LEAF,
 						   pcur, moves_up, &mtr)) {
 			goto rec_loop;
@@ -5744,7 +5694,14 @@ lock_table_wait:
 
 normal_return:
 	/*-------------------------------------------------------------*/
-	que_thr_stop_for_mysql_no_error(thr, trx);
+	{
+		/* handler_index_cond_check() may pull TR_table search
+		   which initates another row_search_mvcc(). */
+		ulint n_active_thrs= trx->lock.n_active_thrs;
+		trx->lock.n_active_thrs= 1;
+		que_thr_stop_for_mysql_no_error(thr, trx);
+		trx->lock.n_active_thrs= n_active_thrs - 1;
+	}
 
 	mtr.commit();
 
@@ -5881,7 +5838,8 @@ row_count_rtree_recs(
 
 	prebuilt->search_tuple = entry;
 
-	ulint bufsize = ut_max(UNIV_PAGE_SIZE, prebuilt->mysql_row_len);
+	ulint bufsize = std::max<ulint>(srv_page_size,
+					prebuilt->mysql_row_len);
 	buf = static_cast<byte*>(ut_malloc_nokey(bufsize));
 
 	ulint cnt = 1000;
@@ -6003,6 +5961,9 @@ row_search_get_max_rec(
 
 	btr_pcur_close(&pcur);
 
+	ut_ad(!rec
+	      || !(rec_get_info_bits(rec, dict_table_is_comp(index->table))
+		   & (REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG)));
 	return(rec);
 }
 
diff --git a/storage/innobase/row/row0trunc.cc b/storage/innobase/row/row0trunc.cc
index ee90ceaf924..ce98717b3c9 100644
--- a/storage/innobase/row/row0trunc.cc
+++ b/storage/innobase/row/row0trunc.cc
@@ -24,18 +24,16 @@ TRUNCATE implementation
 Created 2013-04-12 Sunny Bains
 *******************************************************/
 
-#include "row0mysql.h"
+#include "row0trunc.h"
+#include "btr0sea.h"
 #include "pars0pars.h"
 #include "btr0pcur.h"
 #include "dict0crea.h"
-#include "dict0boot.h"
-#include "dict0load.h"
 #include "dict0stats.h"
 #include "dict0stats_bg.h"
 #include "lock0lock.h"
 #include "fts0fts.h"
-#include "srv0start.h"
-#include "row0trunc.h"
+#include "ibuf0ibuf.h"
 #include "os0file.h"
 #include "que0que.h"
 #include "trx0undo.h"
@@ -51,8 +49,6 @@ bool	truncate_t::s_fix_up_active = false;
 truncate_t::tables_t		truncate_t::s_tables;
 truncate_t::truncated_tables_t	truncate_t::s_truncated_tables;
 
-static const byte magic[] = { 0x01, 0xf3, 0xa1, 0x20 };
-
 /**
 Iterator over the the raw records in an index, doesn't support MVCC. */
 class IndexIterator {
@@ -100,7 +96,7 @@ public:
 		for (;;) {
 
 			if (!btr_pcur_is_on_user_rec(&m_pcur)
-			    || !callback.match(&m_mtr, &m_pcur)) {
+			    || !callback.match(&m_pcur)) {
 
 				/* The end of of the index has been reached. */
 				err = DB_END_OF_INDEX;
@@ -199,10 +195,9 @@ public:
 	}
 
 	/**
-	@param mtr		mini-transaction covering the iteration
 	@param pcur		persistent cursor used for iteration
 	@return true if the table id column matches. */
-	bool match(mtr_t* mtr, btr_pcur_t* pcur) const
+	bool match(btr_pcur_t* pcur) const
 	{
 		ulint		len;
 		const byte*	field;
@@ -244,301 +239,6 @@ protected:
 };
 
 /**
-Creates a TRUNCATE log record with space id, table name, data directory path,
-tablespace flags, table format, index ids, index types, number of index fields
-and index field information of the table. */
-class TruncateLogger : public Callback {
-
-public:
-	/**
-	Constructor
-
-	@param table	Table to truncate
-	@param flags	tablespace falgs */
-	TruncateLogger(
-		dict_table_t*	table,
-		ulint		flags,
-		table_id_t	new_table_id)
-		:
-		Callback(table->id, false),
-		m_table(table),
-		m_flags(flags),
-		m_truncate(table->id, new_table_id, table->data_dir_path),
-		m_log_file_name()
-	{
-		/* Do nothing */
-	}
-
-	/**
-	Initialize Truncate Logger by constructing Truncate Log File Name.
-
-	@return DB_SUCCESS or error code. */
-	dberr_t init()
-	{
-		/* Construct log file name. */
-		ulint	log_file_name_buf_sz =
-			strlen(srv_log_group_home_dir)
-			+ (22 + 22 + sizeof "ib_trunc.log");
-
-		m_log_file_name = UT_NEW_ARRAY_NOKEY(char, log_file_name_buf_sz);
-		if (m_log_file_name == NULL) {
-			return(DB_OUT_OF_MEMORY);
-		}
-		memset(m_log_file_name, 0, log_file_name_buf_sz);
-
-		strcpy(m_log_file_name, srv_log_group_home_dir);
-		ulint	log_file_name_len = strlen(m_log_file_name);
-		if (m_log_file_name[log_file_name_len - 1]
-			!= OS_PATH_SEPARATOR) {
-
-			m_log_file_name[log_file_name_len]
-				= OS_PATH_SEPARATOR;
-			log_file_name_len = strlen(m_log_file_name);
-		}
-
-		snprintf(m_log_file_name + log_file_name_len,
-			 log_file_name_buf_sz - log_file_name_len,
-			 "ib_%u_" IB_ID_FMT "_trunc.log",
-			 m_table->space, m_table->id);
-
-		return(DB_SUCCESS);
-
-	}
-
-	/**
-	Destructor */
-	~TruncateLogger()
-	{
-		if (m_log_file_name != NULL) {
-			bool exist;
-			os_file_delete_if_exists(
-				innodb_log_file_key, m_log_file_name, &exist);
-			UT_DELETE_ARRAY(m_log_file_name);
-			m_log_file_name = NULL;
-		}
-	}
-
-	/**
-	@param mtr	mini-transaction covering the read
-	@param pcur	persistent cursor used for reading
-	@return DB_SUCCESS or error code */
-	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur);
-
-	/** Called after iteratoring over the records.
-	@return true if invariant satisfied. */
-	bool debug() const
-	{
-		/* We must find all the index entries on disk. */
-		return(UT_LIST_GET_LEN(m_table->indexes)
-		       == m_truncate.indexes());
-	}
-
-	/**
-	Write the TRUNCATE log
-	@return DB_SUCCESS or error code */
-	dberr_t log() const
-	{
-		dberr_t	err = DB_SUCCESS;
-
-		if (m_log_file_name == 0) {
-			return(DB_ERROR);
-		}
-
-		bool		ret;
-		os_file_t	handle = os_file_create(
-			innodb_log_file_key, m_log_file_name,
-			OS_FILE_CREATE, OS_FILE_NORMAL,
-			OS_LOG_FILE, srv_read_only_mode, &ret);
-		if (!ret) {
-			return(DB_IO_ERROR);
-		}
-
-
-		ulint	sz = UNIV_PAGE_SIZE;
-		void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
-		if (buf == 0) {
-			os_file_close(handle);
-			return(DB_OUT_OF_MEMORY);
-		}
-
-		/* Align the memory for file i/o if we might have O_DIRECT set*/
-		byte*	log_buf = static_cast<byte*>(
-			ut_align(buf, UNIV_PAGE_SIZE));
-
-		lsn_t	lsn = log_get_lsn();
-
-		/* Generally loop should exit in single go but
-		just for those 1% of rare cases we need to assume
-		corner case. */
-		do {
-			/* First 4 bytes are reserved for magic number
-			which is currently 0. */
-			err = m_truncate.write(
-				log_buf + 4, log_buf + sz - 4,
-				m_table->space, m_table->name.m_name,
-				m_flags, m_table->flags, lsn);
-
-			DBUG_EXECUTE_IF("ib_err_trunc_oom_logging",
-					err = DB_FAIL;);
-
-			if (err != DB_SUCCESS) {
-				ut_ad(err == DB_FAIL);
-				ut_free(buf);
-				sz *= 2;
-				buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
-				DBUG_EXECUTE_IF("ib_err_trunc_oom_logging",
-						ut_free(buf);
-						buf = 0;);
-				if (buf == 0) {
-					os_file_close(handle);
-					return(DB_OUT_OF_MEMORY);
-				}
-				log_buf = static_cast<byte*>(
-					ut_align(buf, UNIV_PAGE_SIZE));
-			}
-
-		} while (err != DB_SUCCESS);
-
-		dberr_t	io_err;
-
-		IORequest	request(IORequest::WRITE);
-
-		io_err = os_file_write(
-			request, m_log_file_name, handle, log_buf, 0, sz);
-
-		if (io_err != DB_SUCCESS) {
-
-			ib::error()
-				<< "IO: Failed to write the file size to '"
-				<< m_log_file_name << "'";
-
-			/* Preserve the original error code */
-			if (err == DB_SUCCESS) {
-				err = io_err;
-			}
-		}
-
-		os_file_flush(handle);
-		os_file_close(handle);
-
-		ut_free(buf);
-
-		/* Why we need MLOG_TRUNCATE when we have truncate_log for
-		recovery?
-		- truncate log can protect us if crash happens while truncate
-		  is active. Once truncate is done truncate log is removed.
-		- If crash happens post truncate and system is yet to
-		  checkpoint, on recovery we would see REDO records from action
-		  before truncate (unless we explicitly checkpoint before
-		  returning from truncate API. Costly alternative so rejected).
-		- These REDO records may reference a page that doesn't exist
-		  post truncate so we need a mechanism to skip all such REDO
-		  records. MLOG_TRUNCATE records space_id and lsn that exactly
-		  serve the purpose.
-		- If checkpoint happens post truncate and crash happens post
-		  this point then neither MLOG_TRUNCATE nor REDO record
-		  from action before truncate are accessible. */
-		if (!is_system_tablespace(m_table->space)) {
-			mtr_t	mtr;
-			byte*	log_ptr;
-
-			mtr_start(&mtr);
-
-			log_ptr = mlog_open(&mtr, 11 + 8);
-			log_ptr = mlog_write_initial_log_record_low(
-				MLOG_TRUNCATE, m_table->space, 0,
-				log_ptr, &mtr);
-
-			mach_write_to_8(log_ptr, lsn);
-			log_ptr += 8;
-
-			mlog_close(&mtr, log_ptr);
-			mtr_commit(&mtr);
-		}
-
-		return(err);
-	}
-
-	/**
-	Indicate completion of truncate log by writing magic-number.
-	File will be removed from the system but to protect against
-	unlink (File-System) anomalies we ensure we write magic-number. */
-	void done()
-	{
-		if (m_log_file_name == 0) {
-			return;
-		}
-
-		bool	ret;
-		os_file_t handle = os_file_create_simple_no_error_handling(
-			innodb_log_file_key, m_log_file_name,
-			OS_FILE_OPEN, OS_FILE_READ_WRITE,
-			srv_read_only_mode, &ret);
-		DBUG_EXECUTE_IF("ib_err_trunc_writing_magic_number",
-				os_file_close(handle);
-				ret = false;);
-		if (!ret) {
-			ib::error() << "Failed to open truncate log file "
-				<< m_log_file_name << "."
-				" If server crashes before truncate log is"
-				" removed make sure it is manually removed"
-				" before restarting server";
-			os_file_delete(innodb_log_file_key, m_log_file_name);
-			return;
-		}
-
-		if (os_file_write(IORequest(IORequest::WRITE),
-				  m_log_file_name, handle, magic, 0,
-				  sizeof magic) != DB_SUCCESS) {
-			ib::error()
-				<< "IO: Failed to write the magic number to '"
-				<< m_log_file_name << "'";
-		}
-
-		DBUG_EXECUTE_IF("ib_trunc_crash_after_updating_magic_no",
-				DBUG_SUICIDE(););
-		os_file_flush(handle);
-		os_file_close(handle);
-		DBUG_EXECUTE_IF("ib_trunc_crash_after_logging_complete",
-				log_buffer_flush_to_disk();
-				os_thread_sleep(1000000);
-				DBUG_SUICIDE(););
-		os_file_delete(innodb_log_file_key, m_log_file_name);
-	}
-
-private:
-	/** Lookup the index using the index id.
-	@return index instance if found else NULL */
-	const dict_index_t* find(index_id_t id) const
-	{
-		for (const dict_index_t* index = UT_LIST_GET_FIRST(
-				m_table->indexes);
-		     index != NULL;
-		     index = UT_LIST_GET_NEXT(indexes, index)) {
-
-			if (index->id == id) {
-				return(index);
-			}
-		}
-
-		return(NULL);
-	}
-
-private:
-	/** Table to be truncated */
-	dict_table_t*		m_table;
-
-	/** Tablespace flags */
-	ulint			m_flags;
-
-	/** Collect table to truncate information */
-	truncate_t		m_truncate;
-
-	/** Truncate log file name. */
-	char*			m_log_file_name;
-};
-
-/**
 Scan to find out truncate log file from the given directory path.
 
 @param dir_path		look for log directory in following path.
@@ -633,8 +333,8 @@ TruncateLogParser::parse(
 		return(DB_IO_ERROR);
 	}
 
-	ulint	sz = UNIV_PAGE_SIZE;
-	void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+	ulint	sz = srv_page_size;
+	void*	buf = ut_zalloc_nokey(sz + srv_page_size);
 	if (buf == 0) {
 		os_file_close(handle);
 		return(DB_OUT_OF_MEMORY);
@@ -643,7 +343,7 @@ TruncateLogParser::parse(
 	IORequest	request(IORequest::READ);
 
 	/* Align the memory for file i/o if we might have O_DIRECT set*/
-	byte*	log_buf = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
+	byte*	log_buf = static_cast<byte*>(ut_align(buf, srv_page_size));
 
 	do {
 		err = os_file_read(request, handle, log_buf, 0, sz);
@@ -653,7 +353,7 @@ TruncateLogParser::parse(
 			break;
 		}
 
-		if (!memcmp(log_buf, magic, sizeof magic)) {
+		if (mach_read_from_4(log_buf) == 32743712) {
 
 			/* Truncate action completed. Avoid parsing the file. */
 			os_file_close(handle);
@@ -682,7 +382,7 @@ TruncateLogParser::parse(
 
 			sz *= 2;
 
-			buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+			buf = ut_zalloc_nokey(sz + srv_page_size);
 
 			if (buf == 0) {
 				os_file_close(handle);
@@ -693,7 +393,7 @@ TruncateLogParser::parse(
 			}
 
 			log_buf = static_cast<byte*>(
-				ut_align(buf, UNIV_PAGE_SIZE));
+				ut_align(buf, srv_page_size));
 		}
 	} while (err != DB_SUCCESS);
 
@@ -834,15 +534,13 @@ public:
 	/**
 	Look for table-id in SYS_XXXX tables without loading the table.
 
-	@param mtr	mini-transaction covering the read
 	@param pcur	persistent cursor used for reading
-	@return DB_SUCCESS or error code */
-	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur);
-
-private:
-	// Disably copying
-	TableLocator(const TableLocator&);
-	TableLocator& operator=(const TableLocator&);
+	@return DB_SUCCESS */
+	dberr_t operator()(mtr_t*, btr_pcur_t*)
+	{
+		m_table_found = true;
+		return(DB_SUCCESS);
+	}
 
 private:
 	/** Set to true if table is present */
@@ -850,58 +548,6 @@ private:
 };
 
 /**
-@param mtr	mini-transaction covering the read
-@param pcur	persistent cursor used for reading
-@return DB_SUCCESS or error code */
-dberr_t
-TruncateLogger::operator()(mtr_t* mtr, btr_pcur_t* pcur)
-{
-	ulint			len;
-	const byte*		field;
-	rec_t*			rec = btr_pcur_get_rec(pcur);
-	truncate_t::index_t	index;
-
-	field = rec_get_nth_field_old(
-		rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
-	ut_ad(len == 4);
-	index.m_type = mach_read_from_4(field);
-
-	field = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len);
-	ut_ad(len == 8);
-	index.m_id = mach_read_from_8(field);
-
-	field = rec_get_nth_field_old(
-			rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
-	ut_ad(len == 4);
-	index.m_root_page_no = mach_read_from_4(field);
-
-	/* For compressed tables we need to store extra meta-data
-	required during btr_create(). */
-	if (FSP_FLAGS_GET_ZIP_SSIZE(m_flags)) {
-
-		const dict_index_t* dict_index = find(index.m_id);
-
-		if (dict_index != NULL) {
-
-			dberr_t err = index.set(dict_index);
-
-			if (err != DB_SUCCESS) {
-				m_truncate.clear();
-				return(err);
-			}
-
-		} else {
-			ib::warn() << "Index id " << index.m_id
-				<< " not found";
-		}
-	}
-
-	m_truncate.add(index);
-
-	return(DB_SUCCESS);
-}
-
-/**
 Drop an index in the table.
 
 @param mtr	mini-transaction covering the read
@@ -949,8 +595,7 @@ DropIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
 	}
 #endif /* UNIV_DEBUG */
 
-	DBUG_EXECUTE_IF("ib_err_trunc_drop_index",
-			freed = false;);
+	DBUG_EXECUTE_IF("ib_err_trunc_drop_index", return DB_ERROR;);
 
 	if (freed) {
 
@@ -967,16 +612,8 @@ DropIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
 
 		btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
 	} else {
-		/* Check if the .ibd file is missing. */
-		bool	found;
-
-		fil_space_get_page_size(m_table->space, &found);
-
-		DBUG_EXECUTE_IF("ib_err_trunc_drop_index",
-				found = false;);
-
-		if (!found) {
-			return(DB_ERROR);
+		if (!m_table->space) {
+			return DB_ERROR;
 		}
 	}
 
@@ -1035,8 +672,7 @@ CreateIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
 	}
 #endif /* UNIV_DEBUG */
 
-	DBUG_EXECUTE_IF("ib_err_trunc_create_index",
-			root_page_no = FIL_NULL;);
+	DBUG_EXECUTE_IF("ib_err_trunc_create_index", return DB_ERROR;);
 
 	if (root_page_no != FIL_NULL) {
 
@@ -1058,13 +694,7 @@ CreateIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
 		btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
 
 	} else {
-		bool	found;
-		fil_space_get_page_size(m_table->space, &found);
-
-		DBUG_EXECUTE_IF("ib_err_trunc_create_index",
-				found = false;);
-
-		if (!found) {
+		if (!m_table->space) {
 			return(DB_ERROR);
 		}
 	}
@@ -1073,265 +703,6 @@ CreateIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
 }
 
 /**
-Look for table-id in SYS_XXXX tables without loading the table.
-
-@param mtr	mini-transaction covering the read
-@param pcur	persistent cursor used for reading
-@return DB_SUCCESS */
-dberr_t
-TableLocator::operator()(mtr_t* mtr, btr_pcur_t* pcur)
-{
-	m_table_found = true;
-
-	return(DB_SUCCESS);
-}
-
-/**
-Rollback the transaction and release the index locks.
-Drop indexes if table is corrupted so that drop/create
-sequence works as expected.
-
-@param table			table to truncate
-@param trx			transaction covering the TRUNCATE
-@param new_id			new table id that was suppose to get assigned
-				to the table if truncate executed successfully.
-@param has_internal_doc_id	indicate existence of fts index
-@param no_redo			if true, turn-off redo logging
-@param corrupted		table corrupted status
-@param unlock_index		if true then unlock indexes before action */
-static
-void
-row_truncate_rollback(
-	dict_table_t*	table,
-	trx_t*		trx,
-	table_id_t	new_id,
-	bool		has_internal_doc_id,
-	bool		no_redo,
-	bool		corrupted,
-	bool		unlock_index)
-{
-	if (unlock_index) {
-		dict_table_x_unlock_indexes(table);
-	}
-
-	trx->error_state = DB_SUCCESS;
-
-	trx_rollback_to_savepoint(trx, NULL);
-
-	trx->error_state = DB_SUCCESS;
-
-	if (corrupted && !dict_table_is_temporary(table)) {
-
-		/* Cleanup action to ensure we don't left over stale entries
-		if we are marking table as corrupted. This will ensure
-		it can be recovered using drop/create sequence. */
-		dict_table_x_lock_indexes(table);
-
-		DropIndex       dropIndex(table, no_redo);
-
-		SysIndexIterator().for_each(dropIndex);
-
-		dict_table_x_unlock_indexes(table);
-
-		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
-		     index != NULL;
-		     index = UT_LIST_GET_NEXT(indexes, index)) {
-
-			dict_set_corrupted(index, trx, "TRUNCATE TABLE");
-		}
-
-		if (has_internal_doc_id) {
-
-			ut_ad(!trx_is_started(trx));
-
-			table_id_t      id = table->id;
-
-			table->id = new_id;
-
-			fts_drop_tables(trx, table);
-
-			table->id = id;
-
-			ut_ad(trx_is_started(trx));
-
-			trx_commit_for_mysql(trx);
-		}
-
-	} else if (corrupted && dict_table_is_temporary(table)) {
-
-		dict_table_x_lock_indexes(table);
-
-		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
-		     index != NULL;
-		     index = UT_LIST_GET_NEXT(indexes, index)) {
-
-			dict_drop_index_tree_in_mem(index, index->page);
-
-			index->page = FIL_NULL;
-		}
-
-		dict_table_x_unlock_indexes(table);
-	}
-
-	table->corrupted = corrupted;
-}
-
-/**
-Finish the TRUNCATE operations for both commit and rollback.
-
-@param table		table being truncated
-@param trx		transaction covering the truncate
-@param fsp_flags	tablespace flags
-@param logger		table to truncate information logger
-@param err		status of truncate operation
-
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((warn_unused_result))
-dberr_t
-row_truncate_complete(
-	dict_table_t*		table,
-	trx_t*			trx,
-	ulint			fsp_flags,
-	TruncateLogger*		&logger,
-	dberr_t			err)
-{
-	bool	is_file_per_table = dict_table_is_file_per_table(table);
-
-	/* Add the table back to FTS optimize background thread. */
-	if (table->fts) {
-		fts_optimize_add_table(table);
-	}
-
-	row_mysql_unlock_data_dictionary(trx);
-
-	DEBUG_SYNC_C("ib_trunc_table_trunc_completing");
-
-	if (!dict_table_is_temporary(table)) {
-
-		DBUG_EXECUTE_IF("ib_trunc_crash_before_log_removal",
-				log_buffer_flush_to_disk();
-				os_thread_sleep(500000);
-				DBUG_SUICIDE(););
-
-		/* Note: We don't log-checkpoint instead we have written
-		a special REDO log record MLOG_TRUNCATE that is used to
-		avoid applying REDO records before truncate for crash
-		that happens post successful truncate completion. */
-
-		if (logger != NULL) {
-			logger->done();
-			UT_DELETE(logger);
-			logger = NULL;
-		}
-	}
-
-	/* If non-temp file-per-table tablespace... */
-	if (is_file_per_table
-	    && !dict_table_is_temporary(table)
-	    && fsp_flags != ULINT_UNDEFINED) {
-
-		/* This function will reset back the stop_new_ops
-		and is_being_truncated so that fil-ops can re-start. */
-		dberr_t err2 = truncate_t::truncate(
-			table->space,
-			table->data_dir_path,
-			table->name.m_name, fsp_flags, false);
-
-		if (err2 != DB_SUCCESS) {
-			return(err2);
-		}
-	}
-
-	if (err == DB_SUCCESS) {
-		dict_stats_update(table, DICT_STATS_EMPTY_TABLE);
-	}
-
-	trx->op_info = "";
-
-	/* For temporary tables or if there was an error, we need to reset
-	the dict operation flags. */
-	trx->ddl = false;
-	trx->dict_operation = TRX_DICT_OP_NONE;
-
-	ut_ad(!trx_is_started(trx));
-
-	srv_wake_master_thread();
-
-	DBUG_EXECUTE_IF("ib_trunc_crash_after_truncate_done",
-			DBUG_SUICIDE(););
-
-	return(err);
-}
-
-/**
-Handle FTS truncate issues.
-@param table		table being truncated
-@param new_id		new id for the table
-@param trx		transaction covering the truncate
-@return DB_SUCCESS or error code. */
-static MY_ATTRIBUTE((warn_unused_result))
-dberr_t
-row_truncate_fts(
-	dict_table_t*	table,
-	table_id_t	new_id,
-	trx_t*		trx)
-{
-	dict_table_t	fts_table;
-
-	fts_table.id = new_id;
-	fts_table.name = table->name;
-	fts_table.flags2 = table->flags2;
-	fts_table.flags = table->flags;
-	fts_table.space = table->space;
-
-	/* table->data_dir_path is used for FTS AUX table
-	creation. */
-	if (DICT_TF_HAS_DATA_DIR(table->flags)
-	    && table->data_dir_path == NULL) {
-		dict_get_and_save_data_dir_path(table, true);
-		ut_ad(table->data_dir_path != NULL);
-	}
-
-	fts_table.data_dir_path = table->data_dir_path;
-
-	dberr_t		err;
-
-	err = fts_create_common_tables(
-		trx, &fts_table, table->name.m_name, TRUE);
-
-	for (ulint i = 0;
-	     i < ib_vector_size(table->fts->indexes) && err == DB_SUCCESS;
-	     i++) {
-
-		dict_index_t*	fts_index;
-
-		fts_index = static_cast<dict_index_t*>(
-			ib_vector_getp(table->fts->indexes, i));
-
-		err = fts_create_index_tables_low(
-			trx, fts_index, table->name.m_name, new_id);
-	}
-
-	DBUG_EXECUTE_IF("ib_err_trunc_during_fts_trunc",
-			err = DB_ERROR;);
-
-	if (err != DB_SUCCESS) {
-
-		trx->error_state = DB_SUCCESS;
-		trx_rollback_to_savepoint(trx, NULL);
-		trx->error_state = DB_SUCCESS;
-
-		ib::error() << "Unable to truncate FTS index for table "
-			<< table->name;
-	} else {
-
-		ut_ad(trx_is_started(trx));
-	}
-
-	return(err);
-}
-
-/**
 Update system table to reflect new table id.
 @param old_table_id		old table id
 @param new_table_id		new table id
@@ -1409,7 +780,7 @@ row_truncate_update_sys_tables_during_fix_up(
 	ibool			reserve_dict_mutex,
 	bool			mark_index_corrupted)
 {
-	trx_t*		trx = trx_allocate_for_background();
+	trx_t*		trx = trx_create();
 
 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
@@ -1466,629 +837,228 @@ row_truncate_update_sys_tables_during_fix_up(
 	}
 
 	trx_commit_for_mysql(trx);
-	trx_free_for_background(trx);
+	trx_free(trx);
 
 	return(err);
 }
 
-/**
-Truncate also results in assignment of new table id, update the system
-SYSTEM TABLES with the new id.
-@param table,			table being truncated
-@param new_id,			new table id
-@param has_internal_doc_id,	has doc col (fts)
-@param no_redo			if true, turn-off redo logging
-@param trx			transaction handle
-@return	error code or DB_SUCCESS */
-static MY_ATTRIBUTE((warn_unused_result))
+/********************************************************//**
+Recreates table indexes by applying
+TRUNCATE log record during recovery.
+@return DB_SUCCESS or error code */
+static
 dberr_t
-row_truncate_update_system_tables(
-	dict_table_t*	table,
-	table_id_t	new_id,
-	bool		has_internal_doc_id,
-	bool		no_redo,
-	trx_t*		trx)
+fil_recreate_table(
+/*===============*/
+	ulint		format_flags,	/*!< in: page format */
+	const char*	name,		/*!< in: table name */
+	truncate_t&	truncate)	/*!< in: The information of
+					TRUNCATE log record */
 {
-	dberr_t		err	= DB_SUCCESS;
+	ut_ad(!truncate_t::s_fix_up_active);
+	truncate_t::s_fix_up_active = true;
 
-	ut_a(!dict_table_is_temporary(table));
-
-	err = row_truncate_update_table_id(table->id, new_id, FALSE, trx);
-
-	DBUG_EXECUTE_IF("ib_err_trunc_during_sys_table_update",
-			err = DB_ERROR;);
+	/* Step-1: Scan for active indexes from REDO logs and drop
+	all the indexes using low level function that take root_page_no
+	and space-id. */
+	truncate.drop_indexes(fil_system.sys_space);
 
+	/* Step-2: Scan for active indexes and re-create them. */
+	dberr_t err = truncate.create_indexes(
+		name, fil_system.sys_space, format_flags);
 	if (err != DB_SUCCESS) {
-
-		row_truncate_rollback(
-			table, trx, new_id, has_internal_doc_id,
-			no_redo, true, false);
-
-		ib::error() << "Unable to assign a new identifier to table "
-			<< table->name << " after truncating it. Marked the"
-			" table as corrupted. In-memory representation is now"
-			" different from the on-disk representation.";
-		err = DB_ERROR;
-	} else {
-		/* Drop the old FTS index */
-		if (has_internal_doc_id) {
-
-			ut_ad(trx_is_started(trx));
-
-			fts_drop_tables(trx, table);
-
-			DBUG_EXECUTE_IF("ib_truncate_crash_while_fts_cleanup",
-					DBUG_SUICIDE(););
-
-			ut_ad(trx_is_started(trx));
-		}
-
-		DBUG_EXECUTE_IF("ib_trunc_crash_after_fts_drop",
-				log_buffer_flush_to_disk();
-				os_thread_sleep(2000000);
-				DBUG_SUICIDE(););
-
-		dict_table_change_id_in_cache(table, new_id);
-
-		/* Reset the Doc ID in cache to 0 */
-		if (has_internal_doc_id && table->fts->cache != NULL) {
-			DBUG_EXECUTE_IF("ib_trunc_sleep_before_fts_cache_clear",
-					os_thread_sleep(10000000););
-
-			table->fts->fts_status |= TABLE_DICT_LOCKED;
-			fts_update_next_doc_id(trx, table, NULL, 0);
-			fts_cache_clear(table->fts->cache);
-			fts_cache_init(table->fts->cache);
-			table->fts->fts_status &= uint(~TABLE_DICT_LOCKED);
-		}
+		ib::info() << "Recovery failed for TRUNCATE TABLE '"
+			<< name << "' within the system tablespace";
 	}
 
-	return(err);
-}
-
-/**
-Prepare for the truncate process. On success all of the table's indexes will
-be locked in X mode.
-@param table		table to truncate
-@param flags		tablespace flags
-@return	error code or DB_SUCCESS */
-static MY_ATTRIBUTE((warn_unused_result))
-dberr_t
-row_truncate_prepare(dict_table_t* table, ulint* flags)
-{
-	ut_ad(!dict_table_is_temporary(table));
-	ut_ad(dict_table_is_file_per_table(table));
-
-	*flags = fil_space_get_flags(table->space);
-
-	ut_ad(!dict_table_is_temporary(table));
-
-	dict_get_and_save_data_dir_path(table, true);
-
-	if (*flags != ULINT_UNDEFINED) {
-
-		dberr_t	err = fil_prepare_for_truncate(table->space);
-
-		if (err != DB_SUCCESS) {
-			return(err);
-		}
-	}
+	truncate_t::s_fix_up_active = false;
 
-	return(DB_SUCCESS);
+	return(err);
 }
 
-/**
-Do foreign key checks before starting TRUNCATE.
-@param table		table being truncated
-@param trx		transaction covering the truncate
+/********************************************************//**
+Recreates the tablespace and table indexes by applying
+TRUNCATE log record during recovery.
 @return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((warn_unused_result))
+static
 dberr_t
-row_truncate_foreign_key_checks(
-	const dict_table_t*	table,
-	const trx_t*		trx)
+fil_recreate_tablespace(
+/*====================*/
+	ulint		space_id,	/*!< in: space id */
+	ulint		format_flags,	/*!< in: page format */
+	ulint		flags,		/*!< in: tablespace flags */
+	const char*	name,		/*!< in: table name */
+	truncate_t&	truncate,	/*!< in: The information of
+					TRUNCATE log record */
+	lsn_t		recv_lsn)	/*!< in: the end LSN of
+						the log record */
 {
-	/* Check if the table is referenced by foreign key constraints from
-	some other table (not the table itself) */
-
-	dict_foreign_set::iterator	it
-		= std::find_if(table->referenced_set.begin(),
-			       table->referenced_set.end(),
-			       dict_foreign_different_tables());
-
-	if (!srv_read_only_mode
-	    && it != table->referenced_set.end()
-	    && trx->check_foreigns) {
-
-		dict_foreign_t*	foreign = *it;
-
-		FILE*	ef = dict_foreign_err_file;
-
-		/* We only allow truncating a referenced table if
-		FOREIGN_KEY_CHECKS is set to 0 */
+	dberr_t		err = DB_SUCCESS;
+	mtr_t		mtr;
 
-		mutex_enter(&dict_foreign_err_mutex);
+	ut_ad(!truncate_t::s_fix_up_active);
+	truncate_t::s_fix_up_active = true;
 
-		rewind(ef);
+	/* Step-1: Invalidate buffer pool pages belonging to the tablespace
+	to re-create. */
+	buf_LRU_flush_or_remove_pages(space_id, NULL);
 
-		ut_print_timestamp(ef);
+	/* Remove all insert buffer entries for the tablespace */
+	ibuf_delete_for_discarded_space(space_id);
 
-		fputs("  Cannot truncate table ", ef);
-		ut_print_name(ef, trx, table->name.m_name);
-		fputs(" by DROP+CREATE\n"
-		      "InnoDB: because it is referenced by ", ef);
-		ut_print_name(ef, trx, foreign->foreign_table_name);
-		putc('\n', ef);
+	/* Step-2: truncate tablespace (reset the size back to original or
+	default size) of tablespace. */
+	err = truncate.truncate(
+		space_id, truncate.get_dir_path(), name, flags, true);
 
-		mutex_exit(&dict_foreign_err_mutex);
+	if (err != DB_SUCCESS) {
 
+		ib::info() << "Cannot access .ibd file for table '"
+			<< name << "' with tablespace " << space_id
+			<< " while truncating";
 		return(DB_ERROR);
 	}
 
-	/* TODO: could we replace the counter n_foreign_key_checks_running
-	with lock checks on the table? Acquire here an exclusive lock on the
-	table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
-	they can cope with the table having been truncated here? Foreign key
-	checks take an IS or IX lock on the table. */
-
-	if (table->n_foreign_key_checks_running > 0) {
-		ib::warn() << "Cannot truncate table " << table->name
-			<< " because there is a foreign key check running on"
-			" it.";
-
+	fil_space_t* space = fil_space_acquire(space_id);
+	if (!space) {
+		ib::info() << "Missing .ibd file for table '" << name
+			<< "' with tablespace " << space_id;
 		return(DB_ERROR);
 	}
 
-	return(DB_SUCCESS);
-}
+	const page_size_t page_size(space->flags);
 
-/**
-Do some sanity checks before starting the actual TRUNCATE.
-@param table		table being truncated
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((warn_unused_result))
-dberr_t
-row_truncate_sanity_checks(
-	const dict_table_t* table)
-{
-	if (dict_table_is_discarded(table)) {
+	/* Step-3: Initialize Header. */
+	if (page_size.is_compressed()) {
+		byte*	buf;
+		page_t*	page;
 
-		return(DB_TABLESPACE_DELETED);
+		buf = static_cast<byte*>(
+			ut_zalloc_nokey(3U << srv_page_size_shift));
 
-	} else if (!table->is_readable()) {
-		if (fil_space_get(table->space) == NULL) {
-			return(DB_TABLESPACE_NOT_FOUND);
+		/* Align the memory for file i/o */
+		page = static_cast<byte*>(ut_align(buf, srv_page_size));
 
-		} else {
-			return(DB_DECRYPTION_FAILED);
-		}
-	} else if (dict_table_is_corrupted(table)) {
+		flags |= FSP_FLAGS_PAGE_SSIZE();
 
-		return(DB_TABLE_CORRUPT);
-	}
+		fsp_header_init_fields(page, space_id, flags);
 
-	return(DB_SUCCESS);
-}
+		mach_write_to_4(
+			page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
+
+		page_zip_des_t  page_zip;
+		page_zip_set_size(&page_zip, page_size.physical());
+		page_zip.data = page + srv_page_size;
 
-/**
-Truncates a table for MySQL.
-@param table		table being truncated
-@param trx		transaction covering the truncate
-@return	error code or DB_SUCCESS */
-dberr_t row_truncate_table_for_mysql(dict_table_t* table, trx_t* trx)
-{
-	bool	is_file_per_table = dict_table_is_file_per_table(table);
-	dberr_t		err;
 #ifdef UNIV_DEBUG
-	ulint		old_space = table->space;
+		page_zip.m_start =
 #endif /* UNIV_DEBUG */
-	TruncateLogger*	logger = NULL;
-
-	/* Understanding the truncate flow.
-
-	Step-1: Perform intiial sanity check to ensure table can be truncated.
-	This would include check for tablespace discard status, ibd file
-	missing, etc ....
-
-	Step-2: Start transaction (only for non-temp table as temp-table don't
-	modify any data on disk doesn't need transaction object).
-
-	Step-3: Validate ownership of needed locks (Exclusive lock).
-	Ownership will also ensure there is no active SQL queries, INSERT,
-	SELECT, .....
-
-	Step-4: Stop all the background process associated with table.
-
-	Step-5: There are few foreign key related constraint under which
-	we can't truncate table (due to referential integrity unless it is
-	turned off). Ensure this condition is satisfied.
-
-	Step-6: Truncate operation can be rolled back in case of error
-	till some point. Associate rollback segment to record undo log.
-
-	Step-7: Generate new table-id.
-	Why we need new table-id ?
-	Purge and rollback case: we assign a new table id for the table.
-	Since purge and rollback look for the table based on the table id,
-	they see the table as 'dropped' and discard their operations.
-
-	Step-8: Log information about tablespace which includes
-	table and index information. If there is a crash in the next step
-	then during recovery we will attempt to fixup the operation.
-
-	Step-9: Drop all indexes (this include freeing of the pages
-	associated with them).
-
-	Step-10: Re-create new indexes.
-
-	Step-11: Update new table-id to in-memory cache (dictionary),
-	on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to
-	be updated to reflect updated root-page-no of new index created
-	and updated table-id.
-
-	Step-12: Cleanup Stage. Reset auto-inc value to 1.
-	Release all the locks.
-	Commit the transaction. Update trx operation state.
-
-	Notes:
-	- On error, log checkpoint is done followed writing of magic number to
-	truncate log file. If servers crashes after truncate, fix-up action
-	will not be applied.
-
-	- log checkpoint is done before starting truncate table to ensure
-	that previous REDO log entries are not applied if current truncate
-	crashes. Consider following use-case:
-	 - create table .... insert/load table .... truncate table (crash)
-	 - on restart table is restored .... truncate table (crash)
-	 - on restart (assuming default log checkpoint is not done) will have
-	   2 REDO log entries for same table. (Note 2 REDO log entries
-	   for different table is not an issue).
-	For system-tablespace we can't truncate the tablespace so we need
-	to initiate a local cleanup that involves dropping of indexes and
-	re-creating them. If we apply stale entry we might end-up issuing
-	drop on wrong indexes.
-
-	- Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE,
-	so we do not have to remove insert buffer records, as the
-	insert buffer works at a low level. If a freed page is later
-	reallocated, the allocator will remove the ibuf entries for
-	it. When we prepare to truncate *.ibd files, we remove all entries
-	for the table in the insert buffer tree. This is not strictly
-	necessary, but we can free up some space in the system tablespace.
-
-	- Linear readahead and random readahead: we use the same
-	method as in 3) to discard ongoing operations. (This is only
-	relevant for TRUNCATE TABLE by TRUNCATE TABLESPACE.)
-	Ensure that the table will be dropped by trx_rollback_active() in
-	case of a crash.
-	*/
-
-	/*-----------------------------------------------------------------*/
-	/* Step-1: Perform intiial sanity check to ensure table can be
-	truncated. This would include check for tablespace discard status,
-	ibd file missing, etc .... */
-	err = row_truncate_sanity_checks(table);
-	if (err != DB_SUCCESS) {
-		return(err);
+		page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0;
+		buf_flush_init_for_writing(NULL, page, &page_zip, 0);
 
-	}
+		err = fil_io(IORequestWrite, true, page_id_t(space_id, 0),
+			     page_size, 0, page_size.physical(), page_zip.data,
+			     NULL);
 
-	/* Step-2: Start transaction (only for non-temp table as temp-table
-	don't modify any data on disk doesn't need transaction object). */
-	if (!dict_table_is_temporary(table)) {
-		if (table->fts) {
-			fts_optimize_remove_table(table);
-		}
+		ut_free(buf);
 
-		/* Avoid transaction overhead for temporary table DDL. */
-		trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+		if (err != DB_SUCCESS) {
+			ib::info() << "Failed to clean header of the"
+				" table '" << name << "' with tablespace "
+				<< space_id;
+			goto func_exit;
+		}
 	}
 
-	DEBUG_SYNC_C("row_trunc_before_dict_lock");
-
-	/* Step-3: Validate ownership of needed locks (Exclusive lock).
-	Ownership will also ensure there is no active SQL queries, INSERT,
-	SELECT, .....*/
-	trx->op_info = "truncating table";
-	ut_a(trx->dict_operation_lock_mode == 0);
-	row_mysql_lock_data_dictionary(trx);
-	ut_ad(mutex_own(&dict_sys->mutex));
-	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
-
-	/* Step-4: Stop all the background process associated with table. */
-	dict_stats_wait_bg_to_stop_using_table(table, trx);
+	mtr_start(&mtr);
+	/* Don't log the operation while fixing up table truncate operation
+	as crash at this level can still be sustained with recovery restarting
+	from last checkpoint. */
+	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+	/* Initialize the first extent descriptor page and
+	the second bitmap page for the new tablespace. */
+	fsp_header_init(space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
+	mtr_commit(&mtr);
 
-	/* Step-5: There are few foreign key related constraint under which
-	we can't truncate table (due to referential integrity unless it is
-	turned off). Ensure this condition is satisfied. */
-	ulint	fsp_flags = ULINT_UNDEFINED;
-	err = row_truncate_foreign_key_checks(table, trx);
+	/* Step-4: Re-Create Indexes to newly re-created tablespace.
+	This operation will restore tablespace back to what it was
+	when it was created during CREATE TABLE. */
+	err = truncate.create_indexes(name, space, format_flags);
 	if (err != DB_SUCCESS) {
-		trx_rollback_to_savepoint(trx, NULL);
-		return(row_truncate_complete(
-				table, trx, fsp_flags, logger, err));
+		goto func_exit;
 	}
 
-	/* Remove all locks except the table-level X lock. */
-	lock_remove_all_on_table(table, FALSE);
-	trx->table_id = table->id;
-	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
-	/* Step-6: Truncate operation can be rolled back in case of error
-	till some point. Associate rollback segment to record undo log. */
-	if (!dict_table_is_temporary(table)) {
-		mutex_enter(&trx->undo_mutex);
-
-		trx_undo_t**	pundo = &trx->rsegs.m_redo.update_undo;
-		err = trx_undo_assign_undo(
-			trx, trx->rsegs.m_redo.rseg, pundo, TRX_UNDO_UPDATE);
-
-		mutex_exit(&trx->undo_mutex);
-
-		DBUG_EXECUTE_IF("ib_err_trunc_assigning_undo_log",
-				err = DB_ERROR;);
-		if (err != DB_SUCCESS) {
-			trx_rollback_to_savepoint(trx, NULL);
-			return(row_truncate_complete(
-				table, trx, fsp_flags, logger, err));
-		}
-	}
-
-	/* Step-7: Generate new table-id.
-	Why we need new table-id ?
-	Purge and rollback: we assign a new table id for the
-	table. Since purge and rollback look for the table based on
-	the table id, they see the table as 'dropped' and discard
-	their operations. */
-	table_id_t	new_id;
-	dict_hdr_get_new_id(&new_id, NULL, NULL, table, false);
-
-	/* Check if table involves FTS index. */
-	bool	has_internal_doc_id =
-		dict_table_has_fts_index(table)
-		|| DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
+	/* Step-5: Write new created pages into ibd file handle and
+	flush it to disk for the tablespace, in case i/o-handler thread
+	deletes the bitmap page from buffer. */
+	mtr_start(&mtr);
 
-	bool	no_redo = is_file_per_table && !has_internal_doc_id;
+	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
 
-	/* Step-8: Log information about tablespace which includes
-	table and index information. If there is a crash in the next step
-	then during recovery we will attempt to fixup the operation. */
+	for (ulint page_no = 0;
+	     page_no < UT_LIST_GET_FIRST(space->chain)->size; ++page_no) {
 
-	/* Lock all index trees for this table, as we will truncate
-	the table/index and possibly change their metadata. All
-	DML/DDL are blocked by table level X lock, with a few exceptions
-	such as queries into information schema about the table,
-	MySQL could try to access index stats for this kind of query,
-	we need to use index locks to sync up */
-	dict_table_x_lock_indexes(table);
+		const page_id_t	cur_page_id(space_id, page_no);
 
-	if (!dict_table_is_temporary(table)) {
+		buf_block_t*	block = buf_page_get(cur_page_id, page_size,
+						     RW_X_LATCH, &mtr);
 
-		if (is_file_per_table) {
+		byte*	page = buf_block_get_frame(block);
 
-			err = row_truncate_prepare(table, &fsp_flags);
+		if (!FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
+			ut_ad(!page_size.is_compressed());
 
-			DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate",
-					err = DB_ERROR;);
+			buf_flush_init_for_writing(
+				block, page, NULL, recv_lsn);
 
-			if (err != DB_SUCCESS) {
-				row_truncate_rollback(
-					table, trx, new_id,
-					has_internal_doc_id,
-					no_redo, false, true);
-				return(row_truncate_complete(
-					table, trx, fsp_flags, logger, err));
-			}
+			err = fil_io(IORequestWrite, true, cur_page_id,
+				     page_size, 0, srv_page_size, page, NULL);
 		} else {
-			fsp_flags = fil_space_get_flags(table->space);
-
-			DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate",
-					fsp_flags = ULINT_UNDEFINED;);
-
-			if (fsp_flags == ULINT_UNDEFINED) {
-				row_truncate_rollback(
-					table, trx, new_id,
-					has_internal_doc_id,
-					no_redo, false, true);
-				return(row_truncate_complete(
-						table, trx, fsp_flags,
-						logger, DB_ERROR));
-			}
-		}
+			ut_ad(page_size.is_compressed());
 
-		logger = UT_NEW_NOKEY(TruncateLogger(
-				table, fsp_flags, new_id));
+			/* We don't want to rewrite empty pages. */
 
-		err = logger->init();
-		if (err != DB_SUCCESS) {
-			row_truncate_rollback(
-				table, trx, new_id, has_internal_doc_id,
-				no_redo, false, true);
-			return(row_truncate_complete(
-				table, trx, fsp_flags, logger, DB_ERROR));
+			if (fil_page_get_type(page) != 0) {
+				page_zip_des_t*  page_zip =
+					buf_block_get_page_zip(block);
 
-		}
-
-		err = SysIndexIterator().for_each(*logger);
-		if (err != DB_SUCCESS) {
-			row_truncate_rollback(
-				table, trx, new_id, has_internal_doc_id,
-				no_redo, false, true);
-			return(row_truncate_complete(
-				table, trx, fsp_flags, logger, DB_ERROR));
-
-		}
-
-		ut_ad(logger->debug());
-
-		err = logger->log();
-
-		if (err != DB_SUCCESS) {
-			row_truncate_rollback(
-				table, trx, new_id, has_internal_doc_id,
-				no_redo, false, true);
-			return(row_truncate_complete(
-				table, trx, fsp_flags, logger, DB_ERROR));
-		}
-	}
-
-	DBUG_EXECUTE_IF("ib_trunc_crash_after_redo_log_write_complete",
-			log_buffer_flush_to_disk();
-			os_thread_sleep(3000000);
-			DBUG_SUICIDE(););
-
-	/* Step-9: Drop all indexes (free index pages associated with these
-	indexes) */
-	if (!dict_table_is_temporary(table)) {
+				buf_flush_init_for_writing(
+					block, page, page_zip, recv_lsn);
 
-		DropIndex	dropIndex(table, no_redo);
-
-		err = SysIndexIterator().for_each(dropIndex);
-
-		if (err != DB_SUCCESS) {
-
-			row_truncate_rollback(
-				table, trx, new_id, has_internal_doc_id,
-				no_redo, true, true);
-
-			return(row_truncate_complete(
-				table, trx, fsp_flags, logger, err));
-		}
-	} else {
-		/* For temporary tables we don't have entries in SYSTEM TABLES*/
-		ut_ad(fsp_is_system_temporary(table->space));
-		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
-		     index != NULL;
-		     index = UT_LIST_GET_NEXT(indexes, index)) {
+				err = fil_io(IORequestWrite, true,
+					     cur_page_id,
+					     page_size, 0,
+					     page_size.physical(),
+					     page_zip->data, NULL);
+			} else {
+#ifdef UNIV_DEBUG
+				const byte*	data = block->page.zip.data;
 
-			err = dict_truncate_index_tree_in_mem(index);
+				/* Make sure that the page is really empty */
+				for (ulint i = 0;
+				     i < page_size.physical();
+				     ++i) {
 
-			if (err != DB_SUCCESS) {
-				row_truncate_rollback(
-					table, trx, new_id, has_internal_doc_id,
-					no_redo, true, true);
-				return(row_truncate_complete(
-					table, trx, fsp_flags, logger, err));
+					ut_a(data[i] == 0);
+				}
+#endif /* UNIV_DEBUG */
 			}
-
-			DBUG_EXECUTE_IF(
-				"ib_trunc_crash_during_drop_index_temp_table",
-				log_buffer_flush_to_disk();
-				os_thread_sleep(2000000);
-				DBUG_SUICIDE(););
-		}
-	}
-
-	if (is_file_per_table && fsp_flags != ULINT_UNDEFINED) {
-		/* A single-table tablespace has initially
-		FIL_IBD_FILE_INITIAL_SIZE number of pages allocated and an
-		extra page is allocated for each of the indexes present. But in
-		the case of clust index 2 pages are allocated and as one is
-		covered in the calculation as part of table->indexes.count we
-		take care of the other page by adding 1. */
-		ulint	space_size = table->indexes.count +
-				FIL_IBD_FILE_INITIAL_SIZE + 1;
-
-		if (has_internal_doc_id) {
-			/* Since aux tables are created for fts indexes and
-			they use seperate tablespaces. */
-			space_size -= ib_vector_size(table->fts->indexes);
-		}
-
-		fil_reinit_space_header_for_table(table, space_size, trx);
-	}
-
-	DBUG_EXECUTE_IF("ib_trunc_crash_with_intermediate_log_checkpoint",
-			log_buffer_flush_to_disk();
-			os_thread_sleep(2000000);
-			log_checkpoint(TRUE, TRUE);
-			os_thread_sleep(1000000);
-			DBUG_SUICIDE(););
-
-	DBUG_EXECUTE_IF("ib_trunc_crash_drop_reinit_done_create_to_start",
-			log_buffer_flush_to_disk();
-			os_thread_sleep(2000000);
-			DBUG_SUICIDE(););
-
-	/* Step-10: Re-create new indexes. */
-	if (!dict_table_is_temporary(table)) {
-
-		CreateIndex	createIndex(table, no_redo);
-
-		err = SysIndexIterator().for_each(createIndex);
-
-		if (err != DB_SUCCESS) {
-
-			row_truncate_rollback(
-				table, trx, new_id, has_internal_doc_id,
-				no_redo, true, true);
-
-			return(row_truncate_complete(
-				table, trx, fsp_flags, logger, err));
-		}
-	}
-
-	/* Done with index truncation, release index tree locks,
-	subsequent work relates to table level metadata change */
-	dict_table_x_unlock_indexes(table);
-
-	if (has_internal_doc_id) {
-
-		err = row_truncate_fts(table, new_id, trx);
-
-		if (err != DB_SUCCESS) {
-
-			row_truncate_rollback(
-				table, trx, new_id, has_internal_doc_id,
-				no_redo, true, false);
-
-			return(row_truncate_complete(
-				table, trx, fsp_flags, logger, err));
 		}
-	}
-
-	/* Step-11: Update new table-id to in-memory cache (dictionary),
-	on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to
-	be updated to reflect updated root-page-no of new index created
-	and updated table-id. */
-	if (dict_table_is_temporary(table)) {
-
-		dict_table_change_id_in_cache(table, new_id);
-		err = DB_SUCCESS;
-
-	} else {
-
-		/* If this fails then we are in an inconsistent state and
-		the results are undefined. */
-		ut_ad(old_space == table->space);
-
-		err = row_truncate_update_system_tables(
-			table, new_id, has_internal_doc_id, no_redo, trx);
 
 		if (err != DB_SUCCESS) {
-			return(row_truncate_complete(
-				table, trx, fsp_flags, logger, err));
+			ib::info() << "Cannot write page " << page_no
+				<< " into a .ibd file for table '"
+				<< name << "' with tablespace " << space_id;
 		}
 	}
 
-	DBUG_EXECUTE_IF("ib_trunc_crash_on_updating_dict_sys_info",
-			log_buffer_flush_to_disk();
-			os_thread_sleep(2000000);
-			DBUG_SUICIDE(););
-
-	/* Step-12: Cleanup Stage. Reset auto-inc value to 1.
-	Release all the locks.
-	Commit the transaction. Update trx operation state. */
-	dict_table_autoinc_lock(table);
-	dict_table_autoinc_initialize(table, 1);
-	dict_table_autoinc_unlock(table);
-
-	if (trx_is_started(trx)) {
-
-		trx_commit_for_mysql(trx);
-	}
+	mtr_commit(&mtr);
 
-	return(row_truncate_complete(table, trx, fsp_flags, logger, err));
+	truncate_t::s_fix_up_active = false;
+func_exit:
+	space->release();
+	return(err);
 }
 
 /**
@@ -2113,9 +1083,7 @@ truncate_t::fixup_tables_in_system_tablespace()
 				"residing in the system tablespace.";
 
 			err = fil_recreate_table(
-				(*it)->m_space_id,
 				(*it)->m_format_flags,
-				(*it)->m_tablespace_flags,
 				(*it)->m_tablename,
 				**it);
 
@@ -2174,23 +1142,22 @@ truncate_t::fixup_tables_in_non_system_tablespace()
 			"residing in file-per-table tablespace with "
 			"id (" << (*it)->m_space_id << ")";
 
-		if (!fil_space_get((*it)->m_space_id)) {
+		fil_space_t* space = fil_space_get((*it)->m_space_id);
 
+		if (!space) {
 			/* Create the database directory for name,
 			if it does not exist yet */
 			fil_create_directory_for_tablename(
 				(*it)->m_tablename);
 
-			err = fil_ibd_create(
-				(*it)->m_space_id,
-				(*it)->m_tablename,
-				(*it)->m_dir_path,
-				(*it)->m_tablespace_flags,
-				FIL_IBD_FILE_INITIAL_SIZE,
-				(*it)->m_encryption,
-				(*it)->m_key_id);
-
-			if (err != DB_SUCCESS) {
+			space = fil_ibd_create((*it)->m_space_id,
+					       (*it)->m_tablename,
+					       (*it)->m_dir_path,
+					       (*it)->m_tablespace_flags,
+					       FIL_IBD_FILE_INITIAL_SIZE,
+					       (*it)->m_encryption,
+					       (*it)->m_key_id, &err);
+			if (!space) {
 				/* If checkpoint is not yet done
 				and table is dropped and then we might
 				still have REDO entries for this table
@@ -2204,8 +1171,6 @@ truncate_t::fixup_tables_in_non_system_tablespace()
 			}
 		}
 
-		ut_ad(fil_space_get((*it)->m_space_id));
-
 		err = fil_recreate_tablespace(
 			(*it)->m_space_id,
 			(*it)->m_format_flags,
@@ -2385,7 +1350,7 @@ truncate_t::update_root_page_no(
 
 		pars_info_add_ull_literal(
 			info, "index_id",
-			(mark_index_corrupted ? -1 : it->m_id));
+			(mark_index_corrupted ? IB_ID_MAX : it->m_id));
 
 		err = que_eval_sql(
 			info,
@@ -2681,8 +1646,7 @@ truncate_t::index_t::set(
 /** Create an index for a table.
 @param[in]	table_name		table name, for which to create
 the index
-@param[in]	space_id		space id where we have to
-create the index
+@param[in]	space			tablespace
 @param[in]	page_size		page size of the .ibd file
 @param[in]	index_type		type of index to truncate
 @param[in]	index_id		id of index to truncate
@@ -2690,18 +1654,17 @@ create the index
 @param[in,out]	mtr			mini-transaction covering the
 create index
 @return root page no or FIL_NULL on failure */
-ulint
+inline ulint
 truncate_t::create_index(
 	const char*		table_name,
-	ulint			space_id,
-	const page_size_t&	page_size,
+	fil_space_t*		space,
 	ulint			index_type,
 	index_id_t		index_id,
 	const btr_create_t&	btr_redo_create_info,
 	mtr_t*			mtr) const
 {
 	ulint	root_page_no = btr_create(
-		index_type, space_id, page_size, index_id,
+		index_type, space, index_id,
 		NULL, &btr_redo_create_info, mtr);
 
 	if (root_page_no == FIL_NULL) {
@@ -2710,7 +1673,7 @@ truncate_t::create_index(
 			<< srv_force_recovery << ". Continuing crash recovery"
 			" even though we failed to create index " << index_id
 			<< " for compressed table '" << table_name << "' with"
-			" tablespace " << space_id << " during recovery";
+			" file " << space->chain.start->name;
 	}
 
 	return(root_page_no);
@@ -2718,30 +1681,27 @@ truncate_t::create_index(
 
 /** Check if index has been modified since TRUNCATE log snapshot
 was recorded.
-@param space_id		space_id where table/indexes resides.
-@param root_page_no	root page of index that needs to be verified.
+@param[in]	space		tablespace
+@param[in]	root_page_no	index root page number
 @return true if modified else false */
-
+inline
 bool
 truncate_t::is_index_modified_since_logged(
-	ulint		space_id,
-	ulint		root_page_no) const
+	const fil_space_t*	space,
+	ulint			root_page_no) const
 {
-	mtr_t			mtr;
-	bool			found;
-	const page_size_t&	page_size = fil_space_get_page_size(space_id,
-								    &found);
-	dberr_t			err = DB_SUCCESS;
-
-	ut_ad(found);
+	dberr_t	err;
+	mtr_t	mtr;
 
 	mtr_start(&mtr);
 
 	/* Root page could be in free state if truncate crashed after drop_index
 	and page was not allocated for any other object. */
 	buf_block_t* block= buf_page_get_gen(
-		page_id_t(space_id, root_page_no), page_size, RW_X_LATCH, NULL,
+		page_id_t(space->id, root_page_no), page_size_t(space->flags),
+		RW_X_LATCH, NULL,
 		BUF_GET_POSSIBLY_FREED, __FILE__, __LINE__, &mtr, &err);
+	if (!block) return true;
 
 	page_t* root = buf_block_get_frame(block);
 
@@ -2765,31 +1725,21 @@ truncate_t::is_index_modified_since_logged(
 }
 
 /** Drop indexes for a table.
-@param space_id		space_id where table/indexes resides. */
-
-void
-truncate_t::drop_indexes(
-	ulint		space_id) const
+@param[in,out] space		tablespace */
+void truncate_t::drop_indexes(fil_space_t* space) const
 {
 	mtr_t           mtr;
-	ulint		root_page_no = FIL_NULL;
 
 	indexes_t::const_iterator       end = m_indexes.end();
+	const page_size_t page_size(space->flags);
 
 	for (indexes_t::const_iterator it = m_indexes.begin();
 	     it != end;
 	     ++it) {
 
-		root_page_no = it->m_root_page_no;
+		ulint root_page_no = it->m_root_page_no;
 
-		bool			found;
-		const page_size_t&	page_size
-			= fil_space_get_page_size(space_id, &found);
-
-		ut_ad(found);
-
-		if (is_index_modified_since_logged(
-			space_id, root_page_no)) {
+		if (is_index_modified_since_logged(space, root_page_no)) {
 			/* Page has been modified since TRUNCATE log snapshot
 			was recorded so not safe to drop the index. */
 			continue;
@@ -2797,14 +1747,14 @@ truncate_t::drop_indexes(
 
 		mtr_start(&mtr);
 
-		if (space_id != TRX_SYS_SPACE) {
+		if (space->id != TRX_SYS_SPACE) {
 			/* Do not log changes for single-table
 			tablespaces, we are in recovery mode. */
 			mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
 		}
 
 		if (root_page_no != FIL_NULL) {
-			const page_id_t	root_page_id(space_id, root_page_no);
+			const page_id_t	root_page_id(space->id, root_page_no);
 
 			btr_free_if_exists(
 				root_page_id, page_size, it->m_id, &mtr);
@@ -2820,24 +1770,20 @@ truncate_t::drop_indexes(
 
 /** Create the indexes for a table
 @param[in]	table_name	table name, for which to create the indexes
-@param[in]	space_id	space id where we have to create the indexes
-@param[in]	page_size	page size of the .ibd file
-@param[in]	flags		tablespace flags
+@param[in,out]	space		tablespace
 @param[in]	format_flags	page format flags
 @return DB_SUCCESS or error code. */
-dberr_t
+inline dberr_t
 truncate_t::create_indexes(
 	const char*		table_name,
-	ulint			space_id,
-	const page_size_t&	page_size,
-	ulint			flags,
+	fil_space_t*		space,
 	ulint			format_flags)
 {
 	mtr_t           mtr;
 
 	mtr_start(&mtr);
 
-	if (space_id != TRX_SYS_SPACE) {
+	if (space->id != TRX_SYS_SPACE) {
 		/* Do not log changes for single-table tablespaces, we
 		are in recovery mode. */
 		mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
@@ -2854,12 +1800,12 @@ truncate_t::create_indexes(
 	     ++it) {
 
 		btr_create_t    btr_redo_create_info(
-			FSP_FLAGS_GET_ZIP_SSIZE(flags)
+			FSP_FLAGS_GET_ZIP_SSIZE(space->flags)
 			? &it->m_fields[0] : NULL);
 
 		btr_redo_create_info.format_flags = format_flags;
 
-		if (FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
+		if (FSP_FLAGS_GET_ZIP_SSIZE(space->flags)) {
 
 			btr_redo_create_info.n_fields = it->m_n_fields;
 			/* Skip the NUL appended field */
@@ -2869,7 +1815,7 @@ truncate_t::create_indexes(
 		}
 
 		root_page_no = create_index(
-			table_name, space_id, page_size, it->m_type, it->m_id,
+			table_name, space, it->m_type, it->m_id,
 			btr_redo_create_info, &mtr);
 
 		if (root_page_no == FIL_NULL) {
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 22edb7faf89..b2679c87dae 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -78,9 +78,10 @@ row_undo_ins_remove_clust_rec(
 
 	mtr.start();
 	if (index->table->is_temporary()) {
+		ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
 		mtr.set_log_mode(MTR_LOG_NO_REDO);
 	} else {
-		mtr.set_named_space(index->space);
+		index->set_modified(mtr);
 	}
 
 	/* This is similar to row_undo_mod_clust(). The DDL thread may
@@ -120,10 +121,11 @@ row_undo_ins_remove_clust_rec(
 		mem_heap_free(heap);
 	}
 
-	if (node->table->id == DICT_INDEXES_ID) {
-
+	switch (node->table->id) {
+	case DICT_INDEXES_ID:
 		ut_ad(!online);
 		ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
+		ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
 
 		dict_drop_index_tree(
 			btr_pcur_get_rec(&node->pcur), &(node->pcur), &mtr);
@@ -135,6 +137,54 @@ row_undo_ins_remove_clust_rec(
 		success = btr_pcur_restore_position(
 			BTR_MODIFY_LEAF, &node->pcur, &mtr);
 		ut_a(success);
+		break;
+	case DICT_COLUMNS_ID:
+		/* This is rolling back an INSERT into SYS_COLUMNS.
+		If it was part of an instant ADD COLUMN operation, we
+		must modify the table definition. At this point, any
+		corresponding operation to the metadata record will have
+		been rolled back. */
+		ut_ad(!online);
+		ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
+		ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
+		const rec_t* rec = btr_pcur_get_rec(&node->pcur);
+		if (rec_get_n_fields_old(rec)
+		    != DICT_NUM_FIELDS__SYS_COLUMNS) {
+			break;
+		}
+		ulint len;
+		const byte* data = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_COLUMNS__TABLE_ID, &len);
+		if (len != 8) {
+			break;
+		}
+		const table_id_t table_id = mach_read_from_8(data);
+		data = rec_get_nth_field_old(rec, DICT_FLD__SYS_COLUMNS__POS,
+					     &len);
+		if (len != 4) {
+			break;
+		}
+		const unsigned pos = mach_read_from_4(data);
+		if (pos == 0 || pos >= (1U << 16)) {
+			break;
+		}
+		dict_table_t* table = dict_table_open_on_id(
+			table_id, true, DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
+		if (!table) {
+			break;
+		}
+
+		dict_index_t* index = dict_table_get_first_index(table);
+
+		if (index && index->is_instant()
+		    && DATA_N_SYS_COLS + 1 + pos == table->n_cols) {
+			/* This is the rollback of an instant ADD COLUMN.
+			Remove the column from the dictionary cache,
+			but keep the system columns. */
+			table->rollback_instant(pos);
+		}
+
+		dict_table_close(table, true, false);
 	}
 
 	if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
@@ -149,7 +199,7 @@ retry:
 	if (index->table->is_temporary()) {
 		mtr.set_log_mode(MTR_LOG_NO_REDO);
 	} else {
-		mtr.set_named_space(index->space);
+		index->set_modified(mtr);
 	}
 
 	success = btr_pcur_restore_position(
@@ -177,6 +227,27 @@ retry:
 
 func_exit:
 	btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
+	if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_INSERT_METADATA) {
+		/* When rolling back the very first instant ADD COLUMN
+		operation, reset the root page to the basic state. */
+		ut_ad(!index->table->is_temporary());
+		mtr.start();
+		if (page_t* root = btr_root_get(index, &mtr)) {
+			byte* page_type = root + FIL_PAGE_TYPE;
+			ut_ad(mach_read_from_2(page_type)
+			      == FIL_PAGE_TYPE_INSTANT
+			      || mach_read_from_2(page_type)
+			      == FIL_PAGE_INDEX);
+			index->set_modified(mtr);
+			mlog_write_ulint(page_type, FIL_PAGE_INDEX,
+					 MLOG_2BYTES, &mtr);
+			byte* instant = PAGE_INSTANT + PAGE_HEADER + root;
+			mlog_write_ulint(instant,
+					 page_ptr_get_direction(instant + 1),
+					 MLOG_2BYTES, &mtr);
+		}
+		mtr.commit();
+	}
 
 	return(err);
 }
@@ -351,15 +422,16 @@ row_undo_ins_parse_undo_rec(
 	default:
 		ut_ad(!"wrong undo record type");
 		goto close_table;
+	case TRX_UNDO_INSERT_METADATA:
 	case TRX_UNDO_INSERT_REC:
 		break;
 	case TRX_UNDO_RENAME_TABLE:
 		dict_table_t* table = node->table;
 		ut_ad(!table->is_temporary());
 		ut_ad(dict_table_is_file_per_table(table)
-		      == (table->space != TRX_SYS_SPACE));
+		      == !is_system_tablespace(table->space_id));
 		size_t len = mach_read_from_2(node->undo_rec)
-			+ node->undo_rec - ptr - 2;
+			+ size_t(node->undo_rec - ptr) - 2;
 		ptr[len] = 0;
 		const char* name = reinterpret_cast<char*>(ptr);
 		if (strcmp(table->name.m_name, name)) {
@@ -387,8 +459,13 @@ close_table:
 		clust_index = dict_table_get_first_index(node->table);
 
 		if (clust_index != NULL) {
-			ptr = trx_undo_rec_get_row_ref(
-				ptr, clust_index, &node->ref, node->heap);
+			if (node->rec_type == TRX_UNDO_INSERT_REC) {
+				ptr = trx_undo_rec_get_row_ref(
+					ptr, clust_index, &node->ref,
+					node->heap);
+			} else {
+				node->ref = &trx_undo_metadata;
+			}
 
 			if (!row_undo_search_clust_to_pcur(node)) {
 				/* An error probably occurred during
@@ -502,18 +579,28 @@ row_undo_ins(
 
 	node->index = dict_table_get_first_index(node->table);
 	ut_ad(dict_index_is_clust(node->index));
-	/* Skip the clustered index (the first index) */
-	node->index = dict_table_get_next_index(node->index);
 
-	dict_table_skip_corrupt_index(node->index);
+	switch (node->rec_type) {
+	default:
+		ut_ad(!"wrong undo record type");
+	case TRX_UNDO_INSERT_REC:
+		/* Skip the clustered index (the first index) */
+		node->index = dict_table_get_next_index(node->index);
+
+		dict_table_skip_corrupt_index(node->index);
 
-	err = row_undo_ins_remove_sec_rec(node, thr);
+		err = row_undo_ins_remove_sec_rec(node, thr);
 
-	if (err == DB_SUCCESS) {
+		if (err != DB_SUCCESS) {
+			break;
+		}
 
+		/* fall through */
+	case TRX_UNDO_INSERT_METADATA:
 		log_free_check();
 
 		if (node->table->id == DICT_INDEXES_ID) {
+			ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
 
 			if (!dict_locked) {
 				mutex_enter(&dict_sys->mutex);
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 4ed4e74fce3..41079450159 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,6 +30,7 @@ Created 2/27/1997 Heikki Tuuri
 #include "dict0boot.h"
 #include "trx0undo.h"
 #include "trx0roll.h"
+#include "trx0purge.h"
 #include "btr0btr.h"
 #include "mach0data.h"
 #include "ibuf0ibuf.h"
@@ -121,7 +122,8 @@ row_undo_mod_clust_low(
 	}
 
 	if (mode != BTR_MODIFY_TREE) {
-		ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
+		ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED))
+		      == BTR_MODIFY_LEAF);
 
 		err = btr_cur_optimistic_update(
 			BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG
@@ -146,101 +148,56 @@ row_undo_mod_clust_low(
 	return(err);
 }
 
-/***********************************************************//**
-Purges a clustered index record after undo if possible.
-This is attempted when the record was inserted by updating a
-delete-marked record and there no longer exist transactions
-that would see the delete-marked record.
-@return	DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_mod_remove_clust_low(
-/*==========================*/
-	undo_node_t*	node,	/*!< in: row undo node */
-	mtr_t*		mtr,	/*!< in/out: mini-transaction */
-	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+/** Get the byte offset of the DB_TRX_ID column
+@param[in]	rec	clustered index record
+@param[in]	index	clustered index
+@return	the byte offset of DB_TRX_ID, from the start of rec */
+static ulint row_trx_id_offset(const rec_t* rec, const dict_index_t* index)
 {
-	btr_cur_t*	btr_cur;
-	dberr_t		err;
-	ulint		trx_id_offset;
-
-	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
-
-	/* Find out if the record has been purged already
-	or if we can remove it. */
-
-	if (!btr_pcur_restore_position(mode, &node->pcur, mtr)
-	    || row_vers_must_preserve_del_marked(node->new_trx_id,
-						 node->table->name,
-						 mtr)) {
-
-		return(DB_SUCCESS);
-	}
-
-	btr_cur = btr_pcur_get_btr_cur(&node->pcur);
-
-	trx_id_offset = btr_cur_get_index(btr_cur)->trx_id_offset;
-
+	ut_ad(index->n_uniq <= MAX_REF_PARTS);
+	ulint trx_id_offset = index->trx_id_offset;
 	if (!trx_id_offset) {
-		mem_heap_t*	heap	= NULL;
-		ulint		trx_id_col;
-		const ulint*	offsets;
-		ulint		len;
-
-		trx_id_col = dict_index_get_sys_col_pos(
-			btr_cur_get_index(btr_cur), DATA_TRX_ID);
-		ut_ad(trx_id_col > 0);
-		ut_ad(trx_id_col != ULINT_UNDEFINED);
-
-		offsets = rec_get_offsets(
-			btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur),
-			NULL, true, trx_id_col + 1, &heap);
-
+		/* Reserve enough offsets for the PRIMARY KEY and 2 columns
+		so that we can access DB_TRX_ID, DB_ROLL_PTR. */
+		ulint	offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
+		rec_offs_init(offsets_);
+		mem_heap_t* heap = NULL;
+		const ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
+		ulint* offsets = rec_get_offsets(rec, index, offsets_, true,
+						 trx_id_pos + 1, &heap);
+		ut_ad(!heap);
+		ulint len;
 		trx_id_offset = rec_get_nth_field_offs(
-			offsets, trx_id_col, &len);
+			offsets, trx_id_pos, &len);
 		ut_ad(len == DATA_TRX_ID_LEN);
-		mem_heap_free(heap);
 	}
 
-	if (trx_read_trx_id(btr_cur_get_rec(btr_cur) + trx_id_offset)
-	    != node->new_trx_id) {
-		/* The record must have been purged and then replaced
-		with a different one. */
-		return(DB_SUCCESS);
-	}
+	return trx_id_offset;
+}
 
-	/* We are about to remove an old, delete-marked version of the
-	record that may have been delete-marked by a different transaction
-	than the rolling-back one. */
-	ut_ad(rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
-				   dict_table_is_comp(node->table)));
-	/* In delete-marked records, DB_TRX_ID must
-	always refer to an existing update_undo log record. */
-	ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index));
-
-	if (mode == BTR_MODIFY_LEAF) {
-		err = btr_cur_optimistic_delete(btr_cur, 0, mtr)
-			? DB_SUCCESS
-			: DB_FAIL;
-	} else {
-		ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
+/** Determine if rollback must execute a purge-like operation.
+@param[in,out]	node	row undo
+@param[in,out]	mtr	mini-transaction
+@return	whether the record should be purged */
+static bool row_undo_mod_must_purge(undo_node_t* node, mtr_t* mtr)
+{
+	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
+	ut_ad(!node->table->is_temporary());
 
-		/* This operation is analogous to purge, we can free also
-		inherited externally stored fields.
-		We can also assume that the record was complete
-		(including BLOBs), because it had been delete-marked
-		after it had been completely inserted. Therefore, we
-		are passing rollback=false, just like purge does. */
+	btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&node->pcur);
+	ut_ad(btr_cur->index->is_primary());
 
-		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
-					   false, mtr);
+	mtr_s_lock(&purge_sys.latch, mtr);
 
-		/* The delete operation may fail if we have little
-		file space left: TODO: easiest to crash the database
-		and restart with more file space */
+	if (!purge_sys.view.changes_visible(node->new_trx_id,
+					    node->table->name)) {
+		return false;
 	}
 
-	return(err);
+	const rec_t* rec = btr_cur_get_rec(btr_cur);
+
+	return trx_read_trx_id(rec + row_trx_id_offset(rec, btr_cur->index))
+		== node->new_trx_id;
 }
 
 /***********************************************************//**
@@ -269,12 +226,13 @@ row_undo_mod_clust(
 	log_free_check();
 	pcur = &node->pcur;
 	index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
+	ut_ad(index->is_primary());
 
 	mtr.start();
 	if (index->table->is_temporary()) {
 		mtr.set_log_mode(MTR_LOG_NO_REDO);
 	} else {
-		mtr.set_named_space(index->space);
+		index->set_modified(mtr);
 	}
 
 	online = dict_index_is_online_ddl(index);
@@ -308,7 +266,7 @@ row_undo_mod_clust(
 		if (index->table->is_temporary()) {
 			mtr.set_log_mode(MTR_LOG_NO_REDO);
 		} else {
-			mtr.set_named_space(index->space);
+			index->set_modified(mtr);
 		}
 
 		err = row_undo_mod_clust_low(
@@ -362,44 +320,122 @@ row_undo_mod_clust(
 
 	btr_pcur_commit_specify_mtr(pcur, &mtr);
 
-	if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
+	if (err != DB_SUCCESS) {
+		goto func_exit;
+	}
+
+	/* FIXME: Perform the below operations in the above
+	mini-transaction when possible. */
+
+	if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
+		/* In delete-marked records, DB_TRX_ID must
+		always refer to an existing update_undo log record. */
+		ut_ad(node->new_trx_id);
 
 		mtr.start();
+		if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) {
+			goto mtr_commit_exit;
+		}
+
 		if (index->table->is_temporary()) {
 			mtr.set_log_mode(MTR_LOG_NO_REDO);
 		} else {
-			mtr.set_named_space(index->space);
+			if (!row_undo_mod_must_purge(node, &mtr)) {
+				goto mtr_commit_exit;
+			}
+			index->set_modified(mtr);
 		}
 
-		/* It is not necessary to call row_log_table,
-		because the record is delete-marked and would thus
-		be omitted from the rebuilt copy of the table. */
-		err = row_undo_mod_remove_clust_low(
-			node, &mtr, BTR_MODIFY_LEAF);
-		if (err != DB_SUCCESS) {
-			btr_pcur_commit_specify_mtr(pcur, &mtr);
+		ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
+					   dict_table_is_comp(node->table)));
+		if (btr_cur_optimistic_delete(&pcur->btr_cur, 0, &mtr)) {
+			goto mtr_commit_exit;
+		}
 
-			/* We may have to modify tree structure: do a
-			pessimistic descent down the index tree */
+		btr_pcur_commit_specify_mtr(pcur, &mtr);
 
-			mtr.start();
-			if (index->table->is_temporary()) {
-				mtr.set_log_mode(MTR_LOG_NO_REDO);
-			} else {
-				mtr.set_named_space(index->space);
+		mtr.start();
+		if (!btr_pcur_restore_position(
+			    BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+			    pcur, &mtr)) {
+			goto mtr_commit_exit;
+		}
+
+		if (index->table->is_temporary()) {
+			mtr.set_log_mode(MTR_LOG_NO_REDO);
+		} else {
+			if (!row_undo_mod_must_purge(node, &mtr)) {
+				goto mtr_commit_exit;
 			}
+			index->set_modified(mtr);
+		}
 
-			err = row_undo_mod_remove_clust_low(
-				node, &mtr,
-				BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE);
+		ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
+					   dict_table_is_comp(node->table)));
+
+		/* This operation is analogous to purge, we can free
+		also inherited externally stored fields. We can also
+		assume that the record was complete (including BLOBs),
+		because it had been delete-marked after it had been
+		completely inserted. Therefore, we are passing
+		rollback=false, just like purge does. */
+		btr_cur_pessimistic_delete(&err, FALSE, &pcur->btr_cur, 0,
+					   false, &mtr);
+		ut_ad(err == DB_SUCCESS
+		      || err == DB_OUT_OF_FILE_SPACE);
+	} else if (!index->table->is_temporary() && node->new_trx_id) {
+		/* We rolled back a record so that it still exists.
+		We must reset the DB_TRX_ID if the history is no
+		longer accessible by any active read view. */
 
-			ut_ad(err == DB_SUCCESS
-			      || err == DB_OUT_OF_FILE_SPACE);
+		mtr.start();
+		if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) {
+			goto mtr_commit_exit;
+		}
+		rec_t* rec = btr_pcur_get_rec(pcur);
+		mtr_s_lock(&purge_sys.latch, &mtr);
+		if (!purge_sys.view.changes_visible(node->new_trx_id,
+						   node->table->name)) {
+			goto mtr_commit_exit;
 		}
 
-		btr_pcur_commit_specify_mtr(pcur, &mtr);
+		ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
+		ut_ad(index->n_uniq <= MAX_REF_PARTS);
+		/* Reserve enough offsets for the PRIMARY KEY and 2 columns
+		so that we can access DB_TRX_ID, DB_ROLL_PTR. */
+		ulint	offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
+		rec_offs_init(offsets_);
+		offsets = rec_get_offsets(
+			rec, index, offsets_, true, trx_id_pos + 2, &heap);
+		ulint len;
+		ulint trx_id_offset = rec_get_nth_field_offs(
+			offsets, trx_id_pos, &len);
+		ut_ad(len == DATA_TRX_ID_LEN);
+
+		if (trx_read_trx_id(rec + trx_id_offset) == node->new_trx_id) {
+			ut_ad(!rec_get_deleted_flag(
+				      rec, dict_table_is_comp(node->table)));
+			index->set_modified(mtr);
+			if (page_zip_des_t* page_zip = buf_block_get_page_zip(
+				    btr_pcur_get_block(&node->pcur))) {
+				page_zip_write_trx_id_and_roll_ptr(
+					page_zip, rec, offsets, trx_id_pos,
+					0, 1ULL << ROLL_PTR_INSERT_FLAG_POS,
+					&mtr);
+			} else {
+				mlog_write_string(rec + trx_id_offset,
+						  reset_trx_id,
+						  sizeof reset_trx_id, &mtr);
+			}
+		}
+	} else {
+		goto func_exit;
 	}
 
+mtr_commit_exit:
+	btr_pcur_commit_specify_mtr(pcur, &mtr);
+
+func_exit:
 	node->state = UNDO_NODE_FETCH_NEXT;
 
 	if (offsets_heap) {
@@ -502,12 +538,11 @@ row_undo_mod_del_mark_or_remove_sec_low(
 	ut_a(success);
 
 	/* For temporary table, we can skip to check older version of
-	clustered index entry. Because the purge won't process
-	any no-redo rollback segment undo logs. */
-	if (dict_table_is_temporary(node->table)
+	clustered index entry, because there is no MVCC or purge. */
+	if (node->table->is_temporary()
 	    || row_vers_old_has_index_entry(
-			false, btr_pcur_get_rec(&(node->pcur)),
-			&mtr_vers, index, entry, 0, 0)) {
+		    false, btr_pcur_get_rec(&node->pcur),
+		    &mtr_vers, index, entry, 0, 0)) {
 		err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
 						   btr_cur, TRUE, thr, &mtr);
 		ut_ad(err == DB_SUCCESS);
@@ -526,18 +561,14 @@ row_undo_mod_del_mark_or_remove_sec_low(
 		}
 
 		if (modify_leaf) {
-			success = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
-			if (success) {
-				err = DB_SUCCESS;
-			} else {
-				err = DB_FAIL;
-			}
+			err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
+				? DB_SUCCESS : DB_FAIL;
 		} else {
 			/* Passing rollback=false,
 			because we are deleting a secondary index record:
 			the distinction only matters when deleting a
 			record that contains externally stored columns. */
-			ut_ad(!dict_index_is_clust(index));
+			ut_ad(!index->is_primary());
 			btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
 						   false, &mtr);
 
@@ -861,8 +892,8 @@ row_undo_mod_upd_del_sec(
 		}
 
 		/* During online index creation,
-		HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should
-		guarantee that any active transaction has not modified
+		HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCk
+		should guarantee that any active transaction has not modified
 		indexed columns such that col->ord_part was 0 at the
 		time when the undo log record was written. When we get
 		to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
@@ -927,8 +958,8 @@ row_undo_mod_del_mark_sec(
 		}
 
 		/* During online index creation,
-		HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should
-		guarantee that any active transaction has not modified
+		HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCK
+		should guarantee that any active transaction has not modified
 		indexed columns such that col->ord_part was 0 at the
 		time when the undo log record was written. When we get
 		to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
@@ -1030,8 +1061,7 @@ row_undo_mod_upd_exist_sec(
 			format.  REDUNDANT and COMPACT formats
 			store a local 768-byte prefix of each
 			externally stored column. */
-			ut_a(dict_table_get_format(index->table)
-			     >= UNIV_FORMAT_B);
+			ut_a(dict_table_has_atomic_blobs(index->table));
 
 			/* This is only legitimate when
 			rolling back an incomplete transaction
@@ -1175,6 +1205,20 @@ close_table:
 				       node->heap, &(node->update));
 	node->new_trx_id = trx_id;
 	node->cmpl_info = cmpl_info;
+	ut_ad(!node->ref->info_bits);
+
+	if (node->update->info_bits & REC_INFO_MIN_REC_FLAG) {
+		/* This must be an undo log record for a subsequent
+		instant ALTER TABLE, extending the metadata record. */
+		ut_ad(clust_index->is_instant());
+		if (node->update->info_bits != REC_INFO_MIN_REC_FLAG) {
+			ut_ad(!"wrong info_bits in undo log record");
+			goto close_table;
+		}
+		node->update->info_bits = REC_INFO_METADATA;
+		const_cast<dtuple_t*>(node->ref)->info_bits
+			= REC_INFO_METADATA;
+	}
 
 	if (!row_undo_search_clust_to_pcur(node)) {
 		/* As long as this rolling-back transaction exists,
@@ -1248,6 +1292,12 @@ row_undo_mod(
 
 	node->index = dict_table_get_first_index(node->table);
 	ut_ad(dict_index_is_clust(node->index));
+
+	if (node->ref->info_bits) {
+		ut_ad(node->ref->info_bits == REC_INFO_METADATA);
+		goto rollback_clust;
+	}
+
 	/* Skip the clustered index (the first index) */
 	node->index = dict_table_get_next_index(node->index);
 
@@ -1270,6 +1320,7 @@ row_undo_mod(
 	}
 
 	if (err == DB_SUCCESS) {
+rollback_clust:
 		err = row_undo_mod_clust(node, thr);
 
 		bool update_statistics
diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc
index 3f960235f3e..9b88f950917 100644
--- a/storage/innobase/row/row0undo.cc
+++ b/storage/innobase/row/row0undo.cc
@@ -40,6 +40,7 @@ Created 1/8/1997 Heikki Tuuri
 #include "row0upd.h"
 #include "row0mysql.h"
 #include "srv0srv.h"
+#include "srv0start.h"
 
 /* How to undo row operations?
 (1) For an insert, we have stored a prefix of the clustered index record
@@ -195,11 +196,10 @@ row_undo_search_clust_to_pcur(
 		ut_ad(row_get_rec_trx_id(rec, clust_index, offsets)
 		      == node->trx->id);
 
-		if (dict_table_get_format(node->table) >= UNIV_FORMAT_B) {
-			/* In DYNAMIC or COMPRESSED format, there is
-			no prefix of externally stored columns in the
-			clustered index record. Build a cache of
-			column prefixes. */
+		if (dict_table_has_atomic_blobs(node->table)) {
+			/* There is no prefix of externally stored
+			columns in the clustered index record. Build a
+			cache of column prefixes. */
 			ext = &node->ext;
 		} else {
 			/* REDUNDANT and COMPACT formats store a local
@@ -227,10 +227,14 @@ row_undo_search_clust_to_pcur(
 		}
 
 		if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
+			ut_ad(node->row->info_bits == REC_INFO_MIN_REC_FLAG
+			      || node->row->info_bits == 0);
 			node->undo_row = dtuple_copy(node->row, node->heap);
 			row_upd_replace(node->undo_row, &node->undo_ext,
 					clust_index, node->update, node->heap);
 		} else {
+			ut_ad((node->row->info_bits == REC_INFO_MIN_REC_FLAG)
+			      == (node->rec_type == TRX_UNDO_INSERT_METADATA));
 			node->undo_row = NULL;
 			node->undo_ext = NULL;
 		}
@@ -340,11 +344,17 @@ row_undo_step(
 
 	ut_ad(que_node_get_type(node) == QUE_NODE_UNDO);
 
-	if (UNIV_UNLIKELY(trx == trx_roll_crash_recv_trx)
-	    && trx_roll_must_shutdown()) {
+	if (UNIV_UNLIKELY(trx_get_dict_operation(trx) == TRX_DICT_OP_NONE
+			  && !srv_undo_sources
+			  && !srv_is_being_started)
+	    && (srv_fast_shutdown == 3 || trx == trx_roll_crash_recv_trx)) {
 		/* Shutdown has been initiated. */
 		trx->error_state = DB_INTERRUPTED;
-		return(NULL);
+		return NULL;
+	}
+
+	if (UNIV_UNLIKELY(trx == trx_roll_crash_recv_trx)) {
+		trx_roll_report_progress();
 	}
 
 	err = row_undo(node, thr);
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index ccb18cb843b..a60694c6613 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -284,52 +284,20 @@ row_upd_check_references_constraints(
 					FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 			}
 
-			/* dict_operation_lock is held both here
-			(UPDATE or DELETE with FOREIGN KEY) and by TRUNCATE
-			TABLE operations.
-			If a TRUNCATE TABLE operation is in progress,
-			there can be 2 possible conditions:
-			1) row_truncate_table_for_mysql() is not yet called.
-			2) Truncate releases dict_operation_lock
-			during eviction of pages from buffer pool
-			for a file-per-table tablespace.
-
-			In case of (1), truncate will wait for FK operation
-			to complete.
-			In case of (2), truncate will be rolled forward even
-			if it is interrupted. So if the foreign table is
-			undergoing a truncate, ignore the FK check. */
-
 			if (foreign_table) {
-				mutex_enter(&fil_system->mutex);
-				const fil_space_t* space = fil_space_get_by_id(
-					foreign_table->space);
-				const bool being_truncated = space
-					&& space->is_being_truncated;
-				mutex_exit(&fil_system->mutex);
-				if (being_truncated) {
-					continue;
-				}
+				foreign_table->inc_fk_checks();
 			}
 
 			/* NOTE that if the thread ends up waiting for a lock
 			we will release dict_operation_lock temporarily!
-			But the counter on the table protects 'foreign' from
+			But the inc_fk_checks() protects foreign_table from
 			being dropped while the check is running. */
 
-			if (foreign_table) {
-				my_atomic_addlint(
-					&foreign_table->n_foreign_key_checks_running,
-					1);
-			}
-
 			err = row_ins_check_foreign_constraint(
 				FALSE, foreign, table, entry, thr);
 
 			if (foreign_table) {
-				my_atomic_addlint(
-					&foreign_table->n_foreign_key_checks_running,
-					-1);
+				foreign_table->dec_fk_checks();
 			}
 			if (ref_table != NULL) {
 				dict_table_close(ref_table, FALSE, FALSE);
@@ -521,9 +489,7 @@ row_upd_rec_sys_fields_in_recovery(
 
 		field = rec_get_nth_field(rec, offsets, pos, &len);
 		ut_ad(len == DATA_TRX_ID_LEN);
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
-#endif
+		compile_time_assert(DATA_TRX_ID + 1 == DATA_ROLL_PTR);
 		trx_write_trx_id(field, trx_id);
 		trx_write_roll_ptr(field + DATA_TRX_ID_LEN, roll_ptr);
 	}
@@ -596,7 +562,11 @@ row_upd_changes_field_size_or_external(
 		}
 
 		new_val = &(upd_field->new_val);
+		if (dfield_is_ext(new_val)) {
+			return(TRUE);
+		}
 		new_len = dfield_get_len(new_val);
+		ut_ad(new_len != UNIV_SQL_DEFAULT);
 
 		if (dfield_is_null(new_val) && !rec_offs_comp(offsets)) {
 			/* A bug fixed on Dec 31st, 2004: we looked at the
@@ -610,11 +580,14 @@ row_upd_changes_field_size_or_external(
 				0);
 		}
 
-		old_len = rec_offs_nth_size(offsets, upd_field->field_no);
+		if (rec_offs_nth_default(offsets, upd_field->field_no)) {
+			/* This is an instantly added column that is
+			at the initial default value. */
+			return(TRUE);
+		}
 
 		if (rec_offs_comp(offsets)
-		    && rec_offs_nth_sql_null(offsets,
-					     upd_field->field_no)) {
+		    && rec_offs_nth_sql_null(offsets, upd_field->field_no)) {
 			/* Note that in the compact table format, for a
 			variable length field, an SQL NULL will use zero
 			bytes in the offset array at the start of the physical
@@ -623,9 +596,12 @@ row_upd_changes_field_size_or_external(
 			if we update an SQL NULL varchar to an empty string! */
 
 			old_len = UNIV_SQL_NULL;
+		} else {
+			old_len = rec_offs_nth_size(offsets,
+						    upd_field->field_no);
 		}
 
-		if (dfield_is_ext(new_val) || old_len != new_len
+		if (old_len != new_len
 		    || rec_offs_nth_extern(offsets, upd_field->field_no)) {
 
 			return(TRUE);
@@ -700,6 +676,30 @@ row_upd_rec_in_place(
 	ut_ad(!index->table->skip_alter_undo);
 
 	if (rec_offs_comp(offsets)) {
+#ifdef UNIV_DEBUG
+		switch (rec_get_status(rec)) {
+		case REC_STATUS_ORDINARY:
+			break;
+		case REC_STATUS_COLUMNS_ADDED:
+			ut_ad(index->is_instant());
+			break;
+		case REC_STATUS_NODE_PTR:
+			if (index->is_dummy
+			    && fil_page_get_type(page_align(rec))
+			    == FIL_PAGE_RTREE) {
+				/* The function rtr_update_mbr_field_in_place()
+				is generating MLOG_COMP_REC_UPDATE_IN_PLACE
+				and MLOG_REC_UPDATE_IN_PLACE records for
+				node pointer pages. */
+				break;
+			}
+			/* fall through */
+		case REC_STATUS_INFIMUM:
+		case REC_STATUS_SUPREMUM:
+			ut_ad(!"wrong record status in update");
+		}
+#endif /* UNIV_DEBUG */
+
 		rec_set_info_bits_new(rec, update->info_bits);
 	} else {
 		rec_set_info_bits_old(rec, update->info_bits);
@@ -819,10 +819,7 @@ row_upd_index_write_log(
 	log_ptr += mach_write_compressed(log_ptr, n_fields);
 
 	for (i = 0; i < n_fields; i++) {
-
-#if MLOG_BUF_MARGIN <= 30
-# error "MLOG_BUF_MARGIN <= 30"
-#endif
+		compile_time_assert(MLOG_BUF_MARGIN > 30);
 
 		if (log_ptr + 30 > buf_end) {
 			mlog_close(mtr, log_ptr);
@@ -840,8 +837,8 @@ row_upd_index_write_log(
 		/* If this is a virtual column, mark it using special
 		field_no */
 		ulint	field_no = upd_fld_is_virtual_col(upd_field)
-				   ? REC_MAX_N_FIELDS + upd_field->field_no
-				   : upd_field->field_no;
+			? REC_MAX_N_FIELDS + unsigned(upd_field->field_no)
+			: unsigned(upd_field->field_no);
 
 		log_ptr += mach_write_compressed(log_ptr, field_no);
 		log_ptr += mach_write_compressed(log_ptr, len);
@@ -981,6 +978,7 @@ row_upd_build_sec_rec_difference_binary(
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(rec_offs_n_fields(offsets) == dtuple_get_n_fields(entry));
 	ut_ad(!rec_offs_any_extern(offsets));
+	ut_ad(!rec_offs_any_default(offsets));
 	ut_ad(!index->table->skip_alter_undo);
 
 	update = upd_create(dtuple_get_n_fields(entry), heap);
@@ -1082,8 +1080,7 @@ row_upd_build_difference_binary(
 	}
 
 	for (i = 0; i < n_fld; i++) {
-
-		data = rec_get_nth_field(rec, offsets, i, &len);
+		data = rec_get_nth_cfield(rec, index, offsets, i, &len);
 
 		dfield = dtuple_get_nth_field(entry, i);
 
@@ -1310,7 +1307,7 @@ row_upd_index_replace_new_col_val(
 
 		/* Copy the locally stored prefix. */
 		memcpy(buf, data,
-		       uf->orig_len - BTR_EXTERN_FIELD_REF_SIZE);
+		       unsigned(uf->orig_len) - BTR_EXTERN_FIELD_REF_SIZE);
 
 		/* Copy the BLOB pointer. */
 		memcpy(buf + uf->orig_len - BTR_EXTERN_FIELD_REF_SIZE,
@@ -1323,50 +1320,34 @@ row_upd_index_replace_new_col_val(
 	}
 }
 
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the index entry
-given. */
+/** Apply an update vector to an index entry.
+@param[in,out]	entry	index entry to be updated; the clustered index record
+			must be covered by a lock or a page latch to prevent
+			deletion (rollback or purge)
+@param[in]	index	index of the entry
+@param[in]	update	update vector built for the entry
+@param[in,out]	heap	memory heap for copying off-page columns */
 void
 row_upd_index_replace_new_col_vals_index_pos(
-/*=========================================*/
-	dtuple_t*	entry,	/*!< in/out: index entry where replaced;
-				the clustered index record must be
-				covered by a lock or a page latch to
-				prevent deletion (rollback or purge) */
-	dict_index_t*	index,	/*!< in: index; NOTE that this may also be a
-				non-clustered index */
-	const upd_t*	update,	/*!< in: an update vector built for the index so
-				that the field number in an upd_field is the
-				index position */
-	ibool		order_only,
-				/*!< in: if TRUE, limit the replacement to
-				ordering fields of index; note that this
-				does not work for non-clustered indexes. */
-	mem_heap_t*	heap)	/*!< in: memory heap for allocating and
-				copying the new values */
+	dtuple_t*		entry,
+	const dict_index_t*	index,
+	const upd_t*		update,
+	mem_heap_t*		heap)
 {
 	ut_ad(!index->table->skip_alter_undo);
 
-	ulint		i;
-	ulint		n_fields;
 	const page_size_t&	page_size = dict_table_page_size(index->table);
 
 	dtuple_set_info_bits(entry, update->info_bits);
 
-	if (order_only) {
-		n_fields = dict_index_get_n_unique(index);
-	} else {
-		n_fields = dict_index_get_n_fields(index);
-	}
-
-	for (i = 0; i < n_fields; i++) {
+	for (unsigned i = index->n_fields; i--; ) {
 		const dict_field_t*	field;
 		const dict_col_t*	col;
 		const upd_field_t*	uf;
 
 		field = dict_index_get_nth_field(index, i);
 		col = dict_field_get_col(field);
-		if (dict_col_is_virtual(col)) {
+		if (col->is_virtual()) {
 			const dict_v_col_t*	vcol = reinterpret_cast<
 							const dict_v_col_t*>(
 								col);
@@ -1420,7 +1401,7 @@ row_upd_index_replace_new_col_vals(
 
 		field = dict_index_get_nth_field(index, i);
 		col = dict_field_get_col(field);
-		if (dict_col_is_virtual(col)) {
+		if (col->is_virtual()) {
 			const dict_v_col_t*	vcol = reinterpret_cast<
 							const dict_v_col_t*>(
 								col);
@@ -1725,7 +1706,7 @@ row_upd_changes_ord_field_binary_func(
 		ind_field = dict_index_get_nth_field(index, i);
 		col = dict_field_get_col(ind_field);
 		col_no = dict_col_get_no(col);
-		is_virtual = dict_col_is_virtual(col);
+		is_virtual = col->is_virtual();
 
 		if (is_virtual) {
 			vcol = reinterpret_cast<const dict_v_col_t*>(col);
@@ -1808,10 +1789,33 @@ row_upd_changes_ord_field_binary_func(
 			/* Get the new mbr. */
 			if (dfield_is_ext(new_field)) {
 				if (flag == ROW_BUILD_FOR_UNDO
-				    && dict_table_get_format(index->table)
-					>= UNIV_FORMAT_B) {
-					/* For undo, and the table is Barrcuda,
-					we need to skip the prefix data. */
+				    && dict_table_has_atomic_blobs(
+					    index->table)) {
+					/* For ROW_FORMAT=DYNAMIC
+					or COMPRESSED, a prefix of
+					off-page records is stored
+					in the undo log record
+					(for any column prefix indexes).
+					For SPATIAL INDEX, we must
+					ignore this prefix. The
+					full column value is stored in
+					the BLOB.
+					For non-spatial index, we
+					would have already fetched a
+					necessary prefix of the BLOB,
+					available in the "ext" parameter.
+
+					Here, for SPATIAL INDEX, we are
+					fetching the full column, which is
+					potentially wasting a lot of I/O,
+					memory, and possibly involving a
+					concurrency problem, similar to ones
+					that existed before the introduction
+					of row_ext_t.
+
+					MDEV-11657 FIXME: write the MBR
+					directly to the undo log record,
+					and avoid recomputing it here! */
 					flen = BTR_EXTERN_FIELD_REF_SIZE;
 					ut_ad(dfield_get_len(new_field) >=
 					      BTR_EXTERN_FIELD_REF_SIZE);
@@ -2066,16 +2070,19 @@ row_upd_copy_columns(
 /*=================*/
 	rec_t*		rec,	/*!< in: record in a clustered index */
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	const dict_index_t*	index, /*!< in: index of rec */
 	sym_node_t*	column)	/*!< in: first column in a column list, or
 				NULL */
 {
-	byte*	data;
+	ut_ad(dict_index_is_clust(index));
+
+	const byte*	data;
 	ulint	len;
 
 	while (column) {
-		data = rec_get_nth_field(rec, offsets,
-					 column->field_nos[SYM_CLUST_FIELD_NO],
-					 &len);
+		data = rec_get_nth_cfield(
+			rec, index, offsets,
+			column->field_nos[SYM_CLUST_FIELD_NO], &len);
 		eval_node_copy_and_alloc_val(column, data, len);
 
 		column = UT_LIST_GET_NEXT(col_var_list, column);
@@ -2231,10 +2238,10 @@ row_upd_store_row(
 	offsets = rec_get_offsets(rec, clust_index, offsets_, true,
 				  ULINT_UNDEFINED, &heap);
 
-	if (dict_table_get_format(node->table) >= UNIV_FORMAT_B) {
-		/* In DYNAMIC or COMPRESSED format, there is no prefix
-		of externally stored columns in the clustered index
-		record. Build a cache of column prefixes. */
+	if (dict_table_has_atomic_blobs(node->table)) {
+		/* There is no prefix of externally stored columns in
+		the clustered index record. Build a cache of column
+		prefixes. */
 		ext = &node->ext;
 	} else {
 		/* REDUNDANT and COMPACT formats store a local
@@ -2252,7 +2259,7 @@ row_upd_store_row(
 				    thd, mysql_table);
 	}
 
-	if (node->is_delete) {
+	if (node->is_delete == PLAIN_DELETE) {
 		node->upd_row = NULL;
 		node->upd_ext = NULL;
 	} else {
@@ -2313,16 +2320,16 @@ row_upd_sec_index_entry(
 
 	mtr.start();
 
-	switch (index->space) {
+	switch (index->table->space_id) {
 	case SRV_TMP_SPACE_ID:
 		mtr.set_log_mode(MTR_LOG_NO_REDO);
 		flags = BTR_NO_LOCKING_FLAG;
 		break;
 	default:
-		mtr.set_named_space(index->space);
+		index->set_modified(mtr);
 		/* fall through */
 	case IBUF_SPACE_ID:
-		flags = 0;
+		flags = index->table->no_rollback() ? BTR_NO_ROLLBACK : 0;
 		break;
 	}
 
@@ -2361,7 +2368,7 @@ row_upd_sec_index_entry(
 		are no foreign key constraints referring to the index.
 		Change buffering is disabled for temporary tables and
 		spatial index. */
-		mode = (referenced || dict_table_is_temporary(index->table)
+		mode = (referenced || index->table->is_temporary()
 			|| dict_index_is_spatial(index))
 			? BTR_MODIFY_LEAF_ALREADY_S_LATCHED
 			: BTR_DELETE_MARK_LEAF_ALREADY_S_LATCHED;
@@ -2375,7 +2382,7 @@ row_upd_sec_index_entry(
 		are no foreign key constraints referring to the index.
 		Change buffering is disabled for temporary tables and
 		spatial index. */
-		mode = (referenced || dict_table_is_temporary(index->table)
+		mode = (referenced || index->table->is_temporary()
 			|| dict_index_is_spatial(index))
 			? BTR_MODIFY_LEAF
 			: BTR_DELETE_MARK_LEAF;
@@ -2506,7 +2513,7 @@ row_upd_sec_index_entry(
 	btr_pcur_close(&pcur);
 	mtr_commit(&mtr);
 
-	if (node->is_delete || err != DB_SUCCESS) {
+	if (node->is_delete == PLAIN_DELETE || err != DB_SUCCESS) {
 
 		goto func_exit;
 	}
@@ -2599,6 +2606,7 @@ row_upd_clust_rec_by_insert_inherit_func(
 
 #ifdef UNIV_DEBUG
 		if (UNIV_LIKELY(rec != NULL)) {
+			ut_ad(!rec_offs_nth_default(offsets, i));
 			const byte* rec_data
 				= rec_get_nth_field(rec, offsets, i, &len);
 			ut_ad(len == dfield_get_len(dfield));
@@ -2633,8 +2641,7 @@ row_upd_clust_rec_by_insert_inherit_func(
 		data[BTR_EXTERN_LEN] &= ~BTR_EXTERN_OWNER_FLAG;
 		data[BTR_EXTERN_LEN] |= BTR_EXTERN_INHERITED_FLAG;
 		/* The BTR_EXTERN_INHERITED_FLAG only matters in
-		rollback of a fresh insert (insert_undo log).
-		Purge (operating on update_undo log) will always free
+		rollback of a fresh insert. Purge will always free
 		the extern fields of a delete-marked row. */
 
 		inherit = true;
@@ -2686,6 +2693,7 @@ row_upd_clust_rec_by_insert(
 
 	entry = row_build_index_entry_low(node->upd_row, node->upd_ext,
 					  index, heap, ROW_BUILD_FOR_INSERT);
+	if (index->is_instant()) entry->trim(*index);
 	ut_ad(dtuple_get_info_bits(entry) == 0);
 
 	row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
@@ -2880,7 +2888,7 @@ row_upd_clust_rec(
 		flags |= BTR_NO_LOCKING_FLAG;
 		mtr->set_log_mode(MTR_LOG_NO_REDO);
 	} else {
-		mtr->set_named_space(index->space);
+		index->set_modified(*mtr);
 	}
 
 	/* NOTE: this transaction has an s-lock or x-lock on the record and
@@ -2962,7 +2970,7 @@ row_upd_del_mark_clust_rec(
 
 	ut_ad(node);
 	ut_ad(dict_index_is_clust(index));
-	ut_ad(node->is_delete);
+	ut_ad(node->is_delete == PLAIN_DELETE);
 
 	pcur = node->pcur;
 	btr_cur = btr_pcur_get_btr_cur(pcur);
@@ -3042,6 +3050,7 @@ row_upd_clust_step(
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets;
 	ibool		referenced;
+	ulint		flags;
 	trx_t*		trx = thr_get_trx(thr);
 
 	rec_offs_init(offsets_);
@@ -3060,12 +3069,17 @@ row_upd_clust_step(
 
 	mtr.start();
 
-	const ulint flags = index->table->is_temporary()
-		? BTR_NO_LOCKING_FLAG : 0;
-	if (flags) {
+	if (node->table->is_temporary()) {
+		/* Disable locking, because temporary tables are
+		private to the connection (no concurrent access). */
+		flags = node->table->no_rollback()
+			? BTR_NO_ROLLBACK
+			: BTR_NO_LOCKING_FLAG;
+		/* Redo logging only matters for persistent tables. */
 		mtr.set_log_mode(MTR_LOG_NO_REDO);
 	} else {
-		mtr.set_named_space(index->space);
+		flags = node->table->no_rollback() ? BTR_NO_ROLLBACK : 0;
+		index->set_modified(mtr);
 	}
 
 	/* If the restoration does not succeed, then the same
@@ -3080,9 +3094,7 @@ row_upd_clust_step(
 
 	ulint	mode;
 
-	DEBUG_SYNC_C_IF_THD(
-		thr_get_trx(thr)->mysql_thd,
-		"innodb_row_upd_clust_step_enter");
+	DEBUG_SYNC_C_IF_THD(trx->mysql_thd, "innodb_row_upd_clust_step_enter");
 
 	if (dict_index_is_online_ddl(index)) {
 		ut_ad(node->table->id != DICT_INDEXES_ID);
@@ -3106,7 +3118,8 @@ row_upd_clust_step(
 	then we have to free the file segments of the index tree associated
 	with the index */
 
-	if (node->is_delete && node->table->id == DICT_INDEXES_ID) {
+	if (node->is_delete == PLAIN_DELETE
+	    && node->table->id == DICT_INDEXES_ID) {
 
 		ut_ad(!dict_index_is_online_ddl(index));
 
@@ -3116,7 +3129,7 @@ row_upd_clust_step(
 		mtr.commit();
 
 		mtr.start();
-		mtr.set_named_space(index->space);
+		index->set_modified(mtr);
 
 		success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur,
 						    &mtr);
@@ -3143,13 +3156,15 @@ row_upd_clust_step(
 		}
 	}
 
-	ut_ad(lock_trx_has_rec_x_lock(thr_get_trx(thr), index->table,
-				      btr_pcur_get_block(pcur),
-				      page_rec_get_heap_no(rec)));
+	ut_ad(index->table->no_rollback() || index->table->is_temporary()
+	      || row_get_rec_trx_id(rec, index, offsets) == trx->id
+	      || lock_trx_has_expl_x_lock(trx, index->table,
+					  btr_pcur_get_block(pcur),
+					  page_rec_get_heap_no(rec)));
 
 	/* NOTE: the following function calls will also commit mtr */
 
-	if (node->is_delete) {
+	if (node->is_delete == PLAIN_DELETE) {
 		err = row_upd_del_mark_clust_rec(
 			node, index, offsets, thr, referenced,
 #ifdef WITH_WSREP
@@ -3171,7 +3186,7 @@ row_upd_clust_step(
 	if (UNIV_UNLIKELY(!node->in_mysql_interface)) {
 		/* Copy the necessary columns from clust_rec and calculate the
 		new values to set */
-		row_upd_copy_columns(rec, offsets,
+		row_upd_copy_columns(rec, offsets, index,
 				     UT_LIST_GET_FIRST(node->columns));
 		row_upd_eval_new_vals(node->update);
 	}
@@ -3262,7 +3277,7 @@ row_upd(
 		/* We do not get the cmpl_info value from the MySQL
 		interpreter: we must calculate it on the fly: */
 
-		if (node->is_delete
+		if (node->is_delete == PLAIN_DELETE
 		    || row_upd_changes_some_index_ord_field_binary(
 			    node->table, node->update)) {
 			node->cmpl_info = 0;
@@ -3353,8 +3368,6 @@ row_upd_step(
 
 	trx = thr_get_trx(thr);
 
-	trx_start_if_not_started_xa(trx, true);
-
 	node = static_cast<upd_node_t*>(thr->run_node);
 
 	sel_node = node->select;
@@ -3447,3 +3460,57 @@ error_handling:
 
 	DBUG_RETURN(thr);
 }
+
+/** Write query start time as SQL field data to a buffer. Needed by InnoDB.
+@param	thd	Thread object
+@param	buf	Buffer to hold start time data */
+void thd_get_query_start_data(THD *thd, char *buf);
+
+/** Appends row_start or row_end field to update vector and sets a
+CURRENT_TIMESTAMP/trx->id value to it.
+Supposed to be called only by make_versioned_update() and
+make_versioned_delete().
+@param[in]	trx	transaction
+@param[in]	vers_sys_idx	table->row_start or table->row_end */
+void upd_node_t::make_versioned_helper(const trx_t* trx, ulint idx)
+{
+	ut_ad(in_mysql_interface); // otherwise needs to recalculate
+				   // node->cmpl_info
+	ut_ad(idx == table->vers_start || idx == table->vers_end);
+
+	dict_index_t* clust_index = dict_table_get_first_index(table);
+
+	update->n_fields++;
+	upd_field_t* ufield =
+		upd_get_nth_field(update, upd_get_n_fields(update) - 1);
+	const dict_col_t* col = dict_table_get_nth_col(table, idx);
+
+	upd_field_set_field_no(ufield, dict_col_get_clust_pos(col, clust_index),
+			       clust_index);
+
+	char* where = reinterpret_cast<char*>(update->vers_sys_value);
+	if (col->vers_native()) {
+		mach_write_to_8(where, trx->id);
+	} else {
+		thd_get_query_start_data(trx->mysql_thd, where);
+	}
+
+	dfield_set_data(&ufield->new_val, update->vers_sys_value, col->len);
+}
+
+/** Also set row_start = CURRENT_TIMESTAMP/trx->id
+@param[in]	trx	transaction */
+void upd_node_t::make_versioned_update(const trx_t* trx)
+{
+	make_versioned_helper(trx, table->vers_start);
+}
+
+/** Only set row_end = CURRENT_TIMESTAMP/trx->id.
+Do not touch other fields at all.
+@param[in]	trx	transaction */
+void upd_node_t::make_versioned_delete(const trx_t* trx)
+{
+	update->n_fields = 0;
+	is_delete = VERSIONED_DELETE;
+	make_versioned_helper(trx, table->vers_end);
+}
diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc
index 30aa44dcb33..f31ae1573cf 100644
--- a/storage/innobase/row/row0vers.cc
+++ b/storage/innobase/row/row0vers.cc
@@ -39,7 +39,6 @@ Created 2/6/1997 Heikki Tuuri
 #include "row0row.h"
 #include "row0upd.h"
 #include "rem0cmp.h"
-#include "read0read.h"
 #include "lock0lock.h"
 #include "row0mysql.h"
 
@@ -59,7 +58,7 @@ row_vers_non_virtual_fields_equal(
 
 	for (const dict_field_t* ifield = index->fields; ifield != end;
 	     ifield++) {
-		if (!dict_col_is_virtual(ifield->col)
+		if (!ifield->col->is_virtual()
 		    && cmp_dfield_dfield(a++, b++)) {
 			return false;
 		}
@@ -70,17 +69,19 @@ row_vers_non_virtual_fields_equal(
 
 /** Determine if an active transaction has inserted or modified a secondary
 index record.
+@param[in,out]	caller_trx	trx of current thread
 @param[in]	clust_rec	clustered index record
 @param[in]	clust_index	clustered index
 @param[in]	rec		secondary index record
 @param[in]	index		secondary index
 @param[in]	offsets		rec_get_offsets(rec, index)
 @param[in,out]	mtr		mini-transaction
-@return	the active transaction; trx_release_reference() must be invoked
+@return	the active transaction; trx->release_reference() must be invoked
 @retval	NULL if the record was committed */
 UNIV_INLINE
 trx_t*
 row_vers_impl_x_locked_low(
+	trx_t*		caller_trx,
 	const rec_t*	clust_rec,
 	dict_index_t*	clust_index,
 	const rec_t*	rec,
@@ -89,7 +90,6 @@ row_vers_impl_x_locked_low(
 	mtr_t*		mtr)
 {
 	trx_id_t	trx_id;
-	ibool		corrupt;
 	ulint		comp;
 	ulint		rec_del;
 	const rec_t*	version;
@@ -104,28 +104,44 @@ row_vers_impl_x_locked_low(
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
+	if (ulint trx_id_offset = clust_index->trx_id_offset) {
+		trx_id = mach_read_from_6(clust_rec + trx_id_offset);
+		if (trx_id == 0) {
+			/* The transaction history was already purged. */
+			DBUG_RETURN(0);
+		}
+	}
+
 	heap = mem_heap_create(1024);
 
 	clust_offsets = rec_get_offsets(
 		clust_rec, clust_index, NULL, true, ULINT_UNDEFINED, &heap);
 
 	trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);
-	corrupt = FALSE;
+	if (trx_id == 0) {
+		/* The transaction history was already purged. */
+		mem_heap_free(heap);
+		DBUG_RETURN(0);
+	}
 
-	ut_ad(!dict_table_is_temporary(clust_index->table));
+	ut_ad(!clust_index->table->is_temporary());
 
-	trx_t*	trx = trx_rw_is_active(trx_id, &corrupt, true);
+	trx_t*	trx;
 
-	if (trx == 0) {
-		/* The transaction that modified or inserted clust_rec is no
-		longer active, or it is corrupt: no implicit lock on rec */
-		if (corrupt) {
-			lock_report_trx_id_insanity(
-				trx_id, clust_rec, clust_index, clust_offsets,
-				trx_sys_get_max_trx_id());
+	if (trx_id == caller_trx->id) {
+		trx = caller_trx;
+		trx->reference();
+	} else {
+		trx = trx_sys.find(caller_trx, trx_id);
+		if (trx == 0) {
+			/* The transaction that modified or inserted
+			clust_rec is no longer active, or it is
+			corrupt: no implicit lock on rec */
+			lock_check_trx_id_sanity(trx_id, clust_rec,
+						 clust_index, clust_offsets);
+			mem_heap_free(heap);
+			DBUG_RETURN(0);
 		}
-		mem_heap_free(heap);
-		DBUG_RETURN(0);
 	}
 
 	comp = page_rec_is_comp(rec);
@@ -183,7 +199,7 @@ row_vers_impl_x_locked_low(
 		inserting a delete-marked record. */
 		ut_ad(prev_version
 		      || !rec_get_deleted_flag(version, comp)
-		      || !trx_rw_is_active(trx_id, NULL, false));
+		      || !trx_sys.is_registered(caller_trx, trx_id));
 
 		/* Free version and clust_offsets. */
 		mem_heap_free(old_heap);
@@ -207,7 +223,7 @@ row_vers_impl_x_locked_low(
 				or updated, the leaf page record always is
 				created with a clear delete-mark flag.
 				(We never insert a delete-marked record.) */
-				trx_release_reference(trx);
+				trx->release_reference();
 				trx = 0;
 			}
 
@@ -334,7 +350,7 @@ result_check:
 			/* prev_version was the first version modified by
 			the trx_id transaction: no implicit x-lock */
 
-			trx_release_reference(trx);
+			trx->release_reference();
 			trx = 0;
 			break;
 		}
@@ -352,13 +368,15 @@ result_check:
 
 /** Determine if an active transaction has inserted or modified a secondary
 index record.
+@param[in,out]	caller_trx	trx of current thread
 @param[in]	rec	secondary index record
 @param[in]	index	secondary index
 @param[in]	offsets	rec_get_offsets(rec, index)
-@return	the active transaction; trx_release_reference() must be invoked
+@return	the active transaction; trx->release_reference() must be invoked
 @retval	NULL if the record was committed */
 trx_t*
 row_vers_impl_x_locked(
+	trx_t*		caller_trx,
 	const rec_t*	rec,
 	dict_index_t*	index,
 	const ulint*	offsets)
@@ -369,7 +387,7 @@ row_vers_impl_x_locked(
 	dict_index_t*	clust_index;
 
 	ut_ad(!lock_mutex_own());
-	ut_ad(!trx_sys_mutex_own());
+	ut_ad(!mutex_own(&trx_sys.mutex));
 
 	mtr_start(&mtr);
 
@@ -399,9 +417,10 @@ row_vers_impl_x_locked(
 		trx = 0;
 	} else {
 		trx = row_vers_impl_x_locked_low(
-			clust_rec, clust_index, rec, index, offsets, &mtr);
+				caller_trx, clust_rec, clust_index, rec, index,
+				offsets, &mtr);
 
-		ut_ad(trx == 0 || trx_is_referenced(trx));
+		ut_ad(trx == 0 || trx->is_referenced());
 	}
 
 	mtr_commit(&mtr);
@@ -409,29 +428,6 @@ row_vers_impl_x_locked(
 	return(trx);
 }
 
-/*****************************************************************//**
-Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view.
-@param[in]	trx_id		transaction id in the version
-@param[in]	name		table name
-@param[in,out]	mtr		mini transaction holding the latch on the
-				clustered index record; it will also hold
-				the latch on purge_view
-@return TRUE if earlier version should be preserved */
-ibool
-row_vers_must_preserve_del_marked(
-/*==============================*/
-	trx_id_t		trx_id,
-	const table_name_t&	name,
-	mtr_t*			mtr)
-{
-	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S));
-
-	mtr_s_lock(&purge_sys->latch, mtr);
-
-	return(!purge_sys->view.changes_visible(trx_id,	name));
-}
-
 /** build virtual column value from current cluster index record data
 @param[in,out]	row		the cluster index row in dtuple form
 @param[in]	clust_index	clustered index
@@ -476,7 +472,7 @@ row_vers_build_clust_v_col(
 		const dict_field_t* ind_field = dict_index_get_nth_field(
 				index, i);
 
-		if (dict_col_is_virtual(ind_field->col)) {
+		if (ind_field->col->is_virtual()) {
 			const dict_v_col_t*       col;
 
 			col = reinterpret_cast<const dict_v_col_t*>(
@@ -579,7 +575,7 @@ row_vers_build_cur_vrow_low(
 				 = dict_index_get_nth_field(index, i);
 			const dict_col_t*	col = ind_field->col;
 
-			if (!dict_col_is_virtual(col)) {
+			if (!col->is_virtual()) {
 				continue;
 			}
 
@@ -663,7 +659,7 @@ row_vers_vc_matches_cluster(
 		for (const dict_field_t *ifield = index->fields,
 			     *const end = &index->fields[index->n_fields];
 		     ifield != end; ifield++, a++, b++) {
-			if (!dict_col_is_virtual(ifield->col)) {
+			if (!ifield->col->is_virtual()) {
 				if (cmp_dfield_dfield(a, b)) {
 					return false;
 				}
@@ -727,7 +723,7 @@ row_vers_vc_matches_cluster(
 			const dict_col_t*	col = ind_field->col;
 			field1 = dtuple_get_nth_field(ientry, i);
 
-			if (!dict_col_is_virtual(col)) {
+			if (!col->is_virtual()) {
 				continue;
 			}
 
@@ -792,7 +788,6 @@ func_exit:
 @param[in]	clust_index	cluster index
 @param[in]	clust_offsets	cluster rec offset
 @param[in]	index		secondary index
-@param[in]	ientry		secondary index rec
 @param[in]	roll_ptr	roll_ptr for the purge record
 @param[in]	trx_id		transaction ID on the purging record
 @param[in,out]	heap		heap memory
@@ -808,7 +803,6 @@ row_vers_build_cur_vrow(
 	dict_index_t*		clust_index,
 	ulint**			clust_offsets,
 	dict_index_t*		index,
-	const dtuple_t*		ientry,
 	roll_ptr_t		roll_ptr,
 	trx_id_t		trx_id,
 	mem_heap_t*		heap,
@@ -903,7 +897,7 @@ row_vers_old_has_index_entry(
 
 	ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
 					     | MTR_MEMO_PAGE_S_FIX));
-	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S));
+	ut_ad(!rw_lock_own(&(purge_sys.latch), RW_LOCK_S));
 
 	clust_index = dict_table_get_first_index(index->table);
 
@@ -926,7 +920,7 @@ row_vers_old_has_index_entry(
 		/* The top of the stack of versions is locked by the
 		mtr holding a latch on the page containing the
 		clustered index record. The bottom of the stack is
-		locked by the fact that the purge_sys->view must
+		locked by the fact that the purge_sys.view must
 		'overtake' any read view of an active transaction.
 		Thus, it is safe to fetch the prefixes for
 		externally stored columns. */
@@ -1041,8 +1035,7 @@ safe_to_purge:
 
 		cur_vrow = row_vers_build_cur_vrow(
 			also_curr, rec, clust_index, &clust_offsets,
-			index, ientry, roll_ptr, trx_id, heap, v_heap, mtr,
-			vcol_info);
+			index, roll_ptr, trx_id, heap, v_heap, mtr, vcol_info);
 
 		if (vcol_info && vcol_info->is_first_fetch()) {
 			goto unsafe_to_purge;
@@ -1174,7 +1167,7 @@ row_vers_build_for_consistent_read(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
 					     | MTR_MEMO_PAGE_S_FIX));
-	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S));
+	ut_ad(!rw_lock_own(&(purge_sys.latch), RW_LOCK_S));
 
 	ut_ad(rec_offs_validate(rec, index, *offsets));
 
@@ -1235,7 +1228,7 @@ row_vers_build_for_consistent_read(
 					in_heap, rec_offs_size(*offsets)));
 
 			*old_vers = rec_copy(buf, prev_version, *offsets);
-			rec_offs_make_valid(*old_vers, index, *offsets);
+			rec_offs_make_valid(*old_vers, index, true, *offsets);
 
 			if (vrow && *vrow) {
 				*vrow = dtuple_copy(*vrow, in_heap);
@@ -1258,6 +1251,7 @@ which should be seen by a semi-consistent read. */
 void
 row_vers_build_for_semi_consistent_read(
 /*====================================*/
+	trx_t*		caller_trx,/*!<in/out: trx of current thread */
 	const rec_t*	rec,	/*!< in: record in a clustered index; the
 				caller must have a latch on the page; this
 				latch locks the top of the stack of versions
@@ -1286,7 +1280,7 @@ row_vers_build_for_semi_consistent_read(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
 					     | MTR_MEMO_PAGE_S_FIX));
-	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S));
+	ut_ad(!rw_lock_own(&(purge_sys.latch), RW_LOCK_S));
 
 	ut_ad(rec_offs_validate(rec, index, *offsets));
 
@@ -1294,7 +1288,6 @@ row_vers_build_for_semi_consistent_read(
 	ut_ad(!vrow || !(*vrow));
 
 	for (;;) {
-		const trx_t*	version_trx;
 		mem_heap_t*	heap2;
 		rec_t*		prev_version;
 		trx_id_t	version_trx_id;
@@ -1304,20 +1297,7 @@ row_vers_build_for_semi_consistent_read(
 			rec_trx_id = version_trx_id;
 		}
 
-		trx_sys_mutex_enter();
-		version_trx = trx_get_rw_trx_by_id(version_trx_id);
-		/* Because version_trx is a read-write transaction,
-		its state cannot change from or to NOT_STARTED while
-		we are holding the trx_sys->mutex.  It may change from
-		ACTIVE to PREPARED or COMMITTED. */
-		if (version_trx
-		    && trx_state_eq(version_trx,
-				    TRX_STATE_COMMITTED_IN_MEMORY)) {
-			version_trx = NULL;
-		}
-		trx_sys_mutex_exit();
-
-		if (!version_trx) {
+		if (!trx_sys.is_registered(caller_trx, version_trx_id)) {
 committed_version_trx:
 			/* We found a version that belongs to a
 			committed transaction: return it. */
@@ -1358,7 +1338,7 @@ committed_version_trx:
 					in_heap, rec_offs_size(*offsets)));
 
 			*old_vers = rec_copy(buf, version, *offsets);
-			rec_offs_make_valid(*old_vers, index, *offsets);
+			rec_offs_make_valid(*old_vers, index, true, *offsets);
 			if (vrow && *vrow) {
 				*vrow = dtuple_copy(*vrow, in_heap);
 				dtuple_dup_v_fld(*vrow, in_heap);
diff --git a/storage/innobase/srv/srv0conc.cc b/storage/innobase/srv/srv0conc.cc
index 9e2aa7d7af4..e4a3e84df01 100644
--- a/storage/innobase/srv/srv0conc.cc
+++ b/storage/innobase/srv/srv0conc.cc
@@ -56,10 +56,8 @@ ulong	srv_thread_sleep_delay	= 10000;
 
 
 /** We are prepared for a situation that we have this many threads waiting for
-a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
-value. */
-
-ulint	srv_max_n_threads	= 0;
+a semaphore inside InnoDB. srv_start() sets the value. */
+ulint	srv_max_n_threads;
 
 /** The following controls how many threads we let inside InnoDB concurrently:
 threads waiting for locks are not counted into the number because otherwise
@@ -134,12 +132,9 @@ srv_conc_enter_innodb_with_atomics(
 #endif /* WITH_WSREP */
 
 		if (srv_thread_concurrency == 0) {
-
 			if (notified_mysql) {
-
-				(void) my_atomic_addlint(
-					&srv_conc.n_waiting, -1);
-
+				my_atomic_addlint(&srv_conc.n_waiting,
+						  ulint(-1));
 				thd_wait_end(trx->mysql_thd);
 			}
 
@@ -158,10 +153,8 @@ srv_conc_enter_innodb_with_atomics(
 				srv_enter_innodb_with_tickets(trx);
 
 				if (notified_mysql) {
-
-					(void) my_atomic_addlint(
-						&srv_conc.n_waiting, -1);
-
+					my_atomic_addlint(&srv_conc.n_waiting,
+							  ulint(-1));
 					thd_wait_end(trx->mysql_thd);
 				}
 
@@ -183,13 +176,11 @@ srv_conc_enter_innodb_with_atomics(
 			/* Since there were no free seats, we relinquish
 			the overbooked ticket. */
 
-			(void) my_atomic_addlint(
-				&srv_conc.n_active, -1);
+			my_atomic_addlint(&srv_conc.n_active, ulint(-1));
 		}
 
 		if (!notified_mysql) {
-			(void) my_atomic_addlint(
-				&srv_conc.n_waiting, 1);
+			my_atomic_addlint(&srv_conc.n_waiting, 1);
 
 			thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
 
@@ -233,7 +224,7 @@ srv_conc_exit_innodb_with_atomics(
 	trx->n_tickets_to_enter_innodb = 0;
 	trx->declared_to_be_inside_innodb = FALSE;
 
-	(void) my_atomic_addlint(&srv_conc.n_active, -1);
+	my_atomic_addlint(&srv_conc.n_active, ulint(-1));
 }
 
 /*********************************************************************//**
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index de1c0d27efe..85d9f0522aa 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -2,7 +2,7 @@
 
 Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -1485,8 +1485,8 @@ srv_mon_set_module_control(
 	mon_option_t	set_option)	/*!< in: Turn on/off reset the
 					counter */
 {
-	ulint	ix;
-	ulint	start_id;
+	lint	ix;
+	lint	start_id;
 	ibool	set_current_module = FALSE;
 
 	ut_a(module_id <= NUM_MONITOR);
@@ -1596,7 +1596,7 @@ srv_mon_get_rseg_size(void)
 	total rollback segment size and to avoid mutex contention we
 	don't acquire the rseg->mutex" */
 	for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
-		const trx_rseg_t*	rseg = trx_sys->rseg_array[i];
+		const trx_rseg_t*	rseg = trx_sys.rseg_array[i];
 
 		if (rseg != NULL) {
 			value += rseg->curr_size;
@@ -1838,7 +1838,7 @@ srv_mon_process_existing_counter(
 
 	/* innodb_page_size */
 	case MONITOR_OVLD_SRV_PAGE_SIZE:
-		value = UNIV_PAGE_SIZE;
+		value = srv_page_size;
 		break;
 
 	case MONITOR_OVLD_RWLOCK_S_SPIN_WAITS:
@@ -1933,7 +1933,7 @@ srv_mon_process_existing_counter(
 
 	/* innodb_row_lock_time_max */
 	case MONITOR_OVLD_LOCK_MAX_WAIT_TIME:
-		value = lock_sys->n_lock_max_wait_time / 1000;
+		value = lock_sys.n_lock_max_wait_time / 1000;
 		break;
 
 	/* innodb_row_lock_time_avg */
@@ -1952,7 +1952,7 @@ srv_mon_process_existing_counter(
 		break;
 
 	case MONITOR_RSEG_HISTORY_LEN:
-		value = trx_sys->rseg_history_len;
+		value = trx_sys.history_size();
 		break;
 
 	case MONITOR_RSEG_CUR_SIZE:
@@ -1960,7 +1960,7 @@ srv_mon_process_existing_counter(
 		break;
 
 	case MONITOR_OVLD_N_FILE_OPENED:
-		value = fil_system->n_open;
+		value = fil_system.n_open;
 		break;
 
 	case MONITOR_OVLD_IBUF_MERGE_INSERT:
@@ -2000,11 +2000,11 @@ srv_mon_process_existing_counter(
 		break;
 
 	case MONITOR_OVLD_LSN_FLUSHDISK:
-		value = (mon_type_t) log_sys->flushed_to_disk_lsn;
+		value = (mon_type_t) log_sys.flushed_to_disk_lsn;
 		break;
 
 	case MONITOR_OVLD_LSN_CURRENT:
-		value = (mon_type_t) log_sys->lsn;
+		value = (mon_type_t) log_sys.lsn;
 		break;
 
 	case MONITOR_OVLD_BUF_OLDEST_LSN:
@@ -2012,15 +2012,15 @@ srv_mon_process_existing_counter(
 		break;
 
 	case MONITOR_OVLD_LSN_CHECKPOINT:
-		value = (mon_type_t) log_sys->last_checkpoint_lsn;
+		value = (mon_type_t) log_sys.last_checkpoint_lsn;
 		break;
 
 	case MONITOR_OVLD_MAX_AGE_ASYNC:
-		value = log_sys->max_modified_age_async;
+		value = log_sys.max_modified_age_async;
 		break;
 
 	case MONITOR_OVLD_MAX_AGE_SYNC:
-		value = log_sys->max_modified_age_sync;
+		value = log_sys.max_modified_age_sync;
 		break;
 
 #ifdef BTR_CUR_HASH_ADAPT
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 5572521662b..f3cab013437 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -149,23 +149,10 @@ my_bool	srv_read_only_mode;
 /** store to its own file each table created by an user; data
 dictionary tables are in the system tablespace 0 */
 my_bool	srv_file_per_table;
-/** whether to use backup-safe TRUNCATE and crash-safe RENAME
-instead of the MySQL 5.7 WL#6501 TRUNCATE TABLE implementation */
-my_bool	srv_safe_truncate;
-/** The file format to use on new *.ibd files. */
-ulint	srv_file_format;
-/** Whether to check file format during startup.  A value of
-UNIV_FORMAT_MAX + 1 means no checking ie. FALSE.  The default is to
-set it to the highest format we support. */
-ulint	srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
 /** Set if InnoDB operates in read-only mode or innodb-force-recovery
 is greater than SRV_FORCE_NO_TRX_UNDO. */
 my_bool	high_level_read_only;
 
-#if UNIV_FORMAT_A
-# error "UNIV_FORMAT_A must be 0!"
-#endif
-
 /** Place locks to records only i.e. do not use next-key locking except
 on duplicate key checking and foreign key checking */
 ibool	srv_locks_unsafe_for_binlog;
@@ -180,17 +167,10 @@ use simulated aio we build below with threads.
 Currently we support native aio on windows and linux */
 my_bool	srv_use_native_aio;
 my_bool	srv_numa_interleave;
-/** innodb_use_trim; whether to use fallocate(PUNCH_HOLE) with
-page_compression */
-my_bool	srv_use_trim;
-/** copy of innodb_use_atomic_writes; @see innobase_init() */
+/** copy of innodb_use_atomic_writes; @see innodb_init_params() */
 my_bool	srv_use_atomic_writes;
 /** innodb_compression_algorithm; used with page compression */
 ulong	innodb_compression_algorithm;
-/** innodb_mtflush_threads; number of threads used for multi-threaded flush */
-long srv_mtflush_threads;
-/** innodb_use_mtflush; whether to use multi threaded flush. */
-my_bool	srv_use_mtflush;
 
 #ifdef UNIV_DEBUG
 /** Used by SET GLOBAL innodb_master_thread_disabled_debug = X. */
@@ -206,15 +186,15 @@ ulong	srv_n_log_files;
 /** The InnoDB redo log file size, or 0 when changing the redo log format
 at startup (while disallowing writes to the redo log). */
 ulonglong	srv_log_file_size;
-/** copy of innodb_log_buffer_size, but in database pages */
-ulint		srv_log_buffer_size;
+/** innodb_log_buffer_size, in bytes */
+ulong		srv_log_buffer_size;
 /** innodb_flush_log_at_trx_commit */
 ulong		srv_flush_log_at_trx_commit;
 /** innodb_flush_log_at_timeout */
 uint		srv_flush_log_at_timeout;
 /** innodb_page_size */
 ulong		srv_page_size;
-/** log2 of innodb_page_size; @see innobase_init() */
+/** log2 of innodb_page_size; @see innodb_init_params() */
 ulong		srv_page_size_shift;
 /** innodb_log_write_ahead_size */
 ulong		srv_log_write_ahead_size;
@@ -269,16 +249,20 @@ ulint	srv_buf_pool_base_size;
 ulint	srv_buf_pool_curr_size;
 /** Dump this % of each buffer pool during BP dump */
 ulong	srv_buf_pool_dump_pct;
+/** Abort load after this amount of pages */
+#ifdef UNIV_DEBUG
+ulong srv_buf_pool_load_pages_abort = LONG_MAX;
+#endif
 /** Lock table size in bytes */
 ulint	srv_lock_table_size	= ULINT_MAX;
 
 /** innodb_idle_flush_pct */
 ulong	srv_idle_flush_pct;
 
-/** copy of innodb_read_io_threads */
-ulint	srv_n_read_io_threads;
-/** copy of innodb_write_io_threads */
-ulint	srv_n_write_io_threads;
+/** innodb_read_io_threads */
+ulong	srv_n_read_io_threads;
+/** innodb_write_io_threads */
+ulong	srv_n_write_io_threads;
 
 /** innodb_random_read_ahead */
 my_bool	srv_random_read_ahead;
@@ -291,13 +275,10 @@ ulong	srv_read_ahead_threshold;
 buffer in terms of percentage of the buffer pool. */
 uint	srv_change_buffer_max_size;
 
-char*	srv_file_flush_method_str;
+ulong	srv_file_flush_method;
 
 
-enum srv_flush_t	srv_file_flush_method = IF_WIN(SRV_ALL_O_DIRECT_FSYNC,SRV_FSYNC);
-
-
-/** copy of innodb_open_files, initialized by innobase_init() */
+/** copy of innodb_open_files; @see innodb_init_params() */
 ulint	srv_max_n_open_files;
 
 /** innodb_io_capacity */
@@ -394,8 +375,7 @@ unsigned long long srv_stats_modified_counter;
 based on number of configured pages */
 my_bool	srv_stats_sample_traditional;
 
-/** copy of innodb_doublewrite */
-ibool	srv_use_doublewrite_buf;
+my_bool	srv_use_doublewrite_buf;
 
 /** innodb_doublewrite_batch_size (a debug parameter) specifies the
 number of pages to use in LRU and flush_list batch flushing.
@@ -527,11 +507,6 @@ UNIV_INTERN ulong srv_buf_dump_status_frequency;
 	mutex_exit(&srv_sys.mutex);			\
 } while (0)
 
-#define fetch_lock_wait_timeout(trx)			\
-	((trx)->lock.allowed_to_wait			\
-	 ? thd_lock_wait_timeout((trx)->mysql_thd)	\
-	 : 0)
-
 /*
 	IMPLEMENTATION OF THE SERVER MAIN PROGRAM
 	=========================================
@@ -635,6 +610,12 @@ struct srv_sys_t{
 
 static srv_sys_t	srv_sys;
 
+/** @return whether the purge coordinator thread is active */
+bool purge_sys_t::running()
+{
+  return my_atomic_loadlint(&srv_sys.n_threads_active[SRV_PURGE]);
+}
+
 /** Event to signal srv_monitor_thread. Not protected by a mutex.
 Set after setting srv_print_innodb_monitor. */
 os_event_t	srv_monitor_event;
@@ -878,7 +859,8 @@ srv_suspend_thread_low(
 	ut_a(!slot->suspended);
 	slot->suspended = TRUE;
 
-	if (my_atomic_addlint(&srv_sys.n_threads_active[type], -1) < 0) {
+	if (lint(my_atomic_addlint(&srv_sys.n_threads_active[type], ulint(-1)))
+	    < 0) {
 		ut_error;
 	}
 
@@ -1130,40 +1112,15 @@ srv_free(void)
 }
 
 /*********************************************************************//**
-Normalizes init parameter values to use units we use inside InnoDB. */
-static
-void
-srv_normalize_init_values(void)
-/*===========================*/
-{
-	srv_sys_space.normalize();
-
-	srv_tmp_space.normalize();
-
-	srv_log_buffer_size /= UNIV_PAGE_SIZE;
-
-	srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
-}
-
-/*********************************************************************//**
 Boots the InnoDB server. */
 void
 srv_boot(void)
 /*==========*/
 {
-	/* Transform the init parameter values given by MySQL to
-	use units we use inside InnoDB: */
-
-	srv_normalize_init_values();
-
 	sync_check_init();
-	/* Reset the system variables in the recovery module. */
 	recv_sys_var_init();
 	trx_pool_init();
 	row_mysql_init();
-
-	/* Initialize this module */
-
 	srv_init();
 }
 
@@ -1228,7 +1185,6 @@ srv_printf_innodb_monitor(
 {
 	double	time_elapsed;
 	time_t	current_time;
-	ulint	n_reserved;
 	ibool	ret;
 
 	mutex_enter(&srv_innodb_monitor_mutex);
@@ -1388,12 +1344,10 @@ srv_printf_innodb_monitor(
 		srv_conc_get_active_threads(),
 		srv_conc_get_waiting_threads());
 
-	/* This is a dirty read, without holding trx_sys->mutex. */
 	fprintf(file, ULINTPF " read views open inside InnoDB\n",
-		trx_sys->mvcc->size());
+		trx_sys.view_count());
 
-	n_reserved = fil_space_get_n_reserved_extents(0);
-	if (n_reserved > 0) {
+	if (ulint n_reserved = fil_system.sys_space->n_reserved_extents) {
 		fprintf(file,
 			ULINTPF " tablespace extents now reserved for"
 			" B-tree split operations\n",
@@ -1558,7 +1512,7 @@ srv_export_innodb_status(void)
 	export_vars.innodb_have_atomic_builtins = 0;
 #endif
 
-	export_vars.innodb_page_size = UNIV_PAGE_SIZE;
+	export_vars.innodb_page_size = srv_page_size;
 
 	export_vars.innodb_log_waits = srv_stats.log_waits;
 
@@ -1605,7 +1559,7 @@ srv_export_innodb_status(void)
 	}
 
 	export_vars.innodb_row_lock_time_max =
-		lock_sys->n_lock_max_wait_time / 1000;
+		lock_sys.n_lock_max_wait_time / 1000;
 
 	export_vars.innodb_rows_read = srv_stats.n_rows_read;
 
@@ -1626,7 +1580,7 @@ srv_export_innodb_status(void)
 	export_vars.innodb_system_rows_deleted =
 		srv_stats.n_system_rows_deleted;
 
-	export_vars.innodb_num_open_files = fil_system->n_open;
+	export_vars.innodb_num_open_files = fil_system.n_open;
 
 	export_vars.innodb_truncated_status_writes =
 		srv_truncated_status_writes;
@@ -1741,7 +1695,7 @@ loop:
 		if (srv_print_innodb_monitor) {
 			/* Reset mutex_skipped counter everytime
 			srv_print_innodb_monitor changes. This is to
-			ensure we will not be blocked by lock_sys->mutex
+			ensure we will not be blocked by lock_sys.mutex
 			for short duration information printing,
 			such as requested by sync_array_print_long_waits() */
 			if (!last_srv_print_monitor) {
@@ -1945,19 +1899,8 @@ srv_get_active_thread_type(void)
 
 	srv_sys_mutex_exit();
 
-	if (ret == SRV_NONE && srv_shutdown_state != SRV_SHUTDOWN_NONE
-	    && purge_sys != NULL) {
-		/* Check only on shutdown. */
-		switch (trx_purge_state()) {
-		case PURGE_STATE_RUN:
-		case PURGE_STATE_STOP:
-			ret = SRV_PURGE;
-			break;
-		case PURGE_STATE_INIT:
-		case PURGE_STATE_DISABLED:
-		case PURGE_STATE_EXIT:
-			break;
-		}
+	if (ret == SRV_NONE && purge_sys.enabled()) {
+		ret = SRV_PURGE;
 	}
 
 	return(ret);
@@ -1996,9 +1939,9 @@ srv_wake_purge_thread_if_not_active()
 {
 	ut_ad(!srv_sys_mutex_own());
 
-	if (purge_sys->state == PURGE_STATE_RUN
+	if (purge_sys.enabled() && !purge_sys.paused()
 	    && !my_atomic_loadlint(&srv_sys.n_threads_active[SRV_PURGE])
-	    && my_atomic_loadlint(&trx_sys->rseg_history_len)) {
+	    && trx_sys.history_size()) {
 
 		srv_release_threads(SRV_PURGE, 1);
 	}
@@ -2144,16 +2087,10 @@ srv_master_do_disabled_loop(void)
 
 /** Disables master thread. It's used by:
 	SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
-@param[in]	thd		thread handle
-@param[in]	var		pointer to system variable
-@param[out]	var_ptr		where the formal string goes
 @param[in]	save		immediate result from check function */
 void
-srv_master_thread_disabled_debug_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save)
+srv_master_thread_disabled_debug_update(THD*, st_mysql_sys_var*, void*,
+					const void* save)
 {
 	/* This method is protected by mutex, as every SET GLOBAL .. */
 	ut_ad(srv_master_thread_disabled_event != NULL);
@@ -2458,22 +2395,14 @@ loop:
 
 	srv_suspend_thread(slot);
 
-	/* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
-	waits for database activity to die down when converting < 4.1.x
-	databases, and relies on this string being exactly as it is. InnoDB
-	manual also mentions this string in several places. */
 	srv_main_thread_op_info = "waiting for server activity";
 
 	srv_resume_thread(slot);
 	goto loop;
 }
 
-/** Check if purge should stop.
-@param[in]	n_purged	pages purged in the last batch
-@return whether purge should exit */
-static
-bool
-srv_purge_should_exit(ulint n_purged)
+/** @return whether purge should exit due to shutdown */
+static bool srv_purge_should_exit()
 {
 	ut_ad(srv_shutdown_state == SRV_SHUTDOWN_NONE
 	      || srv_shutdown_state == SRV_SHUTDOWN_CLEANUP);
@@ -2485,7 +2414,9 @@ srv_purge_should_exit(ulint n_purged)
 		return(true);
 	}
 	/* Slow shutdown was requested. */
-	if (n_purged) {
+	ulint history_size = trx_sys.history_size();
+
+	if (history_size) {
 #if defined HAVE_SYSTEMD && !defined EMBEDDED_LIBRARY
 		static ib_time_t progress_time;
 		ib_time_t time = ut_time();
@@ -2494,51 +2425,37 @@ srv_purge_should_exit(ulint n_purged)
 			service_manager_extend_timeout(
 				INNODB_EXTEND_TIMEOUT_INTERVAL,
 				"InnoDB: to purge " ULINTPF " transactions",
-				trx_sys->rseg_history_len);
+				history_size);
 		}
 #endif
-		/* The previous round still did some work. */
-		return(false);
+		return false;
 	}
-	/* Exit if there are no active transactions to roll back. */
-	return(trx_sys_any_active_transactions() == 0);
+
+	return !trx_sys.any_active_transactions();
 }
 
 /*********************************************************************//**
 Fetch and execute a task from the work queue.
 @return true if a task was executed */
-static
-bool
-srv_task_execute(void)
-/*==================*/
+static bool srv_task_execute()
 {
-	que_thr_t*	thr = NULL;
-
 	ut_ad(!srv_read_only_mode);
-	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
+	ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
 
 	mutex_enter(&srv_sys.tasks_mutex);
 
-	if (UT_LIST_GET_LEN(srv_sys.tasks) > 0) {
-
-		thr = UT_LIST_GET_FIRST(srv_sys.tasks);
-
+	if (que_thr_t* thr = UT_LIST_GET_FIRST(srv_sys.tasks)) {
 		ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
-
 		UT_LIST_REMOVE(srv_sys.tasks, thr);
-	}
-
-	mutex_exit(&srv_sys.tasks_mutex);
-
-	if (thr != NULL) {
-
+		mutex_exit(&srv_sys.tasks_mutex);
 		que_run_threads(thr);
-
-		my_atomic_addlint(
-			&purge_sys->n_completed, 1);
+		my_atomic_addlint(&purge_sys.n_completed, 1);
+		return true;
 	}
 
-	return(thr != NULL);
+	ut_ad(UT_LIST_GET_LEN(srv_sys.tasks) == 0);
+	mutex_exit(&srv_sys.tasks_mutex);
+	return false;
 }
 
 /*********************************************************************//**
@@ -2586,19 +2503,11 @@ DECLARE_THREAD(srv_worker_thread)(
 
 			srv_wake_purge_thread_if_not_active();
 		}
-
-		/* Note: we are checking the state without holding the
-		purge_sys->latch here. */
-	} while (purge_sys->state != PURGE_STATE_EXIT);
+	} while (purge_sys.enabled());
 
 	srv_free_slot(slot);
 
-	rw_lock_x_lock(&purge_sys->latch);
-
-	ut_a(!purge_sys->running);
-	ut_a(purge_sys->state == PURGE_STATE_EXIT);
-
-	rw_lock_x_unlock(&purge_sys->latch);
+	ut_ad(!purge_sys.enabled());
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
 	ib::info() << "Purge worker thread exiting, id "
@@ -2643,7 +2552,7 @@ srv_do_purge(ulint* n_total_purged)
 	}
 
 	do {
-		if (trx_sys->rseg_history_len > rseg_history_len
+		if (trx_sys.history_size() > rseg_history_len
 		    || (srv_max_purge_lag > 0
 			&& rseg_history_len > srv_max_purge_lag)) {
 
@@ -2672,25 +2581,24 @@ srv_do_purge(ulint* n_total_purged)
 		ut_a(n_use_threads <= n_threads);
 
 		/* Take a snapshot of the history list before purge. */
-		if ((rseg_history_len = trx_sys->rseg_history_len) == 0) {
+		if (!(rseg_history_len = trx_sys.history_size())) {
 			break;
 		}
 
 		ulint	undo_trunc_freq =
-			purge_sys->undo_trunc.get_rseg_truncate_frequency();
+			purge_sys.undo_trunc.get_rseg_truncate_frequency();
 
 		ulint	rseg_truncate_frequency = ut_min(
 			static_cast<ulint>(srv_purge_rseg_truncate_frequency),
 			undo_trunc_freq);
 
 		n_pages_purged = trx_purge(
-			n_use_threads, srv_purge_batch_size,
+			n_use_threads,
 			(++count % rseg_truncate_frequency) == 0);
 
 		*n_total_purged += n_pages_purged;
-	} while (!srv_purge_should_exit(n_pages_purged)
-		 && n_pages_purged > 0
-		 && purge_sys->state == PURGE_STATE_RUN);
+	} while (n_pages_purged > 0 && !purge_sys.paused()
+		 && !srv_purge_should_exit());
 
 	return(rseg_history_len);
 }
@@ -2717,34 +2625,25 @@ srv_purge_coordinator_suspend(
 	int64_t		sig_count = srv_suspend_thread(slot);
 
 	do {
-		rw_lock_x_lock(&purge_sys->latch);
-
-		purge_sys->running = false;
-
-		rw_lock_x_unlock(&purge_sys->latch);
-
 		/* We don't wait right away on the the non-timed wait because
 		we want to signal the thread that wants to suspend purge. */
 		const bool wait = stop
-			|| rseg_history_len <= trx_sys->rseg_history_len;
+			|| rseg_history_len <= trx_sys.history_size();
 		const bool timeout = srv_resume_thread(
 			slot, sig_count, wait,
 			stop ? 0 : SRV_PURGE_MAX_TIMEOUT);
 
 		sig_count = srv_suspend_thread(slot);
 
-		rw_lock_x_lock(&purge_sys->latch);
+		rw_lock_x_lock(&purge_sys.latch);
 
-		stop = (srv_shutdown_state == SRV_SHUTDOWN_NONE
-			&& purge_sys->state == PURGE_STATE_STOP);
+		stop = srv_shutdown_state == SRV_SHUTDOWN_NONE
+			&& purge_sys.paused_latched();
 
 		if (!stop) {
-			ut_a(purge_sys->n_stop == 0);
-			purge_sys->running = true;
-
 			if (timeout
-			    && rseg_history_len == trx_sys->rseg_history_len
-			    && trx_sys->rseg_history_len < 5000) {
+			    && rseg_history_len < 5000
+			    && rseg_history_len == trx_sys.history_size()) {
 				/* No new records were added since the
 				wait started. Simply wait for new
 				records. The magic number 5000 is an
@@ -2755,13 +2654,11 @@ srv_purge_coordinator_suspend(
 				stop = true;
 			}
 		} else {
-			ut_a(purge_sys->n_stop > 0);
-
 			/* Signal that we are suspended. */
-			os_event_set(purge_sys->event);
+			os_event_set(purge_sys.event);
 		}
 
-		rw_lock_x_unlock(&purge_sys->latch);
+		rw_lock_x_unlock(&purge_sys.latch);
 	} while (stop && srv_undo_sources);
 
 	srv_resume_thread(slot, 0, false);
@@ -2784,15 +2681,9 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
 
 	ut_ad(!srv_read_only_mode);
 	ut_a(srv_n_purge_threads >= 1);
-	ut_a(trx_purge_state() == PURGE_STATE_INIT);
 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
 
-	rw_lock_x_lock(&purge_sys->latch);
-
-	purge_sys->running = true;
-	purge_sys->state = PURGE_STATE_RUN;
-
-	rw_lock_x_unlock(&purge_sys->latch);
+	purge_sys.coordinator_startup();
 
 #ifdef UNIV_PFS_THREAD
 	pfs_register_thread(srv_purge_thread_key);
@@ -2805,7 +2696,7 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
 
 	slot = srv_reserve_slot(SRV_PURGE);
 
-	ulint	rseg_history_len = trx_sys->rseg_history_len;
+	ulint	rseg_history_len = trx_sys.history_size();
 
 	do {
 		/* If there are no records to purge or the last
@@ -2813,22 +2704,21 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
 
 		if (srv_shutdown_state == SRV_SHUTDOWN_NONE
 		    && srv_undo_sources
-		    && (purge_sys->state == PURGE_STATE_STOP
-			|| n_total_purged == 0)) {
+		    && (n_total_purged == 0 || purge_sys.paused())) {
 
 			srv_purge_coordinator_suspend(slot, rseg_history_len);
 		}
 
 		ut_ad(!slot->suspended);
 
-		if (srv_purge_should_exit(n_total_purged)) {
+		if (srv_purge_should_exit()) {
 			break;
 		}
 
 		n_total_purged = 0;
 
 		rseg_history_len = srv_do_purge(&n_total_purged);
-	} while (!srv_purge_should_exit(n_total_purged));
+	} while (!srv_purge_should_exit());
 
 	/* The task queue should always be empty, independent of fast
 	shutdown state. */
@@ -2837,20 +2727,17 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
 	srv_free_slot(slot);
 
 	/* Note that we are shutting down. */
-	rw_lock_x_lock(&purge_sys->latch);
-
-	purge_sys->state = PURGE_STATE_EXIT;
+	rw_lock_x_lock(&purge_sys.latch);
+	purge_sys.coordinator_shutdown();
 
 	/* If there are any pending undo-tablespace truncate then clear
 	it off as we plan to shutdown the purge thread. */
-	purge_sys->undo_trunc.clear();
-
-	purge_sys->running = false;
+	purge_sys.undo_trunc.clear();
 
-	/* Ensure that the wait in trx_purge_stop() will terminate. */
-	os_event_set(purge_sys->event);
+	/* Ensure that the wait in purge_sys_t::stop() will terminate. */
+	os_event_set(purge_sys.event);
 
-	rw_lock_x_unlock(&purge_sys->latch);
+	rw_lock_x_unlock(&purge_sys.latch);
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
 	ib::info() << "Purge coordinator exiting, id "
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index ae890750841..a2c9828bfee 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -80,7 +80,6 @@ Created 2/16/1996 Heikki Tuuri
 #include "os0proc.h"
 #include "buf0flu.h"
 #include "buf0rea.h"
-#include "buf0mtflu.h"
 #include "dict0boot.h"
 #include "dict0load.h"
 #include "dict0stats_bg.h"
@@ -130,7 +129,7 @@ bool	srv_sys_tablespaces_open;
 bool	srv_was_started;
 /** The original value of srv_log_file_size (innodb_log_file_size) */
 static ulonglong	srv_log_file_size_requested;
-/** TRUE if innobase_start_or_create_for_mysql() has been called */
+/** whether srv_start() has been called */
 static bool		srv_start_has_been_called;
 
 /** Whether any undo log records can be generated */
@@ -181,9 +180,7 @@ static ulint		n[SRV_MAX_N_IO_THREADS + 6];
 /** io_handler_thread identifiers, 32 is the maximum number of purge threads  */
 /** 6 is the ? */
 #define	START_OLD_THREAD_CNT	(SRV_MAX_N_IO_THREADS + 6 + 32)
-static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32 + MTFLUSH_MAX_WORKER];
-/* Thread contex data for multi-threaded flush */
-void *mtflush_ctx=NULL;
+static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32];
 
 /** Thead handles */
 static os_thread_t	thread_handles[SRV_MAX_N_IO_THREADS + 6 + 32];
@@ -194,9 +191,6 @@ static bool		thread_started[SRV_MAX_N_IO_THREADS + 6 + 32] = {false};
 /** Name of srv_monitor_file */
 static char*	srv_monitor_file_name;
 
-/** Minimum expected tablespace size. (10M) */
-static const ulint MIN_EXPECTED_TABLESPACE_SIZE = 5 * 1024 * 1024;
-
 /** */
 #define SRV_MAX_N_PENDING_SYNC_IOS	100
 
@@ -304,7 +298,7 @@ DECLARE_THREAD(io_handler_thread)(
 #endif
 
 	/* For read only mode, we don't need ibuf and log I/O thread.
-	Please see innobase_start_or_create_for_mysql() */
+	Please see srv_start() */
 	ulint   start = (srv_read_only_mode) ? 0 : 2;
 
 	if (segment < start) {
@@ -482,7 +476,7 @@ create_log_files(
 			       false, false);
 	}
 
-	log_init(srv_n_log_files);
+	log_sys.log.create(srv_n_log_files);
 	if (!log_set_capacity(srv_log_file_size_requested)) {
 		return(DB_ERROR);
 	}
@@ -491,7 +485,7 @@ create_log_files(
 
 	/* Create a log checkpoint. */
 	log_mutex_enter();
-	if (log_sys->is_encrypted() && !log_crypt_init()) {
+	if (log_sys.is_encrypted() && !log_crypt_init()) {
 		return(DB_ERROR);
 	}
 	ut_d(recv_no_log_write = false);
@@ -607,13 +601,13 @@ srv_undo_tablespace_create(
 			" be created";
 
 		ib::info() << "Setting file " << name << " size to "
-			<< (size >> (20 - UNIV_PAGE_SIZE_SHIFT)) << " MB";
+			<< (size >> (20 - srv_page_size_shift)) << " MB";
 
 		ib::info() << "Database physically writes the file full: "
 			<< "wait...";
 
 		ret = os_file_set_size(
-			name, fh, os_offset_t(size) << UNIV_PAGE_SIZE_SHIFT);
+			name, fh, os_offset_t(size) << srv_page_size_shift);
 
 		if (!ret) {
 			ib::info() << "Error in creating " << name
@@ -671,7 +665,7 @@ static bool srv_undo_tablespace_open(const char* name, ulint space_id,
 
 	fil_node_t* file = space->add(name, fh, 0, false, true);
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 
 	if (create_new_db) {
 		space->size = file->size = ulint(size >> srv_page_size_shift);
@@ -681,12 +675,12 @@ static bool srv_undo_tablespace_open(const char* name, ulint space_id,
 		if (!success) {
 			os_file_close(file->handle);
 			file->handle = OS_FILE_CLOSED;
-			ut_a(fil_system->n_open > 0);
-			fil_system->n_open--;
+			ut_a(fil_system.n_open > 0);
+			fil_system.n_open--;
 		}
 	}
 
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
 	return success;
 }
@@ -700,7 +694,7 @@ dberr_t
 srv_check_undo_redo_logs_exists()
 {
 	bool		ret;
-	os_file_t	fh;
+	pfs_os_file_t	fh;
 	char	name[OS_FILE_MAX_PATH];
 
 	/* Check if any undo tablespaces exist */
@@ -985,24 +979,19 @@ srv_undo_tablespaces_init(bool create_new_db)
 	if (create_new_db) {
 		mtr_t	mtr;
 
-		mtr_start(&mtr);
-
-		/* The undo log tablespace */
 		for (i = 0; i < n_undo_tablespaces; ++i) {
-
-			fsp_header_init(
-				undo_tablespace_ids[i],
-				SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
+			mtr.start();
+			fsp_header_init(fil_space_get(undo_tablespace_ids[i]),
+					SRV_UNDO_TABLESPACE_SIZE_IN_PAGES,
+					&mtr);
+			mtr.commit();
 		}
-
-		mtr_commit(&mtr);
 	}
 
 	if (!undo::Truncate::s_fix_up_spaces.empty()) {
 
 		/* Step-1: Initialize the tablespace header and rsegs header. */
 		mtr_t		mtr;
-		trx_sysf_t*	sys_header;
 
 		mtr_start(&mtr);
 		/* Turn off REDO logging. We are in server start mode and fixing
@@ -1011,7 +1000,11 @@ srv_undo_tablespaces_init(bool create_new_db)
 		as part of the current recovery process. We surely don't need
 		that as this is fix-up action parallel to REDO logging. */
 		mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
-		sys_header = trx_sysf_get(&mtr);
+		buf_block_t* sys_header = trx_sysf_get(&mtr);
+		if (!sys_header) {
+			mtr.commit();
+			return DB_CORRUPTION;
+		}
 
 		for (undo::undo_spaces_t::const_iterator it
 			     = undo::Truncate::s_fix_up_spaces.begin();
@@ -1020,19 +1013,17 @@ srv_undo_tablespaces_init(bool create_new_db)
 
 			undo::Truncate::add_space_to_trunc_list(*it);
 
-			fsp_header_init(
-				*it, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
+			fil_space_t* space = fil_space_get(*it);
 
-			mtr_x_lock(fil_space_get_latch(*it, NULL), &mtr);
+			fsp_header_init(space,
+					SRV_UNDO_TABLESPACE_SIZE_IN_PAGES,
+					&mtr);
 
 			for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
-
-				ulint	space_id = trx_sysf_rseg_get_space(
-						sys_header, i, &mtr);
-
-				if (space_id == *it) {
+				if (trx_sysf_rseg_get_space(sys_header, i)
+				    == *it) {
 					trx_rseg_header_create(
-						*it, ULINT_MAX, i, &mtr);
+						space, i, sys_header, &mtr);
 				}
 			}
 
@@ -1045,9 +1036,9 @@ srv_undo_tablespaces_init(bool create_new_db)
 			     = undo::Truncate::s_fix_up_spaces.begin();
 		     it != undo::Truncate::s_fix_up_spaces.end();
 		     ++it) {
-			FlushObserver dummy(TRX_SYS_SPACE, NULL, NULL);
+			FlushObserver dummy(fil_system.sys_space, NULL, NULL);
 			buf_LRU_flush_or_remove_pages(TRX_SYS_SPACE, &dummy);
-			FlushObserver dummy2(*it, NULL, NULL);
+			FlushObserver dummy2(fil_space_get(*it), NULL, NULL);
 			buf_LRU_flush_or_remove_pages(*it, &dummy2);
 
 			/* Remove the truncate redo log file. */
@@ -1058,41 +1049,6 @@ srv_undo_tablespaces_init(bool create_new_db)
 	return(DB_SUCCESS);
 }
 
-/********************************************************************
-Wait for the purge thread(s) to start up. */
-static
-void
-srv_start_wait_for_purge_to_start()
-/*===============================*/
-{
-	/* Wait for the purge coordinator and master thread to startup. */
-
-	purge_state_t	state = trx_purge_state();
-
-	ut_a(state != PURGE_STATE_DISABLED);
-
-	while (srv_shutdown_state == SRV_SHUTDOWN_NONE
-	       && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
-	       && state == PURGE_STATE_INIT) {
-
-		switch (state = trx_purge_state()) {
-		case PURGE_STATE_RUN:
-		case PURGE_STATE_STOP:
-			break;
-
-		case PURGE_STATE_INIT:
-			ib::info() << "Waiting for purge to start";
-
-			os_thread_sleep(50000);
-			break;
-
-		case PURGE_STATE_EXIT:
-		case PURGE_STATE_DISABLED:
-			ut_error;
-		}
-	}
-}
-
 /** Create the temporary file tablespace.
 @param[in]	create_new_db	whether we are creating a new database
 @return DB_SUCCESS or error code. */
@@ -1118,47 +1074,30 @@ srv_open_tmp_tablespace(bool create_new_db)
 		&create_new_temp_space, 12 * 1024 * 1024);
 
 	if (err == DB_FAIL) {
-
-		ib::error() << "The " << srv_tmp_space.name()
-			<< " data file must be writable!";
-
+		ib::error() << "The innodb_temporary"
+			" data file must be writable!";
 		err = DB_ERROR;
-
 	} else if (err != DB_SUCCESS) {
-		ib::error() << "Could not create the shared "
-			<< srv_tmp_space.name() << ".";
-
+		ib::error() << "Could not create the shared innodb_temporary.";
 	} else if ((err = srv_tmp_space.open_or_create(
 			    true, create_new_db, &sum_of_new_sizes, NULL))
 		   != DB_SUCCESS) {
-
-		ib::error() << "Unable to create the shared "
-			<< srv_tmp_space.name();
-
+		ib::error() << "Unable to create the shared innodb_temporary";
+	} else if (fil_system.temp_space->open()) {
+		/* Initialize the header page */
+		mtr_t mtr;
+		mtr.start();
+		mtr.set_log_mode(MTR_LOG_NO_REDO);
+		fsp_header_init(fil_system.temp_space,
+				srv_tmp_space.get_sum_of_sizes(),
+				&mtr);
+		mtr.commit();
 	} else {
-
-		mtr_t	mtr;
-		ulint	size = srv_tmp_space.get_sum_of_sizes();
-
-		/* Open this shared temp tablespace in the fil_system so that
-		it stays open until shutdown. */
-		if (fil_space_open(srv_tmp_space.name())) {
-
-			/* Initialize the header page */
-			mtr_start(&mtr);
-			mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
-
-			fsp_header_init(SRV_TMP_SPACE_ID, size, &mtr);
-
-			mtr_commit(&mtr);
-		} else {
-			/* This file was just opened in the code above! */
-			ib::error() << "The " << srv_tmp_space.name()
-				<< " data file cannot be re-opened"
-				" after check_file_spec() succeeded!";
-
-			err = DB_ERROR;
-		}
+		/* This file was just opened in the code above! */
+		ib::error() << "The innodb_temporary"
+			" data file cannot be re-opened"
+			" after check_file_spec() succeeded!";
+		err = DB_ERROR;
 	}
 
 	return(err);
@@ -1173,7 +1112,7 @@ srv_start_state_set(
 	srv_start_state_t state)	/*!< in: indicate current state of
 					thread startup */
 {
-	srv_start_state |= state;
+	srv_start_state |= ulint(state);
 }
 
 /****************************************************************//**
@@ -1185,7 +1124,7 @@ srv_start_state_is_set(
 /*===================*/
 	srv_start_state_t state)	/*!< in: state to check for */
 {
-	return(srv_start_state & state);
+	return(srv_start_state & ulint(state));
 }
 
 /**
@@ -1206,7 +1145,7 @@ srv_shutdown_all_bg_threads()
 
 		if (srv_start_state_is_set(SRV_START_STATE_LOCK_SYS)) {
 			/* a. Let the lock timeout thread exit */
-			os_event_set(lock_sys->timeout_event);
+			os_event_set(lock_sys.timeout_event);
 		}
 
 		if (!srv_read_only_mode) {
@@ -1245,10 +1184,6 @@ srv_shutdown_all_bg_threads()
 			}
 
 			os_event_set(buf_flush_event);
-
-			if (srv_use_mtflush) {
-				buf_mtflu_io_thread_exit();
-			}
 		}
 
 		if (!os_thread_count) {
@@ -1318,6 +1253,7 @@ srv_init_abort_low(
 			" with error " << ut_strerr(err);
 	}
 
+	srv_shutdown_bg_undo_sources();
 	srv_shutdown_all_bg_threads();
 	return(err);
 }
@@ -1337,17 +1273,10 @@ srv_prepare_to_delete_redo_log_files(
 	ulint	pending_io = 0;
 	ulint	count = 0;
 
-	if (srv_safe_truncate) {
-		if ((log_sys->log.format & ~LOG_HEADER_FORMAT_ENCRYPTED)
-		    != LOG_HEADER_FORMAT_10_3
-		    || log_sys->log.subformat != 1) {
-			srv_log_file_size = 0;
-		}
-	} else {
-		if ((log_sys->log.format & ~LOG_HEADER_FORMAT_ENCRYPTED)
-		    != LOG_HEADER_FORMAT_10_2) {
-			srv_log_file_size = 0;
-		}
+	if ((log_sys.log.format & ~LOG_HEADER_FORMAT_ENCRYPTED)
+	    != LOG_HEADER_FORMAT_CURRENT
+	    || log_sys.log.subformat != 2) {
+		srv_log_file_size = 0;
 	}
 
 	do {
@@ -1359,23 +1288,23 @@ srv_prepare_to_delete_redo_log_files(
 
 		log_mutex_enter();
 
-		fil_names_clear(log_sys->lsn, false);
+		fil_names_clear(log_sys.lsn, false);
 
-		flushed_lsn = log_sys->lsn;
+		flushed_lsn = log_sys.lsn;
 
 		{
 			ib::info	info;
 			if (srv_log_file_size == 0) {
-				info << ((log_sys->log.format
+				info << ((log_sys.log.format
 					  & ~LOG_HEADER_FORMAT_ENCRYPTED)
-					 < LOG_HEADER_FORMAT_10_3
+					 != LOG_HEADER_FORMAT_10_4
 					 ? "Upgrading redo log: "
 					 : "Downgrading redo log: ");
 			} else if (n_files != srv_n_log_files
 				   || srv_log_file_size
 				   != srv_log_file_size_requested) {
 				if (srv_encrypt_log
-				    == log_sys->is_encrypted()) {
+				    == (my_bool)log_sys.is_encrypted()) {
 					info << (srv_encrypt_log
 						 ? "Resizing encrypted"
 						 : "Resizing");
@@ -1433,14 +1362,11 @@ srv_prepare_to_delete_redo_log_files(
 	DBUG_RETURN(flushed_lsn);
 }
 
-/********************************************************************
-Starts InnoDB and creates a new database if database files
-are not found and the user wants.
+/** Start InnoDB.
+@param[in]	create_new_db	whether to create a new database
 @return DB_SUCCESS or error code */
-dberr_t
-innobase_start_or_create_for_mysql()
+dberr_t srv_start(bool create_new_db)
 {
-	bool		create_new_db = false;
 	lsn_t		flushed_lsn;
 	dberr_t		err		= DB_SUCCESS;
 	ulint		srv_n_log_files_found = srv_n_log_files;
@@ -1454,6 +1380,7 @@ innobase_start_or_create_for_mysql()
 	      || srv_operation == SRV_OPERATION_RESTORE
 	      || srv_operation == SRV_OPERATION_RESTORE_EXPORT);
 
+
 	if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
 		srv_read_only_mode = true;
 	}
@@ -1465,15 +1392,6 @@ innobase_start_or_create_for_mysql()
 	/* Reset the start state. */
 	srv_start_state = SRV_START_STATE_NONE;
 
-	if (srv_read_only_mode) {
-		ib::info() << "Started in read only mode";
-
-		/* There is no write to InnoDB tablespaces (not even
-		temporary ones, because also CREATE TEMPORARY TABLE is
-		refused in read-only mode). */
-		srv_use_doublewrite_buf = FALSE;
-	}
-
 	compile_time_assert(sizeof(ulint) == sizeof(void*));
 
 #ifdef UNIV_DEBUG
@@ -1529,62 +1447,10 @@ innobase_start_or_create_for_mysql()
 
 	srv_is_being_started = true;
 
-#ifdef _WIN32
-	srv_use_native_aio = TRUE;
-
-#elif defined(LINUX_NATIVE_AIO)
-
-	if (srv_use_native_aio) {
-		ib::info() << "Using Linux native AIO";
-	}
-#else
-	/* Currently native AIO is supported only on windows and linux
-	and that also when the support is compiled in. In all other
-	cases, we ignore the setting of innodb_use_native_aio. */
-	srv_use_native_aio = FALSE;
-#endif /* _WIN32 */
-
 	/* Register performance schema stages before any real work has been
 	started which may need to be instrumented. */
 	mysql_stage_register("innodb", srv_stages, UT_ARR_SIZE(srv_stages));
 
-	if (srv_file_flush_method_str == NULL) {
-		/* These are the default options */
-		srv_file_flush_method = IF_WIN(SRV_ALL_O_DIRECT_FSYNC,SRV_FSYNC);
-	} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
-		srv_file_flush_method = SRV_FSYNC;
-
-	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
-		srv_file_flush_method = SRV_O_DSYNC;
-
-	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
-		srv_file_flush_method = SRV_O_DIRECT;
-
-	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
-		srv_file_flush_method = SRV_O_DIRECT_NO_FSYNC;
-
-	} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
-		srv_file_flush_method = SRV_LITTLESYNC;
-
-	} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
-		srv_file_flush_method = SRV_NOSYNC;
-#ifdef _WIN32
-	} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
-		srv_file_flush_method = SRV_FSYNC;
-	} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
-	} else if (0 == ut_strcmp(srv_file_flush_method_str,
-				  "async_unbuffered")) {
-#endif /* _WIN32 */
-	} else {
-		ib::error() << "Unrecognized value "
-			<< srv_file_flush_method_str
-			<< " for innodb_flush_method";
-		err = DB_ERROR;
-	}
-
-	/* Note that the call srv_boot() also changes the values of
-	some variables to the units used by InnoDB internally */
-
 	/* Set the maximum number of threads which can wait for a semaphore
 	inside InnoDB: this is the 'sync wait array' size, as well as the
 	maximum number of threads that can wait in the 'srv_conc array' for
@@ -1601,7 +1467,7 @@ innobase_start_or_create_for_mysql()
 			    + 1 /* dict_stats_thread */
 			    + 1 /* fts_optimize_thread */
 			    + 1 /* recv_writer_thread */
-			    + 1 /* trx_rollback_or_clean_all_recovered */
+			    + 1 /* trx_rollback_all_recovered */
 			    + 128 /* added as margin, for use of
 				  InnoDB Memcached etc. */
 			    + max_connections
@@ -1613,65 +1479,6 @@ innobase_start_or_create_for_mysql()
 			    + fts_sort_pll_degree * FTS_NUM_AUX_INDEX
 			      * max_connections;
 
-	if (srv_buf_pool_size >= BUF_POOL_SIZE_THRESHOLD) {
-
-		if (srv_buf_pool_instances == srv_buf_pool_instances_default) {
-#if defined(_WIN32) && !defined(_WIN64)
-			/* Do not allocate too large of a buffer pool on
-			Windows 32-bit systems, which can have trouble
-			allocating larger single contiguous memory blocks. */
-			srv_buf_pool_size = static_cast<ulint>(ut_uint64_align_up(srv_buf_pool_size, srv_buf_pool_chunk_unit));
-			srv_buf_pool_instances = ut_min(
-				static_cast<ulong>(MAX_BUFFER_POOLS),
-				static_cast<ulong>(srv_buf_pool_size / srv_buf_pool_chunk_unit));
-#else /* defined(_WIN32) && !defined(_WIN64) */
-			/* Default to 8 instances when size > 1GB. */
-			srv_buf_pool_instances = 8;
-#endif /* defined(_WIN32) && !defined(_WIN64) */
-		}
-	} else {
-		/* If buffer pool is less than 1 GiB, assume fewer
-		threads. Also use only one buffer pool instance. */
-		if (srv_buf_pool_instances != srv_buf_pool_instances_default
-		    && srv_buf_pool_instances != 1) {
-			/* We can't distinguish whether the user has explicitly
-			started mysqld with --innodb-buffer-pool-instances=0,
-			(srv_buf_pool_instances_default is 0) or has not
-			specified that option at all. Thus we have the
-			limitation that if the user started with =0, we
-			will not emit a warning here, but we should actually
-			do so. */
-			ib::info()
-				<< "Adjusting innodb_buffer_pool_instances"
-				" from " << srv_buf_pool_instances << " to 1"
-				" since innodb_buffer_pool_size is less than "
-				<< BUF_POOL_SIZE_THRESHOLD / (1024 * 1024)
-				<< " MiB";
-		}
-
-		srv_buf_pool_instances = 1;
-	}
-
-	if (srv_buf_pool_chunk_unit * srv_buf_pool_instances
-	    > srv_buf_pool_size) {
-		/* Size unit of buffer pool is larger than srv_buf_pool_size.
-		adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */
-		srv_buf_pool_chunk_unit
-			= static_cast<ulong>(srv_buf_pool_size)
-			  / srv_buf_pool_instances;
-		if (srv_buf_pool_size % srv_buf_pool_instances != 0) {
-			++srv_buf_pool_chunk_unit;
-		}
-	}
-
-	srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size);
-
-	if (srv_n_page_cleaners > srv_buf_pool_instances) {
-		/* limit of page_cleaner parallelizability
-		is number of buffer pool instances. */
-		srv_n_page_cleaners = srv_buf_pool_instances;
-	}
-
 	srv_boot();
 
 	ib::info() << ut_crc32_implementation;
@@ -1706,7 +1513,7 @@ innobase_start_or_create_for_mysql()
 		} else {
 
 			srv_monitor_file_name = NULL;
-			srv_monitor_file = os_file_create_tmpfile(NULL);
+			srv_monitor_file = os_file_create_tmpfile();
 
 			if (!srv_monitor_file && err == DB_SUCCESS) {
 				err = DB_ERROR;
@@ -1716,7 +1523,7 @@ innobase_start_or_create_for_mysql()
 		mutex_create(LATCH_ID_SRV_MISC_TMPFILE,
 			     &srv_misc_tmpfile_mutex);
 
-		srv_misc_tmpfile = os_file_create_tmpfile(NULL);
+		srv_misc_tmpfile = os_file_create_tmpfile();
 
 		if (!srv_misc_tmpfile && err == DB_SUCCESS) {
 			err = DB_ERROR;
@@ -1750,7 +1557,7 @@ innobase_start_or_create_for_mysql()
 		return(srv_init_abort(DB_ERROR));
 	}
 
-	fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
+	fil_system.create(srv_file_per_table ? 50000 : 5000);
 
 	double	size;
 	char	unit;
@@ -1801,11 +1608,9 @@ innobase_start_or_create_for_mysql()
 	}
 #endif /* UNIV_DEBUG */
 
-	fsp_init();
-	log_sys_init();
-
+	log_sys.create();
 	recv_sys_init();
-	lock_sys_create(srv_lock_table_size);
+	lock_sys.create(srv_lock_table_size);
 
 	/* Create i/o-handler threads: */
 
@@ -1824,9 +1629,10 @@ innobase_start_or_create_for_mysql()
 		os_thread_create(buf_flush_page_cleaner_coordinator,
 				 NULL, NULL);
 
-		for (i = 1; i < srv_n_page_cleaners; ++i) {
-			os_thread_create(buf_flush_page_cleaner_worker,
-					 NULL, NULL);
+		/* Create page cleaner workers if needed. For example
+		mariabackup could set srv_n_page_cleaners = 0. */
+		if (srv_n_page_cleaners > 1) {
+			buf_flush_set_page_cleaner_thread_cnt(srv_n_page_cleaners);
 		}
 
 #ifdef UNIV_LINUX
@@ -1836,27 +1642,6 @@ innobase_start_or_create_for_mysql()
 		srv_start_state_set(SRV_START_STATE_IO);
 	}
 
-	if (srv_n_log_files * srv_log_file_size >= 512ULL << 30) {
-		/* log_block_convert_lsn_to_no() limits the returned block
-		number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
-		bytes, then we have a limit of 512 GB. If that limit is to
-		be raised, then log_block_convert_lsn_to_no() must be
-		modified. */
-		ib::error() << "Combined size of log files must be < 512 GB";
-
-		return(srv_init_abort(DB_ERROR));
-	}
-
-	os_normalize_path(srv_data_home);
-
-	/* Check if the data files exist or not. */
-	err = srv_sys_space.check_file_spec(
-		&create_new_db, MIN_EXPECTED_TABLESPACE_SIZE);
-
-	if (err != DB_SUCCESS) {
-		return(srv_init_abort(DB_ERROR));
-	}
-
 	srv_startup_is_before_trx_rollback_phase = !create_new_db;
 
 	/* Check if undo tablespaces and redo log files exist before creating
@@ -2058,7 +1843,7 @@ innobase_start_or_create_for_mysql()
 				       false, false);
 		}
 
-		log_init(srv_n_log_files_found);
+		log_sys.log.create(srv_n_log_files_found);
 
 		if (!log_set_capacity(srv_log_file_size_requested)) {
 			return(srv_init_abort(DB_ERROR));
@@ -2071,7 +1856,7 @@ files_checked:
 	shutdown */
 
 	fil_open_log_and_system_tablespace_files();
-	ut_d(fil_space_get(0)->recv_size = srv_sys_space_size_debug);
+	ut_d(fil_system.sys_space->recv_size = srv_sys_space_size_debug);
 
 	err = srv_undo_tablespaces_init(create_new_db);
 
@@ -2090,24 +1875,20 @@ files_checked:
 		dict_stats_thread_init();
 	}
 
-	trx_sys_file_format_init();
-
-	trx_sys_create();
+	trx_sys.create();
 
 	if (create_new_db) {
 		ut_a(!srv_read_only_mode);
 
 		mtr_start(&mtr);
-
-		fsp_header_init(0, sum_of_new_sizes, &mtr);
-
+		ut_ad(fil_system.sys_space->id == 0);
 		compile_time_assert(TRX_SYS_SPACE == 0);
 		compile_time_assert(IBUF_SPACE_ID == 0);
+		fsp_header_init(fil_system.sys_space, sum_of_new_sizes, &mtr);
 
 		ulint ibuf_root = btr_create(
-			DICT_CLUSTERED | DICT_IBUF,
-			0, univ_page_size, DICT_IBUF_ID_MIN,
-			dict_ind_redundant, NULL, &mtr);
+			DICT_CLUSTERED | DICT_IBUF, fil_system.sys_space,
+			DICT_IBUF_ID_MIN, dict_ind_redundant, NULL, &mtr);
 
 		mtr_commit(&mtr);
 
@@ -2122,7 +1903,7 @@ files_checked:
 		All the remaining rollback segments will be created later,
 		after the double write buffer has been created. */
 		trx_sys_create_sys_pages();
-		trx_sys_init_at_db_start();
+		trx_lists_init_at_db_start();
 
 		err = dict_create();
 
@@ -2146,26 +1927,6 @@ files_checked:
 			return(srv_init_abort(err));
 		}
 	} else {
-
-		/* Check if we support the max format that is stamped
-		on the system tablespace.
-		Note:  We are NOT allowed to make any modifications to
-		the TRX_SYS_PAGE_NO page before recovery  because this
-		page also contains the max_trx_id etc. important system
-		variables that are required for recovery.  We need to
-		ensure that we return the system to a state where normal
-		recovery is guaranteed to work. We do this by
-		invalidating the buffer cache, this will force the
-		reread of the page and restoration to its last known
-		consistent state, this is REQUIRED for the recovery
-		process to work. */
-		err = trx_sys_file_format_max_check(
-			srv_max_file_format_at_startup);
-
-		if (err != DB_SUCCESS) {
-			return(srv_init_abort(err));
-		}
-
 		/* Invalidate the buffer pool to ensure that we reread
 		the page that we read above, during recovery.
 		Note that this is not as heavy weight as it seems. At
@@ -2201,19 +1962,15 @@ files_checked:
 			if (err != DB_SUCCESS) {
 				return(srv_init_abort(err));
 			}
+			/* fall through */
+		case SRV_OPERATION_RESTORE:
 			/* This must precede
 			recv_apply_hashed_log_recs(true). */
-			trx_sys_init_at_db_start();
+			trx_lists_init_at_db_start();
 			break;
 		case SRV_OPERATION_RESTORE_DELTA:
 		case SRV_OPERATION_BACKUP:
 			ut_ad(!"wrong mariabackup mode");
-			/* fall through */
-		case SRV_OPERATION_RESTORE:
-			/* mariabackup --prepare only deals with
-			the redo log and the data files, not with
-			transactions or the data dictionary. */
-			break;
 		}
 
 		if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
@@ -2237,15 +1994,27 @@ files_checked:
 		if (!srv_read_only_mode) {
 			const ulint flags = FSP_FLAGS_PAGE_SSIZE();
 			for (ulint id = 0; id <= srv_undo_tablespaces; id++) {
-				if (fil_space_get(id)) {
-					fsp_flags_try_adjust(id, flags);
+				if (fil_space_t* space = fil_space_get(id)) {
+					fsp_flags_try_adjust(space, flags);
 				}
 			}
 
 			if (sum_of_new_sizes > 0) {
 				/* New data file(s) were added */
 				mtr.start();
-				fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
+				buf_block_t* block = buf_page_get(
+					page_id_t(0, 0), univ_page_size,
+					RW_SX_LATCH, &mtr);
+				ulint size = mach_read_from_4(
+					FSP_HEADER_OFFSET + FSP_SIZE
+					+ block->frame);
+				ut_ad(size == fil_system.sys_space
+				      ->size_in_header);
+				size += sum_of_new_sizes;
+				mlog_write_ulint(FSP_HEADER_OFFSET + FSP_SIZE
+						 + block->frame, size,
+						 MLOG_4BYTES, &mtr);
+				fil_system.sys_space->size_in_header = size;
 				mtr.commit();
 				/* Immediately write the log record about
 				increased tablespace size to disk, so that it
@@ -2255,8 +2024,20 @@ files_checked:
 			}
 		}
 
+#ifdef UNIV_DEBUG
+		{
+			mtr.start();
+			buf_block_t* block = buf_page_get(page_id_t(0, 0),
+							  univ_page_size,
+							  RW_S_LATCH, &mtr);
+			ut_ad(mach_read_from_4(FSP_SIZE + FSP_HEADER_OFFSET
+					       + block->frame)
+			      == fil_system.sys_space->size_in_header);
+			mtr.commit();
+		}
+#endif
 		const ulint	tablespace_size_in_header
-			= fsp_header_get_tablespace_size();
+			= fil_system.sys_space->size_in_header;
 		const ulint	sum_of_data_file_sizes
 			= srv_sys_space.get_sum_of_sizes();
 		/* Compare the system tablespace file size to what is
@@ -2294,7 +2075,7 @@ files_checked:
 		}
 
 		/* recv_recovery_from_checkpoint_finish needs trx lists which
-		are initialized in trx_sys_init_at_db_start(). */
+		are initialized in trx_lists_init_at_db_start(). */
 
 		recv_recovery_from_checkpoint_finish();
 
@@ -2312,13 +2093,12 @@ files_checked:
 			err = fil_write_flushed_lsn(log_get_lsn());
 			ut_ad(!buf_pool_check_no_pending_io());
 			fil_close_log_files(true);
-			log_group_close_all();
 			if (err == DB_SUCCESS) {
 				bool trunc = srv_operation
 					== SRV_OPERATION_RESTORE;
 				/* Delete subsequent log files. */
 				delete_log_files(logfilename, dirnamelen,
-						 srv_n_log_files_found, trunc);
+						 (uint)srv_n_log_files_found, trunc);
 				if (trunc) {
 					/* Truncate the first log file. */
 					strcpy(logfilename + dirnamelen,
@@ -2340,17 +2120,12 @@ files_checked:
 			/* Leave the redo log alone. */
 		} else if (srv_log_file_size_requested == srv_log_file_size
 			   && srv_n_log_files_found == srv_n_log_files
-			   && log_sys->log.format
-			   == (srv_safe_truncate
-			       ? (srv_encrypt_log
-				  ? LOG_HEADER_FORMAT_10_3
-				  | LOG_HEADER_FORMAT_ENCRYPTED
-				  : LOG_HEADER_FORMAT_10_3)
-			       : (srv_encrypt_log
-				  ? LOG_HEADER_FORMAT_10_2
-				  | LOG_HEADER_FORMAT_ENCRYPTED
-				  : LOG_HEADER_FORMAT_10_2))
-			   && log_sys->log.subformat == !!srv_safe_truncate) {
+			   && log_sys.log.format
+			   == (srv_encrypt_log
+			       ? LOG_HEADER_FORMAT_CURRENT
+			       | LOG_HEADER_FORMAT_ENCRYPTED
+			       : LOG_HEADER_FORMAT_CURRENT)
+			   && log_sys.log.subformat == 2) {
 			/* No need to add or remove encryption,
 			upgrade, downgrade, or resize. */
 		} else {
@@ -2387,9 +2162,6 @@ files_checked:
 					return(srv_init_abort(DB_ERROR)););
 			DBUG_PRINT("ib_log", ("After innodb_log_abort_5"));
 
-			/* Free the old log file space. */
-			log_group_close_all();
-
 			ib::info() << "Starting to delete and rewrite log"
 				" files.";
 
@@ -2413,10 +2185,8 @@ files_checked:
 		/* Validate a few system page types that were left
 		uninitialized by older versions of MySQL. */
 		if (!high_level_read_only) {
-			mtr_t		mtr;
 			buf_block_t*	block;
 			mtr.start();
-			mtr.set_sys_modified();
 			/* Bitmap page types will be reset in
 			buf_dblwr_check_block() without redo logging. */
 			block = buf_page_get(
@@ -2449,7 +2219,7 @@ files_checked:
 		The data dictionary latch should guarantee that there is at
 		most one data dictionary transaction active at a time. */
 		if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
-			trx_rollback_or_clean_recovered(FALSE);
+			trx_rollback_recovered(false);
 		}
 
 		/* Fix-up truncate of tables in the system tablespace
@@ -2507,13 +2277,6 @@ files_checked:
 
 		recv_recovery_rollback_active();
 		srv_startup_is_before_trx_rollback_phase = FALSE;
-
-		/* It is possible that file_format tag has never
-		been set. In this case we initialize it to minimum
-		value.  Important to note that we can do it ONLY after
-		we have finished the recovery process so that the
-		image of TRX_SYS_PAGE_NO is not stale. */
-		trx_sys_file_format_tag_init();
 	}
 
 	ut_ad(err == DB_SUCCESS);
@@ -2554,7 +2317,7 @@ files_checked:
 			lock_wait_timeout_thread,
 			NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
 		thread_started[2 + SRV_MAX_N_IO_THREADS] = true;
-		lock_sys->timeout_thread_active = true;
+		lock_sys.timeout_thread_active = true;
 
 		/* Create the thread which warns of long semaphore waits */
 		srv_error_monitor_active = true;
@@ -2571,6 +2334,21 @@ files_checked:
 		thread_started[4 + SRV_MAX_N_IO_THREADS] = true;
 		srv_start_state |= SRV_START_STATE_LOCK_SYS
 			| SRV_START_STATE_MONITOR;
+
+		ut_ad(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN
+		      || !purge_sys.enabled());
+
+		if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
+			srv_undo_sources = true;
+			/* Create the dict stats gathering thread */
+			srv_dict_stats_thread_active = true;
+			dict_stats_thread_handle = os_thread_create(
+				dict_stats_thread, NULL, NULL);
+
+			/* Create the thread that will optimize the
+			FULLTEXT search index subsystem. */
+			fts_optimize_init();
+		}
 	}
 
 	/* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
@@ -2604,32 +2382,19 @@ files_checked:
 		}
 
 		trx_temp_rseg_create();
-	}
-
-	ut_a(trx_purge_state() == PURGE_STATE_INIT);
 
-	/* Create the master thread which does purge and other utility
-	operations */
-
-	if (!srv_read_only_mode
-	    && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
-		thread_handles[1 + SRV_MAX_N_IO_THREADS] = os_thread_create(
-			srv_master_thread,
-			NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
-		thread_started[1 + SRV_MAX_N_IO_THREADS] = true;
-		srv_start_state_set(SRV_START_STATE_MASTER);
+		if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
+			thread_handles[1 + SRV_MAX_N_IO_THREADS]
+				= os_thread_create(srv_master_thread, NULL,
+						   (1 + SRV_MAX_N_IO_THREADS)
+						   + thread_ids);
+			thread_started[1 + SRV_MAX_N_IO_THREADS] = true;
+			srv_start_state_set(SRV_START_STATE_MASTER);
+		}
 	}
 
 	if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL
 	    && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
-		srv_undo_sources = true;
-		/* Create the dict stats gathering thread */
-		srv_dict_stats_thread_active = true;
-		dict_stats_thread_handle = os_thread_create(
-			dict_stats_thread, NULL, NULL);
-
-		/* Create the thread that will optimize the FTS sub-system. */
-		fts_optimize_init();
 
 		thread_handles[5 + SRV_MAX_N_IO_THREADS] = os_thread_create(
 			srv_purge_coordinator_thread,
@@ -2648,11 +2413,14 @@ files_checked:
 			thread_started[5 + i + SRV_MAX_N_IO_THREADS] = true;
 		}
 
-		srv_start_wait_for_purge_to_start();
+		while (srv_shutdown_state == SRV_SHUTDOWN_NONE
+		       && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
+		       && !purge_sys.enabled()) {
+			ib::info() << "Waiting for purge to start";
+			os_thread_sleep(50000);
+		}
 
 		srv_start_state_set(SRV_START_STATE_PURGE);
-	} else {
-		purge_sys->state = PURGE_STATE_DISABLED;
 	}
 
 	srv_is_being_started = false;
@@ -2660,25 +2428,13 @@ files_checked:
 	if (!srv_read_only_mode) {
 		/* wake main loop of page cleaner up */
 		os_event_set(buf_flush_event);
-
-		if (srv_use_mtflush) {
-			/* Start multi-threaded flush threads */
-			mtflush_ctx = buf_mtflu_handler_init(
-				srv_mtflush_threads,
-				srv_buf_pool_instances);
-
-			/* Set up the thread ids */
-			buf_mtflu_set_thread_ids(
-				srv_mtflush_threads,
-				mtflush_ctx,
-				(thread_ids + 6 + 32));
-		}
 	}
 
 	if (srv_print_verbose_log) {
 		ib::info() << INNODB_VERSION_STR
-			<< " started; log sequence number "
-			<< srv_start_lsn;
+			   << " started; log sequence number "
+			   << srv_start_lsn
+			   << "; transaction id " << trx_sys.get_max_trx_id();
 	}
 
 	if (srv_force_recovery > 0) {
@@ -2776,8 +2532,7 @@ srv_fts_close(void)
 #endif
 
 /** Shut down background threads that can generate undo log. */
-void
-srv_shutdown_bg_undo_sources()
+void srv_shutdown_bg_undo_sources()
 {
 	if (srv_undo_sources) {
 		ut_ad(!srv_read_only_mode);
@@ -2792,8 +2547,7 @@ srv_shutdown_bg_undo_sources()
 }
 
 /** Shut down InnoDB. */
-void
-innodb_shutdown()
+void innodb_shutdown()
 {
 	ut_ad(!my_atomic_loadptr_explicit(reinterpret_cast<void**>
 					  (&srv_running),
@@ -2837,15 +2591,15 @@ innodb_shutdown()
 
 	ut_ad(dict_stats_event || !srv_was_started || srv_read_only_mode);
 	ut_ad(dict_sys || !srv_was_started);
-	ut_ad(trx_sys || !srv_was_started);
+	ut_ad(trx_sys.is_initialised() || !srv_was_started);
 	ut_ad(buf_dblwr || !srv_was_started || srv_read_only_mode
 	      || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
-	ut_ad(lock_sys || !srv_was_started);
+	ut_ad(lock_sys.is_initialised() || !srv_was_started);
+	ut_ad(log_sys.is_initialised() || !srv_was_started);
 #ifdef BTR_CUR_HASH_ADAPT
 	ut_ad(btr_search_sys || !srv_was_started);
 #endif /* BTR_CUR_HASH_ADAPT */
 	ut_ad(ibuf || !srv_was_started);
-	ut_ad(log_sys || !srv_was_started);
 
 	if (dict_stats_event) {
 		dict_stats_thread_deinit();
@@ -2872,47 +2626,29 @@ innodb_shutdown()
 	if (ibuf) {
 		ibuf_close();
 	}
-	if (log_sys) {
-		log_shutdown();
-	}
-	if (trx_sys) {
-		trx_sys_file_format_close();
-		trx_sys_close();
-	}
-	UT_DELETE(purge_sys);
-	purge_sys = NULL;
+	log_sys.close();
+	purge_sys.close();
+	trx_sys.close();
 	if (buf_dblwr) {
 		buf_dblwr_free();
 	}
-	if (lock_sys) {
-		lock_sys_close();
-	}
-
+	lock_sys.close();
 	trx_pool_close();
 
-	/* We don't create these mutexes in RO mode because we don't create
-	the temp files that the cover. */
 	if (!srv_read_only_mode) {
 		mutex_free(&srv_monitor_file_mutex);
 		mutex_free(&srv_misc_tmpfile_mutex);
 	}
 
-	if (dict_sys) {
-		dict_close();
-	}
-
-#ifdef BTR_CUR_HASH_ADAPT
-	if (btr_search_sys) {
-		btr_search_sys_free();
-	}
-#endif /* BTR_CUR_HASH_ADAPT */
+	dict_close();
+	btr_search_sys_free();
 
 	/* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
 	them */
 	os_aio_free();
 	row_mysql_close();
 	srv_free();
-	fil_close();
+	fil_system.close();
 
 	/* 4. Free all allocated memory */
 
@@ -2926,13 +2662,10 @@ innodb_shutdown()
 
 	sync_check_close();
 
-	if (dict_foreign_err_file) {
-		fclose(dict_foreign_err_file);
-	}
-
 	if (srv_was_started && srv_print_verbose_log) {
 		ib::info() << "Shutdown completed; log sequence number "
-			<< srv_shutdown_lsn;
+			   << srv_shutdown_lsn
+			   << "; transaction id " << trx_sys.get_max_trx_id();
 	}
 
 	srv_start_state = SRV_START_STATE_NONE;
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc
index 183aa09dc2b..b126a2f4ba0 100644
--- a/storage/innobase/sync/sync0arr.cc
+++ b/storage/innobase/sync/sync0arr.cc
@@ -2,7 +2,7 @@
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -582,9 +582,8 @@ sync_array_cell_print(
 
 			fprintf(file,
 				"number of readers " ULINTPF
-				", waiters flag %u, "
-				"lock_word: " ULINTPFx "\n"
-				"Last time read locked in file %s line %u\n"
+				", waiters flag %d, "
+				"lock_word: %x\n"
 				"Last time write locked in file %s line %u"
 #if 0 /* JAN: TODO: FIX LATER */
 				"\nHolder thread " ULINTPF
@@ -592,10 +591,8 @@ sync_array_cell_print(
 #endif
 				"\n",
 				rw_lock_get_reader_count(rwlock),
-				rwlock->waiters,
-				rwlock->lock_word,
-				innobase_basename(rwlock->last_s_file_name),
-				rwlock->last_s_line,
+				my_atomic_load32_explicit(&rwlock->waiters, MY_MEMORY_ORDER_RELAXED),
+				my_atomic_load32_explicit(&rwlock->lock_word, MY_MEMORY_ORDER_RELAXED),
 				innobase_basename(rwlock->last_x_file_name),
 				rwlock->last_x_line
 #if 0 /* JAN: TODO: FIX LATER */
@@ -1077,13 +1074,11 @@ sync_array_print_long_waits(
 	}
 
 	if (noticed) {
-		ibool	old_val;
-
 		fprintf(stderr,
 			"InnoDB: ###### Starts InnoDB Monitor"
 			" for 30 secs to print diagnostic info:\n");
 
-		old_val = srv_print_innodb_monitor;
+		my_bool old_val = srv_print_innodb_monitor;
 
 		/* If some crucial semaphore is reserved, then also the InnoDB
 		Monitor can hang, and we do not get diagnostics. Since in
@@ -1156,23 +1151,18 @@ sync_array_print_info(
 	sync_array_exit(arr);
 }
 
-/**********************************************************************//**
-Create the primary system wait array(s), they are protected by an OS mutex */
-void
-sync_array_init(
-/*============*/
-	ulint		n_threads)		/*!< in: Number of slots to
-						create in all arrays */
+/** Create the primary system wait arrays */
+void sync_array_init()
 {
 	ut_a(sync_wait_array == NULL);
 	ut_a(srv_sync_array_size > 0);
-	ut_a(n_threads > 0);
+	ut_a(srv_max_n_threads > 0);
 
 	sync_array_size = srv_sync_array_size;
 
 	sync_wait_array = UT_NEW_ARRAY_NOKEY(sync_array_t*, sync_array_size);
 
-	ulint	n_slots = 1 + (n_threads - 1) / sync_array_size;
+	ulint	n_slots = 1 + (srv_max_n_threads - 1) / sync_array_size;
 
 	for (ulint i = 0; i < sync_array_size; ++i) {
 
@@ -1180,11 +1170,8 @@ sync_array_init(
 	}
 }
 
-/**********************************************************************//**
-Close sync array wait sub-system. */
-void
-sync_array_close(void)
-/*==================*/
+/** Destroy the sync array wait sub-system. */
+void sync_array_close()
 {
 	for (ulint i = 0; i < sync_array_size; ++i) {
 		sync_array_free(sync_wait_array[i]);
@@ -1304,7 +1291,7 @@ sync_arr_fill_sys_semphore_waits_table(
 	ulint		n_items;
 
 	DBUG_ENTER("i_s_sys_semaphore_waits_fill_table");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
 
 	/* deny access to user without PROCESS_ACL privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
@@ -1391,11 +1378,10 @@ sync_arr_fill_sys_semphore_waits_table(
 						//OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(rwlock->line, true));
 						//fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull();
 						OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_READERS], rw_lock_get_reader_count(rwlock)));
-						OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)rwlock->waiters));
-						OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)rwlock->lock_word));
-						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_READER_FILE], innobase_basename(rwlock->last_s_file_name)));
-						OK(fields[SYS_SEMAPHORE_WAITS_LAST_READER_LINE]->store(rwlock->last_s_line, true));
-						fields[SYS_SEMAPHORE_WAITS_LAST_READER_LINE]->set_notnull();
+						OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG],
+						   my_atomic_load32_explicit(&rwlock->waiters, MY_MEMORY_ORDER_RELAXED)));
+						OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD],
+						   my_atomic_load32_explicit(&rwlock->lock_word, MY_MEMORY_ORDER_RELAXED)));
 						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(rwlock->last_x_file_name)));
 						OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(rwlock->last_x_line, true));
 						fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull();
diff --git a/storage/innobase/sync/sync0debug.cc b/storage/innobase/sync/sync0debug.cc
index deb081a6819..8fa4e6c5bd2 100644
--- a/storage/innobase/sync/sync0debug.cc
+++ b/storage/innobase/sync/sync0debug.cc
@@ -33,6 +33,7 @@ Created 2012-08-21 Sunny Bains
 #include "sync0sync.h"
 #include "sync0debug.h"
 #include "srv0start.h"
+#include "fil0fil.h"
 
 #include <vector>
 #include <string>
@@ -188,10 +189,10 @@ struct LatchDebug {
 					latch that the thread is trying
 					to acquire
 	@return true if passes, else crash with error message. */
-	bool basic_check(
+	inline bool basic_check(
 		const Latches*	latches,
 		latch_level_t	requested_level,
-		ulint		level) const
+		lint		level) const
 		UNIV_NOTHROW;
 
 	/** Adds a latch and its level in the thread level array. Allocates
@@ -477,6 +478,7 @@ LatchDebug::LatchDebug()
 	LEVEL_MAP_INSERT(SYNC_REC_LOCK);
 	LEVEL_MAP_INSERT(SYNC_THREADS);
 	LEVEL_MAP_INSERT(SYNC_TRX);
+	LEVEL_MAP_INSERT(SYNC_RW_TRX_HASH_ELEMENT);
 	LEVEL_MAP_INSERT(SYNC_TRX_SYS);
 	LEVEL_MAP_INSERT(SYNC_LOCK_SYS);
 	LEVEL_MAP_INSERT(SYNC_LOCK_WAIT_SYS);
@@ -495,7 +497,6 @@ LatchDebug::LatchDebug()
 	LEVEL_MAP_INSERT(SYNC_RSEG_HEADER_NEW);
 	LEVEL_MAP_INSERT(SYNC_NOREDO_RSEG);
 	LEVEL_MAP_INSERT(SYNC_REDO_RSEG);
-	LEVEL_MAP_INSERT(SYNC_TRX_UNDO);
 	LEVEL_MAP_INSERT(SYNC_PURGE_LATCH);
 	LEVEL_MAP_INSERT(SYNC_TREE_NODE);
 	LEVEL_MAP_INSERT(SYNC_TREE_NODE_FROM_HASH);
@@ -509,7 +510,6 @@ LatchDebug::LatchDebug()
 	LEVEL_MAP_INSERT(SYNC_DICT);
 	LEVEL_MAP_INSERT(SYNC_FTS_CACHE);
 	LEVEL_MAP_INSERT(SYNC_DICT_OPERATION);
-	LEVEL_MAP_INSERT(SYNC_FILE_FORMAT_TAG);
 	LEVEL_MAP_INSERT(SYNC_TRX_I_S_LAST_READ);
 	LEVEL_MAP_INSERT(SYNC_TRX_I_S_RWLOCK);
 	LEVEL_MAP_INSERT(SYNC_RECV_WRITER);
@@ -604,11 +604,11 @@ LatchDebug::less(
 				The level of the latch that the thread is
 				trying to acquire
 @return true if passes, else crash with error message. */
-bool
+inline bool
 LatchDebug::basic_check(
 	const Latches*	latches,
 	latch_level_t	requested_level,
-	ulint		in_level) const
+	lint		in_level) const
 	UNIV_NOTHROW
 {
 	latch_level_t	level = latch_level_t(in_level);
@@ -736,7 +736,7 @@ LatchDebug::check_order(
 		if (srv_is_being_started) {
 			/* This is violated during trx_sys_create_rsegs()
 			when creating additional rollback segments when
-			upgrading in innobase_start_or_create_for_mysql(). */
+			upgrading in srv_start(). */
 			break;
 		}
 
@@ -754,17 +754,16 @@ LatchDebug::check_order(
 	case SYNC_LOG:
 	case SYNC_LOG_WRITE:
 	case SYNC_LOG_FLUSH_ORDER:
-	case SYNC_FILE_FORMAT_TAG:
 	case SYNC_DOUBLEWRITE:
 	case SYNC_SEARCH_SYS:
 	case SYNC_THREADS:
 	case SYNC_LOCK_SYS:
 	case SYNC_LOCK_WAIT_SYS:
+	case SYNC_RW_TRX_HASH_ELEMENT:
 	case SYNC_TRX_SYS:
 	case SYNC_IBUF_BITMAP_MUTEX:
 	case SYNC_REDO_RSEG:
 	case SYNC_NOREDO_RSEG:
-	case SYNC_TRX_UNDO:
 	case SYNC_PURGE_LATCH:
 	case SYNC_PURGE_QUEUE:
 	case SYNC_DICT_AUTOINC_MUTEX:
@@ -808,7 +807,7 @@ LatchDebug::check_order(
 
 	case SYNC_TRX:
 
-		/* Either the thread must own the lock_sys->mutex, or
+		/* Either the thread must own the lock_sys.mutex, or
 		it is allowed to own only ONE trx_t::mutex. */
 
 		if (less(latches, level) != NULL) {
@@ -891,8 +890,7 @@ LatchDebug::check_order(
 		The purge thread can read the UNDO pages without any covering
 		mutex. */
 
-		ut_a(find(latches, SYNC_TRX_UNDO) != 0
-		     || find(latches, SYNC_REDO_RSEG) != 0
+		ut_a(find(latches, SYNC_REDO_RSEG) != 0
 		     || find(latches, SYNC_NOREDO_RSEG) != 0
 		     || basic_check(latches, level, level - 1));
 		break;
@@ -910,19 +908,10 @@ LatchDebug::check_order(
 
 	case SYNC_TREE_NODE:
 
-		{
-			const latch_t*	fsp_latch;
-
-			fsp_latch = find(latches, SYNC_FSP);
-
-			ut_a((fsp_latch != NULL
-			      && fsp_latch->is_temp_fsp())
-			     || find(latches, SYNC_INDEX_TREE) != 0
-			     || find(latches, SYNC_DICT_OPERATION)
-			     || basic_check(latches,
-					    level, SYNC_TREE_NODE - 1));
-		}
-
+		ut_a(find(latches, SYNC_FSP) == &fil_system.temp_space->latch
+		     || find(latches, SYNC_INDEX_TREE)
+		     || find(latches, SYNC_DICT_OPERATION)
+		     || basic_check(latches, level, SYNC_TREE_NODE - 1));
 		break;
 
 	case SYNC_TREE_NODE_NEW:
@@ -1309,9 +1298,6 @@ sync_latch_meta_init()
 
 	LATCH_ADD_MUTEX(DICT_SYS, SYNC_DICT, dict_sys_mutex_key);
 
-	LATCH_ADD_MUTEX(FILE_FORMAT_MAX, SYNC_FILE_FORMAT_TAG,
-			file_format_max_mutex_key);
-
 	LATCH_ADD_MUTEX(FIL_SYSTEM, SYNC_ANY_LATCH, fil_system_mutex_key);
 
 	LATCH_ADD_MUTEX(FLUSH_LIST, SYNC_BUF_FLUSH_LIST, flush_list_mutex_key);
@@ -1400,8 +1386,6 @@ sync_latch_meta_init()
 
 	LATCH_ADD_MUTEX(BUF_DBLWR, SYNC_DOUBLEWRITE, buf_dblwr_mutex_key);
 
-	LATCH_ADD_MUTEX(TRX_UNDO, SYNC_TRX_UNDO, trx_undo_mutex_key);
-
 	LATCH_ADD_MUTEX(TRX_POOL, SYNC_POOL, trx_pool_mutex_key);
 
 	LATCH_ADD_MUTEX(TRX_POOL_MANAGER, SYNC_POOL_MANAGER,
@@ -1510,10 +1494,6 @@ sync_latch_meta_init()
 			PFS_NOT_INSTRUMENTED);
 	LATCH_ADD_MUTEX(BTR_DEFRAGMENT_MUTEX, SYNC_NO_ORDER_CHECK,
 			PFS_NOT_INSTRUMENTED);
-	LATCH_ADD_MUTEX(MTFLUSH_THREAD_MUTEX,  SYNC_NO_ORDER_CHECK,
-			PFS_NOT_INSTRUMENTED);
-	LATCH_ADD_MUTEX(MTFLUSH_MUTEX, SYNC_NO_ORDER_CHECK,
-			PFS_NOT_INSTRUMENTED);
 	LATCH_ADD_MUTEX(FIL_CRYPT_MUTEX, SYNC_NO_ORDER_CHECK,
 			PFS_NOT_INSTRUMENTED);
 	LATCH_ADD_MUTEX(FIL_CRYPT_STAT_MUTEX, SYNC_NO_ORDER_CHECK,
@@ -1522,6 +1502,8 @@ sync_latch_meta_init()
 			PFS_NOT_INSTRUMENTED);
 	LATCH_ADD_MUTEX(FIL_CRYPT_THREADS_MUTEX, SYNC_NO_ORDER_CHECK,
 			PFS_NOT_INSTRUMENTED);
+	LATCH_ADD_MUTEX(RW_TRX_HASH_ELEMENT, SYNC_RW_TRX_HASH_ELEMENT,
+			rw_trx_hash_element_mutex_key);
 
 	latch_id_t	id = LATCH_ID_NONE;
 
@@ -1746,7 +1728,7 @@ sync_check_init()
 
 	ut_d(LatchDebug::init());
 
-	sync_array_init(OS_THREAD_MAX_N);
+	sync_array_init();
 }
 
 /** Free the InnoDB synchronization data structures. */
diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc
index 31a8234dea7..9304fa66900 100644
--- a/storage/innobase/sync/sync0rw.cc
+++ b/storage/innobase/sync/sync0rw.cc
@@ -230,9 +230,7 @@ rw_lock_create_func(
 	ut_ad(cline <= 8192);
 	lock->cline = cline;
 	lock->count_os_wait = 0;
-	lock->last_s_file_name = "not yet reserved";
 	lock->last_x_file_name = "not yet reserved";
-	lock->last_s_line = 0;
 	lock->last_x_line = 0;
 	lock->event = os_event_create(0);
 	lock->wait_ex_event = os_event_create(0);
@@ -259,7 +257,8 @@ rw_lock_free_func(
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
 	ut_ad(rw_lock_validate(lock));
-	ut_a(lock->lock_word == X_LOCK_DECR);
+	ut_a(my_atomic_load32_explicit(&lock->lock_word,
+				       MY_MEMORY_ORDER_RELAXED) == X_LOCK_DECR);
 
 	mutex_enter(&rw_lock_list_mutex);
 
@@ -294,8 +293,8 @@ rw_lock_s_lock_spin(
 {
 	ulint		i = 0;	/* spin round count */
 	sync_array_t*	sync_arr;
-	ulint		spin_count = 0;
-	uint64_t	count_os_wait = 0;
+	lint		spin_count = 0;
+	int64_t		count_os_wait = 0;
 
 	/* We reuse the thread id to index into the counter, cache
 	it here for efficiency. */
@@ -306,11 +305,10 @@ lock_loop:
 
 	/* Spin waiting for the writer field to become free */
 	HMT_low();
-	while (i < srv_n_spin_wait_rounds && lock->lock_word <= 0) {
-		if (srv_spin_wait_delay) {
-			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
-		}
-
+	while (i < srv_n_spin_wait_rounds &&
+	       my_atomic_load32_explicit(&lock->lock_word,
+					 MY_MEMORY_ORDER_RELAXED) <= 0) {
+		ut_delay(srv_spin_wait_delay);
 		i++;
 	}
 
@@ -349,7 +347,7 @@ lock_loop:
 
 		/* Set waiters before checking lock_word to ensure wake-up
 		signal is sent. This may lead to some unnecessary signals. */
-		my_atomic_fas32((int32*) &lock->waiters, 1);
+		my_atomic_fas32_explicit(&lock->waiters, 1, MY_MEMORY_ORDER_ACQUIRE);
 
 		if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
 
@@ -423,25 +421,20 @@ rw_lock_x_lock_wait_func(
 	unsigned	line)	/*!< in: line where requested */
 {
 	ulint		i = 0;
-	ulint		n_spins = 0;
+	lint		n_spins = 0;
 	sync_array_t*	sync_arr;
-	uint64_t	count_os_wait = 0;
-
-	ut_ad(lock->lock_word <= threshold);
+	int64_t		count_os_wait = 0;
 
-	while (lock->lock_word < threshold) {
+	ut_ad(my_atomic_load32_explicit(&lock->lock_word, MY_MEMORY_ORDER_RELAXED) <= threshold);
 
-
-		HMT_low();
-		if (srv_spin_wait_delay) {
-			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
-		}
+	HMT_low();
+	while (my_atomic_load32_explicit(&lock->lock_word, MY_MEMORY_ORDER_RELAXED) < threshold) {
+		ut_delay(srv_spin_wait_delay);
 
 		if (i < srv_n_spin_wait_rounds) {
 			i++;
 			continue;
 		}
-		HMT_medium();
 
 		/* If there is still a reader, then go to sleep.*/
 		++n_spins;
@@ -454,7 +447,7 @@ rw_lock_x_lock_wait_func(
 		i = 0;
 
 		/* Check lock_word to ensure wake-up isn't missed.*/
-		if (lock->lock_word < threshold) {
+		if (my_atomic_load32_explicit(&lock->lock_word, MY_MEMORY_ORDER_RELAXED) < threshold) {
 
 			++count_os_wait;
 
@@ -477,7 +470,6 @@ rw_lock_x_lock_wait_func(
 			sync_array_free_cell(sync_arr, cell);
 			break;
 		}
-		HMT_low();
 	}
 	HMT_medium();
 	rw_lock_stats.rw_x_spin_round_count.add(n_spins);
@@ -545,14 +537,18 @@ rw_lock_x_lock_low(
 					file_name, line);
 
 			} else {
+				int32_t lock_word = my_atomic_load32_explicit(&lock->lock_word,
+									      MY_MEMORY_ORDER_RELAXED);
 				/* At least one X lock by this thread already
 				exists. Add another. */
-				if (lock->lock_word == 0
-				    || lock->lock_word == -X_LOCK_HALF_DECR) {
-					lock->lock_word -= X_LOCK_DECR;
+				if (lock_word == 0
+				    || lock_word == -X_LOCK_HALF_DECR) {
+					my_atomic_add32_explicit(&lock->lock_word, -X_LOCK_DECR,
+								 MY_MEMORY_ORDER_RELAXED);
 				} else {
-					ut_ad(lock->lock_word <= -X_LOCK_DECR);
-					--lock->lock_word;
+					ut_ad(lock_word <= -X_LOCK_DECR);
+					my_atomic_add32_explicit(&lock->lock_word, -1,
+								 MY_MEMORY_ORDER_RELAXED);
 				}
 			}
 
@@ -623,12 +619,17 @@ rw_lock_sx_lock_low(
 				  thread working on this lock and it is safe to
 				  read and write to the lock_word. */
 
-				ut_ad((lock->lock_word == 0)
-				      || ((lock->lock_word <= -X_LOCK_DECR)
-					  && (lock->lock_word
+#ifdef UNIV_DEBUG
+				int32_t lock_word =
+#endif
+				my_atomic_add32_explicit(&lock->lock_word, -X_LOCK_HALF_DECR,
+							 MY_MEMORY_ORDER_RELAXED);
+
+				ut_ad((lock_word == 0)
+				      || ((lock_word <= -X_LOCK_DECR)
+					  && (lock_word
 					      > -(X_LOCK_DECR
 						  + X_LOCK_HALF_DECR))));
-				lock->lock_word -= X_LOCK_HALF_DECR;
 			}
 		} else {
 			/* Another thread locked before us */
@@ -664,8 +665,8 @@ rw_lock_x_lock_func(
 {
 	ulint		i = 0;
 	sync_array_t*	sync_arr;
-	ulint		spin_count = 0;
-	uint64_t	count_os_wait = 0;
+	lint		spin_count = 0;
+	int64_t		count_os_wait = 0;
 
 	ut_ad(rw_lock_validate(lock));
 	ut_ad(!rw_lock_own(lock, RW_LOCK_S));
@@ -690,18 +691,13 @@ lock_loop:
 		/* Spin waiting for the lock_word to become free */
 		HMT_low();
 		while (i < srv_n_spin_wait_rounds
-		       && lock->lock_word <= X_LOCK_HALF_DECR) {
-
-			if (srv_spin_wait_delay) {
-				ut_delay(ut_rnd_interval(
-						0, srv_spin_wait_delay));
-			}
-
+		       && my_atomic_load32_explicit(&lock->lock_word, MY_MEMORY_ORDER_RELAXED) <= X_LOCK_HALF_DECR) {
+			ut_delay(srv_spin_wait_delay);
 			i++;
 		}
 
 		HMT_medium();
-		spin_count += i;
+		spin_count += lint(i);
 
 		if (i >= srv_n_spin_wait_rounds) {
 
@@ -720,7 +716,7 @@ lock_loop:
 
 	/* Waiters must be set before checking lock_word, to ensure signal
 	is sent. This could lead to a few unnecessary wake-up signals. */
-	my_atomic_fas32((int32*) &lock->waiters, 1);
+	my_atomic_fas32_explicit(&lock->waiters, 1, MY_MEMORY_ORDER_ACQUIRE);
 
 	if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
 		sync_array_free_cell(sync_arr, cell);
@@ -767,9 +763,9 @@ rw_lock_sx_lock_func(
 {
 	ulint		i = 0;
 	sync_array_t*	sync_arr;
-	ulint		spin_count = 0;
-	uint64_t	count_os_wait = 0;
-	ulint		spin_wait_count = 0;
+	lint		spin_count = 0;
+	int64_t		count_os_wait = 0;
+	lint		spin_wait_count = 0;
 
 	ut_ad(rw_lock_validate(lock));
 	ut_ad(!rw_lock_own(lock, RW_LOCK_S));
@@ -796,17 +792,12 @@ lock_loop:
 
 		/* Spin waiting for the lock_word to become free */
 		while (i < srv_n_spin_wait_rounds
-		       && lock->lock_word <= X_LOCK_HALF_DECR) {
-
-			if (srv_spin_wait_delay) {
-				ut_delay(ut_rnd_interval(
-						0, srv_spin_wait_delay));
-			}
-
+		       && my_atomic_load32_explicit(&lock->lock_word, MY_MEMORY_ORDER_RELAXED) <= X_LOCK_HALF_DECR) {
+			ut_delay(srv_spin_wait_delay);
 			i++;
 		}
 
-		spin_count += i;
+		spin_count += lint(i);
 
 		if (i >= srv_n_spin_wait_rounds) {
 
@@ -825,7 +816,7 @@ lock_loop:
 
 	/* Waiters must be set before checking lock_word, to ensure signal
 	is sent. This could lead to a few unnecessary wake-up signals. */
-	my_atomic_fas32((int32*) &lock->waiters, 1);
+	my_atomic_fas32_explicit(&lock->waiters, 1, MY_MEMORY_ORDER_ACQUIRE);
 
 	if (rw_lock_sx_lock_low(lock, pass, file_name, line)) {
 
@@ -864,14 +855,16 @@ rw_lock_validate(
 /*=============*/
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
-	lint	lock_word;
+	int32_t	lock_word;
 
 	ut_ad(lock);
 
-	lock_word = lock->lock_word;
+	lock_word = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+					      MY_MEMORY_ORDER_RELAXED);
 
 	ut_ad(lock->magic_n == RW_LOCK_MAGIC_N);
-	ut_ad(lock->waiters < 2);
+	ut_ad(my_atomic_load32_explicit(const_cast<int32_t*>(&lock->waiters),
+					MY_MEMORY_ORDER_RELAXED) < 2);
 	ut_ad(lock_word > -(2 * X_LOCK_DECR));
 	ut_ad(lock_word <= X_LOCK_DECR);
 
@@ -934,15 +927,17 @@ rw_lock_add_debug_info(
 	rw_lock_debug_mutex_exit();
 
 	if (pass == 0 && lock_type != RW_LOCK_X_WAIT) {
+		int32_t lock_word = my_atomic_load32_explicit(&lock->lock_word,
+							      MY_MEMORY_ORDER_RELAXED);
 
 		/* Recursive x while holding SX
 		(lock_type == RW_LOCK_X && lock_word == -X_LOCK_HALF_DECR)
 		is treated as not-relock (new lock). */
 
 		if ((lock_type == RW_LOCK_X
-		     && lock->lock_word <  -X_LOCK_HALF_DECR)
+		     && lock_word <  -X_LOCK_HALF_DECR)
 		    || (lock_type == RW_LOCK_SX
-		       && (lock->lock_word < 0 || lock->sx_recursive == 1))) {
+		       && (lock_word < 0 || lock->sx_recursive == 1))) {
 
 			sync_check_lock_validate(lock);
 			sync_check_lock_granted(lock);
@@ -999,7 +994,7 @@ rw_lock_remove_debug_info(
 Checks if the thread has locked the rw-lock in the specified mode, with
 the pass value == 0.
 @return TRUE if locked */
-ibool
+bool
 rw_lock_own(
 /*========*/
 	rw_lock_t*	lock,		/*!< in: rw-lock */
@@ -1022,12 +1017,12 @@ rw_lock_own(
 			rw_lock_debug_mutex_exit();
 			/* Found! */
 
-			return(TRUE);
+			return(true);
 		}
 	}
 	rw_lock_debug_mutex_exit();
 
-	return(FALSE);
+	return(false);
 }
 
 /** Checks if the thread has locked the rw-lock in the specified mode, with
@@ -1099,12 +1094,12 @@ rw_lock_list_print_info(
 
 		count++;
 
-		if (lock->lock_word != X_LOCK_DECR) {
+		if (my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word), MY_MEMORY_ORDER_RELAXED) != X_LOCK_DECR) {
 
 			fprintf(file, "RW-LOCK: %p ", (void*) lock);
 
-			if (lock->waiters) {
-				fputs(" Waiters for the lock exist\n", file);
+			if (int32_t waiters= my_atomic_load32_explicit(const_cast<int32_t*>(&lock->waiters), MY_MEMORY_ORDER_RELAXED)) {
+				fprintf(file, " (%d waiters)\n", waiters);
 			} else {
 				putc('\n', file);
 			}
diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc
index cccbf13dbf5..3873e8532e7 100644
--- a/storage/innobase/sync/sync0sync.cc
+++ b/storage/innobase/sync/sync0sync.cc
@@ -42,7 +42,6 @@ mysql_pfs_key_t	buf_pool_zip_mutex_key;
 mysql_pfs_key_t	cache_last_read_mutex_key;
 mysql_pfs_key_t	dict_foreign_err_mutex_key;
 mysql_pfs_key_t	dict_sys_mutex_key;
-mysql_pfs_key_t	file_format_max_mutex_key;
 mysql_pfs_key_t	fil_system_mutex_key;
 mysql_pfs_key_t	flush_list_mutex_key;
 mysql_pfs_key_t	fts_bg_threads_mutex_key;
@@ -80,7 +79,6 @@ mysql_pfs_key_t	srv_innodb_monitor_mutex_key;
 mysql_pfs_key_t	srv_misc_tmpfile_mutex_key;
 mysql_pfs_key_t	srv_monitor_file_mutex_key;
 mysql_pfs_key_t	buf_dblwr_mutex_key;
-mysql_pfs_key_t	trx_undo_mutex_key;
 mysql_pfs_key_t	trx_mutex_key;
 mysql_pfs_key_t	trx_pool_mutex_key;
 mysql_pfs_key_t	trx_pool_manager_mutex_key;
@@ -95,6 +93,7 @@ mysql_pfs_key_t	sync_array_mutex_key;
 mysql_pfs_key_t	thread_mutex_key;
 mysql_pfs_key_t zip_pad_mutex_key;
 mysql_pfs_key_t row_drop_list_mutex_key;
+mysql_pfs_key_t	rw_trx_hash_element_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 #ifdef UNIV_PFS_RWLOCK
 mysql_pfs_key_t	btr_search_latch_key;
@@ -147,13 +146,13 @@ sync_print_wait_info(FILE* file)
 		" %.2f RW-excl, %.2f RW-sx\n",
 		(double) rw_lock_stats.rw_s_spin_round_count /
 		(rw_lock_stats.rw_s_spin_wait_count
-		 ? rw_lock_stats.rw_s_spin_wait_count : 1),
+		 ? rw_lock_stats.rw_s_spin_wait_count : 1LL),
 		(double) rw_lock_stats.rw_x_spin_round_count /
 		(rw_lock_stats.rw_x_spin_wait_count
-		 ? rw_lock_stats.rw_x_spin_wait_count : 1),
+		 ? rw_lock_stats.rw_x_spin_wait_count : 1LL),
 		(double) rw_lock_stats.rw_sx_spin_round_count /
 		(rw_lock_stats.rw_sx_spin_wait_count
-		 ? rw_lock_stats.rw_sx_spin_wait_count : 1));
+		 ? rw_lock_stats.rw_sx_spin_wait_count : 1LL));
 }
 
 /**
diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc
index 4a1598a77a3..965faaba478 100644
--- a/storage/innobase/trx/trx0i_s.cc
+++ b/storage/innobase/trx/trx0i_s.cc
@@ -44,8 +44,8 @@ Created July 17, 2007 Vasil Dimov
 #include "sync0rw.h"
 #include "sync0sync.h"
 #include "trx0sys.h"
-
-#include <sql_class.h>
+#include "que0que.h"
+#include "trx0purge.h"
 
 /** Initial number of rows in the table cache */
 #define TABLE_CACHE_INITIAL_ROWSNUM	1024
@@ -162,10 +162,10 @@ struct trx_i_s_cache_t {
 	ha_storage_t*	storage;	/*!< storage for external volatile
 					data that may become unavailable
 					when we release
-					lock_sys->mutex or trx_sys->mutex */
+					lock_sys.mutex or trx_sys.mutex */
 	ulint		mem_allocd;	/*!< the amount of memory
 					allocated with mem_alloc*() */
-	ibool		is_truncated;	/*!< this is TRUE if the memory
+	bool		is_truncated;	/*!< this is true if the memory
 					limit was hit and thus the data
 					in the cache is truncated */
 };
@@ -527,9 +527,9 @@ thd_done:
 
 	row->trx_tables_locked = lock_number_of_tables_locked(&trx->lock);
 
-	/* These are protected by both trx->mutex or lock_sys->mutex,
-	or just lock_sys->mutex. For reading, it suffices to hold
-	lock_sys->mutex. */
+	/* These are protected by both trx->mutex or lock_sys.mutex,
+	or just lock_sys.mutex. For reading, it suffices to hold
+	lock_sys.mutex. */
 
 	row->trx_lock_structs = UT_LIST_GET_LEN(trx->lock.trx_locks);
 
@@ -1235,102 +1235,64 @@ trx_i_s_cache_clear(
 	ha_storage_empty(&cache->storage);
 }
 
-/*******************************************************************//**
-Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the
-table cache buffer. Cache must be locked for write. */
-static
-void
-fetch_data_into_cache_low(
-/*======================*/
-	trx_i_s_cache_t*	cache,		/*!< in/out: cache */
-	bool			read_write,	/*!< in: only read-write
-						transactions */
-	trx_ut_list_t*		trx_list)	/*!< in: trx list */
-{
-	const trx_t*		trx;
-	bool			rw_trx_list = trx_list == &trx_sys->rw_trx_list;
-
-	ut_ad(rw_trx_list || trx_list == &trx_sys->mysql_trx_list);
-
-	/* Iterate over the transaction list and add each one
-	to innodb_trx's cache. We also add all locks that are relevant
-	to each transaction into innodb_locks' and innodb_lock_waits'
-	caches. */
-
-	for (trx = UT_LIST_GET_FIRST(*trx_list);
-	     trx != NULL;
-	     trx =
-	     (rw_trx_list
-	      ? UT_LIST_GET_NEXT(trx_list, trx)
-	      : UT_LIST_GET_NEXT(mysql_trx_list, trx))) {
-
-		i_s_trx_row_t*		trx_row;
-		i_s_locks_row_t*	requested_lock_row;
 
-		/* Note: Read only transactions that modify temporary
-		tables an have a transaction ID */
-		if (!trx_is_started(trx)
-		    || (!rw_trx_list && trx->id != 0 && !trx->read_only)) {
+/**
+  Add transactions to innodb_trx's cache.
 
-			continue;
-		}
-
-		assert_trx_nonlocking_or_in_list(trx);
-
-		ut_ad(trx->in_rw_trx_list == rw_trx_list);
-
-		if (!add_trx_relevant_locks_to_cache(cache, trx,
-						     &requested_lock_row)) {
-
-			cache->is_truncated = TRUE;
-			return;
-		}
-
-		trx_row = reinterpret_cast<i_s_trx_row_t*>(
-			table_cache_create_empty_row(
-				&cache->innodb_trx, cache));
-
-		/* memory could not be allocated */
-		if (trx_row == NULL) {
-
-			cache->is_truncated = TRUE;
-			return;
-		}
+  We also add all locks that are relevant to each transaction into
+  innodb_locks' and innodb_lock_waits' caches.
+*/
 
-		if (!fill_trx_row(trx_row, trx, requested_lock_row, cache)) {
-
-			/* memory could not be allocated */
-			--cache->innodb_trx.rows_used;
-			cache->is_truncated = TRUE;
-			return;
-		}
-	}
-}
-
-/*******************************************************************//**
-Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the
-table cache buffer. Cache must be locked for write. */
-static
-void
-fetch_data_into_cache(
-/*==================*/
-	trx_i_s_cache_t*	cache)	/*!< in/out: cache */
+static void fetch_data_into_cache_low(trx_i_s_cache_t *cache, const trx_t *trx)
 {
-	ut_ad(lock_mutex_own());
-	ut_ad(trx_sys_mutex_own());
-
-	trx_i_s_cache_clear(cache);
+  i_s_locks_row_t *requested_lock_row;
+
+  assert_trx_nonlocking_or_in_list(trx);
+
+  if (add_trx_relevant_locks_to_cache(cache, trx, &requested_lock_row))
+  {
+    if (i_s_trx_row_t *trx_row= reinterpret_cast<i_s_trx_row_t*>(
+        table_cache_create_empty_row(&cache->innodb_trx, cache)))
+    {
+      if (fill_trx_row(trx_row, trx, requested_lock_row, cache))
+        return;
+      --cache->innodb_trx.rows_used;
+    }
+  }
+
+  /* memory could not be allocated */
+  cache->is_truncated= true;
+}
 
-	/* Capture the state of the read-write transactions. This includes
-	internal transactions too. They are not on mysql_trx_list */
-	fetch_data_into_cache_low(cache, true, &trx_sys->rw_trx_list);
 
-	/* Capture the state of the read-only active transactions */
-	fetch_data_into_cache_low(cache, false, &trx_sys->mysql_trx_list);
+/**
+  Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the
+  table cache buffer. Cache must be locked for write.
+*/
 
-	cache->is_truncated = FALSE;
+static void fetch_data_into_cache(trx_i_s_cache_t *cache)
+{
+  ut_ad(lock_mutex_own());
+  trx_i_s_cache_clear(cache);
+
+  /* Capture the state of transactions */
+  mutex_enter(&trx_sys.mutex);
+  for (const trx_t *trx= UT_LIST_GET_FIRST(trx_sys.trx_list);
+       trx != NULL;
+       trx= UT_LIST_GET_NEXT(trx_list, trx))
+  {
+    if (trx_is_started(trx) && trx != purge_sys.query->trx)
+    {
+      fetch_data_into_cache_low(cache, trx);
+      if (cache->is_truncated)
+        break;
+     }
+  }
+  mutex_exit(&trx_sys.mutex);
+  cache->is_truncated= false;
 }
 
+
 /*******************************************************************//**
 Update the transactions cache if it has not been read for some time.
 Called from handler/i_s.cc.
@@ -1348,13 +1310,7 @@ trx_i_s_possibly_fetch_data_into_cache(
 	/* We need to read trx_sys and record/table lock queues */
 
 	lock_mutex_enter();
-
-	trx_sys_mutex_enter();
-
 	fetch_data_into_cache(cache);
-
-	trx_sys_mutex_exit();
-
 	lock_mutex_exit();
 
 	/* update cache last read time */
@@ -1368,7 +1324,7 @@ trx_i_s_possibly_fetch_data_into_cache(
 Returns TRUE if the data in the cache is truncated due to the memory
 limit posed by TRX_I_S_MEM_LIMIT.
 @return TRUE if truncated */
-ibool
+bool
 trx_i_s_cache_is_truncated(
 /*=======================*/
 	trx_i_s_cache_t*	cache)	/*!< in: cache */
@@ -1415,7 +1371,7 @@ trx_i_s_cache_init(
 
 	cache->mem_allocd = 0;
 
-	cache->is_truncated = FALSE;
+	cache->is_truncated = false;
 }
 
 /*******************************************************************//**
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index c4f272acf07..a4fa12708ac 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -31,7 +31,6 @@ Created 3/26/1996 Heikki Tuuri
 #include "mtr0log.h"
 #include "os0thread.h"
 #include "que0que.h"
-#include "read0read.h"
 #include "row0purge.h"
 #include "row0upd.h"
 #include "srv0mon.h"
@@ -42,6 +41,7 @@ Created 3/26/1996 Heikki Tuuri
 #include "trx0roll.h"
 #include "trx0rseg.h"
 #include "trx0trx.h"
+#include <mysql/service_wsrep.h>
 
 /** Maximum allowable purge history length.  <=0 means 'infinite'. */
 ulong		srv_max_purge_lag = 0;
@@ -50,7 +50,7 @@ ulong		srv_max_purge_lag = 0;
 ulong		srv_max_purge_lag_delay = 0;
 
 /** The global data structure coordinating a purge */
-purge_sys_t*	purge_sys;
+purge_sys_t	purge_sys;
 
 /** A dummy undo record used as a return value when we have a whole undo log
 which needs no purge */
@@ -61,101 +61,67 @@ my_bool		srv_purge_view_update_only_debug;
 #endif /* UNIV_DEBUG */
 
 /** Sentinel value */
-const TrxUndoRsegs TrxUndoRsegsIterator::NullElement(UINT64_UNDEFINED);
+static const TrxUndoRsegs NullElement;
 
-/** Constructor */
+/** Default constructor */
 TrxUndoRsegsIterator::TrxUndoRsegsIterator()
-	:
-	m_trx_undo_rsegs(NullElement),
-	m_iter(m_trx_undo_rsegs.end())
+	: m_rsegs(NullElement), m_iter(m_rsegs.begin())
 {
 }
 
 /** Sets the next rseg to purge in purge_sys.
+Executed in the purge coordinator thread.
 @return whether anything is to be purged */
-inline
-bool
-TrxUndoRsegsIterator::set_next()
+inline bool TrxUndoRsegsIterator::set_next()
 {
-	mutex_enter(&purge_sys->pq_mutex);
+	mutex_enter(&purge_sys.pq_mutex);
 
 	/* Only purge consumes events from the priority queue, user
 	threads only produce the events. */
 
 	/* Check if there are more rsegs to process in the
 	current element. */
-	if (m_iter != m_trx_undo_rsegs.end()) {
-
+	if (m_iter != m_rsegs.end()) {
 		/* We are still processing rollback segment from
 		the same transaction and so expected transaction
-		number shouldn't increase. Undo increment of
-		expected trx_no done by caller assuming rollback
+		number shouldn't increase. Undo the increment of
+		expected commit done by caller assuming rollback
 		segments from given transaction are done. */
-		purge_sys->iter.trx_no = (*m_iter)->last_trx_no;
-
-	} else if (!purge_sys->purge_queue.empty()) {
-
-		/* Read the next element from the queue.
-		Combine elements if they have same transaction number.
-		This can happen if a transaction shares redo rollback segment
-		with another transaction that has already added it to purge
-		queue and former transaction also needs to schedule non-redo
-		rollback segment for purge. */
-		m_trx_undo_rsegs = NullElement;
-
-		purge_pq_t&	purge_queue = purge_sys->purge_queue;
-
-		while (!purge_queue.empty()) {
-
-			if (m_trx_undo_rsegs.get_trx_no() == UINT64_UNDEFINED) {
-				m_trx_undo_rsegs = purge_queue.top();
-			} else if (purge_queue.top().get_trx_no() ==
-					m_trx_undo_rsegs.get_trx_no()) {
-				m_trx_undo_rsegs.append(
-					purge_queue.top());
-			} else {
-				break;
-			}
-
-			purge_queue.pop();
-		}
-
-		m_iter = m_trx_undo_rsegs.begin();
-
+		purge_sys.tail.commit = (*m_iter)->last_commit;
+	} else if (!purge_sys.purge_queue.empty()) {
+		m_rsegs = purge_sys.purge_queue.top();
+		purge_sys.purge_queue.pop();
+		ut_ad(purge_sys.purge_queue.empty()
+		      || purge_sys.purge_queue.top() != m_rsegs);
+		m_iter = m_rsegs.begin();
 	} else {
 		/* Queue is empty, reset iterator. */
-		m_trx_undo_rsegs = NullElement;
-		m_iter = m_trx_undo_rsegs.end();
-
-		mutex_exit(&purge_sys->pq_mutex);
-
-		purge_sys->rseg = NULL;
+		purge_sys.rseg = NULL;
+		mutex_exit(&purge_sys.pq_mutex);
+		m_rsegs = NullElement;
+		m_iter = m_rsegs.begin();
 		return false;
 	}
 
-	purge_sys->rseg = *m_iter++;
-
-	mutex_exit(&purge_sys->pq_mutex);
-
-	ut_a(purge_sys->rseg != NULL);
-
-	mutex_enter(&purge_sys->rseg->mutex);
+	purge_sys.rseg = *m_iter++;
+	mutex_exit(&purge_sys.pq_mutex);
+	mutex_enter(&purge_sys.rseg->mutex);
 
-	ut_a(purge_sys->rseg->last_page_no != FIL_NULL);
-	ut_ad(purge_sys->rseg->last_trx_no == m_trx_undo_rsegs.get_trx_no());
+	ut_a(purge_sys.rseg->last_page_no != FIL_NULL);
+	ut_ad(purge_sys.rseg->last_trx_no() == m_rsegs.trx_no());
 
 	/* We assume in purge of externally stored fields that space id is
 	in the range of UNDO tablespace space ids */
-	ut_a(purge_sys->rseg->space == TRX_SYS_SPACE
-	     || srv_is_undo_tablespace(purge_sys->rseg->space));
+	ut_ad(purge_sys.rseg->space->id == TRX_SYS_SPACE
+	      || srv_is_undo_tablespace(purge_sys.rseg->space->id));
 
-	ut_a(purge_sys->iter.trx_no <= purge_sys->rseg->last_trx_no);
+	ut_a(purge_sys.tail.commit <= purge_sys.rseg->last_commit);
 
-	purge_sys->iter.trx_no = purge_sys->rseg->last_trx_no;
-	purge_sys->hdr_offset = purge_sys->rseg->last_offset;
-	purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
+	purge_sys.tail.commit = purge_sys.rseg->last_commit;
+	purge_sys.hdr_offset = purge_sys.rseg->last_offset;
+	purge_sys.hdr_page_no = purge_sys.rseg->last_page_no;
 
-	mutex_exit(&purge_sys->rseg->mutex);
+	mutex_exit(&purge_sys.rseg->mutex);
 
 	return(true);
 }
@@ -169,7 +135,7 @@ purge_graph_build()
 {
 	ut_a(srv_n_purge_threads > 0);
 
-	trx_t* trx = trx_allocate_for_background();
+	trx_t* trx = trx_create();
 	ut_ad(!trx->id);
 	trx->start_time = ut_time();
 	trx->state = TRX_STATE_ACTIVE;
@@ -188,65 +154,81 @@ purge_graph_build()
 	return(fork);
 }
 
-/** Construct the purge system. */
-purge_sys_t::purge_sys_t()
-	: latch(), event(os_event_create(0)),
-	  n_stop(0), running(false), state(PURGE_STATE_INIT),
-	  query(purge_graph_build()),
-	  view(), n_submitted(0), n_completed(0),
-	  iter(), limit(),
-#ifdef UNIV_DEBUG
-	  done(),
-#endif /* UNIV_DEBUG */
-	  next_stored(false), rseg(NULL),
-	  page_no(0), offset(0), hdr_page_no(0), hdr_offset(0),
-	  rseg_iter(), purge_queue(), pq_mutex(), undo_trunc()
+/** Initialise the purge system. */
+void purge_sys_t::create()
 {
-	ut_ad(!purge_sys);
-	rw_lock_create(trx_purge_latch_key, &latch, SYNC_PURGE_LATCH);
-	mutex_create(LATCH_ID_PURGE_SYS_PQ, &pq_mutex);
+  ut_ad(this == &purge_sys);
+  ut_ad(!enabled());
+  ut_ad(!event);
+  event= os_event_create(0);
+  ut_ad(event);
+  m_paused= 0;
+  query= purge_graph_build();
+  n_submitted= 0;
+  n_completed= 0;
+  next_stored= false;
+  rseg= NULL;
+  page_no= 0;
+  offset= 0;
+  hdr_page_no= 0;
+  hdr_offset= 0;
+  rw_lock_create(trx_purge_latch_key, &latch, SYNC_PURGE_LATCH);
+  mutex_create(LATCH_ID_PURGE_SYS_PQ, &pq_mutex);
+  undo_trunc.create();
 }
 
-/** Destruct the purge system. */
-purge_sys_t::~purge_sys_t()
+/** Close the purge subsystem on shutdown. */
+void purge_sys_t::close()
 {
-	ut_ad(this == purge_sys);
-
-	trx_t* trx = query->trx;
-	que_graph_free(query);
-	ut_ad(!trx->id);
-	ut_ad(trx->state == TRX_STATE_ACTIVE);
-	trx->state = TRX_STATE_NOT_STARTED;
-	trx_free_for_background(trx);
-	view.close();
-	rw_lock_free(&latch);
-	/* rw_lock_free() already called latch.~rw_lock_t(); tame the
-	debug assertions when the destructor will be called once more. */
-	ut_ad(latch.magic_n == 0);
-	ut_d(latch.magic_n = RW_LOCK_MAGIC_N);
-	mutex_free(&pq_mutex);
-	os_event_destroy(event);
+  ut_ad(this == &purge_sys);
+  if (!event) return;
+
+  m_enabled= false;
+  trx_t* trx = query->trx;
+  que_graph_free(query);
+  ut_ad(!trx->id);
+  ut_ad(trx->state == TRX_STATE_ACTIVE);
+  trx->state= TRX_STATE_NOT_STARTED;
+  trx_free(trx);
+  rw_lock_free(&latch);
+  /* rw_lock_free() already called latch.~rw_lock_t(); tame the
+  debug assertions when the destructor will be called once more. */
+  ut_ad(latch.magic_n == 0);
+  ut_d(latch.magic_n= RW_LOCK_MAGIC_N);
+  mutex_free(&pq_mutex);
+  os_event_destroy(event);
 }
 
 /*================ UNDO LOG HISTORY LIST =============================*/
 
-/********************************************************************//**
-Adds the update undo log as the first log in the history list. Removes the
-update undo log segment from the rseg slot if it is too big for reuse. */
+/** Prepend the history list with an undo log.
+Remove the undo log segment from the rseg slot if it is too big for reuse.
+@param[in]	trx		transaction
+@param[in,out]	undo		undo log
+@param[in,out]	mtr		mini-transaction */
 void
-trx_purge_add_update_undo_to_history(
-/*=================================*/
-	trx_t*		trx,		/*!< in: transaction */
-	page_t*		undo_page,	/*!< in: update undo log header page,
-					x-latched */
-	mtr_t*		mtr)		/*!< in: mtr */
+trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
 {
-	trx_undo_t*	undo		= trx->rsegs.m_redo.update_undo;
-	trx_rseg_t*	rseg		= undo->rseg;
+	DBUG_PRINT("trx", ("commit(" TRX_ID_FMT "," TRX_ID_FMT ")",
+			   trx->id, trx->no));
+	ut_ad(undo == trx->rsegs.m_redo.undo
+	      || undo == trx->rsegs.m_redo.old_insert);
+	trx_rseg_t*	rseg		= trx->rsegs.m_redo.rseg;
+	ut_ad(undo->rseg == rseg);
 	trx_rsegf_t*	rseg_header	= trx_rsegf_get(
 		rseg->space, rseg->page_no, mtr);
+	page_t*		undo_page	= trx_undo_set_state_at_finish(
+		undo, mtr);
 	trx_ulogf_t*	undo_header	= undo_page + undo->hdr_offset;
 
+	ut_ad(mach_read_from_2(undo_header + TRX_UNDO_NEEDS_PURGE) <= 1);
+
+	if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG_FORMAT + rseg_header))) {
+		/* This database must have been upgraded from
+		before MariaDB 10.3.5. */
+		trx_rseg_format_upgrade(rseg_header, mtr);
+	}
+
 	if (undo->state != TRX_UNDO_CACHED) {
 		ulint		hist_size;
 #ifdef UNIV_DEBUG
@@ -254,11 +236,7 @@ trx_purge_add_update_undo_to_history(
 #endif /* UNIV_DEBUG */
 
 		/* The undo log segment will not be reused */
-
-		if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
-			ib::fatal() << "undo->id is " << undo->id;
-		}
-
+		ut_a(undo->id < TRX_RSEG_N_SLOTS);
 		trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
 
 		MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED);
@@ -272,6 +250,9 @@ trx_purge_add_update_undo_to_history(
 		mlog_write_ulint(
 			rseg_header + TRX_RSEG_HISTORY_SIZE,
 			hist_size + undo->size, MLOG_4BYTES, mtr);
+
+		mlog_write_ull(rseg_header + TRX_RSEG_MAX_TRX_ID,
+			       trx_sys.get_max_trx_id(), mtr);
 	}
 
 	/* After the purge thread has been given permission to exit,
@@ -293,36 +274,57 @@ trx_purge_add_update_undo_to_history(
 	user transactions. */
 	ut_ad(srv_undo_sources
 	      || trx->undo_no == 0
-	      || ((srv_is_being_started
-		   || trx_rollback_or_clean_is_active)
-		  && purge_sys->state == PURGE_STATE_INIT)
-	      || (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND
-		  && purge_sys->state == PURGE_STATE_DISABLED)
-	      || ((trx->in_mysql_trx_list || trx->internal)
+	      || (!purge_sys.enabled()
+		  && (srv_is_being_started
+		      || trx_rollback_is_active
+		      || srv_force_recovery >= SRV_FORCE_NO_BACKGROUND))
+	      || ((trx->mysql_thd || trx->internal)
 		  && srv_fast_shutdown));
 
+#ifdef	WITH_WSREP
+	if (wsrep_is_wsrep_xid(trx->xid)) {
+		trx_rseg_update_wsrep_checkpoint(rseg_header, trx->xid, mtr);
+	}
+#endif
+
+	if (trx->mysql_log_file_name && *trx->mysql_log_file_name) {
+		/* Update the latest MySQL binlog name and offset info
+		in rollback segment header if MySQL binlogging is on
+		or the database server is a MySQL replication save. */
+		trx_rseg_update_binlog_offset(rseg_header, trx, mtr);
+	}
+
 	/* Add the log as the first in the history list */
 	flst_add_first(rseg_header + TRX_RSEG_HISTORY,
 		       undo_header + TRX_UNDO_HISTORY_NODE, mtr);
 
-	my_atomic_addlint(&trx_sys->rseg_history_len, 1);
-
-	/* Write the trx number to the undo log header */
 	mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
-
-	/* Write information about delete markings to the undo log header */
-
-	if (!undo->del_marks) {
-		mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE,
+	/* This is needed for upgrading old undo log pages from
+	before MariaDB 10.3.1. */
+	if (UNIV_UNLIKELY(!mach_read_from_2(undo_header
+					    + TRX_UNDO_NEEDS_PURGE))) {
+		mlog_write_ulint(undo_header + TRX_UNDO_NEEDS_PURGE, 1,
 				 MLOG_2BYTES, mtr);
 	}
 
 	if (rseg->last_page_no == FIL_NULL) {
 		rseg->last_page_no = undo->hdr_page_no;
 		rseg->last_offset = undo->hdr_offset;
-		rseg->last_trx_no = trx->no;
-		rseg->last_del_marks = undo->del_marks;
+		rseg->set_last_trx_no(trx->no, undo == trx->rsegs.m_redo.undo);
+		rseg->needs_purge = true;
+	}
+
+	trx_sys.history_insert();
+
+	if (undo->state == TRX_UNDO_CACHED) {
+		UT_LIST_ADD_FIRST(rseg->undo_cached, undo);
+		MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
+	} else {
+		ut_ad(undo->state == TRX_UNDO_TO_PURGE);
+		ut_free(undo);
 	}
+
+	undo = NULL;
 }
 
 /** Remove undo log header from the history list.
@@ -338,7 +340,7 @@ trx_purge_remove_log_hdr(
 {
 	flst_remove(rseg_hdr + TRX_RSEG_HISTORY,
 		    log_hdr + TRX_UNDO_HISTORY_NODE, mtr);
-	my_atomic_addlint(&trx_sys->rseg_history_len, -1);
+	trx_sys.history_remove();
 }
 
 /** Free an undo log segment, and remove the header from the history list.
@@ -350,63 +352,52 @@ trx_purge_free_segment(trx_rseg_t* rseg, fil_addr_t hdr_addr)
 {
 	mtr_t		mtr;
 	trx_rsegf_t*	rseg_hdr;
-	trx_ulogf_t*	log_hdr;
-	trx_usegf_t*	seg_hdr;
-	ulint		seg_size;
-	ulint		hist_size;
-	bool		marked		= false;
+	page_t*		undo_page;
 
-	for (;;) {
-		page_t*	undo_page;
+	mtr.start();
+	mutex_enter(&rseg->mutex);
 
-		mtr_start(&mtr);
+	rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+	undo_page = trx_undo_page_get(
+		page_id_t(rseg->space->id, hdr_addr.page), &mtr);
+
+	/* Mark the last undo log totally purged, so that if the
+	system crashes, the tail of the undo log will not get accessed
+	again. The list of pages in the undo log tail gets
+	inconsistent during the freeing of the segment, and therefore
+	purge should not try to access them again. */
+	mlog_write_ulint(undo_page + hdr_addr.boffset + TRX_UNDO_NEEDS_PURGE,
+			 0, MLOG_2BYTES, &mtr);
+
+	while (!fseg_free_step_not_header(
+		       TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER
+		       + undo_page, false, &mtr)) {
+		mutex_exit(&rseg->mutex);
+
+		mtr.commit();
+		mtr.start();
 
 		mutex_enter(&rseg->mutex);
 
 		rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
 
 		undo_page = trx_undo_page_get(
-			page_id_t(rseg->space, hdr_addr.page), &mtr);
-
-		seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-		log_hdr = undo_page + hdr_addr.boffset;
-
-		/* Mark the last undo log totally purged, so that if the
-		system crashes, the tail of the undo log will not get accessed
-		again. The list of pages in the undo log tail gets inconsistent
-		during the freeing of the segment, and therefore purge should
-		not try to access them again. */
-
-		if (!marked) {
-			marked = true;
-			mlog_write_ulint(
-				log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
-				MLOG_2BYTES, &mtr);
-		}
-
-		if (fseg_free_step_not_header(
-			    seg_hdr + TRX_UNDO_FSEG_HEADER, false, &mtr)) {
-
-			break;
-		}
-
-		mutex_exit(&rseg->mutex);
-
-		mtr_commit(&mtr);
+			page_id_t(rseg->space->id, hdr_addr.page), &mtr);
 	}
 
 	/* The page list may now be inconsistent, but the length field
 	stored in the list base node tells us how big it was before we
 	started the freeing. */
 
-	seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST);
+	const ulint seg_size = flst_get_len(
+		TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + undo_page);
 
 	/* We may free the undo log segment header page; it must be freed
 	within the same mtr as the undo log header is removed from the
 	history list: otherwise, in case of a database crash, the segment
 	could become inaccessible garbage in the file space. */
 
-	trx_purge_remove_log_hdr(rseg_hdr, log_hdr, &mtr);
+	trx_purge_remove_log_hdr(rseg_hdr, undo_page + hdr_addr.boffset, &mtr);
 
 	do {
 
@@ -415,10 +406,11 @@ trx_purge_free_segment(trx_rseg_t* rseg, fil_addr_t hdr_addr)
 		is not flooded with bufferfixed pages: see the note in
 		fsp0fsp.cc. */
 
-	} while (!fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER, false, &mtr));
+	} while (!fseg_free_step(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER
+				 + undo_page, false, &mtr));
 
-	hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
-				   MLOG_4BYTES, &mtr);
+	const ulint hist_size = mach_read_from_4(rseg_hdr
+						 + TRX_RSEG_HISTORY_SIZE);
 	ut_ad(hist_size >= seg_size);
 
 	mlog_write_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
@@ -435,10 +427,12 @@ trx_purge_free_segment(trx_rseg_t* rseg, fil_addr_t hdr_addr)
 
 /** Remove unnecessary history data from a rollback segment.
 @param[in,out]	rseg		rollback segment
-@param[in]	limit		truncate offset */
+@param[in]	limit		truncate anything before this */
 static
 void
-trx_purge_truncate_rseg_history(trx_rseg_t* rseg, const purge_iter_t* limit)
+trx_purge_truncate_rseg_history(
+	trx_rseg_t&			rseg,
+	const purge_sys_t::iterator&	limit)
 {
 	fil_addr_t	hdr_addr;
 	fil_addr_t	prev_hdr_addr;
@@ -449,48 +443,37 @@ trx_purge_truncate_rseg_history(trx_rseg_t* rseg, const purge_iter_t* limit)
 	mtr_t		mtr;
 	trx_id_t	undo_trx_no;
 
-	mtr_start(&mtr);
-	ut_ad(rseg->is_persistent());
-	mutex_enter(&(rseg->mutex));
+	mtr.start();
+	ut_ad(rseg.is_persistent());
+	mutex_enter(&rseg.mutex);
 
-	rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+	rseg_hdr = trx_rsegf_get(rseg.space, rseg.page_no, &mtr);
 
 	hdr_addr = trx_purge_get_log_from_hist(
 		flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
 loop:
 	if (hdr_addr.page == FIL_NULL) {
-
-		mutex_exit(&(rseg->mutex));
-
-		mtr_commit(&mtr);
-
+func_exit:
+		mutex_exit(&rseg.mutex);
+		mtr.commit();
 		return;
 	}
 
-	undo_page = trx_undo_page_get(page_id_t(rseg->space, hdr_addr.page),
+	undo_page = trx_undo_page_get(page_id_t(rseg.space->id, hdr_addr.page),
 				      &mtr);
 
 	log_hdr = undo_page + hdr_addr.boffset;
 
 	undo_trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
 
-	if (undo_trx_no >= limit->trx_no) {
-
-		/* limit space_id should match the rollback segment
-		space id to avoid freeing of the page belongs to
-		different rollback segment for the same trx_no. */
-		if (undo_trx_no == limit->trx_no
-		    && rseg->space == limit->undo_rseg_space) {
-
+	if (undo_trx_no >= limit.trx_no()) {
+		if (undo_trx_no == limit.trx_no()) {
 			trx_undo_truncate_start(
-				rseg, hdr_addr.page,
-				hdr_addr.boffset, limit->undo_no);
+				&rseg, hdr_addr.page,
+				hdr_addr.boffset, limit.undo_no);
 		}
 
-		mutex_exit(&(rseg->mutex));
-		mtr_commit(&mtr);
-
-		return;
+		goto func_exit;
 	}
 
 	prev_hdr_addr = trx_purge_get_log_from_hist(
@@ -503,24 +486,24 @@ loop:
 
 		/* We can free the whole log segment */
 
-		mutex_exit(&(rseg->mutex));
-		mtr_commit(&mtr);
+		mutex_exit(&rseg.mutex);
+		mtr.commit();
 
 		/* calls the trx_purge_remove_log_hdr()
 		inside trx_purge_free_segment(). */
-		trx_purge_free_segment(rseg, hdr_addr);
+		trx_purge_free_segment(&rseg, hdr_addr);
 	} else {
 		/* Remove the log hdr from the rseg history. */
 		trx_purge_remove_log_hdr(rseg_hdr, log_hdr, &mtr);
 
-		mutex_exit(&(rseg->mutex));
-		mtr_commit(&mtr);
+		mutex_exit(&rseg.mutex);
+		mtr.commit();
 	}
 
-	mtr_start(&mtr);
-	mutex_enter(&(rseg->mutex));
+	mtr.start();
+	mutex_enter(&rseg.mutex);
 
-	rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+	rseg_hdr = trx_rsegf_get(rseg.space, rseg.page_no, &mtr);
 
 	hdr_addr = prev_hdr_addr;
 
@@ -611,8 +594,8 @@ namespace undo {
 			return;
 		}
 
-		ulint	sz = UNIV_PAGE_SIZE;
-		void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+		ulint	sz = srv_page_size;
+		void*	buf = ut_zalloc_nokey(sz + srv_page_size);
 		if (buf == NULL) {
 			os_file_close(handle);
 			os_file_delete(innodb_log_file_key, log_file_name);
@@ -621,7 +604,7 @@ namespace undo {
 		}
 
 		byte*	log_buf = static_cast<byte*>(
-			ut_align(buf, UNIV_PAGE_SIZE));
+			ut_align(buf, srv_page_size));
 
 		mach_write_to_4(log_buf, undo::s_magic);
 
@@ -679,8 +662,8 @@ namespace undo {
 				return(false);
 			}
 
-			ulint	sz = UNIV_PAGE_SIZE;
-			void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+			ulint	sz = srv_page_size;
+			void*	buf = ut_zalloc_nokey(sz + srv_page_size);
 			if (buf == NULL) {
 				os_file_close(handle);
 				os_file_delete(innodb_log_file_key,
@@ -690,7 +673,7 @@ namespace undo {
 			}
 
 			byte*	log_buf = static_cast<byte*>(
-				ut_align(buf, UNIV_PAGE_SIZE));
+				ut_align(buf, srv_page_size));
 
 			IORequest	request(IORequest::READ);
 
@@ -768,7 +751,7 @@ trx_purge_mark_undo_for_truncate(
 	for (ulint i = 1; i <= srv_undo_tablespaces_active; i++) {
 
 		if (fil_space_get_size(space_id)
-		    > (srv_max_undo_log_size / srv_page_size)) {
+		    > (srv_max_undo_log_size >> srv_page_size_shift)) {
 			/* Tablespace qualifies for truncate. */
 			undo_trunc->mark(space_id);
 			undo::Truncate::add_space_to_trunc_list(space_id);
@@ -794,9 +777,10 @@ trx_purge_mark_undo_for_truncate(
 	/* Step-3: Iterate over all the rsegs of selected UNDO tablespace
 	and mark them temporarily unavailable for allocation.*/
 	for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
-		if (trx_rseg_t* rseg = trx_sys->rseg_array[i]) {
+		if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) {
 			ut_ad(rseg->is_persistent());
-			if (rseg->space == undo_trunc->get_marked_space_id()) {
+			if (rseg->space->id
+			    == undo_trunc->get_marked_space_id()) {
 
 				/* Once set this rseg will not be allocated
 				to new booting transaction but we will wait
@@ -818,17 +802,17 @@ void
 trx_purge_cleanse_purge_queue(
 	undo::Truncate*	undo_trunc)
 {
-	mutex_enter(&purge_sys->pq_mutex);
+	mutex_enter(&purge_sys.pq_mutex);
 	typedef	std::vector<TrxUndoRsegs>	purge_elem_list_t;
 	purge_elem_list_t			purge_elem_list;
 
 	/* Remove rseg instances that are in the purge queue before we start
 	truncate of corresponding UNDO truncate. */
-	while (!purge_sys->purge_queue.empty()) {
-		purge_elem_list.push_back(purge_sys->purge_queue.top());
-		purge_sys->purge_queue.pop();
+	while (!purge_sys.purge_queue.empty()) {
+		purge_elem_list.push_back(purge_sys.purge_queue.top());
+		purge_sys.purge_queue.pop();
 	}
-	ut_ad(purge_sys->purge_queue.empty());
+	ut_ad(purge_sys.purge_queue.empty());
 
 	for (purge_elem_list_t::iterator it = purge_elem_list.begin();
 	     it != purge_elem_list.end();
@@ -838,21 +822,18 @@ trx_purge_cleanse_purge_queue(
 		     it2 != it->end();
 		     ++it2) {
 
-			if ((*it2)->space
+			if ((*it2)->space->id
 				== undo_trunc->get_marked_space_id()) {
 				it->erase(it2);
 				break;
 			}
 		}
 
-		if (it->size()) {
-			/* size != 0 suggest that there exist other rsegs that
-			needs processing so add this element to purge queue.
-			Note: Other rseg could be non-redo rsegs. */
-			purge_sys->purge_queue.push(*it);
+		if (!it->empty()) {
+			purge_sys.purge_queue.push(*it);
 		}
 	}
-	mutex_exit(&purge_sys->pq_mutex);
+	mutex_exit(&purge_sys.pq_mutex);
 }
 
 /** Iterate over selected UNDO tablespace and check if all the rsegs
@@ -862,7 +843,7 @@ that resides in the tablespace are free.
 static
 void
 trx_purge_initiate_truncate(
-	purge_iter_t*	limit,
+	const purge_sys_t::iterator& limit,
 	undo::Truncate*	undo_trunc)
 {
 	/* Step-1: Early check to findout if any of the the UNDO tablespace
@@ -906,23 +887,11 @@ trx_purge_initiate_truncate(
 			ulint		cached_undo_size = 0;
 
 			for (trx_undo_t* undo =
-				UT_LIST_GET_FIRST(rseg->update_undo_cached);
+				     UT_LIST_GET_FIRST(rseg->undo_cached);
 			     undo != NULL && all_free;
 			     undo = UT_LIST_GET_NEXT(undo_list, undo)) {
 
-				if (limit->trx_no < undo->trx_id) {
-					all_free = false;
-				} else {
-					cached_undo_size += undo->size;
-				}
-			}
-
-			for (trx_undo_t* undo =
-				UT_LIST_GET_FIRST(rseg->insert_undo_cached);
-			     undo != NULL && all_free;
-			     undo = UT_LIST_GET_NEXT(undo_list, undo)) {
-
-				if (limit->trx_no < undo->trx_id) {
+				if (limit.trx_no() < undo->trx_id) {
 					all_free = false;
 				} else {
 					cached_undo_size += undo->size;
@@ -960,6 +929,14 @@ trx_purge_initiate_truncate(
 
 	ut_a(srv_is_undo_tablespace(space_id));
 
+	fil_space_t* space = fil_space_get(space_id);
+
+	if (!space) {
+not_found:
+		ib::error() << "Failed to find UNDO tablespace " << space_id;
+		return;
+	}
+
 	/* Flush all to-be-discarded pages of the tablespace.
 
 	During truncation, we do not want any writes to the
@@ -972,8 +949,8 @@ trx_purge_initiate_truncate(
 	break crash recovery. So, we cannot avoid the write. */
 	{
 		FlushObserver observer(
-			space_id,
-			UT_LIST_GET_FIRST(purge_sys->query->thrs)->graph->trx,
+			space,
+			UT_LIST_GET_FIRST(purge_sys.query->thrs)->graph->trx,
 			NULL);
 		buf_LRU_flush_or_remove_pages(space_id, &observer);
 	}
@@ -981,11 +958,10 @@ trx_purge_initiate_truncate(
 	log_free_check();
 
 	/* Adjust the tablespace metadata. */
-	fil_space_t* space = fil_truncate_prepare(space_id);
+	space = fil_truncate_prepare(space_id);
 
 	if (!space) {
-		ib::error() << "Failed to find UNDO tablespace " << space_id;
-		return;
+		goto not_found;
 	}
 
 	/* Undo tablespace always are a single file. */
@@ -1000,10 +976,12 @@ trx_purge_initiate_truncate(
 	mtr.start();
 	mtr_x_lock(&space->latch, &mtr);
 	fil_truncate_log(space, size, &mtr);
-	fsp_header_init(space_id, size, &mtr);
-	mutex_enter(&fil_system->mutex);
+	fsp_header_init(space, size, &mtr);
+	mutex_enter(&fil_system.mutex);
 	space->size = file->size = size;
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
+
+	buf_block_t* sys_header = trx_sysf_get(&mtr);
 
 	for (ulint i = 0; i < undo_trunc->rsegs_size(); ++i) {
 		trx_rsegf_t*	rseg_header;
@@ -1011,58 +989,43 @@ trx_purge_initiate_truncate(
 		trx_rseg_t*	rseg = undo_trunc->get_ith_rseg(i);
 
 		rseg->page_no = trx_rseg_header_create(
-			space_id, ULINT_MAX, rseg->id, &mtr);
+			space, rseg->id, sys_header, &mtr);
 
-		rseg_header = trx_rsegf_get_new(space_id, rseg->page_no, &mtr);
+		rseg_header = trx_rsegf_get_new(
+			space_id, rseg->page_no, &mtr);
 
 		/* Before re-initialization ensure that we free the existing
 		structure. There can't be any active transactions. */
-		ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0);
-		ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0);
+		ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0);
+		ut_a(UT_LIST_GET_LEN(rseg->old_insert_list) == 0);
 
 		trx_undo_t*	next_undo;
 
-		for (trx_undo_t* undo =
-			UT_LIST_GET_FIRST(rseg->update_undo_cached);
-		     undo != NULL;
-		     undo = next_undo) {
-
-			next_undo = UT_LIST_GET_NEXT(undo_list, undo);
-			UT_LIST_REMOVE(rseg->update_undo_cached, undo);
-			MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
-			trx_undo_mem_free(undo);
-		}
-
-		for (trx_undo_t* undo =
-			UT_LIST_GET_FIRST(rseg->insert_undo_cached);
+		for (trx_undo_t* undo = UT_LIST_GET_FIRST(rseg->undo_cached);
 		     undo != NULL;
 		     undo = next_undo) {
 
 			next_undo = UT_LIST_GET_NEXT(undo_list, undo);
-			UT_LIST_REMOVE(rseg->insert_undo_cached, undo);
+			UT_LIST_REMOVE(rseg->undo_cached, undo);
 			MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
-			trx_undo_mem_free(undo);
+			ut_free(undo);
 		}
 
-		UT_LIST_INIT(rseg->update_undo_list, &trx_undo_t::undo_list);
-		UT_LIST_INIT(rseg->update_undo_cached, &trx_undo_t::undo_list);
-		UT_LIST_INIT(rseg->insert_undo_list, &trx_undo_t::undo_list);
-		UT_LIST_INIT(rseg->insert_undo_cached, &trx_undo_t::undo_list);
+		UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list);
+		UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list);
+		UT_LIST_INIT(rseg->old_insert_list, &trx_undo_t::undo_list);
 
 		/* These were written by trx_rseg_header_create(). */
-		ut_ad(mach_read_from_4(rseg_header + TRX_RSEG_MAX_SIZE)
-		      == uint32_t(rseg->max_size));
+		ut_ad(!mach_read_from_4(rseg_header + TRX_RSEG_FORMAT));
 		ut_ad(!mach_read_from_4(rseg_header + TRX_RSEG_HISTORY_SIZE));
 
-		rseg->max_size = ULINT_MAX;
-
 		/* Initialize the undo log lists according to the rseg header */
 		rseg->curr_size = 1;
 		rseg->trx_ref_count = 0;
 		rseg->last_page_no = FIL_NULL;
 		rseg->last_offset = 0;
-		rseg->last_trx_no = 0;
-		rseg->last_del_marks = FALSE;
+		rseg->last_commit = 0;
+		rseg->needs_purge = false;
 	}
 
 	mtr.commit();
@@ -1078,24 +1041,24 @@ trx_purge_initiate_truncate(
 
 	/* TODO: PUNCH_HOLE the garbage (with write-ahead logging) */
 
-	mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system.mutex);
 	ut_ad(space->stop_new_ops);
 	ut_ad(space->is_being_truncated);
 	space->stop_new_ops = false;
 	space->is_being_truncated = false;
-	mutex_exit(&fil_system->mutex);
+	mutex_exit(&fil_system.mutex);
 
-	if (purge_sys->rseg != NULL
-	    && purge_sys->rseg->last_page_no == FIL_NULL) {
-		/* If purge_sys->rseg is pointing to rseg that was recently
+	if (purge_sys.rseg != NULL
+	    && purge_sys.rseg->last_page_no == FIL_NULL) {
+		/* If purge_sys.rseg is pointing to rseg that was recently
 		truncated then move to next rseg element.
-		Note: Ideally purge_sys->rseg should be NULL because purge
+		Note: Ideally purge_sys.rseg should be NULL because purge
 		should complete processing of all the records but there is
 		purge_batch_size that can force the purge loop to exit before
-		all the records are purged and in this case purge_sys->rseg
+		all the records are purged and in this case purge_sys.rseg
 		could point to a valid rseg waiting for next purge cycle. */
-		purge_sys->next_stored = false;
-		purge_sys->rseg = NULL;
+		purge_sys.next_stored = false;
+		purge_sys.rseg = NULL;
 	}
 
 	DBUG_EXECUTE_IF("ib_undo_trunc",
@@ -1115,35 +1078,26 @@ trx_purge_initiate_truncate(
 	undo::Truncate::clear_trunc_list();
 }
 
-/********************************************************************//**
+/**
 Removes unnecessary history data from rollback segments. NOTE that when this
-function is called, the caller must not have any latches on undo log pages! */
-static
-void
-trx_purge_truncate_history(
-/*========================*/
-	purge_iter_t*		limit,		/*!< in: truncate limit */
-	const ReadView*		view)		/*!< in: purge view */
+function is called, the caller must not have any latches on undo log pages!
+*/
+static void trx_purge_truncate_history()
 {
-	ut_ad(trx_purge_check_limit());
-
-	/* We play safe and set the truncate limit at most to the purge view
-	low_limit number, though this is not necessary */
-
-	if (limit->trx_no >= view->low_limit_no()) {
-		limit->trx_no = view->low_limit_no();
-		limit->undo_no = 0;
-		limit->undo_rseg_space = ULINT_UNDEFINED;
+	ut_ad(purge_sys.head <= purge_sys.tail);
+	purge_sys_t::iterator& head = purge_sys.head.commit
+		? purge_sys.head : purge_sys.tail;
+
+	if (head.trx_no() >= purge_sys.view.low_limit_no()) {
+		/* This is sometimes necessary. TODO: find out why. */
+		head.reset_trx_no(purge_sys.view.low_limit_no());
+		head.undo_no = 0;
 	}
 
-	ut_ad(limit->trx_no <= purge_sys->view.low_limit_no());
-
 	for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
-		trx_rseg_t*	rseg = trx_sys->rseg_array[i];
-
-		if (rseg != NULL) {
-			ut_a(rseg->id == i);
-			trx_purge_truncate_rseg_history(rseg, limit);
+		if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) {
+			ut_ad(rseg->id == i);
+			trx_purge_truncate_rseg_history(*rseg, head);
 		}
 	}
 
@@ -1151,14 +1105,14 @@ trx_purge_truncate_history(
 	can (greedy approach). This will ensure when the server is idle we
 	try and truncate all the UNDO tablespaces. */
 	for (ulint i = srv_undo_tablespaces_active; i--; ) {
-		trx_purge_mark_undo_for_truncate(&purge_sys->undo_trunc);
-		trx_purge_initiate_truncate(limit, &purge_sys->undo_trunc);
+		trx_purge_mark_undo_for_truncate(&purge_sys.undo_trunc);
+		trx_purge_initiate_truncate(head, &purge_sys.undo_trunc);
 	}
 }
 
 /***********************************************************************//**
 Updates the last not yet purged history log info in rseg when we have purged
-a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
+a whole undo log. Advances also purge_sys.purge_trx_no past the purged log. */
 static
 void
 trx_purge_rseg_get_next_history_log(
@@ -1171,22 +1125,20 @@ trx_purge_rseg_get_next_history_log(
 	trx_ulogf_t*	log_hdr;
 	fil_addr_t	prev_log_addr;
 	trx_id_t	trx_no;
-	ibool		del_marks;
 	mtr_t		mtr;
 
 	mutex_enter(&(rseg->mutex));
 
 	ut_a(rseg->last_page_no != FIL_NULL);
 
-	purge_sys->iter.trx_no = rseg->last_trx_no + 1;
-	purge_sys->iter.undo_no = 0;
-	purge_sys->iter.undo_rseg_space = ULINT_UNDEFINED;
-	purge_sys->next_stored = false;
+	purge_sys.tail.commit = rseg->last_commit + 1;
+	purge_sys.tail.undo_no = 0;
+	purge_sys.next_stored = false;
 
 	mtr_start(&mtr);
 
 	undo_page = trx_undo_page_get_s_latched(
-		page_id_t(rseg->space, rseg->last_page_no), &mtr);
+		page_id_t(rseg->space->id, rseg->last_page_no), &mtr);
 
 	log_hdr = undo_page + rseg->last_offset;
 
@@ -1211,17 +1163,17 @@ trx_purge_rseg_get_next_history_log(
 
 	mtr_commit(&mtr);
 
-	/* Read the trx number and del marks from the previous log header */
+	/* Read the previous log header. */
 	mtr_start(&mtr);
 
-	log_hdr = trx_undo_page_get_s_latched(page_id_t(rseg->space,
+	log_hdr = trx_undo_page_get_s_latched(page_id_t(rseg->space->id,
 							prev_log_addr.page),
 					      &mtr)
 		+ prev_log_addr.boffset;
 
 	trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
-
-	del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS);
+	unsigned purge = mach_read_from_2(log_hdr + TRX_UNDO_NEEDS_PURGE);
+	ut_ad(purge <= 1);
 
 	mtr_commit(&mtr);
 
@@ -1229,22 +1181,19 @@ trx_purge_rseg_get_next_history_log(
 
 	rseg->last_page_no = prev_log_addr.page;
 	rseg->last_offset = prev_log_addr.boffset;
-	rseg->last_trx_no = trx_no;
-	rseg->last_del_marks = del_marks;
-
-	TrxUndoRsegs elem(rseg->last_trx_no);
-	elem.push_back(rseg);
+	rseg->set_last_trx_no(trx_no, purge != 0);
+	rseg->needs_purge = purge != 0;
 
 	/* Purge can also produce events, however these are already ordered
 	in the rollback segment and any user generated event will be greater
 	than the events that Purge produces. ie. Purge can never produce
 	events from an empty rollback segment. */
 
-	mutex_enter(&purge_sys->pq_mutex);
+	mutex_enter(&purge_sys.pq_mutex);
 
-	purge_sys->purge_queue.push(elem);
+	purge_sys.purge_queue.push(*rseg);
 
-	mutex_exit(&purge_sys->pq_mutex);
+	mutex_exit(&purge_sys.pq_mutex);
 
 	mutex_exit(&rseg->mutex);
 }
@@ -1257,46 +1206,36 @@ trx_purge_read_undo_rec()
 	ulint		offset;
 	ulint		page_no;
 	ib_uint64_t	undo_no;
-	ulint		undo_rseg_space;
 
-	purge_sys->hdr_offset = purge_sys->rseg->last_offset;
-	page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
+	purge_sys.hdr_offset = purge_sys.rseg->last_offset;
+	page_no = purge_sys.hdr_page_no = purge_sys.rseg->last_page_no;
 
-	if (purge_sys->rseg->last_del_marks) {
+	if (purge_sys.rseg->needs_purge) {
 		mtr_t		mtr;
-		trx_undo_rec_t*	undo_rec = NULL;
+		mtr.start();
+		if (trx_undo_rec_t* undo_rec = trx_undo_get_first_rec(
+			    purge_sys.rseg->space, purge_sys.hdr_page_no,
+			    purge_sys.hdr_offset, RW_S_LATCH, &mtr)) {
 
-		mtr_start(&mtr);
-
-		undo_rec = trx_undo_get_first_rec(
-			purge_sys->rseg->space,
-			purge_sys->hdr_page_no,
-			purge_sys->hdr_offset, RW_S_LATCH, &mtr);
-
-		if (undo_rec != NULL) {
 			offset = page_offset(undo_rec);
 			undo_no = trx_undo_rec_get_undo_no(undo_rec);
-			undo_rseg_space = purge_sys->rseg->space;
 			page_no = page_get_page_no(page_align(undo_rec));
 		} else {
 			offset = 0;
 			undo_no = 0;
-			undo_rseg_space = ULINT_UNDEFINED;
 		}
 
-		mtr_commit(&mtr);
+		mtr.commit();
 	} else {
 		offset = 0;
 		undo_no = 0;
-		undo_rseg_space = ULINT_UNDEFINED;
 	}
 
-	purge_sys->offset = offset;
-	purge_sys->page_no = page_no;
-	purge_sys->iter.undo_no = undo_no;
-	purge_sys->iter.undo_rseg_space = undo_rseg_space;
+	purge_sys.offset = offset;
+	purge_sys.page_no = page_no;
+	purge_sys.tail.undo_no = undo_no;
 
-	purge_sys->next_stored = true;
+	purge_sys.next_stored = true;
 }
 
 /***********************************************************************//**
@@ -1309,9 +1248,9 @@ void
 trx_purge_choose_next_log(void)
 /*===========================*/
 {
-	ut_ad(!purge_sys->next_stored);
+	ut_ad(!purge_sys.next_stored);
 
-	if (purge_sys->rseg_iter.set_next()) {
+	if (purge_sys.rseg_iter.set_next()) {
 		trx_purge_read_undo_rec();
 	} else {
 		/* There is nothing to do yet. */
@@ -1340,19 +1279,19 @@ trx_purge_get_next_rec(
 	ulint		space;
 	mtr_t		mtr;
 
-	ut_ad(purge_sys->next_stored);
-	ut_ad(purge_sys->iter.trx_no < purge_sys->view.low_limit_no());
+	ut_ad(purge_sys.next_stored);
+	ut_ad(purge_sys.tail.trx_no() < purge_sys.view.low_limit_no());
 
-	space = purge_sys->rseg->space;
-	page_no = purge_sys->page_no;
-	offset = purge_sys->offset;
+	space = purge_sys.rseg->space->id;
+	page_no = purge_sys.page_no;
+	offset = purge_sys.offset;
 
 	if (offset == 0) {
 		/* It is the dummy undo log record, which means that there is
 		no need to purge this undo log */
 
 		trx_purge_rseg_get_next_history_log(
-			purge_sys->rseg, n_pages_handled);
+			purge_sys.rseg, n_pages_handled);
 
 		/* Look for the next undo log and record to purge */
 
@@ -1368,52 +1307,19 @@ trx_purge_get_next_rec(
 
 	rec = undo_page + offset;
 
-	rec2 = rec;
-
-	for (;;) {
-		ulint		type;
-		trx_undo_rec_t*	next_rec;
-		ulint		cmpl_info;
-
-		/* Try first to find the next record which requires a purge
-		operation from the same page of the same undo log */
-
-		next_rec = trx_undo_page_get_next_rec(
-			rec2, purge_sys->hdr_page_no, purge_sys->hdr_offset);
-
-		if (next_rec == NULL) {
-			rec2 = trx_undo_get_next_rec(
-				rec2, purge_sys->hdr_page_no,
-				purge_sys->hdr_offset, &mtr);
-			break;
-		}
-
-		rec2 = next_rec;
+	rec2 = trx_undo_page_get_next_rec(rec, purge_sys.hdr_page_no,
+					  purge_sys.hdr_offset);
 
-		type = trx_undo_rec_get_type(rec2);
-
-		if (type == TRX_UNDO_DEL_MARK_REC) {
-
-			break;
-		}
-
-		cmpl_info = trx_undo_rec_get_cmpl_info(rec2);
-
-		if (trx_undo_rec_get_extern_storage(rec2)) {
-			break;
-		}
-
-		if ((type == TRX_UNDO_UPD_EXIST_REC)
-		    && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
-			break;
-		}
+	if (rec2 == NULL) {
+		rec2 = trx_undo_get_next_rec(rec, purge_sys.hdr_page_no,
+					     purge_sys.hdr_offset, &mtr);
 	}
 
 	if (rec2 == NULL) {
 		mtr_commit(&mtr);
 
 		trx_purge_rseg_get_next_history_log(
-			purge_sys->rseg, n_pages_handled);
+			purge_sys.rseg, n_pages_handled);
 
 		/* Look for the next undo log and record to purge */
 
@@ -1428,10 +1334,9 @@ trx_purge_get_next_rec(
 	} else {
 		page = page_align(rec2);
 
-		purge_sys->offset = rec2 - page;
-		purge_sys->page_no = page_get_page_no(page);
-		purge_sys->iter.undo_no = trx_undo_rec_get_undo_no(rec2);
-		purge_sys->iter.undo_rseg_space = space;
+		purge_sys.offset = ulint(rec2 - page);
+		purge_sys.page_no = page_get_page_no(page);
+		purge_sys.tail.undo_no = trx_undo_rec_get_undo_no(rec2);
 
 		if (undo_page != page) {
 			/* We advance to a new page of the undo log: */
@@ -1460,17 +1365,17 @@ trx_purge_fetch_next_rec(
 					handled */
 	mem_heap_t*	heap)		/*!< in: memory heap where copied */
 {
-	if (!purge_sys->next_stored) {
+	if (!purge_sys.next_stored) {
 		trx_purge_choose_next_log();
 
-		if (!purge_sys->next_stored) {
+		if (!purge_sys.next_stored) {
 			DBUG_PRINT("ib_purge",
 				   ("no logs left in the history list"));
 			return(NULL);
 		}
 	}
 
-	if (purge_sys->iter.trx_no >= purge_sys->view.low_limit_no()) {
+	if (purge_sys.tail.trx_no() >= purge_sys.view.low_limit_no()) {
 
 		return(NULL);
 	}
@@ -1479,8 +1384,11 @@ trx_purge_fetch_next_rec(
 	os_thread_get_curr_id(), iter->trx_no, iter->undo_no); */
 
 	*roll_ptr = trx_undo_build_roll_ptr(
-		FALSE, purge_sys->rseg->id,
-		purge_sys->page_no, purge_sys->offset);
+		/* row_purge_record_func() will later set
+		ROLL_PTR_INSERT_FLAG for TRX_UNDO_INSERT_REC */
+		false,
+		purge_sys.rseg->id,
+		purge_sys.page_no, purge_sys.offset);
 
 	/* The following call will advance the stored values of the
 	purge iterator. */
@@ -1488,28 +1396,24 @@ trx_purge_fetch_next_rec(
 	return(trx_purge_get_next_rec(n_pages_handled, heap));
 }
 
-/*******************************************************************//**
-This function runs a purge batch.
+/** Run a purge batch.
+@param n_purge_threads	number of purge threads
 @return number of undo log pages handled in the batch */
 static
 ulint
-trx_purge_attach_undo_recs(
-/*=======================*/
-	ulint		n_purge_threads,/*!< in: number of purge threads */
-	purge_sys_t*	purge_sys,	/*!< in/out: purge instance */
-	ulint		batch_size)	/*!< in: no. of pages to purge */
+trx_purge_attach_undo_recs(ulint n_purge_threads)
 {
 	que_thr_t*	thr;
 	ulint		i = 0;
 	ulint		n_pages_handled = 0;
-	ulint		n_thrs = UT_LIST_GET_LEN(purge_sys->query->thrs);
+	ulint		n_thrs = UT_LIST_GET_LEN(purge_sys.query->thrs);
 
 	ut_a(n_purge_threads > 0);
 
-	purge_sys->limit = purge_sys->iter;
+	purge_sys.head = purge_sys.tail;
 
 	/* Debug code to validate some pre-requisites and reset done flag. */
-	for (thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
+	for (thr = UT_LIST_GET_FIRST(purge_sys.query->thrs);
 	     thr != NULL && i < n_purge_threads;
 	     thr = UT_LIST_GET_NEXT(thrs, thr), ++i) {
 
@@ -1531,13 +1435,15 @@ trx_purge_attach_undo_recs(
 
 	/* Fetch and parse the UNDO records. The UNDO records are added
 	to a per purge node vector. */
-	thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
+	thr = UT_LIST_GET_FIRST(purge_sys.query->thrs);
 	ut_a(n_thrs > 0 && thr != NULL);
 
-	ut_ad(trx_purge_check_limit());
+	ut_ad(purge_sys.head <= purge_sys.tail);
 
 	i = 0;
 
+	const ulint batch_size = srv_purge_batch_size;
+
 	for (;;) {
 		purge_node_t*		node;
 		trx_purge_rec_t*	purge_rec;
@@ -1554,11 +1460,11 @@ trx_purge_attach_undo_recs(
 		/* Track the max {trx_id, undo_no} for truncating the
 		UNDO logs once we have purged the records. */
 
-		if (trx_purge_check_limit()) {
-			purge_sys->limit = purge_sys->iter;
+		if (purge_sys.head <= purge_sys.tail) {
+			purge_sys.head = purge_sys.tail;
 		}
 
-		/* Fetch the next record, and advance the purge_sys->iter. */
+		/* Fetch the next record, and advance the purge_sys.tail. */
 		purge_rec->undo_rec = trx_purge_fetch_next_rec(
 			&purge_rec->roll_ptr, &n_pages_handled, node->heap);
 
@@ -1586,13 +1492,13 @@ trx_purge_attach_undo_recs(
 		thr = UT_LIST_GET_NEXT(thrs, thr);
 
 		if (!(++i % n_purge_threads)) {
-			thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
+			thr = UT_LIST_GET_FIRST(purge_sys.query->thrs);
 		}
 
 		ut_a(thr != NULL);
 	}
 
-	ut_ad(trx_purge_check_limit());
+	ut_ad(purge_sys.head <= purge_sys.tail);
 
 	return(n_pages_handled);
 }
@@ -1612,12 +1518,12 @@ trx_purge_dml_delay(void)
 
 	/* If purge lag is set (ie. > 0) then calculate the new DML delay.
 	Note: we do a dirty read of the trx_sys_t data structure here,
-	without holding trx_sys->mutex. */
+	without holding trx_sys.mutex. */
 
 	if (srv_max_purge_lag > 0) {
 		float	ratio;
 
-		ratio = float(trx_sys->rseg_history_len) / srv_max_purge_lag;
+		ratio = float(trx_sys.history_size()) / srv_max_purge_lag;
 
 		if (ratio > 1.0) {
 			/* If the history list length exceeds the
@@ -1637,18 +1543,14 @@ trx_purge_dml_delay(void)
 	return(delay);
 }
 
-/*******************************************************************//**
-Wait for pending purge jobs to complete. */
+/** Wait for pending purge jobs to complete. */
 static
 void
-trx_purge_wait_for_workers_to_complete(
-/*===================================*/
-	purge_sys_t*	purge_sys)	/*!< in: purge instance */
+trx_purge_wait_for_workers_to_complete()
 {
-	ulint		n_submitted = purge_sys->n_submitted;
-
 	/* Ensure that the work queue empties out. */
-	while ((ulint) my_atomic_loadlint(&purge_sys->n_completed) != n_submitted) {
+	while (my_atomic_loadlint(&purge_sys.n_completed)
+	       != purge_sys.n_submitted) {
 
 		if (srv_get_task_queue_length() > 0) {
 			srv_release_threads(SRV_WORKER, 1);
@@ -1657,9 +1559,6 @@ trx_purge_wait_for_workers_to_complete(
 		os_thread_yield();
 	}
 
-	/* None of the worker threads should be doing any work. */
-	ut_a(purge_sys->n_submitted == purge_sys->n_completed);
-
 	/* There should be no outstanding tasks as long
 	as the worker threads are active. */
 	ut_a(srv_get_task_queue_length() == 0);
@@ -1673,8 +1572,6 @@ trx_purge(
 /*======*/
 	ulint	n_purge_threads,	/*!< in: number of purge tasks
 					to submit to the work queue */
-	ulint	batch_size,		/*!< in: the maximum number of records
-					to purge in one batch */
 	bool	truncate)		/*!< in: truncate history if true */
 {
 	que_thr_t*	thr = NULL;
@@ -1685,11 +1582,12 @@ trx_purge(
 	srv_dml_needed_delay = trx_purge_dml_delay();
 
 	/* The number of tasks submitted should be completed. */
-	ut_a(purge_sys->n_submitted == purge_sys->n_completed);
+	ut_a(purge_sys.n_submitted
+	     == my_atomic_loadlint(&purge_sys.n_completed));
 
-	rw_lock_x_lock(&purge_sys->latch);
-	trx_sys->mvcc->clone_oldest_view(&purge_sys->view);
-	rw_lock_x_unlock(&purge_sys->latch);
+	rw_lock_x_lock(&purge_sys.latch);
+	trx_sys.clone_oldest_view();
+	rw_lock_x_unlock(&purge_sys.latch);
 
 #ifdef UNIV_DEBUG
 	if (srv_purge_view_update_only_debug) {
@@ -1698,66 +1596,31 @@ trx_purge(
 #endif /* UNIV_DEBUG */
 
 	/* Fetch the UNDO recs that need to be purged. */
-	n_pages_handled = trx_purge_attach_undo_recs(
-		n_purge_threads, purge_sys, batch_size);
-
-	/* Do we do an asynchronous purge or not ? */
-	if (n_purge_threads > 1) {
-		ulint	i = 0;
-
-		/* Submit the tasks to the work queue. */
-		for (i = 0; i < n_purge_threads - 1; ++i) {
-			thr = que_fork_scheduler_round_robin(
-				purge_sys->query, thr);
-
-			ut_a(thr != NULL);
-
-			srv_que_task_enqueue_low(thr);
-		}
-
-		thr = que_fork_scheduler_round_robin(purge_sys->query, thr);
-		ut_a(thr != NULL);
-
-		purge_sys->n_submitted += n_purge_threads - 1;
-
-		goto run_synchronously;
-
-	/* Do it synchronously. */
-	} else {
-		thr = que_fork_scheduler_round_robin(purge_sys->query, NULL);
-		ut_ad(thr);
+	n_pages_handled = trx_purge_attach_undo_recs(n_purge_threads);
+	purge_sys.n_submitted += n_purge_threads;
+
+	/* Submit tasks to workers queue if using multi-threaded purge. */
+	for (ulint i = n_purge_threads; --i; ) {
+		thr = que_fork_scheduler_round_robin(purge_sys.query, thr);
+		ut_a(thr);
+		srv_que_task_enqueue_low(thr);
+	}
 
-run_synchronously:
-		++purge_sys->n_submitted;
+	thr = que_fork_scheduler_round_robin(purge_sys.query, thr);
 
-		que_run_threads(thr);
+	que_run_threads(thr);
 
-		my_atomic_addlint(
-			&purge_sys->n_completed, 1);
+	my_atomic_addlint(&purge_sys.n_completed, 1);
 
-		if (n_purge_threads > 1) {
-			trx_purge_wait_for_workers_to_complete(purge_sys);
-		}
+	if (n_purge_threads > 1) {
+		trx_purge_wait_for_workers_to_complete();
 	}
 
-	ut_a(purge_sys->n_submitted == purge_sys->n_completed);
-
-#ifdef UNIV_DEBUG
-	rw_lock_x_lock(&purge_sys->latch);
-	if (purge_sys->limit.trx_no == 0) {
-		purge_sys->done = purge_sys->iter;
-	} else {
-		purge_sys->done = purge_sys->limit;
-	}
-	rw_lock_x_unlock(&purge_sys->latch);
-#endif /* UNIV_DEBUG */
+	ut_a(purge_sys.n_submitted
+	     == my_atomic_loadlint(&purge_sys.n_completed));
 
 	if (truncate) {
-		trx_purge_truncate_history(
-			purge_sys->limit.trx_no
-			? &purge_sys->limit
-			: &purge_sys->iter,
-			&purge_sys->view);
+		trx_purge_truncate_history();
 	}
 
 	MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1);
@@ -1766,111 +1629,63 @@ run_synchronously:
 	return(n_pages_handled);
 }
 
-/*******************************************************************//**
-Get the purge state.
-@return purge state. */
-purge_state_t
-trx_purge_state(void)
-/*=================*/
+/** Stop purge during FLUSH TABLES FOR EXPORT */
+void purge_sys_t::stop()
 {
-	purge_state_t	state;
-
-	rw_lock_x_lock(&purge_sys->latch);
-
-	state = purge_sys->state;
-
-	rw_lock_x_unlock(&purge_sys->latch);
-
-	return(state);
+  rw_lock_x_lock(&latch);
+
+  if (!enabled_latched())
+  {
+    /* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */
+    ut_ad(!srv_undo_sources);
+    rw_lock_x_unlock(&latch);
+    return;
+  }
+
+  ut_ad(srv_n_purge_threads > 0);
+
+  if (0 == my_atomic_add32_explicit(&m_paused, 1, MY_MEMORY_ORDER_RELAXED))
+  {
+    /* We need to wakeup the purge thread in case it is suspended, so
+    that it can acknowledge the state change. */
+    const int64_t sig_count = os_event_reset(event);
+    rw_lock_x_unlock(&latch);
+    ib::info() << "Stopping purge";
+    srv_purge_wakeup();
+    /* Wait for purge coordinator to signal that it is suspended. */
+    os_event_wait_low(event, sig_count);
+    MONITOR_ATOMIC_INC(MONITOR_PURGE_STOP_COUNT);
+    return;
+  }
+
+  rw_lock_x_unlock(&latch);
+
+  if (running())
+  {
+    ib::info() << "Waiting for purge to stop";
+    while (running())
+      os_thread_sleep(10000);
+  }
 }
 
-/*******************************************************************//**
-Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
-void
-trx_purge_stop(void)
-/*================*/
+/** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
+void purge_sys_t::resume()
 {
-	rw_lock_x_lock(&purge_sys->latch);
-
-	switch (purge_sys->state) {
-	case PURGE_STATE_INIT:
-	case PURGE_STATE_DISABLED:
-		ut_error;
-	case PURGE_STATE_EXIT:
-		/* Shutdown must have been initiated during
-		FLUSH TABLES FOR EXPORT. */
-		ut_ad(!srv_undo_sources);
-unlock:
-		rw_lock_x_unlock(&purge_sys->latch);
-		break;
-	case PURGE_STATE_STOP:
-		ut_ad(srv_n_purge_threads > 0);
-		++purge_sys->n_stop;
-		purge_sys->state = PURGE_STATE_STOP;
-		if (!purge_sys->running) {
-			goto unlock;
-		}
-		ib::info() << "Waiting for purge to stop";
-		do {
-			rw_lock_x_unlock(&purge_sys->latch);
-			os_thread_sleep(10000);
-			rw_lock_x_lock(&purge_sys->latch);
-		} while (purge_sys->running);
-		goto unlock;
-	case PURGE_STATE_RUN:
-		ut_ad(srv_n_purge_threads > 0);
-		++purge_sys->n_stop;
-		ib::info() << "Stopping purge";
-
-		/* We need to wakeup the purge thread in case it is suspended,
-		so that it can acknowledge the state change. */
-
-		const int64_t sig_count = os_event_reset(purge_sys->event);
-		purge_sys->state = PURGE_STATE_STOP;
-		rw_lock_x_unlock(&purge_sys->latch);
-		srv_purge_wakeup();
-		/* Wait for purge coordinator to signal that it
-		is suspended. */
-		os_event_wait_low(purge_sys->event, sig_count);
-	}
-
-	MONITOR_INC_VALUE(MONITOR_PURGE_STOP_COUNT, 1);
-}
-
-/*******************************************************************//**
-Resume purge, move to PURGE_STATE_RUN. */
-void
-trx_purge_run(void)
-/*===============*/
-{
-	rw_lock_x_lock(&purge_sys->latch);
-
-	switch (purge_sys->state) {
-	case PURGE_STATE_EXIT:
-		/* Shutdown must have been initiated during
-		FLUSH TABLES FOR EXPORT. */
-		ut_ad(!srv_undo_sources);
-		break;
-	case PURGE_STATE_INIT:
-	case PURGE_STATE_DISABLED:
-		ut_error;
-
-	case PURGE_STATE_RUN:
-		ut_a(!purge_sys->n_stop);
-		break;
-	case PURGE_STATE_STOP:
-		ut_a(purge_sys->n_stop);
-		if (--purge_sys->n_stop == 0) {
-
-			ib::info() << "Resuming purge";
-
-			purge_sys->state = PURGE_STATE_RUN;
-		}
-
-		MONITOR_INC_VALUE(MONITOR_PURGE_RESUME_COUNT, 1);
-	}
-
-	rw_lock_x_unlock(&purge_sys->latch);
-
-	srv_purge_wakeup();
+   if (!enabled())
+   {
+     /* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */
+     ut_ad(!srv_undo_sources);
+     return;
+   }
+
+   int32_t paused= my_atomic_add32_explicit(&m_paused, -1,
+                                            MY_MEMORY_ORDER_RELAXED);
+   ut_a(paused);
+
+   if (paused == 1)
+   {
+     ib::info() << "Resuming purge";
+     srv_purge_wakeup();
+     MONITOR_ATOMIC_INC(MONITOR_PURGE_RESUME_COUNT);
+   }
 }
diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc
index e700d5e0f58..56b8db7ec37 100644
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@@ -31,7 +31,6 @@ Created 3/26/1996 Heikki Tuuri
 #include "mtr0log.h"
 #include "dict0dict.h"
 #include "ut0mem.h"
-#include "read0read.h"
 #include "row0ext.h"
 #include "row0upd.h"
 #include "que0que.h"
@@ -40,61 +39,78 @@ Created 3/26/1996 Heikki Tuuri
 #include "row0row.h"
 #include "row0mysql.h"
 
+/** The search tuple corresponding to TRX_UNDO_INSERT_METADATA */
+const dtuple_t trx_undo_metadata = {
+	REC_INFO_METADATA, 0, 0,
+	NULL, 0, NULL,
+	UT_LIST_NODE_T(dtuple_t)()
+#ifdef UNIV_DEBUG
+	, DATA_TUPLE_MAGIC_N
+#endif /* UNIV_DEBUG */
+};
+
 /*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
 
-/**********************************************************************//**
-Writes the mtr log entry of the inserted undo log record on the undo log
-page. */
-UNIV_INLINE
-void
-trx_undof_page_add_undo_rec_log(
-/*============================*/
-	page_t* undo_page,	/*!< in: undo log page */
-	ulint	old_free,	/*!< in: start offset of the inserted entry */
-	ulint	new_free,	/*!< in: end offset of the entry */
-	mtr_t*	mtr)		/*!< in: mtr */
+/** Write redo log of writing an undo log record.
+@param[in]	undo_block	undo log page
+@param[in]	old_free	start offset of the undo log record
+@param[in]	new_free	end offset of the undo log record
+@param[in,out]	mtr		mini-transaction */
+static void trx_undof_page_add_undo_rec_log(const buf_block_t* undo_block,
+					    ulint old_free, ulint new_free,
+					    mtr_t* mtr)
 {
-	byte*		log_ptr;
-	const byte*	log_end;
-	ulint		len;
-
-	log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN);
-
-	if (log_ptr == NULL) {
-
+	ut_ad(old_free >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+	ut_ad(new_free >= old_free);
+	ut_ad(new_free < srv_page_size);
+	ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+			       + undo_block->frame)
+	      == new_free);
+	mtr->set_modified();
+	switch (mtr->get_log_mode()) {
+	case MTR_LOG_NONE:
+	case MTR_LOG_NO_REDO:
 		return;
+	case MTR_LOG_SHORT_INSERTS:
+		ut_ad(0);
+		/* fall through */
+	case MTR_LOG_ALL:
+		break;
 	}
 
-	log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN];
-	log_ptr = mlog_write_initial_log_record_fast(
-		undo_page, MLOG_UNDO_INSERT, log_ptr, mtr);
-	len = new_free - old_free - 4;
-
+	const uint32_t
+		len = uint32_t(new_free - old_free - 4),
+		reserved = std::min<uint32_t>(11 + 13 + len,
+					      mtr->get_log()->MAX_DATA_SIZE);
+	byte* log_ptr = mtr->get_log()->open(reserved);
+	const byte* log_end = log_ptr + reserved;
+	log_ptr = mlog_write_initial_log_record_low(
+		MLOG_UNDO_INSERT,
+		undo_block->page.id.space(), undo_block->page.id.page_no(),
+		log_ptr, mtr);
 	mach_write_to_2(log_ptr, len);
-	log_ptr += 2;
-
-	if (log_ptr + len <= log_end) {
-		memcpy(log_ptr, undo_page + old_free + 2, len);
-		mlog_close(mtr, log_ptr + len);
+	if (log_ptr + 2 + len <= log_end) {
+		memcpy(log_ptr + 2, undo_block->frame + old_free + 2, len);
+		mlog_close(mtr, log_ptr + 2 + len);
 	} else {
-		mlog_close(mtr, log_ptr);
-		mlog_catenate_string(mtr, undo_page + old_free + 2, len);
+		mlog_close(mtr, log_ptr + 2);
+		mtr->get_log()->push(undo_block->frame + old_free + 2, len);
 	}
 }
 
-/***********************************************************//**
-Parses a redo log record of adding an undo log record.
-@return end of log record or NULL */
+/** Parse MLOG_UNDO_INSERT.
+@param[in]	ptr	log record
+@param[in]	end_ptr	end of log record buffer
+@param[in,out]	page	page or NULL
+@return	end of log record
+@retval	NULL	if the log record is incomplete */
 byte*
 trx_undo_parse_add_undo_rec(
-/*========================*/
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	page_t*	page)	/*!< in: page or NULL */
+	const byte*	ptr,
+	const byte*	end_ptr,
+	page_t*		page)
 {
 	ulint	len;
-	byte*	rec;
-	ulint	first_free;
 
 	if (end_ptr < ptr + 2) {
 
@@ -109,39 +125,32 @@ trx_undo_parse_add_undo_rec(
 		return(NULL);
 	}
 
-	if (page == NULL) {
-
-		return(ptr + len);
-	}
-
-	first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
-				      + TRX_UNDO_PAGE_FREE);
-	rec = page + first_free;
+	if (page) {
+		ulint first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
+						    + TRX_UNDO_PAGE_FREE);
+		byte* rec = page + first_free;
 
-	mach_write_to_2(rec, first_free + 4 + len);
-	mach_write_to_2(rec + 2 + len, first_free);
+		mach_write_to_2(rec, first_free + 4 + len);
+		mach_write_to_2(rec + 2 + len, first_free);
 
-	mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
-			first_free + 4 + len);
-	ut_memcpy(rec + 2, ptr, len);
+		mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
+				first_free + 4 + len);
+		memcpy(rec + 2, ptr, len);
+	}
 
-	return(ptr + len);
+	return(const_cast<byte*>(ptr + len));
 }
 
-/**********************************************************************//**
-Calculates the free space left for extending an undo log record.
+/** Calculate the free space left for extending an undo log record.
+@param[in]	undo_block	undo log page
+@param[in]	ptr		current end of the undo page
 @return bytes left */
-UNIV_INLINE
-ulint
-trx_undo_left(
-/*==========*/
-	const page_t*	page,	/*!< in: undo log page */
-	const byte*	ptr)	/*!< in: pointer to page */
+static ulint trx_undo_left(const buf_block_t* undo_block, const byte* ptr)
 {
-	/* The '- 10' is a safety margin, in case we have some small
+	/* The 10 is a safety margin, in case we have some small
 	calculation error below */
-
-	return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
+	return srv_page_size - ulint(ptr - undo_block->frame)
+		- (10 + FIL_PAGE_DATA_END);
 }
 
 /**********************************************************************//**
@@ -153,7 +162,7 @@ static
 ulint
 trx_undo_page_set_next_prev_and_add(
 /*================================*/
-	page_t*		undo_page,	/*!< in/out: undo log page */
+	buf_block_t*	undo_block,	/*!< in/out: undo log page */
 	byte*		ptr,		/*!< in: ptr up to where data has been
 					written on this undo page. */
 	mtr_t*		mtr)		/*!< in: mtr */
@@ -165,15 +174,15 @@ trx_undo_page_set_next_prev_and_add(
 					that points to the next free
 					offset value within undo_page.*/
 
-	ut_ad(ptr > undo_page);
-	ut_ad(ptr < undo_page + UNIV_PAGE_SIZE);
-
-	if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) {
+	ut_ad(ptr > undo_block->frame);
+	ut_ad(ptr < undo_block->frame + srv_page_size);
 
+	if (UNIV_UNLIKELY(trx_undo_left(undo_block, ptr) < 2)) {
 		return(0);
 	}
 
-	ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE;
+	ptr_to_first_free = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+		+ undo_block->frame;
 
 	first_free = mach_read_from_2(ptr_to_first_free);
 
@@ -181,16 +190,16 @@ trx_undo_page_set_next_prev_and_add(
 	mach_write_to_2(ptr, first_free);
 	ptr += 2;
 
-	end_of_rec = ptr - undo_page;
+	end_of_rec = ulint(ptr - undo_block->frame);
 
 	/* Write offset of the next undo log record */
-	mach_write_to_2(undo_page + first_free, end_of_rec);
+	mach_write_to_2(undo_block->frame + first_free, end_of_rec);
 
 	/* Update the offset to first free undo record */
 	mach_write_to_2(ptr_to_first_free, end_of_rec);
 
 	/* Write this log entry to the UNDO log */
-	trx_undof_page_add_undo_rec_log(undo_page, first_free,
+	trx_undof_page_add_undo_rec_log(undo_block, first_free,
 					end_of_rec, mtr);
 
 	return(first_free);
@@ -202,7 +211,7 @@ static const ulint VIRTUAL_COL_UNDO_FORMAT_1 = 0xF1;
 
 /** Write virtual column index info (index id and column position in index)
 to the undo log
-@param[in,out]	undo_page	undo log page
+@param[in,out]	undo_block	undo log page
 @param[in]	table           the table
 @param[in]	pos		the virtual column position
 @param[in]      ptr             undo log record being written
@@ -212,7 +221,7 @@ to the undo log
 static
 byte*
 trx_undo_log_v_idx(
-	page_t*			undo_page,
+	buf_block_t*		undo_block,
 	const dict_table_t*	table,
 	ulint			pos,
 	byte*			ptr,
@@ -231,7 +240,7 @@ trx_undo_log_v_idx(
 	1 byte for undo log record format version marker */
 	ulint		size = n_idx * (5 + 5) + 5 + 2 + (first_v_col ? 1 : 0);
 
-	if (trx_undo_left(undo_page, ptr) < size) {
+	if (trx_undo_left(undo_block, ptr) < size) {
 		return(NULL);
 	}
 
@@ -260,7 +269,7 @@ trx_undo_log_v_idx(
 		ptr += mach_write_compressed(ptr, v_index.nth_field);
 	}
 
-	mach_write_to_2(old_ptr, ptr - old_ptr);
+	mach_write_to_2(old_ptr, ulint(ptr - old_ptr));
 
 	return(ptr);
 }
@@ -304,7 +313,7 @@ trx_undo_read_v_idx_low(
 			if (index->id == id) {
 				const dict_col_t* col = dict_index_get_nth_col(
 					index, pos);
-				ut_ad(dict_col_is_virtual(col));
+				ut_ad(col->is_virtual());
 				const dict_v_col_t*	vcol = reinterpret_cast<
 					const dict_v_col_t*>(col);
 				*col_pos = vcol->v_pos;
@@ -361,7 +370,7 @@ trx_undo_read_v_idx(
 }
 
 /** Reports in the undo log of an insert of virtual columns.
-@param[in]	undo_page	undo log page
+@param[in]	undo_block	undo log page
 @param[in]	table		the table
 @param[in]	row		dtuple contains the virtual columns
 @param[in,out]	ptr		log ptr
@@ -369,7 +378,7 @@ trx_undo_read_v_idx(
 static
 bool
 trx_undo_report_insert_virtual(
-	page_t*		undo_page,
+	buf_block_t*	undo_block,
 	dict_table_t*	table,
 	const dtuple_t*	row,
 	byte**		ptr)
@@ -377,7 +386,7 @@ trx_undo_report_insert_virtual(
 	byte*	start = *ptr;
 	bool	first_v_col = true;
 
-	if (trx_undo_left(undo_page, *ptr) < 2) {
+	if (trx_undo_left(undo_block, *ptr) < 2) {
 		return(false);
 	}
 
@@ -396,7 +405,7 @@ trx_undo_report_insert_virtual(
 		if (col->m_col.ord_part) {
 
 			/* make sure enought space to write the length */
-			if (trx_undo_left(undo_page, *ptr) < 5) {
+			if (trx_undo_left(undo_block, *ptr) < 5) {
 				return(false);
 			}
 
@@ -404,7 +413,7 @@ trx_undo_report_insert_virtual(
 			pos += REC_MAX_N_FIELDS;
 			*ptr += mach_write_compressed(*ptr, pos);
 
-			*ptr = trx_undo_log_v_idx(undo_page, table,
+			*ptr = trx_undo_log_v_idx(undo_block, table,
 						  col_no, *ptr, first_v_col);
 			first_v_col = false;
 
@@ -424,8 +433,8 @@ trx_undo_report_insert_virtual(
 					flen = max_len;
 				}
 
-				if (trx_undo_left(undo_page, *ptr) < flen + 5) {
-
+				if (trx_undo_left(undo_block, *ptr)
+				    < flen + 5) {
 					return(false);
 				}
 				*ptr += mach_write_compressed(*ptr, flen);
@@ -433,8 +442,7 @@ trx_undo_report_insert_virtual(
 				ut_memcpy(*ptr, vfield->data, flen);
 				*ptr += flen;
 			} else {
-				if (trx_undo_left(undo_page, *ptr) < 5) {
-
+				if (trx_undo_left(undo_block, *ptr) < 5) {
 					return(false);
 				}
 
@@ -444,7 +452,7 @@ trx_undo_report_insert_virtual(
 	}
 
 	/* Always mark the end of the log with 2 bytes length field */
-	mach_write_to_2(start, *ptr - start);
+	mach_write_to_2(start, ulint(*ptr - start));
 
 	return(true);
 }
@@ -456,7 +464,7 @@ static
 ulint
 trx_undo_page_report_insert(
 /*========================*/
-	page_t*		undo_page,	/*!< in: undo log page */
+	buf_block_t*	undo_block,	/*!< in: undo log page */
 	trx_t*		trx,		/*!< in: transaction */
 	dict_index_t*	index,		/*!< in: clustered index */
 	const dtuple_t*	clust_entry,	/*!< in: index entry which will be
@@ -468,19 +476,21 @@ trx_undo_page_report_insert(
 	ulint		i;
 
 	ut_ad(dict_index_is_clust(index));
-	ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
-			       + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
-
-	first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
-				      + TRX_UNDO_PAGE_FREE);
-	ptr = undo_page + first_free;
+	/* MariaDB 10.3.1+ in trx_undo_page_init() always initializes
+	TRX_UNDO_PAGE_TYPE as 0, but previous versions wrote
+	TRX_UNDO_INSERT == 1 into insert_undo pages,
+	or TRX_UNDO_UPDATE == 2 into update_undo pages. */
+	ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
+			       + undo_block->frame) <= 2);
 
-	ut_ad(first_free <= UNIV_PAGE_SIZE);
+	first_free = mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+				      + undo_block->frame);
+	ptr = undo_block->frame + first_free;
 
-	if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {
+	ut_ad(first_free <= srv_page_size);
 
+	if (trx_undo_left(undo_block, ptr) < 2 + 1 + 11 + 11) {
 		/* Not enough space for writing the general parameters */
-
 		return(0);
 	}
 
@@ -494,13 +504,21 @@ trx_undo_page_report_insert(
 	/*----------------------------------------*/
 	/* Store then the fields required to uniquely determine the record
 	to be inserted in the clustered index */
+	if (UNIV_UNLIKELY(clust_entry->info_bits != 0)) {
+		ut_ad(clust_entry->info_bits == REC_INFO_METADATA);
+		ut_ad(index->is_instant());
+		ut_ad(undo_block->frame[first_free + 2]
+		      == TRX_UNDO_INSERT_REC);
+		undo_block->frame[first_free + 2] = TRX_UNDO_INSERT_METADATA;
+		goto done;
+	}
 
 	for (i = 0; i < dict_index_get_n_unique(index); i++) {
 
 		const dfield_t*	field	= dtuple_get_nth_field(clust_entry, i);
 		ulint		flen	= dfield_get_len(field);
 
-		if (trx_undo_left(undo_page, ptr) < 5) {
+		if (trx_undo_left(undo_block, ptr) < 5) {
 
 			return(0);
 		}
@@ -508,7 +526,7 @@ trx_undo_page_report_insert(
 		ptr += mach_write_compressed(ptr, flen);
 
 		if (flen != UNIV_SQL_NULL) {
-			if (trx_undo_left(undo_page, ptr) < flen) {
+			if (trx_undo_left(undo_block, ptr) < flen) {
 
 				return(0);
 			}
@@ -520,12 +538,13 @@ trx_undo_page_report_insert(
 
 	if (index->table->n_v_cols) {
 		if (!trx_undo_report_insert_virtual(
-			undo_page, index->table, clust_entry, &ptr)) {
+			undo_block, index->table, clust_entry, &ptr)) {
 			return(0);
 		}
 	}
 
-	return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
+done:
+	return(trx_undo_page_set_next_prev_and_add(undo_block, ptr, mtr));
 }
 
 /**********************************************************************//**
@@ -597,7 +616,7 @@ trx_undo_rec_get_col_val(
 		ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE);
 
 		/* we do not have access to index->table here
-		ut_ad(dict_table_get_format(index->table) >= UNIV_FORMAT_B
+		ut_ad(dict_table_has_atomic_blobs(index->table)
 		      || *len >= col->max_prefix
 		      + BTR_EXTERN_FIELD_REF_SIZE);
 		*/
@@ -630,7 +649,7 @@ trx_undo_rec_get_row_ref(
 				used, as we do NOT copy the data in the
 				record! */
 	dict_index_t*	index,	/*!< in: clustered index */
-	dtuple_t**	ref,	/*!< out, own: row reference */
+	const dtuple_t**ref,	/*!< out, own: row reference */
 	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
 				needed is allocated */
 {
@@ -642,17 +661,17 @@ trx_undo_rec_get_row_ref(
 
 	ref_len = dict_index_get_n_unique(index);
 
-	*ref = dtuple_create(heap, ref_len);
+	dtuple_t* tuple = dtuple_create(heap, ref_len);
+	*ref = tuple;
 
-	dict_index_copy_types(*ref, index, ref_len);
+	dict_index_copy_types(tuple, index, ref_len);
 
 	for (i = 0; i < ref_len; i++) {
-		dfield_t*	dfield;
 		const byte*	field;
 		ulint		len;
 		ulint		orig_len;
 
-		dfield = dtuple_get_nth_field(*ref, i);
+		dfield_t* dfield = dtuple_get_nth_field(tuple, i);
 
 		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
 
@@ -762,7 +781,7 @@ trx_undo_page_report_modify_ext(
 	}
 
 	/* Encode spatial status into length. */
-	spatial_len |= spatial_status << SPATIAL_STATUS_SHIFT;
+	spatial_len |= ulint(spatial_status) << SPATIAL_STATUS_SHIFT;
 
 	if (spatial_status == SPATIAL_ONLY) {
 		/* If the column is only used by gis index, log its
@@ -841,7 +860,7 @@ static
 ulint
 trx_undo_page_report_modify(
 /*========================*/
-	page_t*		undo_page,	/*!< in: undo log page */
+	buf_block_t*	undo_block,	/*!< in: undo log page */
 	trx_t*		trx,		/*!< in: transaction */
 	dict_index_t*	index,		/*!< in: clustered index where update or
 					delete marking is done */
@@ -857,48 +876,46 @@ trx_undo_page_report_modify(
 					virtual column info */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
-	dict_table_t*	table		= index->table;
 	ulint		first_free;
 	byte*		ptr;
-	const byte*	field;
-	ulint		flen;
-	ulint		col_no;
-	ulint		type_cmpl;
-	byte*		type_cmpl_ptr;
-	ulint		i;
-	trx_id_t	trx_id;
-	ibool		ignore_prefix = FALSE;
-	byte		ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN
-				+ BTR_EXTERN_FIELD_REF_SIZE];
-	bool		first_v_col = true;
 
-	ut_a(dict_index_is_clust(index));
+	ut_ad(index->is_primary());
 	ut_ad(rec_offs_validate(rec, index, offsets));
+	/* MariaDB 10.3.1+ in trx_undo_page_init() always initializes
+	TRX_UNDO_PAGE_TYPE as 0, but previous versions wrote
+	TRX_UNDO_INSERT == 1 into insert_undo pages,
+	or TRX_UNDO_UPDATE == 2 into update_undo pages. */
 	ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
-			       + undo_page) == TRX_UNDO_UPDATE
-	      || (dict_table_is_temporary(table)
-		  && mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
-				      + undo_page) == TRX_UNDO_INSERT));
-	trx_undo_t*	update_undo = dict_table_is_temporary(table)
-		? NULL : trx->rsegs.m_redo.update_undo;
-
-	first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
-				      + TRX_UNDO_PAGE_FREE);
-	ptr = undo_page + first_free;
+			       + undo_block->frame) <= 2);
 
-	ut_ad(first_free <= UNIV_PAGE_SIZE);
+	first_free = mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+				      + undo_block->frame);
+	ptr = undo_block->frame + first_free;
 
-	if (trx_undo_left(undo_page, ptr) < 50) {
+	ut_ad(first_free <= srv_page_size);
 
+	if (trx_undo_left(undo_block, ptr) < 50) {
 		/* NOTE: the value 50 must be big enough so that the general
 		fields written below fit on the undo log page */
-
-		return(0);
+		return 0;
 	}
 
 	/* Reserve 2 bytes for the pointer to the next undo log record */
 	ptr += 2;
 
+	dict_table_t*	table		= index->table;
+	const byte*	field;
+	ulint		flen;
+	ulint		col_no;
+	ulint		type_cmpl;
+	byte*		type_cmpl_ptr;
+	ulint		i;
+	trx_id_t	trx_id;
+	ibool		ignore_prefix = FALSE;
+	byte		ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN
+				+ BTR_EXTERN_FIELD_REF_SIZE];
+	bool		first_v_col = true;
+
 	/* Store first some general parameters to the undo log */
 
 	if (!update) {
@@ -944,8 +961,8 @@ trx_undo_page_report_modify(
 	allowed to ignore blob prefixes if the delete marking was done
 	by some other trx as it must have committed by now for us to
 	allow an over-write. */
-	if (ignore_prefix) {
-		ignore_prefix = (trx_id != trx->id);
+	if (trx_id == trx->id) {
+		ignore_prefix = false;
 	}
 	ptr += mach_u64_write_compressed(ptr, trx_id);
 
@@ -963,22 +980,22 @@ trx_undo_page_report_modify(
 
 	for (i = 0; i < dict_index_get_n_unique(index); i++) {
 
+		/* The ordering columns must not be instant added columns. */
+		ut_ad(!rec_offs_nth_default(offsets, i));
 		field = rec_get_nth_field(rec, offsets, i, &flen);
 
 		/* The ordering columns must not be stored externally. */
 		ut_ad(!rec_offs_nth_extern(offsets, i));
 		ut_ad(dict_index_get_nth_col(index, i)->ord_part);
 
-		if (trx_undo_left(undo_page, ptr) < 5) {
-
+		if (trx_undo_left(undo_block, ptr) < 5) {
 			return(0);
 		}
 
 		ptr += mach_write_compressed(ptr, flen);
 
 		if (flen != UNIV_SQL_NULL) {
-			if (trx_undo_left(undo_page, ptr) < flen) {
-
+			if (trx_undo_left(undo_block, ptr) < flen) {
 				return(0);
 			}
 
@@ -991,8 +1008,7 @@ trx_undo_page_report_modify(
 	/* Save to the undo log the old values of the columns to be updated. */
 
 	if (update) {
-		if (trx_undo_left(undo_page, ptr) < 5) {
-
+		if (trx_undo_left(undo_block, ptr) < 5) {
 			return(0);
 		}
 
@@ -1030,8 +1046,7 @@ trx_undo_page_report_modify(
 			ulint	pos = fld->field_no;
 
 			/* Write field number to undo log */
-			if (trx_undo_left(undo_page, ptr) < 5) {
-
+			if (trx_undo_left(undo_block, ptr) < 5) {
 				return(0);
 			}
 
@@ -1055,7 +1070,7 @@ trx_undo_page_report_modify(
 			if (is_virtual) {
 				ut_ad(fld->field_no < table->n_v_def);
 
-				ptr = trx_undo_log_v_idx(undo_page, table,
+				ptr = trx_undo_log_v_idx(undo_block, table,
 							 fld->field_no, ptr,
 							 first_v_col);
 				if (ptr == NULL) {
@@ -1078,12 +1093,11 @@ trx_undo_page_report_modify(
 						flen, max_v_log_len);
 				}
 			} else {
-				field = rec_get_nth_field(rec, offsets,
-							  pos, &flen);
+				field = rec_get_nth_cfield(
+					rec, index, offsets, pos, &flen);
 			}
 
-			if (trx_undo_left(undo_page, ptr) < 15) {
-
+			if (trx_undo_left(undo_block, ptr) < 15) {
 				return(0);
 			}
 
@@ -1106,21 +1120,13 @@ trx_undo_page_report_modify(
 					dict_table_page_size(table),
 					&field, &flen, SPATIAL_UNKNOWN);
 
-				/* Notify purge that it eventually has to
-				free the old externally stored field */
-
-				if (update_undo) {
-					update_undo->del_marks = TRUE;
-				}
-
 				*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
 			} else {
 				ptr += mach_write_compressed(ptr, flen);
 			}
 
 			if (flen != UNIV_SQL_NULL) {
-				if (trx_undo_left(undo_page, ptr) < flen) {
-
+				if (trx_undo_left(undo_block, ptr) < flen) {
 					return(0);
 				}
 
@@ -1137,16 +1143,15 @@ trx_undo_page_report_modify(
 						flen, max_v_log_len);
 				}
 
-				if (trx_undo_left(undo_page, ptr) < 15) {
-
+				if (trx_undo_left(undo_block, ptr) < 15) {
 					return(0);
 				}
 
 				ptr += mach_write_compressed(ptr, flen);
 
 				if (flen != UNIV_SQL_NULL) {
-					if (trx_undo_left(undo_page, ptr) < flen) {
-
+					if (trx_undo_left(undo_block, ptr)
+					    < flen) {
 						return(0);
 					}
 
@@ -1180,12 +1185,7 @@ trx_undo_page_report_modify(
 		double		mbr[SPDIMS * 2];
 		mem_heap_t*	row_heap = NULL;
 
-		if (update_undo) {
-			update_undo->del_marks = TRUE;
-		}
-
-		if (trx_undo_left(undo_page, ptr) < 5) {
-
+		if (trx_undo_left(undo_block, ptr) < 5) {
 			return(0);
 		}
 
@@ -1251,16 +1251,15 @@ trx_undo_page_report_modify(
 
 			if (true) {
 				/* Write field number to undo log */
-				if (trx_undo_left(undo_page, ptr) < 5 + 15) {
-
+				if (trx_undo_left(undo_block, ptr) < 5 + 15) {
 					return(0);
 				}
 
 				ptr += mach_write_compressed(ptr, pos);
 
 				/* Save the old value of field */
-				field = rec_get_nth_field(rec, offsets, pos,
-							  &flen);
+				field = rec_get_nth_cfield(
+					rec, index, offsets, pos, &flen);
 
 				if (is_ext) {
 					const dict_col_t*	col =
@@ -1300,9 +1299,8 @@ trx_undo_page_report_modify(
 
 				if (flen != UNIV_SQL_NULL
 				    && spatial_status != SPATIAL_ONLY) {
-					if (trx_undo_left(undo_page, ptr)
+					if (trx_undo_left(undo_block, ptr)
 					    < flen) {
-
 						return(0);
 					}
 
@@ -1311,7 +1309,7 @@ trx_undo_page_report_modify(
 				}
 
 				if (spatial_status != SPATIAL_NONE) {
-					if (trx_undo_left(undo_page, ptr)
+					if (trx_undo_left(undo_block, ptr)
 					    < DATA_MBR_LEN) {
 						return(0);
 					}
@@ -1344,8 +1342,7 @@ already_logged:
 
 				/* Write field number to undo log.
 				Make sure there is enought space in log */
-				if (trx_undo_left(undo_page, ptr) < 5) {
-
+				if (trx_undo_left(undo_block, ptr) < 5) {
 					return(0);
 				}
 
@@ -1353,7 +1350,7 @@ already_logged:
 				ptr += mach_write_compressed(ptr, pos);
 
 				ut_ad(col_no < table->n_v_def);
-				ptr = trx_undo_log_v_idx(undo_page, table,
+				ptr = trx_undo_log_v_idx(undo_block, table,
 							 col_no, ptr,
 							 first_v_col);
 				first_v_col = false;
@@ -1393,9 +1390,8 @@ already_logged:
 				ptr += mach_write_compressed(ptr, flen);
 
 				if (flen != UNIV_SQL_NULL) {
-					if (trx_undo_left(undo_page, ptr)
+					if (trx_undo_left(undo_block, ptr)
 					    < flen) {
-
 						return(0);
 					}
 
@@ -1405,7 +1401,7 @@ already_logged:
 			}
 		}
 
-		mach_write_to_2(old_ptr, ptr - old_ptr);
+		mach_write_to_2(old_ptr, ulint(ptr - old_ptr));
 
 		if (row_heap) {
 			mem_heap_free(row_heap);
@@ -1414,22 +1410,20 @@ already_logged:
 
 	/*----------------------------------------*/
 	/* Write pointers to the previous and the next undo log records */
-	if (trx_undo_left(undo_page, ptr) < 2) {
-
+	if (trx_undo_left(undo_block, ptr) < 2) {
 		return(0);
 	}
 
 	mach_write_to_2(ptr, first_free);
 	ptr += 2;
-	mach_write_to_2(undo_page + first_free, ptr - undo_page);
+	const ulint new_free = ulint(ptr - undo_block->frame);
+	mach_write_to_2(undo_block->frame + first_free, new_free);
 
-	mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
-			ptr - undo_page);
+	mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+			+ undo_block->frame, new_free);
 
 	/* Write to the REDO log about this change in the UNDO log */
-
-	trx_undof_page_add_undo_rec_log(undo_page, first_free,
-					ptr - undo_page, mtr);
+	trx_undof_page_add_undo_rec_log(undo_block, first_free, new_free, mtr);
 	return(first_free);
 }
 
@@ -1512,7 +1506,7 @@ trx_undo_update_rec_get_update(
 
 	buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_TRX_ID_LEN));
 
-	trx_write_trx_id(buf, trx_id);
+	mach_write_to_6(buf, trx_id);
 
 	upd_field_set_field_no(upd_field,
 			       dict_index_get_sys_col_pos(index, DATA_TRX_ID),
@@ -1540,6 +1534,7 @@ trx_undo_update_rec_get_update(
 		ulint		orig_len;
 		bool		is_virtual;
 
+		upd_field = upd_get_nth_field(update, i);
 		field_no = mach_read_next_compressed(&ptr);
 
 		is_virtual = (field_no >= REC_MAX_N_FIELDS);
@@ -1551,27 +1546,6 @@ trx_undo_update_rec_get_update(
 				index->table, ptr, first_v_col, &is_undo_log,
 				&field_no);
 			first_v_col = false;
-		} else if (field_no >= dict_index_get_n_fields(index)) {
-			ib::error() << "Trying to access update undo rec"
-				" field " << field_no
-				<< " in index " << index->name
-				<< " of table " << index->table->name
-				<< " but index has only "
-				<< dict_index_get_n_fields(index)
-				<< " fields " << BUG_REPORT_MSG
-				<< ". Run also CHECK TABLE "
-				<< index->table->name << "."
-				" n_fields = " << n_fields << ", i = " << i
-				<< ", ptr " << ptr;
-
-			ut_ad(0);
-			*upd = NULL;
-			return(NULL);
-		}
-
-		upd_field = upd_get_nth_field(update, i);
-
-		if (is_virtual) {
 			/* This column could be dropped or no longer indexed */
 			if (field_no == ULINT_UNDEFINED) {
 				/* Mark this is no longer needed */
@@ -1585,10 +1559,31 @@ trx_undo_update_rec_get_update(
 				continue;
 			}
 
-			upd_field_set_v_field_no(
-				upd_field, field_no, index);
-		} else {
+			upd_field_set_v_field_no(upd_field, field_no, index);
+		} else if (field_no < index->n_fields) {
 			upd_field_set_field_no(upd_field, field_no, index);
+		} else if (update->info_bits == REC_INFO_MIN_REC_FLAG
+			   && index->is_instant()) {
+			/* This must be a rollback of a subsequent
+			instant ADD COLUMN operation. This will be
+			detected and handled by btr_cur_trim(). */
+			upd_field->field_no = field_no;
+			upd_field->orig_len = 0;
+		} else {
+			ib::error() << "Trying to access update undo rec"
+				" field " << field_no
+				<< " in index " << index->name
+				<< " of table " << index->table->name
+				<< " but index has only "
+				<< dict_index_get_n_fields(index)
+				<< " fields " << BUG_REPORT_MSG
+				<< ". Run also CHECK TABLE "
+				<< index->table->name << "."
+				" n_fields = " << n_fields << ", i = " << i;
+
+			ut_ad(0);
+			*upd = NULL;
+			return(NULL);
 		}
 
 		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
@@ -1822,8 +1817,7 @@ trx_undo_rec_get_partial_row(
 			    && spatial_status != SPATIAL_ONLY) {
 				ut_a(dfield_get_len(dfield)
 				     >= BTR_EXTERN_FIELD_REF_SIZE);
-				ut_a(dict_table_get_format(index->table)
-				     >= UNIV_FORMAT_B
+				ut_a(dict_table_has_atomic_blobs(index->table)
 				     || dfield_get_len(dfield)
 				     >= REC_ANTELOPE_MAX_INDEX_COL_LEN
 				     + BTR_EXTERN_FIELD_REF_SIZE);
@@ -1834,51 +1828,22 @@ trx_undo_rec_get_partial_row(
 	return(const_cast<byte*>(ptr));
 }
 
-/***********************************************************************//**
-Erases the unused undo log page end.
-@return TRUE if the page contained something, FALSE if it was empty */
-static MY_ATTRIBUTE((nonnull))
-ibool
-trx_undo_erase_page_end(
-/*====================*/
-	page_t*	undo_page,	/*!< in/out: undo page whose end to erase */
-	mtr_t*	mtr)		/*!< in/out: mini-transaction */
+/** Erase the unused undo log page end.
+@param[in,out]	undo_page	undo log page
+@return whether the page contained something */
+bool
+trx_undo_erase_page_end(page_t* undo_page)
 {
 	ulint	first_free;
 
 	first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
 				      + TRX_UNDO_PAGE_FREE);
-	memset(undo_page + first_free, 0xff,
-	       (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free);
+	memset(undo_page + first_free, 0,
+	       (srv_page_size - FIL_PAGE_DATA_END) - first_free);
 
-	mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
 	return(first_free != TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
 }
 
-/***********************************************************//**
-Parses a redo log record of erasing of an undo page end.
-@return end of log record or NULL */
-byte*
-trx_undo_parse_erase_page_end(
-/*==========================*/
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
-	page_t*	page,	/*!< in: page or NULL */
-	mtr_t*	mtr)	/*!< in: mtr or NULL */
-{
-	ut_ad(ptr != NULL);
-	ut_ad(end_ptr != NULL);
-
-	if (page == NULL) {
-
-		return(ptr);
-	}
-
-	trx_undo_erase_page_end(page, mtr);
-
-	return(ptr);
-}
-
 /** Report a RENAME TABLE operation.
 @param[in,out]	trx	transaction
 @param[in]	table	table that is being renamed
@@ -1895,7 +1860,7 @@ trx_undo_page_report_rename(trx_t* trx, const dict_table_t* table,
 		+ block->frame;
 	ulint	first_free = mach_read_from_2(ptr_first_free);
 	ut_ad(first_free >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
-	ut_ad(first_free <= UNIV_PAGE_SIZE);
+	ut_ad(first_free <= srv_page_size);
 	byte* start = block->frame + first_free;
 	size_t len = strlen(table->name.m_name);
 	const size_t fixed = 2 + 1 + 11 + 11 + 2;
@@ -1905,7 +1870,7 @@ trx_undo_page_report_rename(trx_t* trx, const dict_table_t* table,
 			    + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE
 			    < UNIV_PAGE_SIZE_MIN - 10 - FIL_PAGE_DATA_END);
 
-	if (trx_undo_left(block->frame, start) < fixed + len) {
+	if (trx_undo_left(block, start) < fixed + len) {
 		ut_ad(first_free > TRX_UNDO_PAGE_HDR
 		      + TRX_UNDO_PAGE_HDR_SIZE);
 		return 0;
@@ -1923,7 +1888,7 @@ trx_undo_page_report_rename(trx_t* trx, const dict_table_t* table,
 	mach_write_to_2(start, offset);
 	mach_write_to_2(ptr_first_free, offset);
 
-	trx_undof_page_add_undo_rec_log(block->frame, first_free, offset, mtr);
+	trx_undof_page_add_undo_rec_log(block, first_free, offset, mtr);
 	return first_free;
 }
 
@@ -1936,48 +1901,33 @@ dberr_t trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
 	ut_ad(!trx->read_only);
 	ut_ad(trx->id);
 	ut_ad(!table->is_temporary());
-	ut_ad(srv_safe_truncate);
-
-	trx_rseg_t*	rseg	= trx->rsegs.m_redo.rseg;
-	trx_undo_t**	pundo	= &trx->rsegs.m_redo.insert_undo;
-	mutex_enter(&trx->undo_mutex);
-	dberr_t		err	= *pundo
-		? DB_SUCCESS
-		: trx_undo_assign_undo(trx, rseg, pundo, TRX_UNDO_INSERT);
-	ut_ad((err == DB_SUCCESS) == (*pundo != NULL));
-	if (trx_undo_t* undo = *pundo) {
-		mtr_t	mtr;
-		mtr.start();
-
-		buf_block_t* block = buf_page_get_gen(
-			page_id_t(undo->space, undo->last_page_no),
-			univ_page_size, RW_X_LATCH,
-			buf_pool_is_obsolete(undo->withdraw_clock)
-			? NULL : undo->guess_block,
-			BUF_GET, __FILE__, __LINE__, &mtr, &err);
-		ut_ad((err == DB_SUCCESS) == !!block);
 
-		for (ut_d(int loop_count = 0); block;) {
+	mtr_t		mtr;
+	dberr_t		err;
+	mtr.start();
+	if (buf_block_t* block = trx_undo_assign(trx, &err, &mtr)) {
+		trx_undo_t*	undo = trx->rsegs.m_redo.undo;
+		ut_ad(err == DB_SUCCESS);
+		ut_ad(undo);
+		for (ut_d(int loop_count = 0);;) {
 			ut_ad(++loop_count < 2);
-			buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
 			ut_ad(undo->last_page_no == block->page.id.page_no());
 
 			if (ulint offset = trx_undo_page_report_rename(
 				    trx, table, block, &mtr)) {
 				undo->withdraw_clock = buf_withdraw_clock;
-				undo->empty = FALSE;
 				undo->top_page_no = undo->last_page_no;
 				undo->top_offset  = offset;
 				undo->top_undo_no = trx->undo_no++;
 				undo->guess_block = block;
+				ut_ad(!undo->empty());
 
-				trx->undo_rseg_space = rseg->space;
 				err = DB_SUCCESS;
 				break;
 			} else {
 				mtr.commit();
 				mtr.start();
-				block = trx_undo_add_page(trx, undo, &mtr);
+				block = trx_undo_add_page(undo, &mtr);
 				if (!block) {
 					err = DB_OUT_OF_FILE_SPACE;
 					break;
@@ -1988,7 +1938,6 @@ dberr_t trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
 		mtr.commit();
 	}
 
-	mutex_exit(&trx->undo_mutex);
 	return err;
 }
 
@@ -2022,8 +1971,6 @@ trx_undo_report_row_operation(
 					undo log record */
 {
 	trx_t*		trx;
-	ulint		page_no;
-	buf_block_t*	undo_block;
 	mtr_t		mtr;
 #ifdef UNIV_DEBUG
 	int		loop_count	= 0;
@@ -2043,7 +1990,7 @@ trx_undo_report_row_operation(
 	mtr.start();
 	trx_undo_t**	pundo;
 	trx_rseg_t*	rseg;
-	const bool	is_temp	= dict_table_is_temporary(index->table);
+	const bool	is_temp	= index->table->is_temporary();
 
 	if (is_temp) {
 		mtr.set_log_mode(MTR_LOG_NO_REDO);
@@ -2053,63 +2000,32 @@ trx_undo_report_row_operation(
 	} else {
 		ut_ad(!trx->read_only);
 		ut_ad(trx->id);
-		/* Keep INFORMATION_SCHEMA.TABLES.UPDATE_TIME
-		up-to-date for persistent tables. Temporary tables are
-		not listed there. */
-		trx->mod_tables.insert(index->table);
-
-		pundo = !rec
-			? &trx->rsegs.m_redo.insert_undo
-			: &trx->rsegs.m_redo.update_undo;
+		pundo = &trx->rsegs.m_redo.undo;
 		rseg = trx->rsegs.m_redo.rseg;
 	}
 
-	mutex_enter(&trx->undo_mutex);
-	dberr_t	err;
+	dberr_t		err;
+	buf_block_t*	undo_block = trx_undo_assign_low(trx, rseg, pundo,
+							 &err, &mtr);
+	trx_undo_t*	undo	= *pundo;
 
-	if (*pundo) {
-		err = DB_SUCCESS;
-	} else if (!rec || is_temp) {
-		err = trx_undo_assign_undo(trx, rseg, pundo, TRX_UNDO_INSERT);
-	} else {
-		err = trx_undo_assign_undo(trx, rseg, pundo, TRX_UNDO_UPDATE);
-	}
-
-	trx_undo_t*	undo = *pundo;
-
-	ut_ad((err == DB_SUCCESS) == (undo != NULL));
-	if (undo == NULL) {
+	ut_ad((err == DB_SUCCESS) == (undo_block != NULL));
+	if (UNIV_UNLIKELY(undo_block == NULL)) {
 		goto err_exit;
 	}
 
-	page_no = undo->last_page_no;
-
-	undo_block = buf_page_get_gen(
-		page_id_t(undo->space, page_no), univ_page_size, RW_X_LATCH,
-		buf_pool_is_obsolete(undo->withdraw_clock)
-		? NULL : undo->guess_block, BUF_GET, __FILE__, __LINE__,
-		&mtr, &err);
-
-	buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
+	ut_ad(undo != NULL);
 
 	do {
-		ut_ad(page_no == undo_block->page.id.page_no());
-		page_t*	undo_page = buf_block_get_frame(undo_block);
 		ulint	offset = !rec
 			? trx_undo_page_report_insert(
-				undo_page, trx, index, clust_entry, &mtr)
+				undo_block, trx, index, clust_entry, &mtr)
 			: trx_undo_page_report_modify(
-				undo_page, trx, index, rec, offsets, update,
+				undo_block, trx, index, rec, offsets, update,
 				cmpl_info, clust_entry, &mtr);
 
 		if (UNIV_UNLIKELY(offset == 0)) {
-			/* The record did not fit on the page. We erase the
-			end segment of the undo log page and write a log
-			record of it: this is to ensure that in the debug
-			version the replicate page constructed using the log
-			records stays identical to the original page */
-
-			if (!trx_undo_erase_page_end(undo_page, &mtr)) {
+			if (!trx_undo_erase_page_end(undo_block->frame)) {
 				/* The record did not fit on an empty
 				undo page. Discard the freshly allocated
 				page and return an error. */
@@ -2123,7 +2039,7 @@ trx_undo_report_row_operation(
 				first, because it may be holding lower-level
 				latches, such as SYNC_FSP and SYNC_FSP_PAGE. */
 
-				mtr_commit(&mtr);
+				mtr.commit();
 				mtr.start();
 				if (is_temp) {
 					mtr.set_log_mode(MTR_LOG_NO_REDO);
@@ -2143,22 +2059,39 @@ trx_undo_report_row_operation(
 			undo->withdraw_clock = buf_withdraw_clock;
 			mtr_commit(&mtr);
 
-			undo->empty = FALSE;
-			undo->top_page_no = page_no;
+			undo->top_page_no = undo_block->page.id.page_no();
 			undo->top_offset  = offset;
 			undo->top_undo_no = trx->undo_no++;
 			undo->guess_block = undo_block;
-
-			trx->undo_rseg_space = rseg->space;
-
-			mutex_exit(&trx->undo_mutex);
+			ut_ad(!undo->empty());
+
+			if (!is_temp) {
+				const undo_no_t limit = undo->top_undo_no;
+				/* Determine if this is the first time
+				when this transaction modifies a
+				system-versioned column in this table. */
+				trx_mod_table_time_t& time
+					= trx->mod_tables.insert(
+						trx_mod_tables_t::value_type(
+							index->table, limit))
+					.first->second;
+				ut_ad(time.valid(limit));
+
+				if (!time.is_versioned()
+				    && index->table->versioned_by_id()
+				    && (!rec /* INSERT */
+					|| (update
+					    && update->affects_versioned()))) {
+					time.set_versioned(limit);
+				}
+			}
 
 			*roll_ptr = trx_undo_build_roll_ptr(
-				!rec, rseg->id, page_no, offset);
+				!rec, rseg->id, undo->top_page_no, offset);
 			return(DB_SUCCESS);
 		}
 
-		ut_ad(page_no == undo->last_page_no);
+		ut_ad(undo_block->page.id.page_no() == undo->last_page_no);
 
 		/* We have to extend the undo log by one page */
 
@@ -2169,12 +2102,11 @@ trx_undo_report_row_operation(
 			mtr.set_log_mode(MTR_LOG_NO_REDO);
 		}
 
-		undo_block = trx_undo_add_page(trx, undo, &mtr);
-		page_no = undo->last_page_no;
+		undo_block = trx_undo_add_page(undo, &mtr);
 
 		DBUG_EXECUTE_IF("ib_err_ins_undo_page_add_failure",
 				undo_block = NULL;);
-	} while (undo_block != NULL);
+	} while (UNIV_LIKELY(undo_block != NULL));
 
 	ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 		DB_OUT_OF_FILE_SPACE,
@@ -2183,14 +2115,13 @@ trx_undo_report_row_operation(
 		" log pages. Please add new data file to the tablespace or"
 		" check if filesystem is full or enable auto-extension for"
 		" the tablespace",
-		undo->space == TRX_SYS_SPACE
+		undo->rseg->space == fil_system.sys_space
 		? "system" : is_temp ? "temporary" : "undo");
 
 	/* Did not succeed: out of space */
 	err = DB_OUT_OF_FILE_SPACE;
 
 err_exit:
-	mutex_exit(&trx->undo_mutex);
 	mtr_commit(&mtr);
 	return(err);
 }
@@ -2219,12 +2150,13 @@ trx_undo_get_undo_rec_low(
 				 &offset);
 	ut_ad(page_no > FSP_FIRST_INODE_PAGE_NO);
 	ut_ad(offset >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
-	rseg = trx_sys->rseg_array[rseg_id];
+	rseg = trx_sys.rseg_array[rseg_id];
+	ut_ad(rseg->is_persistent());
 
 	mtr_start(&mtr);
 
 	undo_page = trx_undo_page_get_s_latched(
-		page_id_t(rseg->space, page_no), &mtr);
+		page_id_t(rseg->space->id, page_no), &mtr);
 
 	undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
 
@@ -2256,14 +2188,14 @@ trx_undo_get_undo_rec(
 {
 	bool		missing_history;
 
-	rw_lock_s_lock(&purge_sys->latch);
+	rw_lock_s_lock(&purge_sys.latch);
 
-	missing_history = purge_sys->view.changes_visible(trx_id, name);
+	missing_history = purge_sys.view.changes_visible(trx_id, name);
 	if (!missing_history) {
 		*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
 	}
 
-	rw_lock_s_unlock(&purge_sys->latch);
+	rw_lock_s_unlock(&purge_sys.latch);
 
 	return(missing_history);
 }
@@ -2325,12 +2257,13 @@ trx_undo_prev_version_build(
 	bool		dummy_extern;
 	byte*		buf;
 
-	ut_ad(!rw_lock_own(&purge_sys->latch, RW_LOCK_S));
+	ut_ad(!index->table->is_temporary());
+	ut_ad(!rw_lock_own(&purge_sys.latch, RW_LOCK_S));
 	ut_ad(mtr_memo_contains_page_flagged(index_mtr, index_rec,
 					     MTR_MEMO_PAGE_S_FIX
 					     | MTR_MEMO_PAGE_X_FIX));
 	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_a(dict_index_is_clust(index));
+	ut_a(index->is_primary());
 
 	roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
 
@@ -2341,8 +2274,6 @@ trx_undo_prev_version_build(
 		return(true);
 	}
 
-	ut_ad(!dict_table_is_temporary(index->table));
-
 	rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
 
 	ut_ad(!index->table->skip_alter_undo);
@@ -2352,8 +2283,7 @@ trx_undo_prev_version_build(
 		    &undo_rec)) {
 		if (v_status & TRX_UNDO_PREV_IN_PURGE) {
 			/* We are fetching the record being purged */
-			undo_rec = trx_undo_get_undo_rec_low(
-				roll_ptr, heap);
+			undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
 		} else {
 			/* The undo record may already have been purged,
 			during purge or semi-consistent read. */
@@ -2375,12 +2305,12 @@ trx_undo_prev_version_build(
 					       &info_bits);
 
 	/* (a) If a clustered index record version is such that the
-	trx id stamp in it is bigger than purge_sys->view, then the
+	trx id stamp in it is bigger than purge_sys.view, then the
 	BLOBs in that version are known to exist (the purge has not
 	progressed that far);
 
 	(b) if the version is the first version such that trx id in it
-	is less than purge_sys->view, and it is not delete-marked,
+	is less than purge_sys.view, and it is not delete-marked,
 	then the BLOBs in that version are known to exist (the purge
 	cannot have purged the BLOBs referenced by that version
 	yet).
@@ -2419,19 +2349,19 @@ trx_undo_prev_version_build(
 		the BLOB. */
 
 		/* the row_upd_changes_disowned_external(update) call could be
-		omitted, but the synchronization on purge_sys->latch is likely
+		omitted, but the synchronization on purge_sys.latch is likely
 		more expensive. */
 
 		if ((update->info_bits & REC_INFO_DELETED_FLAG)
 		    && row_upd_changes_disowned_external(update)) {
 			bool	missing_extern;
 
-			rw_lock_s_lock(&purge_sys->latch);
+			rw_lock_s_lock(&purge_sys.latch);
 
-			missing_extern = purge_sys->view.changes_visible(
+			missing_extern = purge_sys.view.changes_visible(
 				trx_id,	index->table->name);
 
-			rw_lock_s_unlock(&purge_sys->latch);
+			rw_lock_s_unlock(&purge_sys.latch);
 
 			if (missing_extern) {
 				/* treat as a fresh insert, not to
@@ -2464,7 +2394,7 @@ trx_undo_prev_version_build(
 			heap, rec_offs_size(offsets)));
 
 		*old_vers = rec_copy(buf, rec, offsets);
-		rec_offs_make_valid(*old_vers, index, offsets);
+		rec_offs_make_valid(*old_vers, index, true, offsets);
 		row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
 	}
 
diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc
index 0e277c67453..9e992d2f145 100644
--- a/storage/innobase/trx/trx0roll.cc
+++ b/storage/innobase/trx/trx0roll.cc
@@ -34,7 +34,6 @@ Created 3/26/1996 Heikki Tuuri
 #include "mach0data.h"
 #include "pars0pars.h"
 #include "que0que.h"
-#include "read0read.h"
 #include "row0mysql.h"
 #include "row0undo.h"
 #include "srv0mon.h"
@@ -49,19 +48,53 @@ Created 3/26/1996 Heikki Tuuri
 rollback */
 static const ulint TRX_ROLL_TRUNC_THRESHOLD = 1;
 
-/** true if trx_rollback_or_clean_all_recovered() thread is active */
-bool			trx_rollback_or_clean_is_active;
+/** true if trx_rollback_all_recovered() thread is active */
+bool			trx_rollback_is_active;
 
 /** In crash recovery, the current trx to be rolled back; NULL otherwise */
 const trx_t*		trx_roll_crash_recv_trx;
 
-/****************************************************************//**
-Finishes a transaction rollback. */
-static
-void
-trx_rollback_finish(
-/*================*/
-	trx_t*		trx);	/*!< in: transaction */
+/** Finish transaction rollback.
+@param[in,out]	trx	transaction
+@return	whether the rollback was completed normally
+@retval	false	if the rollback was aborted by shutdown  */
+static bool trx_rollback_finish(trx_t* trx)
+{
+	trx->mod_tables.clear();
+	bool finished = trx->error_state == DB_SUCCESS;
+	if (UNIV_LIKELY(finished)) {
+		trx_commit(trx);
+	} else {
+		ut_a(trx->error_state == DB_INTERRUPTED);
+		ut_ad(!srv_is_being_started);
+		ut_a(!srv_undo_sources);
+		ut_ad(srv_fast_shutdown);
+		ut_d(trx->in_rollback = false);
+		if (trx_undo_t*& undo = trx->rsegs.m_redo.old_insert) {
+			UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->old_insert_list,
+				       undo);
+			ut_free(undo);
+			undo = NULL;
+		}
+		if (trx_undo_t*& undo = trx->rsegs.m_redo.undo) {
+			UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->undo_list,
+				       undo);
+			ut_free(undo);
+			undo = NULL;
+		}
+		if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) {
+			UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->undo_list,
+				       undo);
+			ut_free(undo);
+			undo = NULL;
+		}
+		trx_commit_low(trx, NULL);
+	}
+
+	trx->lock.que_state = TRX_QUE_RUNNING;
+
+	return finished;
+}
 
 /*******************************************************************//**
 Rollback a transaction used in MySQL. */
@@ -92,7 +125,7 @@ trx_rollback_to_savepoint_low(
 
 	trx->error_state = DB_SUCCESS;
 
-	if (trx->has_logged()) {
+	if (trx->has_logged_or_recovered()) {
 
 		ut_ad(trx->rsegs.m_redo.rseg != 0
 		      || trx->rsegs.m_noredo.rseg != 0);
@@ -116,13 +149,20 @@ trx_rollback_to_savepoint_low(
 		trx_rollback_finish(trx);
 		MONITOR_INC(MONITOR_TRX_ROLLBACK);
 	} else {
+		ut_a(trx->error_state == DB_SUCCESS);
+		const undo_no_t limit = savept->least_undo_no;
+		for (trx_mod_tables_t::iterator i = trx->mod_tables.begin();
+		     i != trx->mod_tables.end(); ) {
+			trx_mod_tables_t::iterator j = i++;
+			ut_ad(j->second.valid());
+			if (j->second.rollback(limit)) {
+				trx->mod_tables.erase(j);
+			}
+		}
 		trx->lock.que_state = TRX_QUE_RUNNING;
 		MONITOR_INC(MONITOR_TRX_ROLLBACK_SAVEPOINT);
 	}
 
-	ut_a(trx->error_state == DB_SUCCESS);
-	ut_a(trx->lock.que_state == TRX_QUE_RUNNING);
-
 	mem_heap_free(heap);
 
 	/* There might be work for utility threads.*/
@@ -171,8 +211,6 @@ trx_rollback_for_mysql_low(
 
 	trx->op_info = "";
 
-	ut_a(trx->error_state == DB_SUCCESS);
-
 	return(trx->error_state);
 }
 
@@ -181,7 +219,7 @@ trx_rollback_for_mysql_low(
 @return error code or DB_SUCCESS */
 dberr_t trx_rollback_for_mysql(trx_t* trx)
 {
-	/* We are reading trx->state without holding trx_sys->mutex
+	/* We are reading trx->state without holding trx_sys.mutex
 	here, because the rollback should be invoked for a running
 	active MySQL transaction (or recovered prepared transaction)
 	that is associated with the current thread. */
@@ -189,34 +227,37 @@ dberr_t trx_rollback_for_mysql(trx_t* trx)
 	switch (trx->state) {
 	case TRX_STATE_NOT_STARTED:
 		trx->will_lock = 0;
-		ut_ad(trx->in_mysql_trx_list);
+		ut_ad(trx->mysql_thd);
 		return(DB_SUCCESS);
 
 	case TRX_STATE_ACTIVE:
-		ut_ad(trx->in_mysql_trx_list);
+		ut_ad(trx->mysql_thd);
 		assert_trx_nonlocking_or_in_list(trx);
 		return(trx_rollback_for_mysql_low(trx));
 
 	case TRX_STATE_PREPARED:
 		ut_ad(!trx_is_autocommit_non_locking(trx));
-		if (trx->has_logged_persistent()) {
+		if (trx->rsegs.m_redo.undo || trx->rsegs.m_redo.old_insert) {
 			/* Change the undo log state back from
 			TRX_UNDO_PREPARED to TRX_UNDO_ACTIVE
 			so that if the system gets killed,
 			recovery will perform the rollback. */
-			trx_undo_ptr_t*	undo_ptr = &trx->rsegs.m_redo;
+			ut_ad(!trx->rsegs.m_redo.undo
+			      || trx->rsegs.m_redo.undo->rseg
+			      == trx->rsegs.m_redo.rseg);
+			ut_ad(!trx->rsegs.m_redo.old_insert
+			      || trx->rsegs.m_redo.old_insert->rseg
+			      == trx->rsegs.m_redo.rseg);
 			mtr_t		mtr;
 			mtr.start();
 			mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
-			if (undo_ptr->insert_undo != NULL) {
-				trx_undo_set_state_at_prepare(
-					trx, undo_ptr->insert_undo,
-					true, &mtr);
+			if (trx_undo_t* undo = trx->rsegs.m_redo.undo) {
+				trx_undo_set_state_at_prepare(trx, undo, true,
+							      &mtr);
 			}
-			if (undo_ptr->update_undo != NULL) {
-				trx_undo_set_state_at_prepare(
-					trx, undo_ptr->update_undo,
-					true, &mtr);
+			if (trx_undo_t* undo = trx->rsegs.m_redo.old_insert) {
+				trx_undo_set_state_at_prepare(trx, undo, true,
+							      &mtr);
 			}
 			mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
 			/* Persist the XA ROLLBACK, so that crash
@@ -263,11 +304,11 @@ trx_rollback_last_sql_stat_for_mysql(
 {
 	dberr_t	err;
 
-	/* We are reading trx->state without holding trx_sys->mutex
+	/* We are reading trx->state without holding trx_sys.mutex
 	here, because the statement rollback should be invoked for a
 	running active MySQL transaction that is associated with the
 	current thread. */
-	ut_ad(trx->in_mysql_trx_list);
+	ut_ad(trx->mysql_thd);
 
 	switch (trx->state) {
 	case TRX_STATE_NOT_STARTED:
@@ -389,7 +430,7 @@ trx_rollback_to_savepoint_for_mysql_low(
 	dberr_t	err;
 
 	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
-	ut_ad(trx->in_mysql_trx_list);
+	ut_ad(trx->mysql_thd);
 
 	/* Free all savepoints strictly later than savep. */
 
@@ -442,11 +483,11 @@ trx_rollback_to_savepoint_for_mysql(
 {
 	trx_named_savept_t*	savep;
 
-	/* We are reading trx->state without holding trx_sys->mutex
+	/* We are reading trx->state without holding trx_sys.mutex
 	here, because the savepoint rollback should be invoked for a
 	running active MySQL transaction that is associated with the
 	current thread. */
-	ut_ad(trx->in_mysql_trx_list);
+	ut_ad(trx->mysql_thd);
 
 	savep = trx_savepoint_find(trx, savepoint_name);
 
@@ -539,7 +580,7 @@ trx_release_savepoint_for_mysql(
 
 	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE, true)
 	      || trx_state_eq(trx, TRX_STATE_PREPARED, true));
-	ut_ad(trx->in_mysql_trx_list);
+	ut_ad(trx->mysql_thd);
 
 	savep = trx_savepoint_find(trx, savepoint_name);
 
@@ -590,8 +631,6 @@ trx_rollback_active(
 	que_fork_t*	fork;
 	que_thr_t*	thr;
 	roll_node_t*	roll_node;
-	dict_table_t*	table;
-	ibool		dictionary_locked = FALSE;
 	const trx_id_t	trx_id = trx->id;
 
 	ut_ad(trx_id);
@@ -614,9 +653,11 @@ trx_rollback_active(
 
 	trx_roll_crash_recv_trx	= trx;
 
-	if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
+	const bool dictionary_locked = trx_get_dict_operation(trx)
+		!= TRX_DICT_OP_NONE;
+
+	if (dictionary_locked) {
 		row_mysql_lock_data_dictionary(trx);
-		dictionary_locked = TRUE;
 	}
 
 	que_run_threads(thr);
@@ -624,46 +665,26 @@ trx_rollback_active(
 
 	que_run_threads(roll_node->undo_thr);
 
-	if (trx->error_state != DB_SUCCESS) {
-		ut_ad(trx->error_state == DB_INTERRUPTED);
-		ut_ad(!srv_is_being_started);
-		ut_ad(!srv_undo_sources);
-		ut_ad(srv_fast_shutdown);
+	que_graph_free(
+		static_cast<que_t*>(roll_node->undo_thr->common.parent));
+
+	if (UNIV_UNLIKELY(!trx_rollback_finish(trx))) {
 		ut_ad(!dictionary_locked);
-		que_graph_free(static_cast<que_t*>(
-				       roll_node->undo_thr->common.parent));
 		goto func_exit;
 	}
 
-	trx_rollback_finish(thr_get_trx(roll_node->undo_thr));
-
-	/* Free the memory reserved by the undo graph */
-	que_graph_free(static_cast<que_t*>(
-			       roll_node->undo_thr->common.parent));
-
 	ut_a(trx->lock.que_state == TRX_QUE_RUNNING);
 
-	if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
-	    && trx->table_id != 0) {
-
-		ut_ad(dictionary_locked);
+	if (!dictionary_locked || !trx->table_id) {
+	} else if (dict_table_t* table = dict_table_open_on_id(
+			   trx->table_id, TRUE, DICT_TABLE_OP_NORMAL)) {
+		ib::info() << "Dropping table " << table->name
+			   << ", with id " << trx->table_id
+			   << " in recovery";
 
-		/* If the transaction was for a dictionary operation,
-		we drop the relevant table only if it is not flagged
-		as DISCARDED. If it still exists. */
+		dict_table_close_and_drop(trx, table);
 
-		table = dict_table_open_on_id(
-			trx->table_id, TRUE, DICT_TABLE_OP_NORMAL);
-
-		if (table && !dict_table_is_discarded(table)) {
-			ib::warn() << "Dropping table '" << table->name
-				<< "', with id " << trx->table_id
-				<< " in recovery";
-
-			dict_table_close_and_drop(trx, table);
-
-			trx_commit_for_mysql(trx);
-		}
+		trx_commit_for_mysql(trx);
 	}
 
 	ib::info() << "Rolled back recovered transaction " << trx_id;
@@ -678,194 +699,144 @@ func_exit:
 	trx_roll_crash_recv_trx	= NULL;
 }
 
-/*******************************************************************//**
-Rollback or clean up any resurrected incomplete transactions. It assumes
-that the caller holds the trx_sys_t::mutex and it will release the
-lock if it does a clean up or rollback.
-@return TRUE if the transaction was cleaned up or rolled back
-and trx_sys->mutex was released. */
-static
-ibool
-trx_rollback_resurrected(
-/*=====================*/
-	trx_t*	trx,	/*!< in: transaction to rollback or clean */
-	ibool*	all)	/*!< in/out: FALSE=roll back dictionary transactions;
-			TRUE=roll back all non-PREPARED transactions */
-{
-	ut_ad(trx_sys_mutex_own());
-
-	/* The trx->is_recovered flag and trx->state are set
-	atomically under the protection of the trx->mutex (and
-	lock_sys->mutex) in lock_trx_release_locks(). We do not want
-	to accidentally clean up a non-recovered transaction here. */
-
-	trx_mutex_enter(trx);
-	if (!trx->is_recovered) {
-func_exit:
-		trx_mutex_exit(trx);
-		return(FALSE);
-	}
-
-	switch (trx->state) {
-	case TRX_STATE_COMMITTED_IN_MEMORY:
-		trx_mutex_exit(trx);
-		trx_sys_mutex_exit();
-		ib::info() << "Cleaning up trx with id " << ib::hex(trx->id);
 
-		trx_cleanup_at_db_startup(trx);
-		trx_free_resurrected(trx);
-		return(TRUE);
-	case TRX_STATE_ACTIVE:
-		if (!srv_is_being_started
-		    && !srv_undo_sources && srv_fast_shutdown) {
-fake_prepared:
-			trx->state = TRX_STATE_PREPARED;
-			trx_sys->n_prepared_trx++;
-			trx_sys->n_prepared_recovered_trx++;
-			*all = FALSE;
-			goto func_exit;
-		}
-		trx_mutex_exit(trx);
+struct trx_roll_count_callback_arg
+{
+  uint32_t n_trx;
+  uint64_t n_rows;
+  trx_roll_count_callback_arg(): n_trx(0), n_rows(0) {}
+};
 
-		if (*all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
-			trx_sys_mutex_exit();
-			trx_rollback_active(trx);
-			if (trx->error_state != DB_SUCCESS) {
-				ut_ad(trx->error_state == DB_INTERRUPTED);
-				trx->error_state = DB_SUCCESS;
-				ut_ad(!srv_undo_sources);
-				ut_ad(srv_fast_shutdown);
-				mutex_enter(&trx_sys->mutex);
-				trx_mutex_enter(trx);
-				goto fake_prepared;
-			}
-			trx_free_for_background(trx);
-			return(TRUE);
-		}
-		return(FALSE);
-	case TRX_STATE_PREPARED:
-		goto func_exit;
-	case TRX_STATE_NOT_STARTED:
-		break;
-	}
 
-	ut_error;
-	goto func_exit;
+static my_bool trx_roll_count_callback(rw_trx_hash_element_t *element,
+                                       trx_roll_count_callback_arg *arg)
+{
+  mutex_enter(&element->mutex);
+  if (trx_t *trx= element->trx)
+  {
+    if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE))
+    {
+      arg->n_trx++;
+      arg->n_rows+= trx->undo_no;
+    }
+  }
+  mutex_exit(&element->mutex);
+  return 0;
 }
 
-/** Report progress when rolling back a row of a recovered transaction.
-@return	whether the rollback should be aborted due to pending shutdown */
-bool
-trx_roll_must_shutdown()
+/** Report progress when rolling back a row of a recovered transaction. */
+void trx_roll_report_progress()
 {
-	const trx_t* trx = trx_roll_crash_recv_trx;
-	ut_ad(trx);
-	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
-
-	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE
-	    && !srv_is_being_started
-	    && !srv_undo_sources && srv_fast_shutdown) {
-		return true;
-	}
-
 	ib_time_t time = ut_time();
-	mutex_enter(&trx_sys->mutex);
 	mutex_enter(&recv_sys->mutex);
-
-	if (recv_sys->report(time)) {
-		ulint n_trx = 0;
-		ulonglong n_rows = 0;
-		for (const trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
-		     t != NULL;
-		     t = UT_LIST_GET_NEXT(trx_list, t)) {
-
-			assert_trx_in_rw_list(t);
-			if (t->is_recovered
-			    && trx_state_eq(t, TRX_STATE_ACTIVE)) {
-				n_trx++;
-				n_rows += t->undo_no;
-			}
-		}
-		if (n_rows > 0) {
-			service_manager_extend_timeout(
-				INNODB_EXTEND_TIMEOUT_INTERVAL,
-				"To roll back: " ULINTPF " transactions, "
-				"%llu rows", n_trx, n_rows);
-		}
-
-		ib::info() << "To roll back: " << n_trx << " transactions, "
-			   << n_rows << " rows";
-	}
-
+	bool report = recv_sys->report(time);
 	mutex_exit(&recv_sys->mutex);
-	mutex_exit(&trx_sys->mutex);
-	return false;
-}
 
-/*******************************************************************//**
-Rollback or clean up any incomplete transactions which were
-encountered in crash recovery.  If the transaction already was
-committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back. */
-void
-trx_rollback_or_clean_recovered(
-/*============================*/
-	ibool	all)	/*!< in: FALSE=roll back dictionary transactions;
-			TRUE=roll back all non-PREPARED transactions */
-{
-	trx_t*	trx;
+	if (report) {
+		trx_roll_count_callback_arg arg;
 
-	ut_a(srv_force_recovery < SRV_FORCE_NO_TRX_UNDO);
+		/* Get number of recovered active transactions and number of
+		rows they modified. Numbers must be accurate, because only this
+		thread is allowed to touch recovered transactions. */
+		trx_sys.rw_trx_hash.iterate_no_dups(
+			reinterpret_cast<my_hash_walk_action>
+			(trx_roll_count_callback), &arg);
 
-	if (trx_sys_get_n_rw_trx() == 0) {
+		if (arg.n_rows > 0) {
+			service_manager_extend_timeout(
+				INNODB_EXTEND_TIMEOUT_INTERVAL,
+				"To roll back: " UINT32PF " transactions, "
+				UINT64PF " rows", arg.n_trx, arg.n_rows);
+		}
 
-		return;
-	}
+		ib::info() << "To roll back: " << arg.n_trx
+			   << " transactions, " << arg.n_rows << " rows";
 
-	if (all) {
-		ib::info() << "Starting in background the rollback"
-			" of recovered transactions";
 	}
+}
 
-	/* Note: For XA recovered transactions, we rely on MySQL to
-	do rollback. They will be in TRX_STATE_PREPARED state. If the server
-	is shutdown and they are still lingering in trx_sys_t::trx_list
-	then the shutdown will hang. */
-
-	/* Loop over the transaction list as long as there are
-	recovered transactions to clean up or recover. */
-
-	do {
-		trx_sys_mutex_enter();
-
-		for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
-		     trx != NULL;
-		     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
 
-			assert_trx_in_rw_list(trx);
+static my_bool trx_rollback_recovered_callback(rw_trx_hash_element_t *element,
+                                               std::vector<trx_t*> *trx_list)
+{
+  mutex_enter(&element->mutex);
+  if (trx_t *trx= element->trx)
+  {
+    mutex_enter(&trx->mutex);
+    if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE))
+      trx_list->push_back(trx);
+    mutex_exit(&trx->mutex);
+  }
+  mutex_exit(&element->mutex);
+  return 0;
+}
 
-			/* If this function does a cleanup or rollback
-			then it will release the trx_sys->mutex, therefore
-			we need to reacquire it before retrying the loop. */
 
-			if (trx_rollback_resurrected(trx, &all)) {
+/**
+  Rollback any incomplete transactions which were encountered in crash recovery.
 
-				trx_sys_mutex_enter();
+  If the transaction already was committed, then we clean up a possible insert
+  undo log. If the transaction was not yet committed, then we roll it back.
 
-				break;
-			}
-		}
+  Note: For XA recovered transactions, we rely on MySQL to
+  do rollback. They will be in TRX_STATE_PREPARED state. If the server
+  is shutdown and they are still lingering in trx_sys_t::trx_list
+  then the shutdown will hang.
 
-		trx_sys_mutex_exit();
+  @param[in]  all  true=roll back all recovered active transactions;
+                   false=roll back any incomplete dictionary transaction
+*/
 
-	} while (trx != NULL);
+void trx_rollback_recovered(bool all)
+{
+  std::vector<trx_t*> trx_list;
+
+  ut_a(srv_force_recovery < SRV_FORCE_NO_TRX_UNDO);
+
+  /*
+    Collect list of recovered ACTIVE transaction ids first. Once collected, no
+    other thread is allowed to modify or remove these transactions from
+    rw_trx_hash.
+  */
+  trx_sys.rw_trx_hash.iterate_no_dups(reinterpret_cast<my_hash_walk_action>
+                                      (trx_rollback_recovered_callback),
+                                      &trx_list);
+
+  while (!trx_list.empty())
+  {
+    trx_t *trx= trx_list.back();
+    trx_list.pop_back();
+
+#ifdef UNIV_DEBUG
+    ut_ad(trx);
+    trx_mutex_enter(trx);
+    ut_ad(trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE));
+    trx_mutex_exit(trx);
+#endif
 
-	if (all) {
-		ib::info() << "Rollback of non-prepared transactions"
-			" completed";
-	}
+    if (!srv_is_being_started && !srv_undo_sources && srv_fast_shutdown)
+      goto discard;
+
+    if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE)
+    {
+      trx_rollback_active(trx);
+      if (trx->error_state != DB_SUCCESS)
+      {
+        ut_ad(trx->error_state == DB_INTERRUPTED);
+        trx->error_state= DB_SUCCESS;
+        ut_ad(!srv_undo_sources);
+        ut_ad(srv_fast_shutdown);
+discard:
+        trx_sys.deregister_rw(trx);
+        trx_free_at_shutdown(trx);
+      }
+      else
+        trx_free(trx);
+    }
+  }
 }
 
+
 /*******************************************************************//**
 Rollback or clean up any incomplete transactions which were
 encountered in crash recovery.  If the transaction already was
@@ -875,11 +846,7 @@ Note: this is done in a background thread.
 @return a dummy parameter */
 extern "C"
 os_thread_ret_t
-DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
-/*================================================*/
-	void*	arg MY_ATTRIBUTE((unused)))
-			/*!< in: a dummy parameter required by
-			os_thread_create */
+DECLARE_THREAD(trx_rollback_all_recovered)(void*)
 {
 	my_thread_init();
 	ut_ad(!srv_read_only_mode);
@@ -888,9 +855,15 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
 	pfs_register_thread(trx_rollback_clean_thread_key);
 #endif /* UNIV_PFS_THREAD */
 
-	trx_rollback_or_clean_recovered(TRUE);
+	if (trx_sys.rw_trx_hash.size()) {
+		ib::info() << "Starting in background the rollback of"
+			" recovered transactions";
+		trx_rollback_recovered(true);
+		ib::info() << "Rollback of non-prepared transactions"
+			" completed";
+	}
 
-	trx_rollback_or_clean_is_active = false;
+	trx_rollback_is_active = false;
 
 	my_thread_end();
 	/* We count the number of threads in os_thread_exit(). A created
@@ -907,25 +880,15 @@ static
 void
 trx_roll_try_truncate(trx_t* trx)
 {
-	ut_ad(mutex_own(&trx->undo_mutex));
-
 	trx->pages_undone = 0;
 
 	undo_no_t	undo_no		= trx->undo_no;
-	trx_undo_t*	insert_undo	= trx->rsegs.m_redo.insert_undo;
-	trx_undo_t*	update_undo	= trx->rsegs.m_redo.update_undo;
-
-	if (insert_undo || update_undo) {
-		mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
-		if (insert_undo) {
-			ut_ad(insert_undo->rseg == trx->rsegs.m_redo.rseg);
-			trx_undo_truncate_end(insert_undo, undo_no, false);
-		}
-		if (update_undo) {
-			ut_ad(update_undo->rseg == trx->rsegs.m_redo.rseg);
-			trx_undo_truncate_end(update_undo, undo_no, false);
-		}
-		mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
+
+	if (trx_undo_t*	undo = trx->rsegs.m_redo.undo) {
+		ut_ad(undo->rseg == trx->rsegs.m_redo.rseg);
+		mutex_enter(&undo->rseg->mutex);
+		trx_undo_truncate_end(undo, undo_no, false);
+		mutex_exit(&undo->rseg->mutex);
 	}
 
 	if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) {
@@ -954,10 +917,8 @@ trx_roll_pop_top_rec(
 	trx_undo_t*	undo,	/*!< in: undo log */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	ut_ad(mutex_own(&trx->undo_mutex));
-
 	page_t*	undo_page = trx_undo_page_get_s_latched(
-		page_id_t(undo->space, undo->top_page_no), mtr);
+		page_id_t(undo->rseg->space->id, undo->top_page_no), mtr);
 
 	ulint	offset = undo->top_offset;
 
@@ -966,8 +927,8 @@ trx_roll_pop_top_rec(
 		true, mtr);
 
 	if (prev_rec == NULL) {
-
-		undo->empty = TRUE;
+		undo->top_undo_no = IB_ID_MAX;
+		ut_ad(undo->empty());
 	} else {
 		page_t*	prev_rec_page = page_align(prev_rec);
 
@@ -977,8 +938,9 @@ trx_roll_pop_top_rec(
 		}
 
 		undo->top_page_no = page_get_page_no(prev_rec_page);
-		undo->top_offset  = prev_rec - prev_rec_page;
+		undo->top_offset  = ulint(prev_rec - prev_rec_page);
 		undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
+		ut_ad(!undo->empty());
 	}
 
 	return(undo_page + offset);
@@ -993,30 +955,29 @@ trx_roll_pop_top_rec(
 trx_undo_rec_t*
 trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
 {
-	mutex_enter(&trx->undo_mutex);
-
 	if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
 		trx_roll_try_truncate(trx);
 	}
 
-	trx_undo_t*	undo = NULL;
-	trx_undo_t*	insert	= trx->rsegs.m_redo.insert_undo;
-	trx_undo_t*	update	= trx->rsegs.m_redo.update_undo;
+	trx_undo_t*	undo	= NULL;
+	trx_undo_t*	insert	= trx->rsegs.m_redo.old_insert;
+	trx_undo_t*	update	= trx->rsegs.m_redo.undo;
 	trx_undo_t*	temp	= trx->rsegs.m_noredo.undo;
 	const undo_no_t	limit	= trx->roll_limit;
 
-	ut_ad(!insert || !update || insert->empty || update->empty
+	ut_ad(!insert || !update || insert->empty() || update->empty()
 	      || insert->top_undo_no != update->top_undo_no);
-	ut_ad(!insert || !temp || insert->empty || temp->empty
+	ut_ad(!insert || !temp || insert->empty() || temp->empty()
 	      || insert->top_undo_no != temp->top_undo_no);
-	ut_ad(!update || !temp || update->empty || temp->empty
+	ut_ad(!update || !temp || update->empty() || temp->empty()
 	      || update->top_undo_no != temp->top_undo_no);
 
-	if (insert && !insert->empty && limit <= insert->top_undo_no) {
+	if (UNIV_LIKELY_NULL(insert)
+	    && !insert->empty() && limit <= insert->top_undo_no) {
 		undo = insert;
 	}
 
-	if (update && !update->empty && update->top_undo_no >= limit) {
+	if (update && !update->empty() && update->top_undo_no >= limit) {
 		if (!undo) {
 			undo = update;
 		} else if (undo->top_undo_no < update->top_undo_no) {
@@ -1024,7 +985,7 @@ trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
 		}
 	}
 
-	if (temp && !temp->empty && temp->top_undo_no >= limit) {
+	if (temp && !temp->empty() && temp->top_undo_no >= limit) {
 		if (!undo) {
 			undo = temp;
 		} else if (undo->top_undo_no < temp->top_undo_no) {
@@ -1038,12 +999,11 @@ trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
 		if the transaction object is committed and reused
 		later, we will default to a full ROLLBACK. */
 		trx->roll_limit = 0;
-		ut_d(trx->in_rollback = false);
-		mutex_exit(&trx->undo_mutex);
+		trx->in_rollback = false;
 		return(NULL);
 	}
 
-	ut_ad(!undo->empty);
+	ut_ad(!undo->empty());
 	ut_ad(limit <= undo->top_undo_no);
 
 	*roll_ptr = trx_undo_build_roll_ptr(
@@ -1055,11 +1015,19 @@ trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
 	trx_undo_rec_t*	undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
 	const undo_no_t	undo_no = trx_undo_rec_get_undo_no(undo_rec);
 	switch (trx_undo_rec_get_type(undo_rec)) {
+	case TRX_UNDO_INSERT_METADATA:
+		/* This record type was introduced in MDEV-11369
+		instant ADD COLUMN, which was implemented after
+		MDEV-12288 removed the insert_undo log. There is no
+		instant ADD COLUMN for temporary tables. Therefore,
+		this record can only be present in the main undo log. */
+		ut_ad(undo == update);
+		/* fall through */
 	case TRX_UNDO_RENAME_TABLE:
-		ut_ad(undo == insert);
+		ut_ad(undo == insert || undo == update);
 		/* fall through */
 	case TRX_UNDO_INSERT_REC:
-		ut_ad(undo == insert || undo == temp);
+		ut_ad(undo == insert || undo == update || undo == temp);
 		*roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS;
 		break;
 	default:
@@ -1067,12 +1035,7 @@ trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
 		break;
 	}
 
-	ut_ad(trx_roll_check_undo_rec_ordering(
-		undo_no, undo->rseg->space, trx));
-
 	trx->undo_no = undo_no;
-	trx->undo_rseg_space = undo->rseg->space;
-	mutex_exit(&trx->undo_mutex);
 
 	trx_undo_rec_t*	undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
 	mtr.commit();
@@ -1130,7 +1093,7 @@ trx_rollback_start(
 	ut_ad(!trx->in_rollback);
 
 	trx->roll_limit = roll_limit;
-	ut_d(trx->in_rollback = true);
+	trx->in_rollback = true;
 
 	ut_a(trx->roll_limit <= trx->undo_no);
 
@@ -1147,21 +1110,6 @@ trx_rollback_start(
 	return(que_fork_start_command(roll_graph));
 }
 
-/****************************************************************//**
-Finishes a transaction rollback. */
-static
-void
-trx_rollback_finish(
-/*================*/
-	trx_t*		trx)	/*!< in: transaction */
-{
-	trx_commit(trx);
-
-	trx->mod_tables.clear();
-
-	trx->lock.que_state = TRX_QUE_RUNNING;
-}
-
 /*********************************************************************//**
 Creates a rollback command node struct.
 @return own: rollback node struct */
diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc
index 3d5320cdd15..4a3102d225f 100644
--- a/storage/innobase/trx/trx0rseg.cc
+++ b/storage/innobase/trx/trx0rseg.cc
@@ -33,30 +33,252 @@ Created 3/26/1996 Heikki Tuuri
 
 #include <algorithm>
 
-/** Creates a rollback segment header.
-This function is called only when a new rollback segment is created in
-the database.
-@param[in]	space		space id
-@param[in]	max_size	max size in pages
-@param[in]	rseg_slot_no	rseg id == slot number in trx sys
+#ifdef WITH_WSREP
+#include <mysql/service_wsrep.h>
+
+#ifdef UNIV_DEBUG
+/** The latest known WSREP XID sequence number */
+static long long wsrep_seqno = -1;
+#endif /* UNIV_DEBUG */
+/** The latest known WSREP XID UUID */
+static unsigned char wsrep_uuid[16];
+
+/** Update the WSREP XID information in rollback segment header.
+@param[in,out]	rseg_header	rollback segment header
+@param[in]	xid		WSREP XID
+@param[in,out]	mtr		mini-transaction */
+void
+trx_rseg_update_wsrep_checkpoint(
+	trx_rsegf_t*	rseg_header,
+	const XID*	xid,
+	mtr_t*		mtr)
+{
+	ut_ad(wsrep_is_wsrep_xid(xid));
+
+#ifdef UNIV_DEBUG
+	/* Check that seqno is monotonically increasing */
+	long long xid_seqno = wsrep_xid_seqno(xid);
+	const byte* xid_uuid = wsrep_xid_uuid(xid);
+
+	if (!memcmp(xid_uuid, wsrep_uuid, sizeof wsrep_uuid)) {
+		ut_ad(xid_seqno > wsrep_seqno);
+	} else {
+		memcpy(wsrep_uuid, xid_uuid, sizeof wsrep_uuid);
+	}
+	wsrep_seqno = xid_seqno;
+#endif /* UNIV_DEBUG */
+
+	mlog_write_ulint(TRX_RSEG_WSREP_XID_FORMAT + rseg_header,
+			 uint32_t(xid->formatID),
+			 MLOG_4BYTES, mtr);
+
+	mlog_write_ulint(TRX_RSEG_WSREP_XID_GTRID_LEN + rseg_header,
+			 uint32_t(xid->gtrid_length),
+			 MLOG_4BYTES, mtr);
+
+	mlog_write_ulint(TRX_RSEG_WSREP_XID_BQUAL_LEN + rseg_header,
+			 uint32_t(xid->bqual_length),
+			 MLOG_4BYTES, mtr);
+
+	mlog_write_string(TRX_RSEG_WSREP_XID_DATA + rseg_header,
+			  reinterpret_cast<const byte*>(xid->data),
+			  XIDDATASIZE, mtr);
+}
+
+/** Update WSREP checkpoint XID in first rollback segment header
+as part of wsrep_set_SE_checkpoint() when it is guaranteed that there
+are no wsrep transactions committing.
+If the UUID part of the WSREP XID does not match to the UUIDs of XIDs already
+stored into rollback segments, the WSREP XID in all the remaining rollback
+segments will be reset.
+@param[in]	xid		WSREP XID */
+void trx_rseg_update_wsrep_checkpoint(const XID* xid)
+{
+	mtr_t	mtr;
+	mtr.start();
+
+	const trx_rseg_t* rseg = trx_sys.rseg_array[0];
+
+	trx_rsegf_t* rseg_header = trx_rsegf_get(rseg->space, rseg->page_no,
+						 &mtr);
+	if (UNIV_UNLIKELY(mach_read_from_4(rseg_header + TRX_RSEG_FORMAT))) {
+		trx_rseg_format_upgrade(rseg_header, &mtr);
+	}
+
+	trx_rseg_update_wsrep_checkpoint(rseg_header, xid, &mtr);
+
+	const byte* xid_uuid = wsrep_xid_uuid(xid);
+	if (memcmp(wsrep_uuid, xid_uuid, sizeof wsrep_uuid)) {
+		memcpy(wsrep_uuid, xid_uuid, sizeof wsrep_uuid);
+
+		/* Because the UUID part of the WSREP XID differed
+		from current_xid_uuid, the WSREP group UUID was
+		changed, and we must reset the XID in all rollback
+		segment headers. */
+		for (ulint rseg_id = 1; rseg_id < TRX_SYS_N_RSEGS; ++rseg_id) {
+			if (const trx_rseg_t* rseg =
+			    trx_sys.rseg_array[rseg_id]) {
+				trx_rseg_update_wsrep_checkpoint(
+					trx_rsegf_get(rseg->space,
+						      rseg->page_no, &mtr),
+					xid, &mtr);
+			}
+		}
+	}
+
+	mtr.commit();
+}
+
+/** Read the WSREP XID information in rollback segment header.
+@param[in]	rseg_header	Rollback segment header
+@param[out]	xid		Transaction XID
+@return	whether the WSREP XID was present */
+static
+bool trx_rseg_read_wsrep_checkpoint(const trx_rsegf_t* rseg_header, XID& xid)
+{
+	int formatID = static_cast<int>(
+		mach_read_from_4(
+			TRX_RSEG_WSREP_XID_FORMAT + rseg_header));
+	if (formatID == 0) {
+		return false;
+	}
+
+	xid.formatID = formatID;
+	xid.gtrid_length = static_cast<int>(
+		mach_read_from_4(
+			TRX_RSEG_WSREP_XID_GTRID_LEN + rseg_header));
+
+	xid.bqual_length = static_cast<int>(
+		mach_read_from_4(
+			TRX_RSEG_WSREP_XID_BQUAL_LEN + rseg_header));
+
+	memcpy(xid.data, TRX_RSEG_WSREP_XID_DATA + rseg_header, XIDDATASIZE);
+
+	return true;
+}
+
+/** Read the WSREP XID from the TRX_SYS page (in case of upgrade).
+@param[in]	page	TRX_SYS page
+@param[out]	xid	WSREP XID (if present)
+@return	whether the WSREP XID is present */
+static bool trx_rseg_init_wsrep_xid(const page_t* page, XID& xid)
+{
+	if (mach_read_from_4(TRX_SYS + TRX_SYS_WSREP_XID_INFO
+			     + TRX_SYS_WSREP_XID_MAGIC_N_FLD
+			     + page)
+	    != TRX_SYS_WSREP_XID_MAGIC_N) {
+		return false;
+	}
+
+	xid.formatID = static_cast<int>(
+		mach_read_from_4(
+			TRX_SYS + TRX_SYS_WSREP_XID_INFO
+			+ TRX_SYS_WSREP_XID_FORMAT + page));
+	xid.gtrid_length = static_cast<int>(
+		mach_read_from_4(
+			TRX_SYS + TRX_SYS_WSREP_XID_INFO
+			+ TRX_SYS_WSREP_XID_GTRID_LEN + page));
+	xid.bqual_length = static_cast<int>(
+		mach_read_from_4(
+			TRX_SYS + TRX_SYS_WSREP_XID_INFO
+			+ TRX_SYS_WSREP_XID_BQUAL_LEN + page));
+	memcpy(xid.data,
+	       TRX_SYS + TRX_SYS_WSREP_XID_INFO
+	       + TRX_SYS_WSREP_XID_DATA + page, XIDDATASIZE);
+	return true;
+}
+
+/** Recover the latest WSREP checkpoint XID.
+@param[out]	xid	WSREP XID
+@return	whether the WSREP XID was found */
+bool trx_rseg_read_wsrep_checkpoint(XID& xid)
+{
+	mtr_t		mtr;
+	long long       max_xid_seqno = -1;
+	bool		found = false;
+
+	for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS;
+	     rseg_id++, mtr.commit()) {
+		mtr.start();
+		const buf_block_t* sys = trx_sysf_get(&mtr, false);
+		if (rseg_id == 0) {
+			found = trx_rseg_init_wsrep_xid(sys->frame, xid);
+			ut_ad(!found || xid.formatID == 1);
+			if (found) {
+				max_xid_seqno = wsrep_xid_seqno(&xid);
+				memcpy(wsrep_uuid, wsrep_xid_uuid(&xid),
+				       sizeof wsrep_uuid);
+			}
+		}
+
+		const uint32_t page_no = trx_sysf_rseg_get_page_no(
+			sys, rseg_id);
+
+		if (page_no == FIL_NULL) {
+			continue;
+		}
+
+		const trx_rsegf_t* rseg_header = trx_rsegf_get_new(
+			trx_sysf_rseg_get_space(sys, rseg_id), page_no, &mtr);
+
+		if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT)) {
+			continue;
+		}
+
+		XID tmp_xid;
+		long long tmp_seqno = 0;
+		if (trx_rseg_read_wsrep_checkpoint(rseg_header, tmp_xid)
+		    && (tmp_seqno = wsrep_xid_seqno(&tmp_xid))
+		    > max_xid_seqno) {
+			found = true;
+			max_xid_seqno = tmp_seqno;
+			xid = tmp_xid;
+			memcpy(wsrep_uuid, wsrep_xid_uuid(&tmp_xid),
+			       sizeof wsrep_uuid);
+		}
+	}
+
+	return found;
+}
+#endif /* WITH_WSREP */
+
+/** Upgrade a rollback segment header page to MariaDB 10.3 format.
+@param[in,out]	rseg_header	rollback segment header page
+@param[in,out]	mtr		mini-transaction */
+void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr)
+{
+	ut_ad(page_offset(rseg_header) == TRX_RSEG);
+	byte* rseg_format = TRX_RSEG_FORMAT + rseg_header;
+	mlog_write_ulint(rseg_format, 0, MLOG_4BYTES, mtr);
+	/* Clear also possible garbage at the end of the page. Old
+	InnoDB versions did not initialize unused parts of pages. */
+	byte* b = rseg_header + TRX_RSEG_MAX_TRX_ID + 8;
+	ulint len = srv_page_size
+		- (FIL_PAGE_DATA_END
+		   + TRX_RSEG + TRX_RSEG_MAX_TRX_ID + 8);
+	memset(b, 0, len);
+	mlog_log_string(b, len, mtr);
+}
+
+/** Create a rollback segment header.
+@param[in,out]	space		system, undo, or temporary tablespace
+@param[in]	rseg_id		rollback segment identifier
+@param[in,out]	sys_header	the TRX_SYS page (NULL for temporary rseg)
 @param[in,out]	mtr		mini-transaction
 @return page number of the created segment, FIL_NULL if fail */
 ulint
 trx_rseg_header_create(
-	ulint			space,
-	ulint			max_size,
-	ulint			rseg_slot_no,
-	mtr_t*			mtr)
+	fil_space_t*	space,
+	ulint		rseg_id,
+	buf_block_t*	sys_header,
+	mtr_t*		mtr)
 {
 	ulint		page_no;
 	trx_rsegf_t*	rsegf;
-	trx_sysf_t*	sys_header;
-	ulint		i;
 	buf_block_t*	block;
 
-	ut_ad(mtr);
-	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
-				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains(mtr, &space->latch, MTR_MEMO_X_LOCK));
+	ut_ad(!sys_header == (space == fil_system.temp_space));
 
 	/* Allocate a new file segment for the rollback segment */
 	block = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
@@ -72,11 +294,9 @@ trx_rseg_header_create(
 	page_no = block->page.id.page_no();
 
 	/* Get the rollback segment file page */
-	rsegf = trx_rsegf_get_new(space, page_no, mtr);
+	rsegf = trx_rsegf_get_new(space->id, page_no, mtr);
 
-	/* Initialize max size field */
-	mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size,
-			 MLOG_4BYTES, mtr);
+	mlog_write_ulint(rsegf + TRX_RSEG_FORMAT, 0, MLOG_4BYTES, mtr);
 
 	/* Initialize the history list */
 
@@ -84,23 +304,27 @@ trx_rseg_header_create(
 	flst_init(rsegf + TRX_RSEG_HISTORY, mtr);
 
 	/* Reset the undo log slots */
-	for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+	for (ulint i = 0; i < TRX_RSEG_N_SLOTS; i++) {
 		/* FIXME: This is generating a lot of redo log.
 		Why not just let it remain zero-initialized,
 		and adjust trx_rsegf_undo_find_free() and friends? */
 		trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr);
 	}
 
-	if (space != SRV_TMP_SPACE_ID) {
+	if (sys_header) {
 		/* Add the rollback segment info to the free slot in
 		the trx system header */
 
-		sys_header = trx_sysf_get(mtr);
-
-		trx_sysf_rseg_set_space(sys_header, rseg_slot_no, space, mtr);
-
-		trx_sysf_rseg_set_page_no(
-			sys_header, rseg_slot_no, page_no, mtr);
+		mlog_write_ulint(TRX_SYS + TRX_SYS_RSEGS
+				 + TRX_SYS_RSEG_SPACE
+				 + rseg_id * TRX_SYS_RSEG_SLOT_SIZE
+				 + sys_header->frame,
+				 space->id, MLOG_4BYTES, mtr);
+		mlog_write_ulint(TRX_SYS + TRX_SYS_RSEGS
+				 + TRX_SYS_RSEG_PAGE_NO
+				 + rseg_id * TRX_SYS_RSEG_SLOT_SIZE
+				 + sys_header->frame,
+				 page_no, MLOG_4BYTES, mtr);
 	}
 
 	return(page_no);
@@ -116,33 +340,20 @@ trx_rseg_mem_free(trx_rseg_t* rseg)
 	mutex_free(&rseg->mutex);
 
 	/* There can't be any active transactions. */
-	ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0);
-	ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0);
+	ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0);
+	ut_a(UT_LIST_GET_LEN(rseg->old_insert_list) == 0);
 
-	for (undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
+	for (undo = UT_LIST_GET_FIRST(rseg->undo_cached);
 	     undo != NULL;
 	     undo = next_undo) {
 
 		next_undo = UT_LIST_GET_NEXT(undo_list, undo);
 
-		UT_LIST_REMOVE(rseg->update_undo_cached, undo);
+		UT_LIST_REMOVE(rseg->undo_cached, undo);
 
 		MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
 
-		trx_undo_mem_free(undo);
-	}
-
-	for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
-	     undo != NULL;
-	     undo = next_undo) {
-
-		next_undo = UT_LIST_GET_NEXT(undo_list, undo);
-
-		UT_LIST_REMOVE(rseg->insert_undo_cached, undo);
-
-		MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
-
-		trx_undo_mem_free(undo);
+		ut_free(undo);
 	}
 
 	ut_free(rseg);
@@ -154,7 +365,7 @@ trx_rseg_mem_free(trx_rseg_t* rseg)
 @param[in]	page_no		page number of the segment header */
 static
 trx_rseg_t*
-trx_rseg_mem_create(ulint id, ulint space, ulint page_no)
+trx_rseg_mem_create(ulint id, fil_space_t* space, ulint page_no)
 {
 	trx_rseg_t* rseg = static_cast<trx_rseg_t*>(
 		ut_zalloc_nokey(sizeof *rseg));
@@ -163,104 +374,215 @@ trx_rseg_mem_create(ulint id, ulint space, ulint page_no)
 	rseg->space = space;
 	rseg->page_no = page_no;
 	rseg->last_page_no = FIL_NULL;
+	rseg->curr_size = 1;
 
 	mutex_create(rseg->is_persistent()
 		     ? LATCH_ID_REDO_RSEG : LATCH_ID_NOREDO_RSEG,
 		     &rseg->mutex);
 
-	UT_LIST_INIT(rseg->update_undo_list, &trx_undo_t::undo_list);
-	UT_LIST_INIT(rseg->update_undo_cached, &trx_undo_t::undo_list);
-	UT_LIST_INIT(rseg->insert_undo_list, &trx_undo_t::undo_list);
-	UT_LIST_INIT(rseg->insert_undo_cached, &trx_undo_t::undo_list);
+	UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list);
+	UT_LIST_INIT(rseg->old_insert_list, &trx_undo_t::undo_list);
+	UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list);
 
 	return(rseg);
 }
 
+/** Read the undo log lists.
+@param[in,out]	rseg		rollback segment
+@param[in,out]	max_trx_id	maximum observed transaction identifier
+@param[in]	rseg_header	rollback segment header
+@return the combined size of undo log segments in pages */
+static
+ulint
+trx_undo_lists_init(trx_rseg_t* rseg, trx_id_t& max_trx_id,
+		    const trx_rsegf_t* rseg_header)
+{
+	ut_ad(srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN);
+
+	ulint size = 0;
+
+	for (ulint i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+		ulint	page_no = trx_rsegf_get_nth_undo(rseg_header, i);
+		if (page_no != FIL_NULL) {
+			size += trx_undo_mem_create_at_db_start(
+				rseg, i, page_no, max_trx_id);
+			MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
+		}
+	}
+
+	return(size);
+}
+
 /** Restore the state of a persistent rollback segment.
-@param[in,out]	rseg	persistent rollback segment
-@param[in,out]	mtr	mini-transaction */
+@param[in,out]	rseg		persistent rollback segment
+@param[in,out]	max_trx_id	maximum observed transaction identifier
+@param[in,out]	mtr		mini-transaction */
 static
 void
-trx_rseg_mem_restore(trx_rseg_t* rseg, mtr_t* mtr)
+trx_rseg_mem_restore(trx_rseg_t* rseg, trx_id_t& max_trx_id, mtr_t* mtr)
 {
-	ulint		len;
-	fil_addr_t	node_addr;
-	trx_rsegf_t*	rseg_header;
-	trx_ulogf_t*	undo_log_hdr;
-	ulint		sum_of_undo_sizes;
+	trx_rsegf_t*	rseg_header = trx_rsegf_get_new(
+		rseg->space->id, rseg->page_no, mtr);
 
-	rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, mtr);
+	if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT) == 0) {
+		trx_id_t id = mach_read_from_8(rseg_header
+					       + TRX_RSEG_MAX_TRX_ID);
 
-	rseg->max_size = mtr_read_ulint(
-		rseg_header + TRX_RSEG_MAX_SIZE, MLOG_4BYTES, mtr);
+		if (id > max_trx_id) {
+			max_trx_id = id;
+		}
 
-	/* Initialize the undo log lists according to the rseg header */
+		if (rseg_header[TRX_RSEG_BINLOG_NAME]) {
+			const char* binlog_name = reinterpret_cast<const char*>
+				(rseg_header) + TRX_RSEG_BINLOG_NAME;
+			compile_time_assert(TRX_RSEG_BINLOG_NAME_LEN == sizeof
+					    trx_sys.recovered_binlog_filename);
+
+			int cmp = *trx_sys.recovered_binlog_filename
+				? strncmp(binlog_name,
+					  trx_sys.recovered_binlog_filename,
+					  TRX_RSEG_BINLOG_NAME_LEN)
+				: 1;
+
+			if (cmp >= 0) {
+				uint64_t binlog_offset = mach_read_from_8(
+					rseg_header + TRX_RSEG_BINLOG_OFFSET);
+				if (cmp) {
+					memcpy(trx_sys.
+					       recovered_binlog_filename,
+					       binlog_name,
+					       TRX_RSEG_BINLOG_NAME_LEN);
+					trx_sys.recovered_binlog_offset
+						= binlog_offset;
+				} else if (binlog_offset >
+					   trx_sys.recovered_binlog_offset) {
+					trx_sys.recovered_binlog_offset
+						= binlog_offset;
+				}
+			}
+
+#ifdef WITH_WSREP
+			trx_rseg_read_wsrep_checkpoint(
+				rseg_header, trx_sys.recovered_wsrep_xid);
+#endif
+		}
+	}
 
-	sum_of_undo_sizes = trx_undo_lists_init(rseg);
+	if (srv_operation == SRV_OPERATION_RESTORE) {
+		/* mariabackup --prepare only deals with
+		the redo log and the data files, not with
+		transactions or the data dictionary. */
+		return;
+	}
 
-	rseg->curr_size = mtr_read_ulint(
-		rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr)
-		+ 1 + sum_of_undo_sizes;
+	/* Initialize the undo log lists according to the rseg header */
 
-	len = flst_get_len(rseg_header + TRX_RSEG_HISTORY);
+	rseg->curr_size = mach_read_from_4(rseg_header + TRX_RSEG_HISTORY_SIZE)
+		+ 1 + trx_undo_lists_init(rseg, max_trx_id, rseg_header);
 
-	if (len > 0) {
-		my_atomic_addlint(&trx_sys->rseg_history_len, len);
+	if (ulint len = flst_get_len(rseg_header + TRX_RSEG_HISTORY)) {
+		trx_sys.history_add(int32(len));
 
-		node_addr = trx_purge_get_log_from_hist(
+		fil_addr_t	node_addr = trx_purge_get_log_from_hist(
 			flst_get_last(rseg_header + TRX_RSEG_HISTORY, mtr));
 
 		rseg->last_page_no = node_addr.page;
 		rseg->last_offset = node_addr.boffset;
 
-		undo_log_hdr = trx_undo_page_get(
-			page_id_t(rseg->space, node_addr.page), mtr)
+		const trx_ulogf_t*	undo_log_hdr = trx_undo_page_get(
+			page_id_t(rseg->space->id, node_addr.page), mtr)
 			+ node_addr.boffset;
 
-		rseg->last_trx_no = mach_read_from_8(
-			undo_log_hdr + TRX_UNDO_TRX_NO);
-
-		rseg->last_del_marks = mtr_read_ulint(
-			undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr);
-
-		TrxUndoRsegs elem(rseg->last_trx_no);
-		elem.push_back(rseg);
+		trx_id_t id = mach_read_from_8(undo_log_hdr + TRX_UNDO_TRX_ID);
+		if (id > max_trx_id) {
+			max_trx_id = id;
+		}
+		id = mach_read_from_8(undo_log_hdr + TRX_UNDO_TRX_NO);
+		if (id > max_trx_id) {
+			max_trx_id = id;
+		}
+		unsigned purge = mach_read_from_2(
+			undo_log_hdr + TRX_UNDO_NEEDS_PURGE);
+		ut_ad(purge <= 1);
+		rseg->set_last_trx_no(id, purge != 0);
+		rseg->needs_purge = purge != 0;
 
 		if (rseg->last_page_no != FIL_NULL) {
 
 			/* There is no need to cover this operation by the purge
 			mutex because we are still bootstrapping. */
-
-			purge_sys->purge_queue.push(elem);
+			purge_sys.purge_queue.push(*rseg);
 		}
 	}
 }
 
+/** Read binlog metadata from the TRX_SYS page, in case we are upgrading
+from MySQL or a MariaDB version older than 10.3.5. */
+static void trx_rseg_init_binlog_info(const page_t* page)
+{
+	if (mach_read_from_4(TRX_SYS + TRX_SYS_MYSQL_LOG_INFO
+			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
+			     + page)
+	    == TRX_SYS_MYSQL_LOG_MAGIC_N) {
+		memcpy(trx_sys.recovered_binlog_filename,
+		       TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME
+		       + TRX_SYS + page, TRX_SYS_MYSQL_LOG_NAME_LEN);
+		trx_sys.recovered_binlog_offset = mach_read_from_8(
+			TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_OFFSET
+			+ TRX_SYS + page);
+	}
+
+#ifdef WITH_WSREP
+	trx_rseg_init_wsrep_xid(page, trx_sys.recovered_wsrep_xid);
+#endif
+}
+
 /** Initialize the rollback segments in memory at database startup. */
 void
 trx_rseg_array_init()
 {
-	mtr_t	mtr;
+	trx_id_t max_trx_id = 0;
+
+	*trx_sys.recovered_binlog_filename = '\0';
+	trx_sys.recovered_binlog_offset = 0;
+#ifdef WITH_WSREP
+	trx_sys.recovered_wsrep_xid.null();
+#endif
 
-	for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
+	for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) {
+		mtr_t mtr;
 		mtr.start();
-		trx_sysf_t*	sys_header = trx_sysf_get(&mtr);
-		ulint		page_no = trx_sysf_rseg_get_page_no(
-			sys_header, i, &mtr);
+		if (const buf_block_t* sys = trx_sysf_get(&mtr, false)) {
+			if (rseg_id == 0) {
+				/* In case this is an upgrade from
+				before MariaDB 10.3.5, fetch the base
+				information from the TRX_SYS page. */
+				max_trx_id = mach_read_from_8(
+					TRX_SYS + TRX_SYS_TRX_ID_STORE
+					+ sys->frame);
+				trx_rseg_init_binlog_info(sys->frame);
+			}
 
-		if (page_no != FIL_NULL) {
-			trx_rseg_t* rseg = trx_rseg_mem_create(
-				i,
-				trx_sysf_rseg_get_space(sys_header, i, &mtr),
-				page_no);
-			ut_ad(rseg->is_persistent());
-			ut_ad(!trx_sys->rseg_array[rseg->id]);
-			trx_sys->rseg_array[rseg->id] = rseg;
-			trx_rseg_mem_restore(rseg, &mtr);
+			const uint32_t	page_no = trx_sysf_rseg_get_page_no(
+				sys, rseg_id);
+			if (page_no != FIL_NULL) {
+				trx_rseg_t* rseg = trx_rseg_mem_create(
+					rseg_id,
+					fil_space_get(trx_sysf_rseg_get_space(
+							      sys, rseg_id)),
+					page_no);
+				ut_ad(rseg->is_persistent());
+				ut_ad(rseg->id == rseg_id);
+				ut_ad(!trx_sys.rseg_array[rseg_id]);
+				trx_sys.rseg_array[rseg_id] = rseg;
+				trx_rseg_mem_restore(rseg, max_trx_id, &mtr);
+			}
 		}
 
 		mtr.commit();
 	}
+
+	trx_sys.init_max_trx_id(max_trx_id + 1);
 }
 
 /** Create a persistent rollback segment.
@@ -276,30 +598,25 @@ trx_rseg_create(ulint space_id)
 	mtr.start();
 
 	/* To obey the latching order, acquire the file space
-	x-latch before the trx_sys->mutex. */
-#ifdef UNIV_DEBUG
-	const fil_space_t*	space =
-#endif /* UNIV_DEBUG */
-		mtr_x_lock_space(space_id, &mtr);
+	x-latch before the trx_sys.mutex. */
+	fil_space_t*	space = mtr_x_lock_space(space_id, &mtr);
 	ut_ad(space->purpose == FIL_TYPE_TABLESPACE);
 
-	ulint	slot_no = trx_sysf_rseg_find_free(&mtr);
-	ulint	page_no = slot_no == ULINT_UNDEFINED
-		? FIL_NULL
-		: trx_rseg_header_create(space_id, ULINT_MAX, slot_no, &mtr);
-
-	if (page_no != FIL_NULL) {
-		trx_sysf_t*	sys_header = trx_sysf_get(&mtr);
-
-		ulint		id = trx_sysf_rseg_get_space(
-			sys_header, slot_no, &mtr);
-		ut_a(id == space_id);
-
-		rseg = trx_rseg_mem_create(slot_no, space_id, page_no);
-		ut_ad(rseg->is_persistent());
-		ut_ad(!trx_sys->rseg_array[rseg->id]);
-		trx_sys->rseg_array[rseg->id] = rseg;
-		trx_rseg_mem_restore(rseg, &mtr);
+	if (buf_block_t* sys_header = trx_sysf_get(&mtr)) {
+		ulint	rseg_id = trx_sys_rseg_find_free(sys_header);
+		ulint	page_no = rseg_id == ULINT_UNDEFINED
+			? FIL_NULL
+			: trx_rseg_header_create(space, rseg_id, sys_header,
+						 &mtr);
+		if (page_no != FIL_NULL) {
+			ut_ad(trx_sysf_rseg_get_space(sys_header, rseg_id)
+			      == space_id);
+			rseg = trx_rseg_mem_create(rseg_id, space, page_no);
+			ut_ad(rseg->id == rseg_id);
+			ut_ad(rseg->is_persistent());
+			ut_ad(!trx_sys.rseg_array[rseg->id]);
+			trx_sys.rseg_array[rseg->id] = rseg;
+		}
 	}
 
 	mtr.commit();
@@ -316,20 +633,15 @@ trx_temp_rseg_create()
 	for (ulong i = 0; i < TRX_SYS_N_RSEGS; i++) {
 		mtr.start();
 		mtr.set_log_mode(MTR_LOG_NO_REDO);
-#ifdef UNIV_DEBUG
-		const fil_space_t*	space =
-#endif /* UNIV_DEBUG */
-			mtr_x_lock_space(SRV_TMP_SPACE_ID, &mtr);
-		ut_ad(space->purpose == FIL_TYPE_TEMPORARY);
+		mtr_x_lock(&fil_system.temp_space->latch, &mtr);
 
 		ulint page_no = trx_rseg_header_create(
-			SRV_TMP_SPACE_ID, ULINT_MAX, i, &mtr);
+			fil_system.temp_space, i, NULL, &mtr);
 		trx_rseg_t* rseg = trx_rseg_mem_create(
-			i, SRV_TMP_SPACE_ID, page_no);
+			i, fil_system.temp_space, page_no);
 		ut_ad(!rseg->is_persistent());
-		ut_ad(!trx_sys->temp_rsegs[i]);
-		trx_sys->temp_rsegs[i] = rseg;
-		trx_rseg_mem_restore(rseg, &mtr);
+		ut_ad(!trx_sys.temp_rsegs[i]);
+		trx_sys.temp_rsegs[i] = rseg;
 		mtr.commit();
 	}
 }
@@ -346,54 +658,70 @@ trx_rseg_get_n_undo_tablespaces(
 	ulint*		space_ids)	/*!< out: array of space ids of
 					UNDO tablespaces */
 {
-	ulint		i;
-	mtr_t		mtr;
-	trx_sysf_t*	sys_header;
-	ulint		n_undo_tablespaces = 0;
-
-	mtr_start(&mtr);
+	mtr_t mtr;
+	mtr.start();
 
-	sys_header = trx_sysf_get(&mtr);
+	buf_block_t* sys_header = trx_sysf_get(&mtr, false);
+	if (!sys_header) {
+		mtr.commit();
+		return 0;
+	}
 
-	for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
-		ulint	page_no;
-		ulint	space;
+	ulint* end = space_ids;
 
-		page_no = trx_sysf_rseg_get_page_no(sys_header, i, &mtr);
+	for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) {
+		uint32_t page_no = trx_sysf_rseg_get_page_no(sys_header,
+							     rseg_id);
 
 		if (page_no == FIL_NULL) {
 			continue;
 		}
 
-		space = trx_sysf_rseg_get_space(sys_header, i, &mtr);
-
-		if (space != 0) {
-			ulint	j;
-			ibool	found = FALSE;
-
-			for (j = 0; j < n_undo_tablespaces; ++j) {
-				if (space_ids[j] == space) {
-					found = TRUE;
-					break;
-				}
-			}
-
-			if (!found) {
-				ut_a(n_undo_tablespaces <= i);
-				space_ids[n_undo_tablespaces++] = space;
+		if (ulint space = trx_sysf_rseg_get_space(sys_header,
+							  rseg_id)) {
+			if (std::find(space_ids, end, space) == end) {
+				*end++ = space;
 			}
 		}
 	}
 
-	mtr_commit(&mtr);
+	mtr.commit();
+
+	ut_a(end - space_ids <= TRX_SYS_N_RSEGS);
+	*end = ULINT_UNDEFINED;
+
+	std::sort(space_ids, end);
 
-	ut_a(n_undo_tablespaces <= TRX_SYS_N_RSEGS);
+	return ulint(end - space_ids);
+}
 
-	space_ids[n_undo_tablespaces] = ULINT_UNDEFINED;
+/** Update the offset information about the end of the binlog entry
+which corresponds to the transaction just being committed.
+In a replication slave, this updates the master binlog position
+up to which replication has proceeded.
+@param[in,out]	rseg_header	rollback segment header
+@param[in]	trx		committing transaction
+@param[in,out]	mtr		mini-transaction */
+void
+trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr)
+{
+	DBUG_LOG("trx", "trx_mysql_binlog_offset: " << trx->mysql_log_offset);
 
-	if (n_undo_tablespaces > 0) {
-		std::sort(space_ids, space_ids + n_undo_tablespaces);
+	const size_t len = strlen(trx->mysql_log_file_name) + 1;
+
+	ut_ad(len > 1);
+
+	if (UNIV_UNLIKELY(len > TRX_RSEG_BINLOG_NAME_LEN)) {
+		return;
 	}
 
-	return(n_undo_tablespaces);
+	mlog_write_ull(rseg_header + TRX_RSEG_BINLOG_OFFSET,
+		       trx->mysql_log_offset, mtr);
+	byte* p = rseg_header + TRX_RSEG_BINLOG_NAME;
+	const byte* binlog_name = reinterpret_cast<const byte*>
+		(trx->mysql_log_file_name);
+
+	if (memcmp(binlog_name, p, len)) {
+		mlog_write_string(p, binlog_name, len, mtr);
+	}
 }
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index f4c043a3ca9..b46805eef4b 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -24,8 +24,8 @@ Transaction system
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
-#include "mysqld.h"
 #include "trx0sys.h"
+#include "mysqld.h"
 #include "sql_error.h"
 
 #include "fsp0fsp.h"
@@ -40,55 +40,9 @@ Created 3/26/1996 Heikki Tuuri
 #include "log0log.h"
 #include "log0recv.h"
 #include "os0file.h"
-#include "read0read.h"
-
-#include <mysql/service_wsrep.h>
-
-/** The file format tag structure with id and name. */
-struct file_format_t {
-	ulint		id;		/*!< id of the file format */
-	const char*	name;		/*!< text representation of the
-					file format */
-	ib_mutex_t		mutex;		/*!< covers changes to the above
-					fields */
-};
 
 /** The transaction system */
-trx_sys_t*		trx_sys;
-
-/** List of animal names representing file format. */
-static const char*	file_format_name_map[] = {
-	"Antelope",
-	"Barracuda",
-	"Cheetah",
-	"Dragon",
-	"Elk",
-	"Fox",
-	"Gazelle",
-	"Hornet",
-	"Impala",
-	"Jaguar",
-	"Kangaroo",
-	"Leopard",
-	"Moose",
-	"Nautilus",
-	"Ocelot",
-	"Porpoise",
-	"Quail",
-	"Rabbit",
-	"Shark",
-	"Tiger",
-	"Urchin",
-	"Viper",
-	"Whale",
-	"Xenops",
-	"Yak",
-	"Zebra"
-};
-
-/** The number of elements in the file format name array. */
-static const ulint	FILE_FORMAT_NAME_N
-	= sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
+trx_sys_t		trx_sys;
 
 /** Check whether transaction id is valid.
 @param[in]	id              transaction id to check
@@ -98,7 +52,7 @@ ReadView::check_trx_id_sanity(
 	trx_id_t		id,
 	const table_name_t&	name)
 {
-	if (id >= trx_sys->max_trx_id) {
+	if (id >= trx_sys.get_max_trx_id()) {
 
 		ib::warn() << "A transaction id"
 			   << " in a record of table "
@@ -129,249 +83,32 @@ ReadView::check_trx_id_sanity(
 uint	trx_rseg_n_slots_debug = 0;
 #endif
 
-/** This is used to track the maximum file format id known to InnoDB. It's
-updated via SET GLOBAL innodb_file_format_max = 'x' or when we open
-or create a table. */
-static	file_format_t	file_format_max;
-
-/*****************************************************************//**
-Writes the value of max_trx_id to the file based trx system header. */
-void
-trx_sys_flush_max_trx_id(void)
-/*==========================*/
-{
-	mtr_t		mtr;
-	trx_sysf_t*	sys_header;
-
-	/* wsrep_fake_trx_id  violates this assert
-	Copied from trx_sys_get_new_trx_id
-	*/
-	ut_ad(trx_sys_mutex_own());
-
-	if (!srv_read_only_mode) {
-		mtr_start(&mtr);
-
-		sys_header = trx_sysf_get(&mtr);
-
-		mlog_write_ull(
-			sys_header + TRX_SYS_TRX_ID_STORE,
-			trx_sys->max_trx_id, &mtr);
-
-		mtr_commit(&mtr);
-	}
-}
-
-/*****************************************************************//**
-Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. In a MySQL
-replication slave updates the latest master binlog position up to which
-replication has proceeded. */
-void
-trx_sys_update_mysql_binlog_offset(
-/*===============================*/
-	const char*	file_name,/*!< in: MySQL log file name */
-	int64_t		offset,	/*!< in: position in that log file */
-        trx_sysf_t*     sys_header, /*!< in: trx sys header */
-	mtr_t*		mtr)	/*!< in: mtr */
-{
-	DBUG_PRINT("InnoDB",("trx_mysql_binlog_offset: %lld", (longlong) offset));
-
-	const size_t len = strlen(file_name) + 1;
-
-	if (len > TRX_SYS_MYSQL_LOG_NAME_LEN) {
-
-		/* We cannot fit the name to the 512 bytes we have reserved */
-
-		return;
-	}
-
-	if (mach_read_from_4(TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
-			     + TRX_SYS_MYSQL_LOG_INFO + sys_header)
-	    != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
-		mlog_write_ulint(TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
-				 + TRX_SYS_MYSQL_LOG_INFO + sys_header,
-				 TRX_SYS_MYSQL_LOG_MAGIC_N,
-				 MLOG_4BYTES, mtr);
-	}
-
-	if (memcmp(file_name, TRX_SYS_MYSQL_LOG_NAME + TRX_SYS_MYSQL_LOG_INFO
-		   + sys_header, len)) {
-		mlog_write_string(TRX_SYS_MYSQL_LOG_NAME
-				  + TRX_SYS_MYSQL_LOG_INFO
-				  + sys_header,
-				  reinterpret_cast<const byte*>(file_name),
-				  len, mtr);
-	}
-
-	mlog_write_ull(TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_OFFSET
-		       + sys_header, offset, mtr);
-}
-
 /** Display the MySQL binlog offset info if it is present in the trx
 system header. */
 void
 trx_sys_print_mysql_binlog_offset()
 {
-	mtr_t		mtr;
-
-	mtr.start();
-
-	const trx_sysf_t*	sys_header = trx_sysf_get(&mtr);
-
-	if (mach_read_from_4(TRX_SYS_MYSQL_LOG_INFO
-			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD + sys_header)
-	    == TRX_SYS_MYSQL_LOG_MAGIC_N) {
-		ib::info() << "Last binlog file '"
-			<< TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME
-			+ sys_header
-			<< "', position "
-			<< mach_read_from_8(TRX_SYS_MYSQL_LOG_INFO
-					    + TRX_SYS_MYSQL_LOG_OFFSET
-					    + sys_header);
-	}
-
-	mtr.commit();
-}
-
-#ifdef WITH_WSREP
-
-#ifdef UNIV_DEBUG
-static long long trx_sys_cur_xid_seqno = -1;
-static unsigned char trx_sys_cur_xid_uuid[16];
-
-/** Read WSREP XID seqno */
-static inline long long read_wsrep_xid_seqno(const XID* xid)
-{
-	long long seqno;
-	memcpy(&seqno, xid->data + 24, sizeof(long long));
-	return seqno;
-}
-
-/** Read WSREP XID UUID */
-static inline void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf)
-{
-	memcpy(buf, xid->data + 8, 16);
-}
-
-#endif /* UNIV_DEBUG */
-
-/** Update WSREP XID info in sys_header of TRX_SYS_PAGE_NO = 5.
-@param[in]	xid		Transaction XID
-@param[in,out]	sys_header	sys_header
-@param[in]	mtr		minitransaction */
-UNIV_INTERN
-void
-trx_sys_update_wsrep_checkpoint(
-	const XID*	xid,
-	trx_sysf_t*	sys_header,
-	mtr_t*		mtr)
-{
-	ut_ad(xid->formatID == 1);
-	ut_ad(wsrep_is_wsrep_xid(xid));
-
-	if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
-			     + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
-		!= TRX_SYS_WSREP_XID_MAGIC_N) {
-		mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
-			+ TRX_SYS_WSREP_XID_MAGIC_N_FLD,
-			TRX_SYS_WSREP_XID_MAGIC_N,
-			MLOG_4BYTES, mtr);
-#ifdef UNIV_DEBUG
-	} else {
-		/* Check that seqno is monotonically increasing */
-		unsigned char xid_uuid[16];
-		long long xid_seqno = read_wsrep_xid_seqno(xid);
-		read_wsrep_xid_uuid(xid, xid_uuid);
-
-		if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8)) {
-			ut_ad(xid_seqno > trx_sys_cur_xid_seqno);
-			trx_sys_cur_xid_seqno = xid_seqno;
-		} else {
-			memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16);
-		}
-
-		trx_sys_cur_xid_seqno = xid_seqno;
-#endif /* UNIV_DEBUG */
-	}
-
-	mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
-		+ TRX_SYS_WSREP_XID_FORMAT,
-		(int)xid->formatID,
-		MLOG_4BYTES, mtr);
-	mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
-		+ TRX_SYS_WSREP_XID_GTRID_LEN,
-		(int)xid->gtrid_length,
-		MLOG_4BYTES, mtr);
-	mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
-		+ TRX_SYS_WSREP_XID_BQUAL_LEN,
-		(int)xid->bqual_length,
-		MLOG_4BYTES, mtr);
-	mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO
-		+ TRX_SYS_WSREP_XID_DATA,
-		(const unsigned char*) xid->data,
-		XIDDATASIZE, mtr);
-}
-
-/** Read WSREP checkpoint XID from sys header.
-@param[out]	xid	WSREP XID
-@return	whether the checkpoint was present */
-UNIV_INTERN
-bool
-trx_sys_read_wsrep_checkpoint(XID* xid)
-{
-	trx_sysf_t*	sys_header;
-	mtr_t		mtr;
-	ulint		magic;
-
-	ut_ad(xid);
-
-	mtr_start(&mtr);
-
-	sys_header = trx_sysf_get(&mtr);
-
-	if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
-					+ TRX_SYS_WSREP_XID_MAGIC_N_FLD))
-	    != TRX_SYS_WSREP_XID_MAGIC_N) {
-		mtr.commit();
-		xid->null();
-		xid->gtrid_length = 0;
-		xid->bqual_length = 0;
-		memset(xid->data, 0, sizeof xid->data);
-		memset(xid->data + 24, 0xff, 8);
-		return false;
+	if (!*trx_sys.recovered_binlog_filename) {
+		return;
 	}
 
-	xid->formatID = (int)mach_read_from_4(
-			sys_header
-			+ TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
-	xid->gtrid_length = (int)mach_read_from_4(
-			sys_header
-			+ TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
-	xid->bqual_length = (int)mach_read_from_4(
-			sys_header
-			+ TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
-	ut_memcpy(xid->data,
-		  sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
-		  XIDDATASIZE);
-
-	mtr_commit(&mtr);
-	return true;
+	ib::info() << "Last binlog file '"
+		<< trx_sys.recovered_binlog_filename
+		<< "', position "
+		<< trx_sys.recovered_binlog_offset;
 }
 
-#endif /* WITH_WSREP */
-
-/** @return an unallocated rollback segment slot in the TRX_SYS header
+/** Find an available rollback segment.
+@param[in]	sys_header
+@return an unallocated rollback segment slot in the TRX_SYS header
 @retval ULINT_UNDEFINED if not found */
 ulint
-trx_sysf_rseg_find_free(mtr_t* mtr)
+trx_sys_rseg_find_free(const buf_block_t* sys_header)
 {
-	trx_sysf_t*	sys_header = trx_sysf_get(mtr);
-
-	for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
-		if (trx_sysf_rseg_get_page_no(sys_header, i, mtr)
+	for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) {
+		if (trx_sysf_rseg_get_page_no(sys_header, rseg_id)
 		    == FIL_NULL) {
-			return(i);
+			return rseg_id;
 		}
 	}
 
@@ -386,13 +123,14 @@ trx_sysf_get_n_rseg_slots()
 	mtr_t		mtr;
 	mtr.start();
 
-	trx_sysf_t*	sys_header	= trx_sysf_get(&mtr);
 	srv_available_undo_logs = 0;
-
-	for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
-		srv_available_undo_logs
-			+= trx_sysf_rseg_get_page_no(sys_header, i, &mtr)
-			!= FIL_NULL;
+	if (const buf_block_t* sys_header = trx_sysf_get(&mtr, false)) {
+		for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) {
+			srv_available_undo_logs
+				+= trx_sysf_rseg_get_page_no(sys_header,
+							     rseg_id)
+				!= FIL_NULL;
+		}
 	}
 
 	mtr.commit();
@@ -407,7 +145,6 @@ trx_sysf_create(
 /*============*/
 	mtr_t*	mtr)	/*!< in: mtr */
 {
-	trx_sysf_t*	sys_header;
 	ulint		slot_no;
 	buf_block_t*	block;
 	page_t*		page;
@@ -420,10 +157,12 @@ trx_sysf_create(
 	then enter the kernel: we must do it in this order to conform
 	to the latching order rules. */
 
-	mtr_x_lock_space(TRX_SYS_SPACE, mtr);
+	mtr_x_lock(&fil_system.sys_space->latch, mtr);
+	compile_time_assert(TRX_SYS_SPACE == 0);
 
 	/* Create the trx sys file block in a new allocated file segment */
-	block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
+	block = fseg_create(fil_system.sys_space, 0,
+			    TRX_SYS + TRX_SYS_FSEG_HEADER,
 			    mtr);
 	buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
 
@@ -441,126 +180,42 @@ trx_sysf_create(
 	mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
 			 + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
 
-	sys_header = trx_sysf_get(mtr);
-
-	/* Start counting transaction ids from number 1 up */
-	mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1);
-
 	/* Reset the rollback segment slots.  Old versions of InnoDB
 	(before MySQL 5.5) define TRX_SYS_N_RSEGS as 256 and expect
 	that the whole array is initialized. */
-	ptr = TRX_SYS_RSEGS + sys_header;
+	ptr = TRX_SYS + TRX_SYS_RSEGS + page;
 	compile_time_assert(256 >= TRX_SYS_N_RSEGS);
 	memset(ptr, 0xff, 256 * TRX_SYS_RSEG_SLOT_SIZE);
 	ptr += 256 * TRX_SYS_RSEG_SLOT_SIZE;
-	ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END));
+	ut_a(ptr <= page + (srv_page_size - FIL_PAGE_DATA_END));
 
 	/* Initialize all of the page.  This part used to be uninitialized. */
-	memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr);
+	memset(ptr, 0, srv_page_size - FIL_PAGE_DATA_END + size_t(page - ptr));
 
-	mlog_log_string(sys_header, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
-			+ page - sys_header, mtr);
+	mlog_log_string(TRX_SYS + page, srv_page_size - FIL_PAGE_DATA_END
+			- TRX_SYS, mtr);
 
 	/* Create the first rollback segment in the SYSTEM tablespace */
-	slot_no = trx_sysf_rseg_find_free(mtr);
-	page_no = trx_rseg_header_create(TRX_SYS_SPACE,
-					 ULINT_MAX, slot_no, mtr);
+	slot_no = trx_sys_rseg_find_free(block);
+	page_no = trx_rseg_header_create(fil_system.sys_space, slot_no, block,
+					 mtr);
 
 	ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
 	ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO);
 }
 
-/** Initialize the transaction system main-memory data structures. */
-void
-trx_sys_init_at_db_start()
-{
-	trx_sysf_t*	sys_header;
-	ib_uint64_t	rows_to_undo	= 0;
-	const char*	unit		= "";
-
-	/* VERY important: after the database is started, max_trx_id value is
-	divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
-	trx_sys_get_new_trx_id will evaluate to TRUE when the function
-	is first time called, and the value for trx id will be written
-	to the disk-based header! Thus trx id values will not overlap when
-	the database is repeatedly started! */
-
-	mtr_t	mtr;
-	mtr.start();
-
-	sys_header = trx_sysf_get(&mtr);
-
-	trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN
-		+ ut_uint64_align_up(mach_read_from_8(sys_header
-						   + TRX_SYS_TRX_ID_STORE),
-				     TRX_SYS_TRX_ID_WRITE_MARGIN);
-
-	mtr.commit();
-	ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id);
-
-	trx_lists_init_at_db_start();
-
-	/* This mutex is not strictly required, it is here only to satisfy
-	the debug code (assertions). We are still running in single threaded
-	bootstrap mode. */
-
-	trx_sys_mutex_enter();
-
-	if (UT_LIST_GET_LEN(trx_sys->rw_trx_list) > 0) {
-		const trx_t*	trx;
-
-		for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
-		     trx != NULL;
-		     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
-			ut_ad(trx->is_recovered);
-			assert_trx_in_rw_list(trx);
-
-			if (trx_state_eq(trx, TRX_STATE_ACTIVE)) {
-				rows_to_undo += trx->undo_no;
-			}
-		}
-
-		if (rows_to_undo > 1000000000) {
-			unit = "M";
-			rows_to_undo = rows_to_undo / 1000000;
-		}
-
-		ib::info() << UT_LIST_GET_LEN(trx_sys->rw_trx_list)
-			<< " transaction(s) which must be rolled back or"
-			" cleaned up in total " << rows_to_undo << unit
-			<< " row operations to undo";
-
-		ib::info() << "Trx id counter is " << trx_sys->max_trx_id;
-	}
-
-	trx_sys_mutex_exit();
-
-	trx_sys->mvcc->clone_oldest_view(&purge_sys->view);
-}
-
-/*****************************************************************//**
-Creates the trx_sys instance and initializes purge_queue and mutex. */
+/** Create the instance */
 void
-trx_sys_create(void)
-/*================*/
+trx_sys_t::create()
 {
-	ut_ad(trx_sys == NULL);
-
-	trx_sys = static_cast<trx_sys_t*>(ut_zalloc_nokey(sizeof(*trx_sys)));
-
-	mutex_create(LATCH_ID_TRX_SYS, &trx_sys->mutex);
-
-	UT_LIST_INIT(trx_sys->serialisation_list, &trx_t::no_list);
-	UT_LIST_INIT(trx_sys->rw_trx_list, &trx_t::trx_list);
-	UT_LIST_INIT(trx_sys->mysql_trx_list, &trx_t::mysql_trx_list);
-
-	trx_sys->mvcc = UT_NEW_NOKEY(MVCC(1024));
-
-	new(&trx_sys->rw_trx_ids) trx_ids_t(ut_allocator<trx_id_t>(
-			mem_key_trx_sys_t_rw_trx_ids));
-
-	new(&trx_sys->rw_trx_set) TrxIdSet();
+	ut_ad(this == &trx_sys);
+	ut_ad(!is_initialised());
+	m_initialised = true;
+	mutex_create(LATCH_ID_TRX_SYS, &mutex);
+	UT_LIST_INIT(trx_list, &trx_t::trx_list);
+	my_atomic_store32(&rseg_history_len, 0);
+
+	rw_trx_hash.init();
 }
 
 /*****************************************************************//**
@@ -578,260 +233,6 @@ trx_sys_create_sys_pages(void)
 	mtr_commit(&mtr);
 }
 
-/*****************************************************************//**
-Update the file format tag.
-@return always TRUE */
-static
-ibool
-trx_sys_file_format_max_write(
-/*==========================*/
-	ulint		format_id,	/*!< in: file format id */
-	const char**	name)		/*!< out: max file format name, can
-					be NULL */
-{
-	mtr_t		mtr;
-	byte*		ptr;
-	buf_block_t*	block;
-	ib_uint64_t	tag_value;
-
-	mtr_start(&mtr);
-
-	block = buf_page_get(
-		page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), univ_page_size,
-		RW_X_LATCH, &mtr);
-
-	file_format_max.id = format_id;
-	file_format_max.name = trx_sys_file_format_id_to_name(format_id);
-
-	ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
-	tag_value = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
-
-	if (name) {
-		*name = file_format_max.name;
-	}
-
-	mlog_write_ull(ptr, tag_value, &mtr);
-
-	mtr_commit(&mtr);
-
-	return(TRUE);
-}
-
-/*****************************************************************//**
-Read the file format tag.
-@return the file format or ULINT_UNDEFINED if not set. */
-static
-ulint
-trx_sys_file_format_max_read(void)
-/*==============================*/
-{
-	mtr_t			mtr;
-	const byte*		ptr;
-	const buf_block_t*	block;
-	ib_id_t			file_format_id;
-
-	/* Since this is called during the startup phase it's safe to
-	read the value without a covering mutex. */
-	mtr_start(&mtr);
-
-	block = buf_page_get(
-		page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), univ_page_size,
-		RW_X_LATCH, &mtr);
-
-	ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
-	file_format_id = mach_read_from_8(ptr);
-
-	mtr_commit(&mtr);
-
-	file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
-
-	if (file_format_id >= FILE_FORMAT_NAME_N) {
-
-		/* Either it has never been tagged, or garbage in it. */
-		return(ULINT_UNDEFINED);
-	}
-
-	return((ulint) file_format_id);
-}
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
-	const ulint	id)	/*!< in: id of the file format */
-{
-	ut_a(id < FILE_FORMAT_NAME_N);
-
-	return(file_format_name_map[id]);
-}
-
-/*****************************************************************//**
-Check for the max file format tag stored on disk. Note: If max_format_id
-is == UNIV_FORMAT_MAX + 1 then we only print a warning.
-@return DB_SUCCESS or error code */
-dberr_t
-trx_sys_file_format_max_check(
-/*==========================*/
-	ulint	max_format_id)	/*!< in: max format id to check */
-{
-	ulint	format_id;
-
-	/* Check the file format in the tablespace. Do not try to
-	recover if the file format is not supported by the engine
-	unless forced by the user. */
-	format_id = trx_sys_file_format_max_read();
-	if (format_id == ULINT_UNDEFINED) {
-		/* Format ID was not set. Set it to minimum possible
-		value. */
-		format_id = UNIV_FORMAT_MIN;
-	}
-
-	ib::info() << "Highest supported file format is "
-		<< trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX) << ".";
-
-	if (format_id > UNIV_FORMAT_MAX) {
-
-		ut_a(format_id < FILE_FORMAT_NAME_N);
-
-		const std::string	msg = std::string("The system"
-			" tablespace is in a file format that this version"
-			" doesn't support - ")
-			+ trx_sys_file_format_id_to_name(format_id)
-			+ ".";
-
-		if (max_format_id <= UNIV_FORMAT_MAX) {
-			ib::error() << msg;
-		} else {
-			ib::warn() << msg;
-		}
-
-		if (max_format_id <= UNIV_FORMAT_MAX) {
-			return(DB_ERROR);
-		}
-	}
-
-	format_id = (format_id > max_format_id) ? format_id : max_format_id;
-
-	/* We don't need a mutex here, as this function should only
-	be called once at start up. */
-	file_format_max.id = format_id;
-	file_format_max.name = trx_sys_file_format_id_to_name(format_id);
-
-	return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Set the file format id unconditionally except if it's already the
-same value.
-@return TRUE if value updated */
-ibool
-trx_sys_file_format_max_set(
-/*========================*/
-	ulint		format_id,	/*!< in: file format id */
-	const char**	name)		/*!< out: max file format name or
-					NULL if not needed. */
-{
-	ibool		ret = FALSE;
-
-	ut_a(format_id <= UNIV_FORMAT_MAX);
-
-	mutex_enter(&file_format_max.mutex);
-
-	/* Only update if not already same value. */
-	if (format_id != file_format_max.id) {
-
-		ret = trx_sys_file_format_max_write(format_id, name);
-	}
-
-	mutex_exit(&file_format_max.mutex);
-
-	return(ret);
-}
-
-/********************************************************************//**
-Tags the system table space with minimum format id if it has not been
-tagged yet.
-WARNING: This function is only called during the startup and AFTER the
-redo log application during recovery has finished. */
-void
-trx_sys_file_format_tag_init(void)
-/*==============================*/
-{
-	ulint	format_id;
-
-	format_id = trx_sys_file_format_max_read();
-
-	/* If format_id is not set then set it to the minimum. */
-	if (format_id == ULINT_UNDEFINED) {
-		trx_sys_file_format_max_set(UNIV_FORMAT_MIN, NULL);
-	}
-}
-
-/********************************************************************//**
-Update the file format tag in the system tablespace only if the given
-format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
-ibool
-trx_sys_file_format_max_upgrade(
-/*============================*/
-	const char**	name,		/*!< out: max file format name */
-	ulint		format_id)	/*!< in: file format identifier */
-{
-	ibool		ret = FALSE;
-
-	ut_a(name);
-	ut_a(file_format_max.name != NULL);
-	ut_a(format_id <= UNIV_FORMAT_MAX);
-
-	mutex_enter(&file_format_max.mutex);
-
-	if (format_id > file_format_max.id) {
-
-		ret = trx_sys_file_format_max_write(format_id, name);
-	}
-
-	mutex_exit(&file_format_max.mutex);
-
-	return(ret);
-}
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the max format name */
-const char*
-trx_sys_file_format_max_get(void)
-/*=============================*/
-{
-	return(file_format_max.name);
-}
-
-/*****************************************************************//**
-Initializes the tablespace tag system. */
-void
-trx_sys_file_format_init(void)
-/*==========================*/
-{
-	mutex_create(LATCH_ID_FILE_FORMAT_MAX, &file_format_max.mutex);
-
-	/* We don't need a mutex here, as this function should only
-	be called once at start up. */
-	file_format_max.id = UNIV_FORMAT_MIN;
-
-	file_format_max.name = trx_sys_file_format_id_to_name(
-		file_format_max.id);
-}
-
-/*****************************************************************//**
-Closes the tablespace tag system. */
-void
-trx_sys_file_format_close(void)
-/*===========================*/
-{
-	mutex_free(&file_format_max.mutex);
-}
-
 /** Create the rollback segments.
 @return	whether the creation succeeded */
 bool
@@ -910,128 +311,53 @@ trx_sys_create_rsegs()
 	return(true);
 }
 
-/*********************************************************************
-Shutdown/Close the transaction system. */
+/** Close the transaction system on shutdown */
 void
-trx_sys_close(void)
-/*===============*/
+trx_sys_t::close()
 {
-	ut_ad(trx_sys != NULL);
 	ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
+	if (!is_initialised()) {
+		return;
+	}
 
-	if (ulint size = trx_sys->mvcc->size()) {
+	if (size_t size = view_count()) {
 		ib::error() << "All read views were not closed before"
 			" shutdown: " << size << " read views open";
 	}
 
-	/* Only prepared transactions may be left in the system. Free them. */
-	ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx
-	     || !srv_was_started
-	     || srv_read_only_mode
-	     || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
-
-	while (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) {
-		UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
-		trx_free_prepared(trx);
-	}
+	rw_trx_hash.destroy();
 
 	/* There can't be any active transactions. */
 
 	for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
-		if (trx_rseg_t* rseg = trx_sys->rseg_array[i]) {
+		if (trx_rseg_t* rseg = rseg_array[i]) {
 			trx_rseg_mem_free(rseg);
 		}
 
-		if (trx_rseg_t* rseg = trx_sys->temp_rsegs[i]) {
+		if (trx_rseg_t* rseg = temp_rsegs[i]) {
 			trx_rseg_mem_free(rseg);
 		}
 	}
 
-	UT_DELETE(trx_sys->mvcc);
-
-	ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == 0);
-	ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
-	ut_a(UT_LIST_GET_LEN(trx_sys->serialisation_list) == 0);
-
-	/* We used placement new to create this mutex. Call the destructor. */
-	mutex_free(&trx_sys->mutex);
-
-	trx_sys->rw_trx_ids.~trx_ids_t();
-
-	trx_sys->rw_trx_set.~TrxIdSet();
-
-	ut_free(trx_sys);
-
-	trx_sys = NULL;
-}
-
-/*********************************************************************
-Check if there are any active (non-prepared) transactions.
-This is only used to check if it's safe to shutdown.
-@return total number of active transactions or 0 if none */
-ulint
-trx_sys_any_active_transactions(void)
-/*=================================*/
-{
-	ulint	total_trx = 0;
-
-	trx_sys_mutex_enter();
-
-	total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
-	for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
-	     trx != NULL;
-	     trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
-		total_trx += trx->state != TRX_STATE_NOT_STARTED;
-	}
-
-	ut_a(total_trx >= trx_sys->n_prepared_trx);
-	total_trx -= trx_sys->n_prepared_trx;
-
-	trx_sys_mutex_exit();
-
-	return(total_trx);
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Validate the trx_ut_list_t.
-@return true if valid. */
-static
-bool
-trx_sys_validate_trx_list_low(
-/*===========================*/
-	trx_ut_list_t*	trx_list)	/*!< in: &trx_sys->rw_trx_list */
-{
-	const trx_t*	trx;
-	const trx_t*	prev_trx = NULL;
-
-	ut_ad(trx_sys_mutex_own());
-
-	ut_ad(trx_list == &trx_sys->rw_trx_list);
-
-	for (trx = UT_LIST_GET_FIRST(*trx_list);
-	     trx != NULL;
-	     prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) {
-
-		check_trx_state(trx);
-		ut_a(prev_trx == NULL || prev_trx->id > trx->id);
-	}
-
-	return(true);
+	ut_a(UT_LIST_GET_LEN(trx_list) == 0);
+	mutex_free(&mutex);
+	m_initialised = false;
 }
 
-/*************************************************************//**
-Validate the trx_sys_t::rw_trx_list.
-@return true if the list is valid. */
-bool
-trx_sys_validate_trx_list()
-/*=======================*/
+/** @return total number of active (non-prepared) transactions */
+ulint trx_sys_t::any_active_transactions()
 {
-	ut_ad(trx_sys_mutex_own());
-
-	ut_a(trx_sys_validate_trx_list_low(&trx_sys->rw_trx_list));
-
-	return(true);
+  uint32_t total_trx= 0;
+
+  mutex_enter(&mutex);
+  for (trx_t* trx= UT_LIST_GET_FIRST(trx_sys.trx_list);
+       trx != NULL;
+       trx= UT_LIST_GET_NEXT(trx_list, trx))
+  {
+    if (trx->state == TRX_STATE_COMMITTED_IN_MEMORY ||
+        (trx->state == TRX_STATE_ACTIVE && trx->id))
+      total_trx++;
+  }
+  mutex_exit(&mutex);
+  return total_trx;
 }
-#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 93fc1bb0ed2..07654be12ee 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -37,7 +37,6 @@ Created 3/26/1996 Heikki Tuuri
 #include "log0log.h"
 #include "os0proc.h"
 #include "que0que.h"
-#include "read0read.h"
 #include "srv0mon.h"
 #include "srv0srv.h"
 #include "srv0start.h"
@@ -53,8 +52,16 @@ Created 3/26/1996 Heikki Tuuri
 #include <set>
 #include <new>
 
-extern "C"
-int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
+/** The bit pattern corresponding to TRX_ID_MAX */
+const byte trx_id_max_bytes[8] = {
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+/** The bit pattern corresponding to max timestamp */
+const byte timestamp_max_bytes[7] = {
+	0x7f, 0xff, 0xff, 0xff, 0x0f, 0x42, 0x3f
+};
+
 
 static const ulint MAX_DETAILED_ERROR_LEN = 256;
 
@@ -159,7 +166,7 @@ trx_init(
 
 	trx->last_sql_stat_start.least_undo_no = 0;
 
-	ut_ad(!MVCC::is_view_active(trx->read_view));
+	ut_ad(!trx->read_view.is_open());
 
 	trx->lock.rec_cached = 0;
 
@@ -186,6 +193,9 @@ struct TrxFactory {
 
 		new(&trx->lock.table_locks) lock_list();
 
+		new(&trx->read_view) ReadView();
+
+		trx->rw_trx_hash_pins = 0;
 		trx_init(trx);
 
 		trx->dict_operation_lock_mode = 0;
@@ -205,7 +215,6 @@ struct TrxFactory {
 			&trx_named_savept_t::trx_savepoints);
 
 		mutex_create(LATCH_ID_TRX, &trx->mutex);
-		mutex_create(LATCH_ID_TRX_UNDO, &trx->undo_mutex);
 	}
 
 	/** Release resources held by the transaction object.
@@ -213,8 +222,7 @@ struct TrxFactory {
 	static void destroy(trx_t* trx)
 	{
 		ut_a(trx->magic_n == TRX_MAGIC_N);
-		ut_ad(!trx->in_rw_trx_list);
-		ut_ad(!trx->in_mysql_trx_list);
+		ut_ad(!trx->mysql_thd);
 
 		ut_a(trx->lock.wait_lock == NULL);
 		ut_a(trx->lock.wait_thr == NULL);
@@ -231,13 +239,14 @@ struct TrxFactory {
 		ut_free(trx->detailed_error);
 
 		mutex_free(&trx->mutex);
-		mutex_free(&trx->undo_mutex);
 
 		trx->mod_tables.~trx_mod_tables_t();
 
-		ut_ad(trx->read_view == NULL);
+		ut_ad(!trx->read_view.is_open());
 
 		trx->lock.table_locks.~lock_list();
+
+		trx->read_view.~ReadView();
 	}
 
 	/** Enforce any invariants here, this is called before the transaction
@@ -257,9 +266,6 @@ struct TrxFactory {
 
 		ut_ad(trx->mysql_thd == 0);
 
-		ut_ad(!trx->in_rw_trx_list);
-		ut_ad(!trx->in_mysql_trx_list);
-
 		ut_a(trx->lock.wait_thr == NULL);
 		ut_a(trx->lock.wait_lock == NULL);
 		ut_a(trx->dict_operation_lock_mode == 0);
@@ -349,9 +355,7 @@ trx_pool_close()
 }
 
 /** @return a trx_t instance from trx_pools. */
-static
-trx_t*
-trx_create_low()
+trx_t *trx_create()
 {
 	trx_t*	trx = trx_pools->get();
 
@@ -363,6 +367,7 @@ trx_create_low()
 	/* We just got trx from pool, it should be non locking */
 	ut_ad(trx->will_lock == 0);
 	ut_ad(trx->state == TRX_STATE_NOT_STARTED);
+	ut_ad(!trx->rw_trx_hash_pins);
 
 	DBUG_LOG("trx", "Create: " << trx);
 
@@ -386,90 +391,16 @@ trx_create_low()
 	trx->wsrep_event = NULL;
 #endif /* WITH_WSREP */
 
-	return(trx);
-}
-
-/**
-Release a trx_t instance back to the pool.
-@param trx the instance to release. */
-static
-void
-trx_free(trx_t*& trx)
-{
-	assert_trx_is_free(trx);
-
-	trx->mysql_thd = 0;
-	trx->mysql_log_file_name = 0;
-
-	// FIXME: We need to avoid this heap free/alloc for each commit.
-	if (trx->autoinc_locks != NULL) {
-		ut_ad(ib_vector_is_empty(trx->autoinc_locks));
-		/* We allocated a dedicated heap for the vector. */
-		ib_vector_free(trx->autoinc_locks);
-		trx->autoinc_locks = NULL;
-	}
-
-	trx->mod_tables.clear();
-
-	ut_ad(trx->read_view == NULL);
-
-	/* trx locking state should have been reset before returning trx
-	to pool */
-	ut_ad(trx->will_lock == 0);
-
-	trx_pools->mem_free(trx);
-	/* Unpoison the memory for innodb_monitor_set_option;
-	it is operating also on the freed transaction objects. */
-	MEM_UNDEFINED(&trx->mutex, sizeof trx->mutex);
-	MEM_UNDEFINED(&trx->undo_mutex, sizeof trx->undo_mutex);
-	/* Declare the contents as initialized for Valgrind;
-	we checked that it was initialized in trx_pools->mem_free(trx). */
-	UNIV_MEM_VALID(&trx->mutex, sizeof trx->mutex);
-	UNIV_MEM_VALID(&trx->undo_mutex, sizeof trx->undo_mutex);
-
-	trx = NULL;
-}
-
-/********************************************************************//**
-Creates a transaction object for background operations by the master thread.
-@return own: transaction object */
-trx_t*
-trx_allocate_for_background(void)
-/*=============================*/
-{
-	trx_t*	trx;
-
-	trx = trx_create_low();
+	trx_sys.register_trx(trx);
 
 	return(trx);
 }
 
-/********************************************************************//**
-Creates a transaction object for MySQL.
-@return own: transaction object */
-trx_t*
-trx_allocate_for_mysql(void)
-/*========================*/
-{
-	trx_t*	trx;
-
-	trx = trx_allocate_for_background();
-
-	trx_sys_mutex_enter();
-
-	ut_d(trx->in_mysql_trx_list = TRUE);
-	UT_LIST_ADD_FIRST(trx_sys->mysql_trx_list, trx);
-
-	trx_sys_mutex_exit();
-
-	return(trx);
-}
-
-/** Check state of transaction before freeing it.
-@param trx trx object to validate */
-static
-void
-trx_validate_state_before_free(trx_t* trx)
+/**
+  Release a trx_t instance back to the pool.
+  @param trx the instance to release.
+*/
+void trx_free(trx_t*& trx)
 {
 	ut_ad(!trx->declared_to_be_inside_innodb);
 	ut_ad(!trx->n_mysql_tables_in_use);
@@ -506,57 +437,61 @@ trx_validate_state_before_free(trx_t* trx)
 
 	trx->dict_operation = TRX_DICT_OP_NONE;
 	assert_trx_is_inactive(trx);
-}
 
-/** Free and initialize a transaction object instantinated during recovery.
-@param trx trx object to free and initialize during recovery */
-void
-trx_free_resurrected(trx_t* trx)
-{
-	trx_validate_state_before_free(trx);
+	trx_sys.deregister_trx(trx);
 
-	trx_init(trx);
+	assert_trx_is_free(trx);
 
-	trx_free(trx);
-}
+	trx_sys.rw_trx_hash.put_pins(trx);
+	trx->mysql_thd = 0;
+	trx->mysql_log_file_name = 0;
 
-/** Free a transaction that was allocated by background or user threads.
-@param trx trx object to free */
-void
-trx_free_for_background(trx_t* trx)
-{
-	trx_validate_state_before_free(trx);
+	// FIXME: We need to avoid this heap free/alloc for each commit.
+	if (trx->autoinc_locks != NULL) {
+		ut_ad(ib_vector_is_empty(trx->autoinc_locks));
+		/* We allocated a dedicated heap for the vector. */
+		ib_vector_free(trx->autoinc_locks);
+		trx->autoinc_locks = NULL;
+	}
 
-	trx_free(trx);
+	trx->mod_tables.clear();
+
+	/* trx locking state should have been reset before returning trx
+	to pool */
+	ut_ad(trx->will_lock == 0);
+
+	trx_pools->mem_free(trx);
+	/* Unpoison the memory for innodb_monitor_set_option;
+	it is operating also on the freed transaction objects. */
+	MEM_UNDEFINED(&trx->mutex, sizeof trx->mutex);
+	/* Declare the contents as initialized for Valgrind;
+	we checked that it was initialized in trx_pools->mem_free(trx). */
+	UNIV_MEM_VALID(&trx->mutex, sizeof trx->mutex);
+
+	trx = NULL;
 }
 
-/********************************************************************//**
-At shutdown, frees a transaction object that is in the PREPARED state. */
+/** At shutdown, frees a transaction object. */
 void
-trx_free_prepared(
-/*==============*/
-	trx_t*	trx)	/*!< in, own: trx object */
+trx_free_at_shutdown(trx_t *trx)
 {
+	ut_ad(trx->is_recovered);
 	ut_a(trx_state_eq(trx, TRX_STATE_PREPARED)
-	     || (trx->is_recovered
-		 && (trx_state_eq(trx, TRX_STATE_ACTIVE)
-		     || trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY))
+	     || (trx_state_eq(trx, TRX_STATE_ACTIVE)
 		 && (!srv_was_started
 		     || srv_operation == SRV_OPERATION_RESTORE
 		     || srv_operation == SRV_OPERATION_RESTORE_EXPORT
 		     || srv_read_only_mode
-		     || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO)));
+		     || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
+		     || (!srv_is_being_started
+		         && !srv_undo_sources && srv_fast_shutdown))));
 	ut_a(trx->magic_n == TRX_MAGIC_N);
 
 	lock_trx_release_locks(trx);
-	trx_undo_free_prepared(trx);
-
-	assert_trx_in_rw_list(trx);
+	trx_undo_free_at_shutdown(trx);
 
 	ut_a(!trx->read_only);
 
-	ut_d(trx->in_rw_trx_list = FALSE);
-
 	DBUG_LOG("trx", "Free prepared: " << trx);
 	trx->state = TRX_STATE_NOT_STARTED;
 
@@ -571,71 +506,20 @@ trx_free_prepared(
 	trx_free(trx);
 }
 
-/** Disconnect a transaction from MySQL and optionally mark it as if
-it's been recovered. For the marking the transaction must be in prepared state.
-The recovery-marked transaction is going to survive "alone" so its association
-with the mysql handle is destroyed now rather than when it will be
-finally freed.
-@param[in,out]	trx		transaction
-@param[in]	prepared	boolean value to specify whether trx is
-				for recovery or not. */
-inline
-void
-trx_disconnect_from_mysql(
-	trx_t*	trx,
-	bool	prepared)
-{
-	trx_sys_mutex_enter();
-
-	ut_ad(trx->in_mysql_trx_list);
-	ut_d(trx->in_mysql_trx_list = FALSE);
-
-	UT_LIST_REMOVE(trx_sys->mysql_trx_list, trx);
-
-	if (trx->read_view != NULL) {
-		trx_sys->mvcc->view_close(trx->read_view, true);
-	}
 
-	ut_ad(trx_sys_validate_trx_list());
-
-	if (prepared) {
-
-		ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
-
-		trx->is_recovered = true;
-		trx_sys->n_prepared_recovered_trx++;
-	        trx->mysql_thd = NULL;
-		/* todo/fixme: suggest to do it at innodb prepare */
-		trx->will_lock = 0;
-	}
-
-	trx_sys_mutex_exit();
-}
-
-/** Disconnect a transaction from MySQL.
-@param[in,out]	trx	transaction */
-inline
-void
-trx_disconnect_plain(trx_t*	trx)
-{
-	trx_disconnect_from_mysql(trx, false);
-}
-
-/** Disconnect a prepared transaction from MySQL.
-@param[in,out]	trx	transaction */
-void
-trx_disconnect_prepared(trx_t*	trx)
-{
-	trx_disconnect_from_mysql(trx, true);
-}
-
-/** Free a transaction object for MySQL.
-@param[in,out]	trx	transaction */
-void
-trx_free_for_mysql(trx_t*	trx)
+/**
+  Disconnect a prepared transaction from MySQL
+  @param[in,out] trx transaction
+*/
+void trx_disconnect_prepared(trx_t *trx)
 {
-	trx_disconnect_plain(trx);
-	trx_free_for_background(trx);
+  ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
+  ut_ad(trx->mysql_thd);
+  trx->read_view.close();
+  trx->is_recovered= true;
+  trx->mysql_thd= NULL;
+  /* todo/fixme: suggest to do it at innodb prepare */
+  trx->will_lock= 0;
 }
 
 /****************************************************************//**
@@ -645,8 +529,6 @@ void
 trx_resurrect_table_locks(
 /*======================*/
 	trx_t*			trx,	/*!< in/out: transaction */
-	const trx_undo_ptr_t*	undo_ptr,
-					/*!< in: pointer to undo segment. */
 	const trx_undo_t*	undo)	/*!< in: undo log */
 {
 	mtr_t			mtr;
@@ -654,10 +536,11 @@ trx_resurrect_table_locks(
 	trx_undo_rec_t*		undo_rec;
 	table_id_set		tables;
 
-	ut_ad(undo == undo_ptr->insert_undo || undo == undo_ptr->update_undo);
-
-	if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) || undo->empty) {
+	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) ||
+	      trx_state_eq(trx, TRX_STATE_PREPARED));
+	ut_ad(undo->rseg == trx->rsegs.m_redo.rseg);
 
+	if (undo->empty()) {
 		return;
 	}
 
@@ -666,7 +549,8 @@ trx_resurrect_table_locks(
 	/* trx_rseg_mem_create() may have acquired an X-latch on this
 	page, so we cannot acquire an S-latch. */
 	undo_page = trx_undo_page_get(
-		page_id_t(undo->space, undo->top_page_no), &mtr);
+		page_id_t(trx->rsegs.m_redo.rseg->space->id,
+			  undo->top_page_no), &mtr);
 
 	undo_rec = undo_page + undo->top_offset;
 
@@ -709,192 +593,95 @@ trx_resurrect_table_locks(
 			}
 
 			if (trx->state == TRX_STATE_PREPARED) {
-				trx->mod_tables.insert(table);
+				trx->mod_tables.insert(
+					trx_mod_tables_t::value_type(table,
+								     0));
 			}
 			lock_table_ix_resurrect(table, trx);
 
-			DBUG_PRINT("ib_trx",
-				   ("resurrect" TRX_ID_FMT
-				    "  table '%s' IX lock from %s undo",
-				    trx_get_id_for_print(trx),
-				    table->name.m_name,
-				    undo == undo_ptr->insert_undo
-				    ? "insert" : "update"));
+			DBUG_LOG("ib_trx",
+				 "resurrect " << ib::hex(trx->id)
+				 << " IX lock on " << table->name);
 
 			dict_table_close(table, FALSE, FALSE);
 		}
 	}
 }
 
-/****************************************************************//**
-Resurrect the transactions that were doing inserts the time of the
-crash, they need to be undone.
-@return trx_t instance */
-static
-trx_t*
-trx_resurrect_insert(
-/*=================*/
-	trx_undo_t*	undo,		/*!< in: entry to UNDO */
-	trx_rseg_t*	rseg)		/*!< in: rollback segment */
-{
-	trx_t*		trx;
-
-	trx = trx_allocate_for_background();
-
-	ut_d(trx->start_file = __FILE__);
-	ut_d(trx->start_line = __LINE__);
 
-	trx->rsegs.m_redo.rseg = rseg;
-	*trx->xid = undo->xid;
-	trx->id = undo->trx_id;
-	trx->rsegs.m_redo.insert_undo = undo;
-	trx->is_recovered = true;
-
-	/* This is single-threaded startup code, we do not need the
-	protection of trx->mutex or trx_sys->mutex here. */
-
-	if (undo->state != TRX_UNDO_ACTIVE) {
-
-		/* Prepared transactions are left in the prepared state
-		waiting for a commit or abort decision from MySQL */
-
-		if (undo->state == TRX_UNDO_PREPARED) {
-
-			ib::info() << "Transaction "
-				<< trx_get_id_for_print(trx)
-				<< " was in the XA prepared state.";
-
-			trx->state = TRX_STATE_PREPARED;
-			trx_sys->n_prepared_trx++;
-			trx_sys->n_prepared_recovered_trx++;
-		} else {
-			trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
-		}
-
-		/* We give a dummy value for the trx no; this should have no
-		relevance since purge is not interested in committed
-		transaction numbers, unless they are in the history
-		list, in which case it looks the number from the disk based
-		undo log structure */
-
-		trx->no = trx->id;
-
-	} else {
-		trx->state = TRX_STATE_ACTIVE;
-
-		/* A running transaction always has the number
-		field inited to TRX_ID_MAX */
-
-		trx->no = TRX_ID_MAX;
-	}
-
-	/* trx_start_low() is not called with resurrect, so need to initialize
-	start time here.*/
-	if (trx->state == TRX_STATE_ACTIVE
-	    || trx->state == TRX_STATE_PREPARED) {
-
-		trx->start_time = ut_time();
-	}
-
-	if (undo->dict_operation) {
-		trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-		trx->table_id = undo->table_id;
-	}
-
-	if (!undo->empty) {
-		trx->undo_no = undo->top_undo_no + 1;
-		trx->undo_rseg_space = undo->rseg->space;
-	}
-
-	return(trx);
-}
+/**
+  Resurrect the transactions that were doing inserts/updates the time of the
+  crash, they need to be undone.
+*/
 
-/****************************************************************//**
-Prepared transactions are left in the prepared state waiting for a
-commit or abort decision from MySQL */
-static
-void
-trx_resurrect_update_in_prepared_state(
-/*===================================*/
-	trx_t*			trx,	/*!< in,out: transaction */
-	const trx_undo_t*	undo)	/*!< in: update UNDO record */
+static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg,
+                          ib_time_t start_time, uint64_t *rows_to_undo,
+                          bool is_old_insert)
 {
-	/* This is single-threaded startup code, we do not need the
-	protection of trx->mutex or trx_sys->mutex here. */
-
-	if (undo->state == TRX_UNDO_PREPARED) {
-		ib::info() << "Transaction " << trx_get_id_for_print(trx)
-			<< " was in the XA prepared state.";
-
-		if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
-			trx_sys->n_prepared_trx++;
-			trx_sys->n_prepared_recovered_trx++;
-		} else {
-			ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
-		}
-
-		trx->state = TRX_STATE_PREPARED;
-	} else {
-		trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
-	}
+  trx_state_t state;
+  /*
+    This is single-threaded startup code, we do not need the
+    protection of trx->mutex or trx_sys.mutex here.
+  */
+  switch (undo->state)
+  {
+  case TRX_UNDO_ACTIVE:
+    state= TRX_STATE_ACTIVE;
+    break;
+  case TRX_UNDO_PREPARED:
+    /*
+      Prepared transactions are left in the prepared state
+      waiting for a commit or abort decision from MySQL
+    */
+    ib::info() << "Transaction " << undo->trx_id
+               << " was in the XA prepared state.";
+
+    state= TRX_STATE_PREPARED;
+    break;
+  default:
+    if (is_old_insert && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO)
+      trx_undo_commit_cleanup(undo, false);
+    return;
+  }
+
+  trx_t *trx= trx_create();
+  trx->state= state;
+  ut_d(trx->start_file= __FILE__);
+  ut_d(trx->start_line= __LINE__);
+  ut_ad(trx->no == TRX_ID_MAX);
+
+  if (is_old_insert)
+    trx->rsegs.m_redo.old_insert= undo;
+  else
+    trx->rsegs.m_redo.undo= undo;
+
+  trx->undo_no= undo->top_undo_no + 1;
+  trx->rsegs.m_redo.rseg= rseg;
+  /*
+    For transactions with active data will not have rseg size = 1
+    or will not qualify for purge limit criteria. So it is safe to increment
+    this trx_ref_count w/o mutex protection.
+  */
+  ++trx->rsegs.m_redo.rseg->trx_ref_count;
+  *trx->xid= undo->xid;
+  trx->id= undo->trx_id;
+  trx->is_recovered= true;
+  trx->start_time= start_time;
+
+  if (undo->dict_operation)
+  {
+    trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+    if (!trx->table_id)
+      trx->table_id= undo->table_id;
+  }
+
+  trx_sys.rw_trx_hash.insert(trx);
+  trx_sys.rw_trx_hash.put_pins(trx);
+  trx_resurrect_table_locks(trx, undo);
+  if (trx_state_eq(trx, TRX_STATE_ACTIVE))
+    *rows_to_undo+= trx->undo_no;
 }
 
-/****************************************************************//**
-Resurrect the transactions that were doing updates the time of the
-crash, they need to be undone. */
-static
-void
-trx_resurrect_update(
-/*=================*/
-	trx_t*		trx,	/*!< in/out: transaction */
-	trx_undo_t*	undo,	/*!< in/out: update UNDO record */
-	trx_rseg_t*	rseg)	/*!< in/out: rollback segment */
-{
-	trx->rsegs.m_redo.rseg = rseg;
-	*trx->xid = undo->xid;
-	trx->id = undo->trx_id;
-	trx->rsegs.m_redo.update_undo = undo;
-	trx->is_recovered = true;
-
-	/* This is single-threaded startup code, we do not need the
-	protection of trx->mutex or trx_sys->mutex here. */
-
-	if (undo->state != TRX_UNDO_ACTIVE) {
-		trx_resurrect_update_in_prepared_state(trx, undo);
-
-		/* We give a dummy value for the trx number */
-
-		trx->no = trx->id;
-
-	} else {
-		trx->state = TRX_STATE_ACTIVE;
-
-		/* A running transaction always has the number field inited to
-		TRX_ID_MAX */
-
-		trx->no = TRX_ID_MAX;
-	}
-
-	/* trx_start_low() is not called with resurrect, so need to initialize
-	start time here.*/
-	if (trx->state == TRX_STATE_ACTIVE
-	    || trx->state == TRX_STATE_PREPARED) {
-		trx->start_time = ut_time();
-	}
-
-	if (undo->dict_operation) {
-		trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-		if (!trx->table_id) {
-			trx->table_id = undo->table_id;
-		}
-	}
-
-	if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
-
-		trx->undo_no = undo->top_undo_no + 1;
-		trx->undo_rseg_space = undo->rseg->space;
-	}
-}
 
 /** Initialize (resurrect) transactions at startup. */
 void
@@ -902,22 +689,30 @@ trx_lists_init_at_db_start()
 {
 	ut_a(srv_is_being_started);
 	ut_ad(!srv_was_started);
-	ut_ad(!purge_sys);
 
-	purge_sys = UT_NEW_NOKEY(purge_sys_t());
+	if (srv_operation == SRV_OPERATION_RESTORE) {
+		/* mariabackup --prepare only deals with
+		the redo log and the data files, not with
+		transactions or the data dictionary. */
+		trx_rseg_array_init();
+		return;
+	}
 
 	if (srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) {
 		return;
 	}
 
+	purge_sys.create();
 	trx_rseg_array_init();
 
 	/* Look from the rollback segments if there exist undo logs for
 	transactions. */
+	const ib_time_t	start_time	= ut_time();
+	uint64_t	rows_to_undo	= 0;
 
 	for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
 		trx_undo_t*	undo;
-		trx_rseg_t*	rseg = trx_sys->rseg_array[i];
+		trx_rseg_t*	rseg = trx_sys.rseg_array[i];
 
 		/* Some rollback segment may be unavailable,
 		especially if the server was previously run with a
@@ -926,76 +721,58 @@ trx_lists_init_at_db_start()
 			continue;
 		}
 
-		/* Resurrect transactions that were doing inserts. */
-		for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
-		     undo != NULL;
-		     undo = UT_LIST_GET_NEXT(undo_list, undo)) {
-
-			/* trx_purge() will not run before we return,
-			so we can safely increment this without
-			holding rseg->mutex. */
-			++rseg->trx_ref_count;
-
-			trx_t*	trx;
-
-			trx = trx_resurrect_insert(undo, rseg);
-
-			trx_sys_rw_trx_add(trx);
-
-			trx_resurrect_table_locks(
-				trx, &trx->rsegs.m_redo, undo);
+		/* Resurrect transactions that were doing inserts
+		using the old separate insert_undo log. */
+		undo = UT_LIST_GET_FIRST(rseg->old_insert_list);
+		while (undo) {
+			trx_undo_t* next = UT_LIST_GET_NEXT(undo_list, undo);
+			trx_resurrect(undo, rseg, start_time, &rows_to_undo,
+				      true);
+			undo = next;
 		}
 
-		/* Ressurrect transactions that were doing updates. */
-		for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
+		/* Ressurrect other transactions. */
+		for (undo = UT_LIST_GET_FIRST(rseg->undo_list);
 		     undo != NULL;
 		     undo = UT_LIST_GET_NEXT(undo_list, undo)) {
-
-			/* Check the trx_sys->rw_trx_set first. */
-			trx_sys_mutex_enter();
-
-			trx_t*	trx = trx_get_rw_trx_by_id(undo->trx_id);
-
-			trx_sys_mutex_exit();
-
-			if (trx == NULL) {
-				trx = trx_allocate_for_background();
-				++rseg->trx_ref_count;
-
-				ut_d(trx->start_file = __FILE__);
-				ut_d(trx->start_line = __LINE__);
+			trx_t *trx = trx_sys.find(0, undo->trx_id, false);
+			if (!trx) {
+				trx_resurrect(undo, rseg, start_time,
+					      &rows_to_undo, false);
+			} else {
+				ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) ||
+				      trx_state_eq(trx, TRX_STATE_PREPARED));
+				ut_ad(trx->start_time == start_time);
+				ut_ad(trx->is_recovered);
+				ut_ad(trx->rsegs.m_redo.rseg == rseg);
+				ut_ad(trx->rsegs.m_redo.rseg->trx_ref_count);
+
+				trx->rsegs.m_redo.undo = undo;
+				if (undo->top_undo_no >= trx->undo_no) {
+					if (trx_state_eq(trx,
+							 TRX_STATE_ACTIVE)) {
+						rows_to_undo -= trx->undo_no;
+						rows_to_undo +=
+							undo->top_undo_no + 1;
+					}
+
+					trx->undo_no = undo->top_undo_no + 1;
+				}
+				trx_resurrect_table_locks(trx, undo);
 			}
-
-			trx_resurrect_update(trx, undo, rseg);
-
-			trx_sys_rw_trx_add(trx);
-
-			trx_resurrect_table_locks(
-				trx, &trx->rsegs.m_redo, undo);
 		}
 	}
 
-	TrxIdSet::iterator	end = trx_sys->rw_trx_set.end();
+	if (trx_sys.rw_trx_hash.size()) {
 
-	for (TrxIdSet::iterator it = trx_sys->rw_trx_set.begin();
-	     it != end;
-	     ++it) {
+		ib::info() << trx_sys.rw_trx_hash.size()
+			<< " transaction(s) which must be rolled back or"
+			" cleaned up in total " << rows_to_undo
+			<< " row operations to undo";
 
-		ut_ad(it->m_trx->in_rw_trx_list);
-#ifdef UNIV_DEBUG
-		if (it->m_trx->id > trx_sys->rw_max_trx_id) {
-			trx_sys->rw_max_trx_id = it->m_trx->id;
-		}
-#endif /* UNIV_DEBUG */
-
-		if (it->m_trx->state == TRX_STATE_ACTIVE
-		    || it->m_trx->state == TRX_STATE_PREPARED) {
-
-			trx_sys->rw_trx_ids.push_back(it->m_id);
-		}
-
-		UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, it->m_trx);
+		ib::info() << "Trx id counter is " << trx_sys.get_max_trx_id();
 	}
+	trx_sys.clone_oldest_view();
 }
 
 /** Assign a persistent rollback segment in a round-robin fashion,
@@ -1010,7 +787,7 @@ static trx_rseg_t* trx_assign_rseg_low()
 	}
 
 	/* The first slot is always assigned to the system tablespace. */
-	ut_ad(trx_sys->rseg_array[0]->space == TRX_SYS_SPACE);
+	ut_ad(trx_sys.rseg_array[0]->space == fil_system.sys_space);
 
 	/* Choose a rollback segment evenly distributed between 0 and
 	innodb_undo_logs-1 in a round-robin fashion, skipping those
@@ -1033,7 +810,7 @@ static trx_rseg_t* trx_assign_rseg_low()
 
 	do {
 		for (;;) {
-			rseg = trx_sys->rseg_array[slot];
+			rseg = trx_sys.rseg_array[slot];
 
 #ifdef UNIV_DEBUG
 			/* Ensure that we are not revisiting the same
@@ -1052,14 +829,14 @@ static trx_rseg_t* trx_assign_rseg_low()
 
 			ut_ad(rseg->is_persistent());
 
-			if (rseg->space != TRX_SYS_SPACE) {
+			if (rseg->space != fil_system.sys_space) {
 				if (rseg->skip_allocation
 				    || !srv_undo_tablespaces) {
 					continue;
 				}
 			} else if (trx_rseg_t* next
-				   = trx_sys->rseg_array[slot]) {
-				if (next->space != TRX_SYS_SPACE
+				   = trx_sys.rseg_array[slot]) {
+				if (next->space != fil_system.sys_space
 				    && srv_undo_tablespaces > 0) {
 					/** If dedicated
 					innodb_undo_tablespaces have
@@ -1103,17 +880,13 @@ trx_t::assign_temp_rseg()
 	multiple transactions that start modifications concurrently
 	will write their undo log to the same rollback segment. */
 	static ulong	rseg_slot;
-	trx_rseg_t*	rseg = trx_sys->temp_rsegs[
+	trx_rseg_t*	rseg = trx_sys.temp_rsegs[
 		rseg_slot++ & (TRX_SYS_N_RSEGS - 1)];
 	ut_ad(!rseg->is_persistent());
 	rsegs.m_noredo.rseg = rseg;
 
 	if (id == 0) {
-		mutex_enter(&trx_sys->mutex);
-		id = trx_sys_get_new_trx_id();
-		trx_sys->rw_trx_ids.push_back(id);
-		trx_sys->rw_trx_set.insert(TrxTrack(id, this));
-		mutex_exit(&trx_sys->mutex);
+		trx_sys.register_rw(this);
 	}
 
 	ut_ad(!rseg->is_persistent());
@@ -1165,17 +938,14 @@ trx_start_low(
 	ut_a(ib_vector_is_empty(trx->autoinc_locks));
 	ut_a(trx->lock.table_locks.empty());
 
-	/* If this transaction came from trx_allocate_for_mysql(),
-	trx->in_mysql_trx_list would hold. In that case, the trx->state
-	change must be protected by the trx_sys->mutex, so that
-	lock_print_info_all_transactions() will have a consistent view. */
+	/* No other thread can access this trx object through rw_trx_hash, thus
+	we don't need trx_sys.mutex protection for that purpose. Still this
+	trx can be found through trx_sys.trx_list, which means state
+	change must be protected by e.g. trx->mutex.
 
-	ut_ad(!trx->in_rw_trx_list);
-
-	/* We tend to over assert and that complicates the code somewhat.
-	e.g., the transaction state can be set earlier but we are forced to
-	set it under the protection of the trx_sys_t::mutex because some
-	trx list assertions are triggered unnecessarily. */
+	For now we update it without mutex protection, because original code
+	did it this way. It has to be reviewed and fixed properly. */
+	trx->state = TRX_STATE_ACTIVE;
 
 	/* By default all transactions are in the read-only list unless they
 	are non-locking auto-commit read only transactions or background
@@ -1186,37 +956,14 @@ trx_start_low(
 	if (!trx->read_only
 	    && (trx->mysql_thd == 0 || read_write || trx->ddl)) {
 
-		trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
-
 		/* Temporary rseg is assigned only if the transaction
 		updates a temporary table */
-
-		trx_sys_mutex_enter();
-
-		trx->id = trx_sys_get_new_trx_id();
-
-		trx_sys->rw_trx_ids.push_back(trx->id);
-
-		trx_sys_rw_trx_add(trx);
-
+		trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
 		ut_ad(trx->rsegs.m_redo.rseg != 0
 		      || srv_read_only_mode
 		      || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
 
-		UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, trx);
-
-		ut_d(trx->in_rw_trx_list = true);
-#ifdef UNIV_DEBUG
-		if (trx->id > trx_sys->rw_max_trx_id) {
-			trx_sys->rw_max_trx_id = trx->id;
-		}
-#endif /* UNIV_DEBUG */
-
-		trx->state = TRX_STATE_ACTIVE;
-
-		ut_ad(trx_sys_validate_trx_list());
-
-		trx_sys_mutex_exit();
+		trx_sys.register_rw(trx);
 	} else {
 		if (!trx_is_autocommit_non_locking(trx)) {
 
@@ -1225,26 +972,11 @@ trx_start_low(
 			to write to the temporary table. */
 
 			if (read_write) {
-
-				trx_sys_mutex_enter();
-
 				ut_ad(!srv_read_only_mode);
-
-				trx->id = trx_sys_get_new_trx_id();
-
-				trx_sys->rw_trx_ids.push_back(trx->id);
-
-				trx_sys->rw_trx_set.insert(
-					TrxTrack(trx->id, trx));
-
-				trx_sys_mutex_exit();
+				trx_sys.register_rw(trx);
 			}
-
-			trx->state = TRX_STATE_ACTIVE;
-
 		} else {
 			ut_ad(!read_write);
-			trx->state = TRX_STATE_ACTIVE;
 		}
 	}
 
@@ -1263,52 +995,36 @@ trx_start_low(
 }
 
 /** Set the serialisation number for a persistent committed transaction.
-@param[in,out]	trx	committed transaction with persistent changes
-@param[in,out]	rseg	rollback segment for update_undo, or NULL */
+@param[in,out]	trx	committed transaction with persistent changes */
 static
 void
-trx_serialise(trx_t* trx, trx_rseg_t* rseg)
+trx_serialise(trx_t* trx)
 {
-	ut_ad(!rseg || rseg == trx->rsegs.m_redo.rseg);
-
-	trx_sys_mutex_enter();
+	trx_rseg_t *rseg = trx->rsegs.m_redo.rseg;
+	ut_ad(rseg);
+	ut_ad(mutex_own(&rseg->mutex));
 
-	trx->no = trx_sys_get_new_trx_id();
+	if (rseg->last_page_no == FIL_NULL) {
+		mutex_enter(&purge_sys.pq_mutex);
+	}
 
-	/* Track the minimum serialisation number. */
-	UT_LIST_ADD_LAST(trx_sys->serialisation_list, trx);
+	trx_sys.assign_new_trx_no(trx);
 
-	/* If the rollack segment is not empty then the
+	/* If the rollback segment is not empty then the
 	new trx_t::no can't be less than any trx_t::no
 	already in the rollback segment. User threads only
 	produce events when a rollback segment is empty. */
-	if (rseg && rseg->last_page_no == FIL_NULL) {
-		TrxUndoRsegs	elem(trx->no);
-		elem.push_back(rseg);
-
-		mutex_enter(&purge_sys->pq_mutex);
-
-		/* This is to reduce the pressure on the trx_sys_t::mutex
-		though in reality it should make very little (read no)
-		difference because this code path is only taken when the
-		rbs is empty. */
-
-		trx_sys_mutex_exit();
-
-		purge_sys->purge_queue.push(elem);
-
-		mutex_exit(&purge_sys->pq_mutex);
-	} else {
-		trx_sys_mutex_exit();
+	if (rseg->last_page_no == FIL_NULL) {
+		purge_sys.purge_queue.push(TrxUndoRsegs(trx->no, *rseg));
+		mutex_exit(&purge_sys.pq_mutex);
 	}
 }
 
 /****************************************************************//**
 Assign the transaction its history serialisation number and write the
-update UNDO log record to the assigned rollback segment.
-@return true if a serialisation log was written */
+update UNDO log record to the assigned rollback segment. */
 static
-bool
+void
 trx_write_serialisation_history(
 /*============================*/
 	trx_t*		trx,	/*!< in/out: transaction */
@@ -1339,70 +1055,43 @@ trx_write_serialisation_history(
 		temp_mtr.commit();
 	}
 
-	if (!trx->rsegs.m_redo.rseg) {
-		ut_ad(!trx->rsegs.m_redo.insert_undo);
-		ut_ad(!trx->rsegs.m_redo.update_undo);
-		return false;
+	trx_rseg_t*	rseg = trx->rsegs.m_redo.rseg;
+	if (!rseg) {
+		ut_ad(!trx->rsegs.m_redo.undo);
+		ut_ad(!trx->rsegs.m_redo.old_insert);
+		return;
 	}
 
-	trx_undo_t* insert = trx->rsegs.m_redo.insert_undo;
-	trx_undo_t* update = trx->rsegs.m_redo.update_undo;
+	trx_undo_t*& undo = trx->rsegs.m_redo.undo;
+	trx_undo_t*& old_insert = trx->rsegs.m_redo.old_insert;
 
-	if (!insert && !update) {
-		return false;
+	if (!undo && !old_insert) {
+		return;
 	}
 
 	ut_ad(!trx->read_only);
-	trx_rseg_t*	update_rseg = update ? trx->rsegs.m_redo.rseg : NULL;
-	mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
+	ut_ad(!undo || undo->rseg == rseg);
+	ut_ad(!old_insert || old_insert->rseg == rseg);
+	mutex_enter(&rseg->mutex);
 
 	/* Assign the transaction serialisation number and add any
-	update_undo log to the purge queue. */
-	trx_serialise(trx, update_rseg);
+	undo log to the purge queue. */
+	trx_serialise(trx);
 
-	/* It is not necessary to acquire trx->undo_mutex here because
-	only a single OS thread is allowed to commit this transaction. */
-	if (insert) {
-		trx_undo_set_state_at_finish(insert, mtr);
+	if (UNIV_LIKELY_NULL(old_insert)) {
+		UT_LIST_REMOVE(rseg->old_insert_list, old_insert);
+		trx_purge_add_undo_to_history(trx, old_insert, mtr);
 	}
-	if (update) {
-		/* The undo logs and possible delete-marked records
-		for updates and deletes will be purged later. */
-		page_t*	undo_hdr_page = trx_undo_set_state_at_finish(
-			update, mtr);
-
-		trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
+	if (undo) {
+		UT_LIST_REMOVE(rseg->undo_list, undo);
+		trx_purge_add_undo_to_history(trx, undo, mtr);
 	}
 
-	mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
+	mutex_exit(&rseg->mutex);
 
 	MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
 
-	trx_sysf_t* sys_header = trx_sysf_get(mtr);
-#ifdef WITH_WSREP
-	/* Update latest MySQL wsrep XID in trx sys header. */
-	if (wsrep_is_wsrep_xid(trx->xid)) {
-		trx_sys_update_wsrep_checkpoint(trx->xid, sys_header, mtr);
-	}
-#endif /* WITH_WSREP */
-
-	/* Update the latest MySQL binlog name and offset info
-	in trx sys header if MySQL binlogging is on or the database
-	server is a MySQL replication slave */
-
-	if (trx->mysql_log_file_name != NULL
-	    && trx->mysql_log_file_name[0] != '\0') {
-
-		trx_sys_update_mysql_binlog_offset(
-			trx->mysql_log_file_name,
-			trx->mysql_log_offset,
-			sys_header,
-			mtr);
-
-		trx->mysql_log_file_name = NULL;
-	}
-
-	return(true);
+	trx->mysql_log_file_name = NULL;
 }
 
 /********************************************************************
@@ -1531,9 +1220,6 @@ trx_update_mod_tables_timestamp(
 /*============================*/
 	trx_t*	trx)	/*!< in: transaction */
 {
-
-	ut_ad(trx->id != 0);
-
 	/* consider using trx->start_time if calling time() is too
 	expensive here */
 	time_t	now = ut_time();
@@ -1552,58 +1238,12 @@ trx_update_mod_tables_timestamp(
 		"garbage" in table->update_time is justified because
 		protecting it with a latch here would be too performance
 		intrusive. */
-		(*it)->update_time = now;
+		it->first->update_time = now;
 	}
 
 	trx->mod_tables.clear();
 }
 
-/**
-Erase the transaction from running transaction lists and serialization
-list. Active RW transaction list of a MVCC snapshot(ReadView::prepare)
-won't include this transaction after this call. All implicit locks are
-also released by this call as trx is removed from rw_trx_list.
-@param[in] trx		Transaction to erase, must have an ID > 0
-@param[in] serialised	true if serialisation log was written */
-static
-void
-trx_erase_lists(
-	trx_t*	trx,
-	bool	serialised)
-{
-	ut_ad(trx->id > 0);
-	trx_sys_mutex_enter();
-
-	if (serialised) {
-		UT_LIST_REMOVE(trx_sys->serialisation_list, trx);
-	}
-
-	trx_ids_t::iterator	it = std::lower_bound(
-		trx_sys->rw_trx_ids.begin(),
-		trx_sys->rw_trx_ids.end(),
-		trx->id);
-	ut_ad(*it == trx->id);
-	trx_sys->rw_trx_ids.erase(it);
-
-	if (trx->read_only || trx->rsegs.m_redo.rseg == NULL) {
-
-		ut_ad(!trx->in_rw_trx_list);
-	} else {
-
-		UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
-		ut_d(trx->in_rw_trx_list = false);
-		ut_ad(trx_sys_validate_trx_list());
-
-		if (trx->read_view != NULL) {
-			trx_sys->mvcc->view_close(trx->read_view, true);
-		}
-	}
-
-	trx_sys->rw_trx_set.erase(TrxTrack(trx->id));
-
-	trx_sys_mutex_exit();
-}
-
 /****************************************************************//**
 Commits a transaction in memory. */
 static
@@ -1611,21 +1251,18 @@ void
 trx_commit_in_memory(
 /*=================*/
 	trx_t*		trx,	/*!< in/out: transaction */
-	const mtr_t*	mtr,	/*!< in: mini-transaction of
+	const mtr_t*	mtr)	/*!< in: mini-transaction of
 				trx_write_serialisation_history(), or NULL if
 				the transaction did not modify anything */
-	bool		serialised)
-				/*!< in: true if serialisation log was
-				written */
 {
 	trx->must_flush_log_later = false;
+	trx->read_view.close();
 
 	if (trx_is_autocommit_non_locking(trx)) {
 		ut_ad(trx->id == 0);
 		ut_ad(trx->read_only);
 		ut_a(!trx->is_recovered);
 		ut_ad(trx->rsegs.m_redo.rseg == NULL);
-		ut_ad(!trx->in_rw_trx_list);
 
 		/* Note: We are asserting without holding the lock mutex. But
 		that is OK because this transaction is not waiting and cannot
@@ -1638,15 +1275,11 @@ trx_commit_in_memory(
 		there is an inherent race here around state transition during
 		printouts. We ignore this race for the sake of efficiency.
 		However, the trx_sys_t::mutex will protect the trx_t instance
-		and it cannot be removed from the mysql_trx_list and freed
+		and it cannot be removed from the trx_list and freed
 		without first acquiring the trx_sys_t::mutex. */
 
 		ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
 
-		if (trx->read_view != NULL) {
-			trx_sys->mvcc->view_close(trx->read_view, false);
-		}
-
 		MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
 
 		DBUG_LOG("trx", "Autocommit in memory: " << trx);
@@ -1654,9 +1287,9 @@ trx_commit_in_memory(
 	} else {
 		if (trx->id > 0) {
 			/* For consistent snapshot, we need to remove current
-			transaction from running transaction id list for mvcc
-			before doing commit and releasing locks. */
-			trx_erase_lists(trx, serialised);
+			transaction from rw_trx_hash before doing commit and
+			releasing locks. */
+			trx_sys.deregister_rw(trx);
 		}
 
 		/* trx->id will be cleared in lock_trx_release_locks(trx). */
@@ -1671,19 +1304,14 @@ trx_commit_in_memory(
 		DEBUG_SYNC_C("after_trx_committed_in_memory");
 
 		if (trx->read_only || trx->rsegs.m_redo.rseg == NULL) {
-
 			MONITOR_INC(MONITOR_TRX_RO_COMMIT);
-			if (trx->read_view != NULL) {
-				trx_sys->mvcc->view_close(
-					trx->read_view, false);
-			}
-
 		} else {
+			trx_update_mod_tables_timestamp(trx);
 			MONITOR_INC(MONITOR_TRX_RW_COMMIT);
 		}
 	}
 
-	ut_ad(!trx->rsegs.m_redo.update_undo);
+	ut_ad(!trx->rsegs.m_redo.undo);
 
 	if (trx_rseg_t*	rseg = trx->rsegs.m_redo.rseg) {
 		mutex_enter(&rseg->mutex);
@@ -1691,14 +1319,14 @@ trx_commit_in_memory(
 		--rseg->trx_ref_count;
 		mutex_exit(&rseg->mutex);
 
-		if (trx_undo_t*& insert = trx->rsegs.m_redo.insert_undo) {
+		if (trx_undo_t*& insert = trx->rsegs.m_redo.old_insert) {
 			ut_ad(insert->rseg == rseg);
 			trx_undo_commit_cleanup(insert, false);
 			insert = NULL;
 		}
 	}
 
-	ut_ad(!trx->rsegs.m_redo.insert_undo);
+	ut_ad(!trx->rsegs.m_redo.old_insert);
 
 	if (mtr != NULL) {
 		if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) {
@@ -1780,9 +1408,6 @@ trx_commit_in_memory(
 	DBUG_LOG("trx", "Commit in memory: " << trx);
 	trx->state = TRX_STATE_NOT_STARTED;
 
-	/* trx->in_mysql_trx_list would hold between
-	trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
-	hold for recovered transactions or system transactions. */
 	assert_trx_is_free(trx);
 
 	trx_init(trx);
@@ -1793,19 +1418,18 @@ trx_commit_in_memory(
 	srv_wake_purge_thread_if_not_active();
 }
 
-/****************************************************************//**
-Commits a transaction and a mini-transaction. */
-void
-trx_commit_low(
-/*===========*/
-	trx_t*	trx,	/*!< in/out: transaction */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction (will be committed),
-			or NULL if trx made no modifications */
+/** Commit a transaction and a mini-transaction.
+@param[in,out]	trx	transaction
+@param[in,out]	mtr	mini-transaction (NULL if no modifications) */
+void trx_commit_low(trx_t* trx, mtr_t* mtr)
 {
 	assert_trx_nonlocking_or_in_list(trx);
 	ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
 	ut_ad(!mtr || mtr->is_active());
-	ut_ad(!mtr == !trx->has_logged());
+	ut_d(bool aborted = trx->in_rollback
+	     && trx->error_state == DB_DEADLOCK);
+	ut_ad(!mtr == (aborted || !trx->has_logged_or_recovered()));
+	ut_ad(!mtr || !aborted);
 
 	/* undo_no is non-zero if we're doing the final commit. */
 	if (trx->fts_trx != NULL && trx->undo_no != 0) {
@@ -1829,10 +1453,12 @@ trx_commit_low(
 		}
 	}
 
-	bool	serialised;
+#ifndef DBUG_OFF
+	const bool debug_sync = trx->mysql_thd && trx->has_logged_persistent();
+#endif
 
 	if (mtr != NULL) {
-		serialised = trx_write_serialisation_history(trx, mtr);
+		trx_write_serialisation_history(trx, mtr);
 
 		/* The following call commits the mini-transaction, making the
 		whole transaction committed in the file-based world, at this
@@ -1860,9 +1486,6 @@ trx_commit_low(
 					DBUG_SUICIDE();
 				});
 		/*--------------*/
-
-	} else {
-		serialised = false;
 	}
 #ifndef DBUG_OFF
 	/* In case of this function is called from a stack executing
@@ -1873,12 +1496,12 @@ trx_commit_low(
            thd->debug_sync_control defined any longer. However the stack
            is possible only with a prepared trx not updating any data.
         */
-	if (trx->mysql_thd != NULL && trx->has_logged_persistent()) {
+	if (debug_sync) {
 		DEBUG_SYNC_C("before_trx_state_committed_in_memory");
 	}
 #endif
 
-	trx_commit_in_memory(trx, mtr, serialised);
+	trx_commit_in_memory(trx, mtr);
 }
 
 /****************************************************************//**
@@ -1894,7 +1517,7 @@ trx_commit(
 	DBUG_EXECUTE_IF("ib_trx_commit_crash_before_trx_commit_start",
 			DBUG_SUICIDE(););
 
-	if (trx->has_logged()) {
+	if (trx->has_logged_or_recovered()) {
 		mtr = &local_mtr;
 		mtr->start();
 	} else {
@@ -1906,82 +1529,13 @@ trx_commit(
 }
 
 /****************************************************************//**
-Cleans up a transaction at database startup. The cleanup is needed if
-the transaction already got to the middle of a commit when the database
-crashed, and we cannot roll it back. */
-void
-trx_cleanup_at_db_startup(
-/*======================*/
-	trx_t*	trx)	/*!< in: transaction */
-{
-	ut_ad(trx->is_recovered);
-	ut_ad(!trx->rsegs.m_noredo.undo);
-	ut_ad(!trx->rsegs.m_redo.update_undo);
-
-	if (trx_undo_t*& undo = trx->rsegs.m_redo.insert_undo) {
-		ut_ad(undo->rseg == trx->rsegs.m_redo.rseg);
-		trx_undo_commit_cleanup(undo, false);
-		undo = NULL;
-	}
-
-	memset(&trx->rsegs, 0x0, sizeof(trx->rsegs));
-	trx->undo_no = 0;
-	trx->undo_rseg_space = 0;
-	trx->last_sql_stat_start.least_undo_no = 0;
-
-	trx_sys_mutex_enter();
-
-	ut_a(!trx->read_only);
-
-	UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
-
-	ut_d(trx->in_rw_trx_list = FALSE);
-
-	trx_sys_mutex_exit();
-
-	/* Change the transaction state without mutex protection, now
-	that it no longer is in the trx_list. Recovered transactions
-	are never placed in the mysql_trx_list. */
-	ut_ad(trx->is_recovered);
-	ut_ad(!trx->in_rw_trx_list);
-	ut_ad(!trx->in_mysql_trx_list);
-	DBUG_LOG("trx", "Cleanup at startup: " << trx);
-	trx->id = 0;
-	trx->state = TRX_STATE_NOT_STARTED;
-}
-
-/********************************************************************//**
-Assigns a read view for a consistent read query. All the consistent reads
-within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction.
-@return consistent read view */
-ReadView*
-trx_assign_read_view(
-/*=================*/
-	trx_t*		trx)	/*!< in/out: active transaction */
-{
-	ut_ad(trx->state == TRX_STATE_ACTIVE);
-
-	if (srv_read_only_mode) {
-
-		ut_ad(trx->read_view == NULL);
-		return(NULL);
-
-	} else if (!MVCC::is_view_active(trx->read_view)) {
-		trx_sys->mvcc->view_open(trx->read_view, trx);
-	}
-
-	return(trx->read_view);
-}
-
-/****************************************************************//**
 Prepares a transaction for commit/rollback. */
 void
 trx_commit_or_rollback_prepare(
 /*===========================*/
 	trx_t*	trx)		/*!< in/out: transaction */
 {
-	/* We are reading trx->state without holding trx_sys->mutex
+	/* We are reading trx->state without holding trx_sys.mutex
 	here, because the commit or rollback should be invoked for a
 	running (or recovered prepared) transaction that is associated
 	with the current thread. */
@@ -2107,10 +1661,6 @@ trx_commit_for_mysql(
 
 		trx->op_info = "committing";
 
-		if (trx->id != 0) {
-			trx_update_mod_tables_timestamp(trx);
-		}
-
 		trx_commit(trx);
 
 		MONITOR_DEC(MONITOR_TRX_ACTIVE);
@@ -2158,7 +1708,6 @@ trx_mark_sql_stat_end(
 		break;
 	case TRX_STATE_NOT_STARTED:
 		trx->undo_no = 0;
-		trx->undo_rseg_space = 0;
 		/* fall through */
 	case TRX_STATE_ACTIVE:
 		trx->last_sql_stat_start.least_undo_no = trx->undo_no;
@@ -2174,8 +1723,7 @@ trx_mark_sql_stat_end(
 }
 
 /**********************************************************************//**
-Prints info about a transaction.
-Caller must hold trx_sys->mutex. */
+Prints info about a transaction. */
 void
 trx_print_low(
 /*==========*/
@@ -2196,12 +1744,10 @@ trx_print_low(
 	ibool		newline;
 	const char*	op_info;
 
-	ut_ad(trx_sys_mutex_own());
-
 	fprintf(f, "TRANSACTION " TRX_ID_FMT, trx_get_id_for_print(trx));
 
 	/* trx->state cannot change from or to NOT_STARTED while we
-	are holding the trx_sys->mutex. It may change from ACTIVE to
+	are holding the trx_sys.mutex. It may change from ACTIVE to
 	PREPARED or COMMITTED. */
 	switch (trx->state) {
 	case TRX_STATE_NOT_STARTED:
@@ -2294,7 +1840,7 @@ state_ok:
 
 /**********************************************************************//**
 Prints info about a transaction.
-The caller must hold lock_sys->mutex and trx_sys->mutex.
+The caller must hold lock_sys.mutex.
 When possible, use trx_print() instead. */
 void
 trx_print_latched(
@@ -2305,7 +1851,6 @@ trx_print_latched(
 					or 0 to use the default max length */
 {
 	ut_ad(lock_mutex_own());
-	ut_ad(trx_sys_mutex_own());
 
 	trx_print_low(f, trx, max_query_len,
 		      lock_number_of_rows_locked(&trx->lock),
@@ -2313,115 +1858,9 @@ trx_print_latched(
 		      mem_heap_get_size(trx->lock.lock_heap));
 }
 
-#ifdef WITH_WSREP
 /**********************************************************************//**
 Prints info about a transaction.
-Transaction information may be retrieved without having trx_sys->mutex acquired
-so it may not be completely accurate. The caller must own lock_sys->mutex
-and the trx must have some locks to make sure that it does not escape
-without locking lock_sys->mutex. */
-UNIV_INTERN
-void
-wsrep_trx_print_locking(
-	FILE*		f,
-			/*!< in: output stream */
-	const trx_t*	trx,
-			/*!< in: transaction */
-	ulint		max_query_len)
-			/*!< in: max query length to print,
-			or 0 to use the default max length */
-{
-	ibool		newline;
-	const char*	op_info;
-
-	ut_ad(lock_mutex_own());
-	ut_ad(trx->lock.trx_locks.count > 0);
-
-	fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
-
-	/* trx->state may change since trx_sys->mutex is not required */
-	switch (trx->state) {
-	case TRX_STATE_NOT_STARTED:
-		fputs(", not started", f);
-		goto state_ok;
-	case TRX_STATE_ACTIVE:
-		fprintf(f, ", ACTIVE %lu sec",
-			(ulong) difftime(time(NULL), trx->start_time));
-		goto state_ok;
-	case TRX_STATE_PREPARED:
-		fprintf(f, ", ACTIVE (PREPARED) %lu sec",
-			(ulong) difftime(time(NULL), trx->start_time));
-		goto state_ok;
-	case TRX_STATE_COMMITTED_IN_MEMORY:
-		fputs(", COMMITTED IN MEMORY", f);
-		goto state_ok;
-	}
-	fprintf(f, ", state %lu", (ulong) trx->state);
-	ut_ad(0);
-state_ok:
-
-	/* prevent a race condition */
-	op_info = trx->op_info;
-
-	if (*op_info) {
-		putc(' ', f);
-		fputs(op_info, f);
-	}
-
-	if (trx->is_recovered) {
-		fputs(" recovered trx", f);
-	}
-
-	if (trx->declared_to_be_inside_innodb) {
-		fprintf(f, ", thread declared inside InnoDB %lu",
-			(ulong) trx->n_tickets_to_enter_innodb);
-	}
-
-	putc('\n', f);
-
-	if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
-		fprintf(f, "mysql tables in use %lu, locked %lu\n",
-			(ulong) trx->n_mysql_tables_in_use,
-			(ulong) trx->mysql_n_tables_locked);
-	}
-
-	newline = TRUE;
-
-	/* trx->lock.que_state of an ACTIVE transaction may change
-	while we are not holding trx->mutex. We perform a dirty read
-	for performance reasons. */
-
-	switch (trx->lock.que_state) {
-	case TRX_QUE_RUNNING:
-		newline = FALSE; break;
-	case TRX_QUE_LOCK_WAIT:
-		fputs("LOCK WAIT ", f); break;
-	case TRX_QUE_ROLLING_BACK:
-		fputs("ROLLING BACK ", f); break;
-	case TRX_QUE_COMMITTING:
-		fputs("COMMITTING ", f); break;
-	default:
-		fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
-	}
-
-	if (trx->undo_no != 0) {
-		newline = TRUE;
-		fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
-	}
-
-	if (newline) {
-		putc('\n', f);
-	}
-
-	if (trx->mysql_thd != NULL) {
-		innobase_mysql_print_thd(
-			f, trx->mysql_thd, static_cast<uint>(max_query_len));
-	}
-}
-#endif /* WITH_WSREP */
-/**********************************************************************//**
-Prints info about a transaction.
-Acquires and releases lock_sys->mutex and trx_sys->mutex. */
+Acquires and releases lock_sys.mutex. */
 void
 trx_print(
 /*======*/
@@ -2440,52 +1879,10 @@ trx_print(
 	heap_size = mem_heap_get_size(trx->lock.lock_heap);
 	lock_mutex_exit();
 
-	mutex_enter(&trx_sys->mutex);
-
 	trx_print_low(f, trx, max_query_len,
 		      n_rec_locks, n_trx_locks, heap_size);
-
-	mutex_exit(&trx_sys->mutex);
 }
 
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Asserts that a transaction has been started.
-The caller must hold trx_sys->mutex.
-@return TRUE if started */
-ibool
-trx_assert_started(
-/*===============*/
-	const trx_t*	trx)	/*!< in: transaction */
-{
-	ut_ad(trx_sys_mutex_own());
-
-	/* Non-locking autocommits should not hold any locks and this
-	function is only called from the locking code. */
-	check_trx_state(trx);
-
-	/* trx->state can change from or to NOT_STARTED while we are holding
-	trx_sys->mutex for non-locking autocommit selects but not for other
-	types of transactions. It may change from ACTIVE to PREPARED. Unless
-	we are holding lock_sys->mutex, it may also change to COMMITTED. */
-
-	switch (trx->state) {
-	case TRX_STATE_PREPARED:
-		return(TRUE);
-
-	case TRX_STATE_ACTIVE:
-	case TRX_STATE_COMMITTED_IN_MEMORY:
-		return(TRUE);
-
-	case TRX_STATE_NOT_STARTED:
-		break;
-	}
-
-	ut_error;
-	return(FALSE);
-}
-#endif /* UNIV_DEBUG */
-
 /*******************************************************************//**
 Compares the "weight" (or size) of two transactions. Transactions that
 have edited non-transactional tables are considered heavier than ones
@@ -2528,11 +1925,10 @@ static
 lsn_t
 trx_prepare_low(trx_t* trx)
 {
-	mtr_t	mtr;
+	ut_ad(!trx->rsegs.m_redo.old_insert);
+	ut_ad(!trx->is_recovered);
 
-	/* It is not necessary to acquire trx->undo_mutex here because
-	only the owning (connection) thread of the transaction is
-	allowed to perform XA PREPARE. */
+	mtr_t	mtr;
 
 	if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) {
 		ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg);
@@ -2547,15 +1943,15 @@ trx_prepare_low(trx_t* trx)
 		mtr.commit();
 	}
 
-	trx_undo_t* insert = trx->rsegs.m_redo.insert_undo;
-	trx_undo_t* update = trx->rsegs.m_redo.update_undo;
+	trx_undo_t* undo = trx->rsegs.m_redo.undo;
 
-	if (!insert && !update) {
+	if (!undo) {
 		/* There were no changes to persistent tables. */
 		return(0);
 	}
 
 	trx_rseg_t*	rseg = trx->rsegs.m_redo.rseg;
+	ut_ad(undo->rseg == rseg);
 
 	mtr.start();
 
@@ -2565,17 +1961,7 @@ trx_prepare_low(trx_t* trx)
 	world, at the serialization point of lsn. */
 
 	mutex_enter(&rseg->mutex);
-
-	if (insert) {
-		ut_ad(insert->rseg == rseg);
-		trx_undo_set_state_at_prepare(trx, insert, false, &mtr);
-	}
-
-	if (update) {
-		ut_ad(update->rseg == rseg);
-		trx_undo_set_state_at_prepare(trx, update, false, &mtr);
-	}
-
+	trx_undo_set_state_at_prepare(trx, undo, false, &mtr);
 	mutex_exit(&rseg->mutex);
 
 	/* Make the XA PREPARE durable. */
@@ -2600,13 +1986,10 @@ trx_prepare(
 
 	DBUG_EXECUTE_IF("ib_trx_crash_during_xa_prepare_step", DBUG_SUICIDE(););
 
-	/*--------------------------------------*/
 	ut_a(trx->state == TRX_STATE_ACTIVE);
-	trx_sys_mutex_enter();
+	trx_mutex_enter(trx);
 	trx->state = TRX_STATE_PREPARED;
-	trx_sys->n_prepared_trx++;
-	trx_sys_mutex_exit();
-	/*--------------------------------------*/
+	trx_mutex_exit(trx);
 
 	if (lsn) {
 		/* Depending on the my.cnf options, we may now write the log
@@ -2642,141 +2025,118 @@ void trx_prepare_for_mysql(trx_t* trx)
 	trx->op_info = "";
 }
 
-/**********************************************************************//**
-This function is used to find number of prepared transactions and
-their transaction objects for a recovery.
-@return number of prepared transactions stored in xid_list */
-int
-trx_recover_for_mysql(
-/*==================*/
-	XID*	xid_list,	/*!< in/out: prepared transactions */
-	ulint	len)		/*!< in: number of slots in xid_list */
-{
-	const trx_t*	trx;
-	ulint		count = 0;
-
-	ut_ad(xid_list);
-	ut_ad(len);
-
-	/* We should set those transactions which are in the prepared state
-	to the xid_list */
-
-	trx_sys_mutex_enter();
 
-	for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
-	     trx != NULL;
-	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
-		assert_trx_in_rw_list(trx);
-
-		/* The state of a read-write transaction cannot change
-		from or to NOT_STARTED while we are holding the
-		trx_sys->mutex. It may change to PREPARED, but not if
-		trx->is_recovered. It may also change to COMMITTED. */
-		if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
-			xid_list[count] = *trx->xid;
-
-			if (count == 0) {
-				ib::info() << "Starting recovery for"
-					" XA transactions...";
-			}
-
-			ib::info() << "Transaction "
-				<< trx_get_id_for_print(trx)
-				<< " in prepared state after recovery";
+struct trx_recover_for_mysql_callback_arg
+{
+  XID *xid_list;
+  uint len;
+  uint count;
+};
 
-			ib::info() << "Transaction contains changes to "
-				<< trx->undo_no << " rows";
 
-			count++;
+static my_bool trx_recover_for_mysql_callback(rw_trx_hash_element_t *element,
+  trx_recover_for_mysql_callback_arg *arg)
+{
+  mutex_enter(&element->mutex);
+  if (trx_t *trx= element->trx)
+  {
+    /*
+      The state of a read-write transaction can only change from ACTIVE to
+      PREPARED while we are holding the element->mutex. But since it is
+      executed at startup no state change should occur.
+    */
+    if (trx_state_eq(trx, TRX_STATE_PREPARED))
+    {
+      ut_ad(trx->is_recovered);
+      if (arg->count == 0)
+        ib::info() << "Starting recovery for XA transactions...";
+      ib::info() << "Transaction " << trx_get_id_for_print(trx)
+                 << " in prepared state after recovery";
+      ib::info() << "Transaction contains changes to " << trx->undo_no
+                 << " rows";
+      arg->xid_list[arg->count++]= *trx->xid;
+    }
+  }
+  mutex_exit(&element->mutex);
+  return arg->count == arg->len;
+}
 
-			if (count == len) {
-				break;
-			}
-		}
-	}
 
-	trx_sys_mutex_exit();
+/**
+  Find prepared transaction objects for recovery.
 
-	if (count > 0){
-		ib::info() << count << " transactions in prepared state"
-			" after recovery";
-	}
+  @param[out]  xid_list  prepared transactions
+  @param[in]   len       number of slots in xid_list
 
-	return(int (count));
-}
+  @return number of prepared transactions stored in xid_list
+*/
 
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx on match, the trx->xid will be invalidated;
-note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
-static MY_ATTRIBUTE((warn_unused_result))
-trx_t*
-trx_get_trx_by_xid_low(
-/*===================*/
-	XID*	xid)		/*!< in: X/Open XA transaction
-					identifier */
+int trx_recover_for_mysql(XID *xid_list, uint len)
 {
-	trx_t*		trx;
-
-	ut_ad(trx_sys_mutex_own());
-
-	for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
-	     trx != NULL;
-	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
-		assert_trx_in_rw_list(trx);
+  trx_recover_for_mysql_callback_arg arg= { xid_list, len, 0 };
+
+  ut_ad(xid_list);
+  ut_ad(len);
+
+  /* Fill xid_list with PREPARED transactions. */
+  trx_sys.rw_trx_hash.iterate_no_dups(reinterpret_cast<my_hash_walk_action>
+                                      (trx_recover_for_mysql_callback), &arg);
+  if (arg.count)
+    ib::info() << arg.count
+               << " transactions in prepared state after recovery";
+  return int(arg.count);
+}
 
-		/* Compare two X/Open XA transaction id's: their
-		length should be the same and binary comparison
-		of gtrid_length+bqual_length bytes should be
-		the same */
 
-		if (trx->is_recovered
-		    && trx_state_eq(trx, TRX_STATE_PREPARED)
-			&& xid->eq((XID*)trx->xid)) {
+struct trx_get_trx_by_xid_callback_arg
+{
+  XID *xid;
+  trx_t *trx;
+};
 
-			/* Invalidate the XID, so that subsequent calls
-			will not find it. */
-			trx->xid->null();
-			break;
-		}
-	}
 
-	return(trx);
+static my_bool trx_get_trx_by_xid_callback(rw_trx_hash_element_t *element,
+  trx_get_trx_by_xid_callback_arg *arg)
+{
+  my_bool found= 0;
+  mutex_enter(&element->mutex);
+  if (trx_t *trx= element->trx)
+  {
+    if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_PREPARED) &&
+        arg->xid->eq(reinterpret_cast<XID*>(trx->xid)))
+    {
+      /* Invalidate the XID, so that subsequent calls will not find it. */
+      trx->xid->null();
+      arg->trx= trx;
+      found= 1;
+    }
+  }
+  mutex_exit(&element->mutex);
+  return found;
 }
 
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx or NULL; on match, the trx->xid will be invalidated;
-note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
-trx_t*
-trx_get_trx_by_xid(
-/*===============*/
-	XID*	xid)	/*!< in: X/Open XA transaction identifier */
-{
-	trx_t*	trx;
 
-	if (xid == NULL) {
+/**
+  Finds PREPARED XA transaction by xid.
 
-		return(NULL);
-	}
+  trx may have been committed, unless the caller is holding lock_sys.mutex.
 
-	trx_sys_mutex_enter();
+  @param[in]  xid  X/Open XA transaction identifier
 
-	/* Recovered/Resurrected transactions are always only on the
-	trx_sys_t::rw_trx_list. */
-	trx = trx_get_trx_by_xid_low((XID*)xid);
+  @return trx or NULL; on match, the trx->xid will be invalidated;
+*/
 
-	trx_sys_mutex_exit();
+trx_t *trx_get_trx_by_xid(XID *xid)
+{
+  trx_get_trx_by_xid_callback_arg arg= { xid, 0 };
 
-	return(trx);
+  if (xid)
+    trx_sys.rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action>
+                                (trx_get_trx_by_xid_callback), &arg);
+  return arg.trx;
 }
 
+
 /*************************************************************//**
 Starts the transaction if it is not yet started. */
 void
@@ -2795,7 +2155,7 @@ trx_start_if_not_started_xa_low(
 			/* If the transaction is tagged as read-only then
 			it can only write to temp tables and for such
 			transactions we don't want to move them to the
-			trx_sys_t::rw_trx_list. */
+			trx_sys_t::rw_trx_hash. */
 			if (!trx->read_only) {
 				trx_set_rw_mode(trx);
 			}
@@ -2894,15 +2254,6 @@ trx_start_for_ddl_low(
 		return;
 
 	case TRX_STATE_ACTIVE:
-
-		/* We have this start if not started idiom, therefore we
-		can't add stronger checks here. */
-		trx->ddl = true;
-
-		ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
-		ut_ad(trx->will_lock > 0);
-		return;
-
 	case TRX_STATE_PREPARED:
 	case TRX_STATE_COMMITTED_IN_MEMORY:
 		break;
@@ -2924,48 +2275,28 @@ trx_set_rw_mode(
 	trx_t*		trx)		/*!< in/out: transaction that is RW */
 {
 	ut_ad(trx->rsegs.m_redo.rseg == 0);
-	ut_ad(!trx->in_rw_trx_list);
 	ut_ad(!trx_is_autocommit_non_locking(trx));
 	ut_ad(!trx->read_only);
+	ut_ad(trx->id == 0);
 
 	if (high_level_read_only) {
 		return;
 	}
 
 	/* Function is promoting existing trx from ro mode to rw mode.
-	In this process it has acquired trx_sys->mutex as it plan to
+	In this process it has acquired trx_sys.mutex as it plan to
 	move trx from ro list to rw list. If in future, some other thread
 	looks at this trx object while it is being promoted then ensure
 	that both threads are synced by acquring trx->mutex to avoid decision
 	based on in-consistent view formed during promotion. */
 
 	trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
-
 	ut_ad(trx->rsegs.m_redo.rseg != 0);
 
-	mutex_enter(&trx_sys->mutex);
-
-	ut_ad(trx->id == 0);
-	trx->id = trx_sys_get_new_trx_id();
-
-	trx_sys->rw_trx_ids.push_back(trx->id);
-
-	trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
+	trx_sys.register_rw(trx);
 
 	/* So that we can see our own changes. */
-	if (MVCC::is_view_active(trx->read_view)) {
-		MVCC::set_view_creator_trx_id(trx->read_view, trx->id);
-	}
-
-#ifdef UNIV_DEBUG
-	if (trx->id > trx_sys->rw_max_trx_id) {
-		trx_sys->rw_max_trx_id = trx->id;
+	if (trx->read_view.is_open()) {
+		trx->read_view.set_creator_trx_id(trx->id);
 	}
-#endif /* UNIV_DEBUG */
-
-	UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, trx);
-
-	ut_d(trx->in_rw_trx_list = true);
-
-	mutex_exit(&trx_sys->mutex);
 }
diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc
index 98db4bf4e9e..61ba65ebc19 100644
--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -74,16 +74,19 @@ can still remove old versions from the bottom of the stack. */
    -------------------------------------------------------------------
 latches?
 -------
-The contention of the trx_sys_t::mutex should be minimized. When a transaction
+The contention of the trx_sys.mutex should be minimized. When a transaction
 does its first insert or modify in an index, an undo log is assigned for it.
 Then we must have an x-latch to the rollback segment header.
-	When the transaction does more modifys or rolls back, the undo log is
-protected with undo_mutex in the transaction.
-	When the transaction commits, its insert undo log is either reset and
-cached for a fast reuse, or freed. In these cases we must have an x-latch on
-the rollback segment page. The update undo log is put to the history list. If
-it is not suitable for reuse, its slot in the rollback segment is reset. In
-both cases, an x-latch must be acquired on the rollback segment.
+	When the transaction performs modifications or rolls back, its
+undo log is protected by undo page latches.
+Only the thread that is associated with the transaction may hold multiple
+undo page latches at a time. Undo pages are always private to a single
+transaction. Other threads that are performing MVCC reads
+or checking for implicit locks will lock at most one undo page at a time
+in trx_undo_get_undo_rec_low().
+	When the transaction commits, its persistent undo log is added
+to the history list. If it is not suitable for reuse, its slot is reset.
+In both cases, an x-latch must be acquired on the rollback segment header page.
 	The purge operation steps through the history list without modifying
 it until a truncate operation occurs, which can remove undo logs from the end
 of the list and release undo log segments. In stepping through the list,
@@ -91,16 +94,6 @@ s-latches on the undo log pages are enough, but in a truncate, x-latches must
 be obtained on the rollback segment and individual pages. */
 
 /********************************************************************//**
-Initializes the fields in an undo log segment page. */
-static
-void
-trx_undo_page_init(
-/*===============*/
-	page_t* undo_page,	/*!< in: undo log segment page */
-	ulint	type,		/*!< in: undo log segment type */
-	mtr_t*	mtr);		/*!< in: mtr */
-
-/********************************************************************//**
 Creates and initializes an undo log memory object.
 @return own: the undo log memory object */
 static
@@ -109,26 +102,58 @@ trx_undo_mem_create(
 /*================*/
 	trx_rseg_t*	rseg,	/*!< in: rollback segment memory object */
 	ulint		id,	/*!< in: slot index within rseg */
-	ulint		type,	/*!< in: type of the log: TRX_UNDO_INSERT or
-				TRX_UNDO_UPDATE */
 	trx_id_t	trx_id,	/*!< in: id of the trx for which the undo log
 				is created */
 	const XID*	xid,	/*!< in: X/Open XA transaction identification*/
 	ulint		page_no,/*!< in: undo log header page number */
 	ulint		offset);/*!< in: undo log header byte offset on page */
-/***************************************************************//**
-Initializes a cached insert undo log header page for new use. NOTE that this
-function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function!
-@return undo log header byte offset on page */
+
+/** Determine the start offset of undo log records of an undo log page.
+@param[in]	undo_page	undo log page
+@param[in]	page_no		undo log header page number
+@param[in]	offset		undo log header offset
+@return start offset */
 static
-ulint
-trx_undo_insert_header_reuse(
-/*=========================*/
-	page_t*		undo_page,	/*!< in/out: insert undo log segment
-					header page, x-latched */
-	trx_id_t	trx_id,		/*!< in: transaction id */
-	mtr_t*		mtr);		/*!< in: mtr */
+uint16_t
+trx_undo_page_get_start(const page_t* undo_page, ulint page_no, ulint offset)
+{
+	return page_no == page_get_page_no(undo_page)
+		? mach_read_from_2(offset + TRX_UNDO_LOG_START + undo_page)
+		: TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE;
+}
+
+/** Get the first undo log record on a page.
+@param[in]	page	undo log page
+@param[in]	page_no	undo log header page number
+@param[in]	offset	undo log header page offset
+@return	pointer to first record
+@retval	NULL	if none exists */
+static
+trx_undo_rec_t*
+trx_undo_page_get_first_rec(page_t* page, ulint page_no, ulint offset)
+{
+	ulint start = trx_undo_page_get_start(page, page_no, offset);
+	return start == trx_undo_page_get_end(page, page_no, offset)
+		? NULL
+		: page + start;
+}
+
+/** Get the last undo log record on a page.
+@param[in]	page	undo log page
+@param[in]	page_no	undo log header page number
+@param[in]	offset	undo log header page offset
+@return	pointer to last record
+@retval	NULL	if none exists */
+static
+trx_undo_rec_t*
+trx_undo_page_get_last_rec(page_t* page, ulint page_no, ulint offset)
+{
+	ulint end = trx_undo_page_get_end(page, page_no, offset);
+
+	return trx_undo_page_get_start(page, page_no, offset) == end
+		? NULL
+		: page + mach_read_from_2(page + end - 2);
+}
 
 /***********************************************************************//**
 Gets the previous record in an undo log from the previous page.
@@ -172,6 +197,31 @@ trx_undo_get_prev_rec_from_prev_page(
 	return(trx_undo_page_get_last_rec(prev_page, page_no, offset));
 }
 
+/** Get the previous undo log record.
+@param[in]	rec	undo log record
+@param[in]	page_no	undo log header page number
+@param[in]	offset	undo log header page offset
+@return	pointer to record
+@retval	NULL if none */
+static
+trx_undo_rec_t*
+trx_undo_page_get_prev_rec(trx_undo_rec_t* rec, ulint page_no, ulint offset)
+{
+	page_t*	undo_page;
+	ulint	start;
+
+	undo_page = (page_t*) ut_align_down(rec, srv_page_size);
+
+	start = trx_undo_page_get_start(undo_page, page_no, offset);
+
+	if (start + undo_page == rec) {
+
+		return(NULL);
+	}
+
+	return(undo_page + mach_read_from_2(rec - 2));
+}
+
 /***********************************************************************//**
 Gets the previous record in an undo log.
 @return undo log record, the page s-latched, NULL if none */
@@ -292,7 +342,7 @@ trx_undo_get_next_rec(
 @return undo log record, the page latched, NULL if none */
 trx_undo_rec_t*
 trx_undo_get_first_rec(
-	ulint			space,
+	fil_space_t*		space,
 	ulint			page_no,
 	ulint			offset,
 	ulint			mode,
@@ -301,7 +351,7 @@ trx_undo_get_first_rec(
 	page_t*		undo_page;
 	trx_undo_rec_t*	rec;
 
-	const page_id_t	page_id(space, page_no);
+	const page_id_t	page_id(space->id, page_no);
 
 	if (mode == RW_S_LATCH) {
 		undo_page = trx_undo_page_get_s_latched(page_id, mtr);
@@ -315,176 +365,202 @@ trx_undo_get_first_rec(
 		return(rec);
 	}
 
-	return(trx_undo_get_next_rec_from_next_page(space,
+	return(trx_undo_get_next_rec_from_next_page(space->id,
 						    undo_page, page_no, offset,
 						    mode, mtr));
 }
 
 /*============== UNDO LOG FILE COPY CREATION AND FREEING ==================*/
 
-/**********************************************************************//**
-Writes the mtr log entry of an undo log page initialization. */
-UNIV_INLINE
-void
-trx_undo_page_init_log(
-/*===================*/
-	page_t* undo_page,	/*!< in: undo log page */
-	ulint	type,		/*!< in: undo log type */
-	mtr_t*	mtr)		/*!< in: mtr */
+/** Parse MLOG_UNDO_INIT.
+@param[in]	ptr	log record
+@param[in]	end_ptr	end of log record buffer
+@param[in,out]	page	page or NULL
+@return	end of log record
+@retval	NULL	if the log record is incomplete */
+byte*
+trx_undo_parse_page_init(const byte* ptr, const byte* end_ptr, page_t* page)
 {
-	mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr);
+	if (end_ptr <= ptr) {
+		return NULL;
+	}
 
-	mlog_catenate_ulint_compressed(mtr, type);
+	const ulint type = *ptr++;
+
+	if (type > TRX_UNDO_UPDATE) {
+		recv_sys->found_corrupt_log = true;
+	} else if (page) {
+		/* Starting with MDEV-12288 in MariaDB 10.3.1, we use
+		type=0 for the combined insert/update undo log
+		pages. MariaDB 10.2 would use TRX_UNDO_INSERT or
+		TRX_UNDO_UPDATE. */
+		mach_write_to_2(FIL_PAGE_TYPE + page, FIL_PAGE_UNDO_LOG);
+		mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + page,
+				type);
+		mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + page,
+				TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+		mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + page,
+				TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+	}
+
+	return(const_cast<byte*>(ptr));
 }
 
-/***********************************************************//**
-Parses the redo log entry of an undo log page initialization.
+/** Parse MLOG_UNDO_HDR_REUSE for crash-upgrade from MariaDB 10.2.
+@param[in]	ptr	redo log record
+@param[in]	end_ptr	end of log buffer
+@param[in,out]	page	undo log page or NULL
 @return end of log record or NULL */
 byte*
-trx_undo_parse_page_init(
-/*=====================*/
-	const byte*	ptr,	/*!< in: buffer */
-	const byte*	end_ptr,/*!< in: buffer end */
-	page_t*		page,	/*!< in: page or NULL */
-	mtr_t*		mtr)	/*!< in: mtr or NULL */
+trx_undo_parse_page_header_reuse(
+	const byte*	ptr,
+	const byte*	end_ptr,
+	page_t*		undo_page)
 {
-	ulint	type;
+	trx_id_t	trx_id = mach_u64_parse_compressed(&ptr, end_ptr);
 
-	type = mach_parse_compressed(&ptr, end_ptr);
+	if (!ptr || !undo_page) {
+		return(const_cast<byte*>(ptr));
+	}
 
-	if (ptr == NULL) {
+	compile_time_assert(TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE
+			    + TRX_UNDO_LOG_XA_HDR_SIZE
+			    < UNIV_PAGE_SIZE_MIN - 100);
 
-		return(NULL);
-	}
+	const ulint new_free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE
+		+ TRX_UNDO_LOG_OLD_HDR_SIZE;
 
-	if (page) {
-		trx_undo_page_init(page, type, mtr);
-	}
+	/* Insert undo data is not needed after commit: we may free all
+	the space on the page */
 
-	return(const_cast<byte*>(ptr));
-}
+	ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
+			       + undo_page)
+	      == TRX_UNDO_INSERT);
 
-/********************************************************************//**
-Initializes the fields in an undo log segment page. */
-static
-void
-trx_undo_page_init(
-/*===============*/
-	page_t* undo_page,	/*!< in: undo log segment page */
-	ulint	type,		/*!< in: undo log segment type */
-	mtr_t*	mtr)		/*!< in: mtr */
-{
-	trx_upagef_t*	page_hdr;
+	byte*	page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+	mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
+	mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
+	mach_write_to_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + undo_page,
+			TRX_UNDO_ACTIVE);
 
-	page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+	byte* log_hdr = undo_page + TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE;
 
-	mach_write_to_2(page_hdr + TRX_UNDO_PAGE_TYPE, type);
+	mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
+	mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
+
+	mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE);
+	mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE);
+
+	return(const_cast<byte*>(ptr));
+}
 
-	mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
+/** Initialize the fields in an undo log segment page.
+@param[in,out]	undo_block	undo page
+@param[in,out]	mtr		mini-transaction */
+static void trx_undo_page_init(buf_block_t* undo_block, mtr_t* mtr)
+{
+	page_t* page = undo_block->frame;
+	mach_write_to_2(FIL_PAGE_TYPE + page, FIL_PAGE_UNDO_LOG);
+	mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + page, 0);
+	mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_START + page,
 			TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
-	mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE,
+	mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + page,
 			TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
 
-	fil_page_set_type(undo_page, FIL_PAGE_UNDO_LOG);
+	mtr->set_modified();
+	switch (mtr->get_log_mode()) {
+	case MTR_LOG_NONE:
+	case MTR_LOG_NO_REDO:
+		return;
+	case MTR_LOG_SHORT_INSERTS:
+		ut_ad(0);
+		/* fall through */
+	case MTR_LOG_ALL:
+		break;
+	}
 
-	trx_undo_page_init_log(undo_page, type, mtr);
+	byte* log_ptr = mtr->get_log()->open(11 + 1);
+	log_ptr = mlog_write_initial_log_record_low(
+		MLOG_UNDO_INIT,
+		undo_block->page.id.space(),
+		undo_block->page.id.page_no(),
+		log_ptr, mtr);
+	*log_ptr++ = 0;
+	mlog_close(mtr, log_ptr);
 }
 
-/***************************************************************//**
-Creates a new undo log segment in file.
-@return DB_SUCCESS if page creation OK possible error codes are:
-DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((warn_unused_result))
-dberr_t
-trx_undo_seg_create(
-/*================*/
-	trx_rseg_t*	rseg MY_ATTRIBUTE((unused)),/*!< in: rollback segment */
-	trx_rsegf_t*	rseg_hdr,/*!< in: rollback segment header, page
-				x-latched */
-	ulint		type,	/*!< in: type of the segment: TRX_UNDO_INSERT or
-				TRX_UNDO_UPDATE */
-	ulint*		id,	/*!< out: slot index within rseg header */
-	page_t**	undo_page,
-				/*!< out: segment header page x-latched, NULL
-				if there was an error */
-	mtr_t*		mtr)	/*!< in: mtr */
+/** Create an undo log segment.
+@param[in,out]	space		tablespace
+@param[in,out]	rseg_hdr	rollback segment header (x-latched)
+@param[out]	id		undo slot number
+@param[out]	err		error code
+@param[in,out]	mtr		mini-transaction
+@return	undo log block
+@retval	NULL	on failure */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+buf_block_t*
+trx_undo_seg_create(fil_space_t* space, trx_rsegf_t* rseg_hdr, ulint* id,
+		    dberr_t* err, mtr_t* mtr)
 {
 	ulint		slot_no;
-	ulint		space;
 	buf_block_t*	block;
-	trx_upagef_t*	page_hdr;
-	trx_usegf_t*	seg_hdr;
 	ulint		n_reserved;
 	bool		success;
-	dberr_t		err = DB_SUCCESS;
 
-	ut_ad(mtr != NULL);
-	ut_ad(id != NULL);
-	ut_ad(rseg_hdr != NULL);
-	ut_ad(mutex_own(&(rseg->mutex)));
-
-	/*	fputs(type == TRX_UNDO_INSERT
-	? "Creating insert undo log segment\n"
-	: "Creating update undo log segment\n", stderr); */
-	slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr);
+	slot_no = trx_rsegf_undo_find_free(rseg_hdr);
 
 	if (slot_no == ULINT_UNDEFINED) {
 		ib::warn() << "Cannot find a free slot for an undo log. Do"
 			" you have too many active transactions running"
 			" concurrently?";
 
-		return(DB_TOO_MANY_CONCURRENT_TRXS);
+		*err = DB_TOO_MANY_CONCURRENT_TRXS;
+		return NULL;
 	}
 
-	space = page_get_space_id(page_align(rseg_hdr));
-
 	success = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO,
 					   mtr);
 	if (!success) {
-
-		return(DB_OUT_OF_FILE_SPACE);
+		*err = DB_OUT_OF_FILE_SPACE;
+		return NULL;
 	}
 
 	/* Allocate a new file segment for the undo log */
-	block = fseg_create_general(space, 0,
-				    TRX_UNDO_SEG_HDR
-				    + TRX_UNDO_FSEG_HEADER, TRUE, mtr);
+	block = fseg_create(space, 0, TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER,
+			    mtr, true);
 
-	fil_space_release_free_extents(space, n_reserved);
+	space->release_free_extents(n_reserved);
 
 	if (block == NULL) {
-		/* No space left */
-
-		return(DB_OUT_OF_FILE_SPACE);
+		*err = DB_OUT_OF_FILE_SPACE;
+		return NULL;
 	}
 
 	buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
 
-	*undo_page = buf_block_get_frame(block);
-
-	page_hdr = *undo_page + TRX_UNDO_PAGE_HDR;
-	seg_hdr = *undo_page + TRX_UNDO_SEG_HDR;
+	trx_undo_page_init(block, mtr);
 
-	trx_undo_page_init(*undo_page, type, mtr);
-
-	mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE,
+	mlog_write_ulint(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + block->frame,
 			 TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE,
 			 MLOG_2BYTES, mtr);
 
-	mlog_write_ulint(seg_hdr + TRX_UNDO_LAST_LOG, 0, MLOG_2BYTES, mtr);
+	mlog_write_ulint(TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG + block->frame,
+			 0, MLOG_2BYTES, mtr);
 
-	flst_init(seg_hdr + TRX_UNDO_PAGE_LIST, mtr);
+	flst_init(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->frame, mtr);
 
-	flst_add_last(seg_hdr + TRX_UNDO_PAGE_LIST,
-		      page_hdr + TRX_UNDO_PAGE_NODE, mtr);
+	flst_add_last(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->frame,
+		      TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + block->frame,
+		      mtr);
 
-	trx_rsegf_set_nth_undo(rseg_hdr, slot_no,
-			       page_get_page_no(*undo_page), mtr);
 	*id = slot_no;
+	trx_rsegf_set_nth_undo(rseg_hdr, slot_no, block->page.id.page_no(),
+			       mtr);
 
 	MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
 
-	return(err);
+	*err = DB_SUCCESS;
+	return block;
 }
 
 /**********************************************************************//**
@@ -537,7 +613,7 @@ trx_undo_header_create(
 
 	new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE;
 
-	ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100);
+	ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < srv_page_size - 100);
 
 	mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
 
@@ -559,7 +635,7 @@ trx_undo_header_create(
 
 	log_hdr = undo_page + free;
 
-	mach_write_to_2(log_hdr + TRX_UNDO_DEL_MARKS, TRUE);
+	mach_write_to_2(log_hdr + TRX_UNDO_NEEDS_PURGE, 1);
 
 	mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
 	mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
@@ -607,10 +683,7 @@ trx_undo_write_xid(
 Read X/Open XA Transaction Identification (XID) from undo log header */
 static
 void
-trx_undo_read_xid(
-/*==============*/
-	trx_ulogf_t*	log_hdr,/*!< in: undo log header */
-	XID*		xid)	/*!< out: X/Open XA Transaction Identification */
+trx_undo_read_xid(const trx_ulogf_t* log_hdr, XID* xid)
 {
 	xid->formatID=static_cast<long>(mach_read_from_4(
 		log_hdr + TRX_UNDO_XA_FORMAT));
@@ -662,23 +735,7 @@ trx_undo_header_add_space_for_xid(
 			 MLOG_2BYTES, mtr);
 }
 
-/**********************************************************************//**
-Writes the mtr log entry of an undo log header reuse. */
-UNIV_INLINE
-void
-trx_undo_insert_header_reuse_log(
-/*=============================*/
-	const page_t*	undo_page,	/*!< in: undo log header page */
-	trx_id_t	trx_id,		/*!< in: transaction id */
-	mtr_t*		mtr)		/*!< in: mtr */
-{
-	mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr);
-
-	mlog_catenate_ull_compressed(mtr, trx_id);
-}
-
-/** Parse the redo log entry of an undo log page header create or reuse.
-@param[in]	type	MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE
+/** Parse the redo log entry of an undo log page header create.
 @param[in]	ptr	redo log record
 @param[in]	end_ptr	end of log buffer
 @param[in,out]	page	page frame or NULL
@@ -686,7 +743,6 @@ trx_undo_insert_header_reuse_log(
 @return end of log record or NULL */
 byte*
 trx_undo_parse_page_header(
-	mlog_id_t	type,
 	const byte*	ptr,
 	const byte*	end_ptr,
 	page_t*		page,
@@ -695,93 +751,20 @@ trx_undo_parse_page_header(
 	trx_id_t	trx_id = mach_u64_parse_compressed(&ptr, end_ptr);
 
 	if (ptr != NULL && page != NULL) {
-		switch (type) {
-		case MLOG_UNDO_HDR_CREATE:
-			trx_undo_header_create(page, trx_id, mtr);
-			return(const_cast<byte*>(ptr));
-		case MLOG_UNDO_HDR_REUSE:
-			trx_undo_insert_header_reuse(page, trx_id, mtr);
-			return(const_cast<byte*>(ptr));
-		default:
-			break;
-		}
-		ut_ad(0);
+		trx_undo_header_create(page, trx_id, mtr);
+		return(const_cast<byte*>(ptr));
 	}
 
 	return(const_cast<byte*>(ptr));
 }
 
-/***************************************************************//**
-Initializes a cached insert undo log header page for new use. NOTE that this
-function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function!
-@return undo log header byte offset on page */
-static
-ulint
-trx_undo_insert_header_reuse(
-/*=========================*/
-	page_t*		undo_page,	/*!< in/out: insert undo log segment
-					header page, x-latched */
-	trx_id_t	trx_id,		/*!< in: transaction id */
-	mtr_t*		mtr)		/*!< in: mtr */
-{
-	trx_upagef_t*	page_hdr;
-	trx_usegf_t*	seg_hdr;
-	trx_ulogf_t*	log_hdr;
-	ulint		free;
-	ulint		new_free;
-
-	ut_ad(mtr && undo_page);
-
-	page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
-	free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE;
-
-	ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100);
-
-	log_hdr = undo_page + free;
-
-	new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE;
-
-	/* Insert undo data is not needed after commit: we may free all
-	the space on the page */
-
-	ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
-			      + TRX_UNDO_PAGE_TYPE)
-	     == TRX_UNDO_INSERT);
-
-	mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
-
-	mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
-
-	mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE);
-
-	log_hdr = undo_page + free;
-
-	mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
-	mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
-
-	mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE);
-	mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE);
-
-	/* Write the log record MLOG_UNDO_HDR_REUSE */
-	trx_undo_insert_header_reuse_log(undo_page, trx_id, mtr);
-
-	return(free);
-}
-
 /** Allocate an undo log page.
-@param[in,out]	trx	transaction
 @param[in,out]	undo	undo log
 @param[in,out]	mtr	mini-transaction that does not hold any page latch
 @return	X-latched block if success
 @retval	NULL	on failure */
-buf_block_t*
-trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
+buf_block_t* trx_undo_add_page(trx_undo_t* undo, mtr_t* mtr)
 {
-	ut_ad(mutex_own(&trx->undo_mutex));
-
 	trx_rseg_t*	rseg		= undo->rseg;
 	buf_block_t*	new_block	= NULL;
 	ulint		n_reserved;
@@ -792,14 +775,11 @@ trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
 	counterpart of the tree latch, which is the rseg mutex. */
 
 	mutex_enter(&rseg->mutex);
-	if (rseg->curr_size == rseg->max_size) {
-		goto func_exit;
-	}
 
 	header_page = trx_undo_page_get(
-		page_id_t(undo->space, undo->hdr_page_no), mtr);
+		page_id_t(undo->rseg->space->id, undo->hdr_page_no), mtr);
 
-	if (!fsp_reserve_free_extents(&n_reserved, undo->space, 1,
+	if (!fsp_reserve_free_extents(&n_reserved, undo->rseg->space, 1,
 				      FSP_UNDO, mtr)) {
 		goto func_exit;
 	}
@@ -809,7 +789,7 @@ trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
 		+ header_page,
 		undo->top_page_no + 1, FSP_UP, TRUE, mtr, mtr);
 
-	fil_space_release_free_extents(undo->space, n_reserved);
+	rseg->space->release_free_extents(n_reserved);
 
 	if (!new_block) {
 		goto func_exit;
@@ -819,7 +799,7 @@ trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
 	buf_block_dbg_add_level(new_block, SYNC_TRX_UNDO_PAGE);
 	undo->last_page_no = new_block->page.id.page_no();
 
-	trx_undo_page_init(new_block->frame, undo->type, mtr);
+	trx_undo_page_init(new_block, mtr);
 
 	flst_add_last(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST
 		      + header_page,
@@ -842,9 +822,8 @@ ulint
 trx_undo_free_page(
 /*===============*/
 	trx_rseg_t* rseg,	/*!< in: rollback segment */
-	ibool	in_history,	/*!< in: TRUE if the undo log is in the history
+	bool	in_history,	/*!< in: TRUE if the undo log is in the history
 				list */
-	ulint	space,		/*!< in: space */
 	ulint	hdr_page_no,	/*!< in: header page number */
 	ulint	page_no,	/*!< in: page number to free: must not be the
 				header page */
@@ -857,6 +836,7 @@ trx_undo_free_page(
 	fil_addr_t	last_addr;
 	trx_rsegf_t*	rseg_header;
 	ulint		hist_size;
+	const ulint	space = rseg->space->id;
 
 	ut_a(hdr_page_no != page_no);
 	ut_ad(mutex_own(&(rseg->mutex)));
@@ -876,7 +856,7 @@ trx_undo_free_page(
 	rseg->curr_size--;
 
 	if (in_history) {
-		rseg_header = trx_rsegf_get(space, rseg->page_no, mtr);
+		rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
 
 		hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
 					   MLOG_4BYTES, mtr);
@@ -899,40 +879,11 @@ trx_undo_free_last_page(trx_undo_t* undo, mtr_t* mtr)
 	ut_ad(undo->size > 0);
 
 	undo->last_page_no = trx_undo_free_page(
-		undo->rseg, FALSE, undo->space,
-		undo->hdr_page_no, undo->last_page_no, mtr);
+		undo->rseg, false, undo->hdr_page_no, undo->last_page_no, mtr);
 
 	undo->size--;
 }
 
-/** Empties an undo log header page of undo records for that undo log.
-Other undo logs may still have records on that page, if it is an update
-undo log.
-@param[in]	space		space
-@param[in]	hdr_page_no	header page number
-@param[in]	hdr_offset	header offset
-@param[in,out]	mtr		mini-transaction */
-static
-void
-trx_undo_empty_header_page(
-	ulint			space,
-	ulint			hdr_page_no,
-	ulint			hdr_offset,
-	mtr_t*			mtr)
-{
-	page_t*		header_page;
-	trx_ulogf_t*	log_hdr;
-	ulint		end;
-
-	header_page = trx_undo_page_get(page_id_t(space, hdr_page_no), mtr);
-
-	log_hdr = header_page + hdr_offset;
-
-	end = trx_undo_page_get_end(header_page, hdr_page_no, hdr_offset);
-
-	mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr);
-}
-
 /** Truncate the tail of an undo log during rollback.
 @param[in,out]	undo	undo log
 @param[in]	limit	all undo logs after this limit will be discarded
@@ -952,7 +903,8 @@ trx_undo_truncate_end(trx_undo_t* undo, undo_no_t limit, bool is_temp)
 
 		trx_undo_rec_t* trunc_here = NULL;
 		page_t*		undo_page = trx_undo_page_get(
-			page_id_t(undo->space, undo->last_page_no), &mtr);
+			page_id_t(undo->rseg->space->id, undo->last_page_no),
+			&mtr);
 		trx_undo_rec_t* rec = trx_undo_page_get_last_rec(
 			undo_page, undo->hdr_page_no, undo->hdr_offset);
 		while (rec) {
@@ -974,7 +926,7 @@ function_exit:
 			if (trunc_here) {
 				mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR
 						 + TRX_UNDO_PAGE_FREE,
-						 trunc_here - undo_page,
+						 ulint(trunc_here - undo_page),
 						 MLOG_2BYTES, &mtr);
 			}
 
@@ -1044,12 +996,18 @@ loop:
 	page_no = page_get_page_no(undo_page);
 
 	if (page_no == hdr_page_no) {
-		trx_undo_empty_header_page(rseg->space,
-					   hdr_page_no, hdr_offset,
-					   &mtr);
+		uint16_t end = mach_read_from_2(hdr_offset + TRX_UNDO_NEXT_LOG
+						+ undo_page);
+		if (end == 0) {
+			end = mach_read_from_2(TRX_UNDO_PAGE_HDR
+					       + TRX_UNDO_PAGE_FREE
+					       + undo_page);
+		}
+
+		mlog_write_ulint(undo_page + hdr_offset + TRX_UNDO_LOG_START,
+				 end, MLOG_2BYTES, &mtr);
 	} else {
-		trx_undo_free_page(rseg, TRUE, rseg->space, hdr_page_no,
-				   page_no, &mtr);
+		trx_undo_free_page(rseg, true, hdr_page_no, page_no, &mtr);
 	}
 
 	mtr_commit(&mtr);
@@ -1085,7 +1043,7 @@ trx_undo_seg_free(
 
 		mutex_enter(&(rseg->mutex));
 
-		seg_header = trx_undo_page_get(page_id_t(undo->space,
+		seg_header = trx_undo_page_get(page_id_t(undo->rseg->space->id,
 							 undo->hdr_page_no),
 					       &mtr)
 			+ TRX_UNDO_SEG_HDR;
@@ -1111,181 +1069,108 @@ trx_undo_seg_free(
 
 /*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/
 
-/********************************************************************//**
-Creates and initializes an undo log memory object according to the values
-in the header in file, when the database is started. The memory object is
-inserted in the appropriate list of rseg.
-@return own: the undo log memory object */
-static
-trx_undo_t*
-trx_undo_mem_create_at_db_start(
-/*============================*/
-	trx_rseg_t*	rseg,	/*!< in: rollback segment memory object */
-	ulint		id,	/*!< in: slot index within rseg */
-	ulint		page_no,/*!< in: undo log segment page number */
-	mtr_t*		mtr)	/*!< in: mtr */
+/** Read an undo log when starting up the database.
+@param[in,out]	rseg		rollback segment
+@param[in]	id		rollback segment slot
+@param[in]	page_no		undo log segment page number
+@param[in,out]	max_trx_id	the largest observed transaction ID
+@return	size of the undo log in pages */
+ulint
+trx_undo_mem_create_at_db_start(trx_rseg_t* rseg, ulint id, ulint page_no,
+				trx_id_t& max_trx_id)
 {
-	page_t*		undo_page;
-	trx_upagef_t*	page_header;
-	trx_usegf_t*	seg_header;
-	trx_ulogf_t*	undo_header;
-	trx_undo_t*	undo;
-	ulint		type;
-	ulint		state;
-	trx_id_t	trx_id;
-	ulint		offset;
-	fil_addr_t	last_addr;
-	page_t*		last_page;
-	trx_undo_rec_t*	rec;
+	mtr_t		mtr;
 	XID		xid;
-	ibool		xid_exists = FALSE;
-
-	ut_a(id < TRX_RSEG_N_SLOTS);
 
-	undo_page = trx_undo_page_get(page_id_t(rseg->space, page_no), mtr);
+	ut_ad(id < TRX_RSEG_N_SLOTS);
 
-	page_header = undo_page + TRX_UNDO_PAGE_HDR;
-
-	type = mtr_read_ulint(page_header + TRX_UNDO_PAGE_TYPE, MLOG_2BYTES,
-			      mtr);
-	seg_header = undo_page + TRX_UNDO_SEG_HDR;
-
-	state = mach_read_from_2(seg_header + TRX_UNDO_STATE);
-
-	offset = mach_read_from_2(seg_header + TRX_UNDO_LAST_LOG);
-
-	undo_header = undo_page + offset;
+	mtr.start();
+	const page_t* undo_page = trx_undo_page_get(
+		page_id_t(rseg->space->id, page_no), &mtr);
+	const ulint type = mach_read_from_2(
+		TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + undo_page);
+	ut_ad(type == 0 || type == TRX_UNDO_INSERT || type == TRX_UNDO_UPDATE);
 
-	trx_id = mach_read_from_8(undo_header + TRX_UNDO_TRX_ID);
+	uint state = mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE
+				      + undo_page);
+	uint offset = mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG
+				       + undo_page);
 
-	xid_exists = mtr_read_ulint(undo_header + TRX_UNDO_XID_EXISTS,
-				    MLOG_1BYTE, mtr);
+	const trx_ulogf_t*	undo_header = undo_page + offset;
 
 	/* Read X/Open XA transaction identification if it exists, or
 	set it to NULL. */
-	xid.null();
 
-	if (xid_exists == TRUE) {
+	if (undo_header[TRX_UNDO_XID_EXISTS]) {
 		trx_undo_read_xid(undo_header, &xid);
+	} else {
+		xid.null();
 	}
 
-	mutex_enter(&(rseg->mutex));
-
-	undo = trx_undo_mem_create(rseg, id, type, trx_id, &xid,
-				   page_no, offset);
-	mutex_exit(&(rseg->mutex));
-
-	undo->dict_operation =	mtr_read_ulint(
-		undo_header + TRX_UNDO_DICT_TRANS, MLOG_1BYTE, mtr);
-
-	undo->table_id = mach_read_from_8(undo_header + TRX_UNDO_TABLE_ID);
-	undo->state = state;
-	undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST);
-
-	/* If the log segment is being freed, the page list is inconsistent! */
-	if (state == TRX_UNDO_TO_FREE) {
-
-		goto add_to_list;
+	trx_id_t trx_id = mach_read_from_8(undo_header + TRX_UNDO_TRX_ID);
+	if (trx_id > max_trx_id) {
+		max_trx_id = trx_id;
 	}
 
-	last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr);
-
-	undo->last_page_no = last_addr.page;
-	undo->top_page_no = last_addr.page;
-
-	last_page = trx_undo_page_get(
-		page_id_t(rseg->space, undo->last_page_no), mtr);
+	mutex_enter(&rseg->mutex);
+	trx_undo_t* undo = trx_undo_mem_create(
+		rseg, id, trx_id, &xid, page_no, offset);
+	mutex_exit(&rseg->mutex);
 
-	rec = trx_undo_page_get_last_rec(last_page, page_no, offset);
+	undo->dict_operation = undo_header[TRX_UNDO_DICT_TRANS];
+	undo->table_id = mach_read_from_8(undo_header + TRX_UNDO_TABLE_ID);
+	undo->size = flst_get_len(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST
+				  + undo_page);
 
-	if (rec == NULL) {
-		undo->empty = TRUE;
+	if (UNIV_UNLIKELY(state == TRX_UNDO_TO_FREE)) {
+		/* This is an old-format insert_undo log segment that
+		is being freed. The page list is inconsistent. */
+		ut_ad(type == TRX_UNDO_INSERT);
+		state = TRX_UNDO_TO_PURGE;
 	} else {
-		undo->empty = FALSE;
-		undo->top_offset = rec - last_page;
-		undo->top_undo_no = trx_undo_rec_get_undo_no(rec);
-	}
-add_to_list:
-	if (type == TRX_UNDO_INSERT) {
-		if (state != TRX_UNDO_CACHED) {
+		if (state == TRX_UNDO_TO_PURGE
+		    || state == TRX_UNDO_CACHED) {
+			trx_id_t id = mach_read_from_8(TRX_UNDO_TRX_NO
+						       + undo_header);
+			if (id > max_trx_id) {
+				max_trx_id = id;
+			}
+		}
 
-			UT_LIST_ADD_LAST(rseg->insert_undo_list, undo);
-		} else {
+		fil_addr_t	last_addr = flst_get_last(
+			TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + undo_page,
+			&mtr);
 
-			UT_LIST_ADD_LAST(rseg->insert_undo_cached, undo);
+		undo->last_page_no = last_addr.page;
+		undo->top_page_no = last_addr.page;
 
-			MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
-		}
-	} else {
-		ut_ad(type == TRX_UNDO_UPDATE);
-		if (state != TRX_UNDO_CACHED) {
+		page_t* last_page = trx_undo_page_get(
+			page_id_t(rseg->space->id, undo->last_page_no), &mtr);
 
-			UT_LIST_ADD_LAST(rseg->update_undo_list, undo);
+		if (const trx_undo_rec_t* rec = trx_undo_page_get_last_rec(
+			    last_page, page_no, offset)) {
+			undo->top_offset = ulint(rec - last_page);
+			undo->top_undo_no = trx_undo_rec_get_undo_no(rec);
+			ut_ad(!undo->empty());
 		} else {
-
-			UT_LIST_ADD_LAST(rseg->update_undo_cached, undo);
-
-			MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
+			undo->top_undo_no = IB_ID_MAX;
+			ut_ad(undo->empty());
 		}
 	}
 
-	return(undo);
-}
-
-/********************************************************************//**
-Initializes the undo log lists for a rollback segment memory copy. This
-function is only called when the database is started or a new rollback
-segment is created.
-@return the combined size of undo log segments in pages */
-ulint
-trx_undo_lists_init(
-/*================*/
-	trx_rseg_t*	rseg)	/*!< in: rollback segment memory object */
-{
-	ulint		size	= 0;
-	trx_rsegf_t*	rseg_header;
-	ulint		i;
-	mtr_t		mtr;
-
-	mtr_start(&mtr);
-
-	rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, &mtr);
-
-	for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
-		ulint	page_no;
-
-		page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr);
-
-		/* In forced recovery: try to avoid operations which look
-		at database pages; undo logs are rapidly changing data, and
-		the probability that they are in an inconsistent state is
-		high */
-
-		if (page_no != FIL_NULL
-		    && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
-
-			trx_undo_t*	undo;
-
-			undo = trx_undo_mem_create_at_db_start(
-				rseg, i, page_no, &mtr);
-
-			size += undo->size;
-
-			mtr_commit(&mtr);
-
-			mtr_start(&mtr);
-
-			rseg_header = trx_rsegf_get(
-				rseg->space, rseg->page_no, &mtr);
+	undo->state = state;
 
-			/* Found a used slot */
-			MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
-		}
+	if (state != TRX_UNDO_CACHED) {
+		UT_LIST_ADD_LAST(type == TRX_UNDO_INSERT
+				 ? rseg->old_insert_list
+				 : rseg->undo_list, undo);
+	} else {
+		UT_LIST_ADD_LAST(rseg->undo_cached, undo);
+		MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
 	}
 
-	mtr_commit(&mtr);
-
-	return(size);
+	mtr.commit();
+	return undo->size;
 }
 
 /********************************************************************//**
@@ -1297,8 +1182,6 @@ trx_undo_mem_create(
 /*================*/
 	trx_rseg_t*	rseg,	/*!< in: rollback segment memory object */
 	ulint		id,	/*!< in: slot index within rseg */
-	ulint		type,	/*!< in: type of the log: TRX_UNDO_INSERT or
-				TRX_UNDO_UPDATE */
 	trx_id_t	trx_id,	/*!< in: id of the trx for which the undo log
 				is created */
 	const XID*	xid,	/*!< in: X/Open transaction identification */
@@ -1319,9 +1202,7 @@ trx_undo_mem_create(
 	}
 
 	undo->id = id;
-	undo->type = type;
 	undo->state = TRX_UNDO_ACTIVE;
-	undo->del_marks = FALSE;
 	undo->trx_id = trx_id;
 	undo->xid = *xid;
 
@@ -1329,16 +1210,16 @@ trx_undo_mem_create(
 
 	undo->rseg = rseg;
 
-	undo->space = rseg->space;
 	undo->hdr_page_no = page_no;
 	undo->hdr_offset = offset;
 	undo->last_page_no = page_no;
 	undo->size = 1;
 
-	undo->empty = TRUE;
+	undo->top_undo_no = IB_ID_MAX;
 	undo->top_page_no = page_no;
 	undo->guess_block = NULL;
 	undo->withdraw_clock = 0;
+	ut_ad(undo->empty());
 
 	return(undo);
 }
@@ -1360,201 +1241,191 @@ trx_undo_mem_init_for_reuse(
 	ut_a(undo->id < TRX_RSEG_N_SLOTS);
 
 	undo->state = TRX_UNDO_ACTIVE;
-	undo->del_marks = FALSE;
 	undo->trx_id = trx_id;
 	undo->xid = *xid;
 
 	undo->dict_operation = FALSE;
 
 	undo->hdr_offset = offset;
-	undo->empty = TRUE;
-}
-
-/********************************************************************//**
-Frees an undo log memory copy. */
-void
-trx_undo_mem_free(
-/*==============*/
-	trx_undo_t*	undo)	/*!< in: the undo object to be freed */
-{
-	ut_a(undo->id < TRX_RSEG_N_SLOTS);
-
-	ut_free(undo);
+	undo->top_undo_no = IB_ID_MAX;
+	ut_ad(undo->empty());
 }
 
-/**********************************************************************//**
-Creates a new undo log.
-@return DB_SUCCESS if successful in creating the new undo lob object,
-possible error codes are: DB_TOO_MANY_CONCURRENT_TRXS
-DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY */
+/** Create an undo log.
+@param[in,out]	trx	transaction
+@param[in,out]	rseg	rollback segment
+@param[out]	undo	undo log object
+@param[out]	err	error code
+@param[in,out]	mtr	mini-transaction
+@return undo log block
+@retval	NULL	on failure */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-trx_undo_create(
-/*============*/
-	trx_t*		trx,	/*!< in: transaction */
-	trx_rseg_t*	rseg,	/*!< in: rollback segment memory copy */
-	ulint		type,	/*!< in: type of the log: TRX_UNDO_INSERT or
-				TRX_UNDO_UPDATE */
-	trx_id_t	trx_id,	/*!< in: id of the trx for which the undo log
-				is created */
-	const XID*	xid,	/*!< in: X/Open transaction identification*/
-	trx_undo_t**	undo,	/*!< out: the new undo log object, undefined
-				 * if did not succeed */
-	mtr_t*		mtr)	/*!< in: mtr */
+buf_block_t*
+trx_undo_create(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
+		dberr_t* err, mtr_t* mtr)
 {
-	trx_rsegf_t*	rseg_header;
-	ulint		page_no;
-	ulint		offset;
 	ulint		id;
-	page_t*		undo_page;
-	dberr_t		err;
 
 	ut_ad(mutex_own(&(rseg->mutex)));
 
-	if (rseg->curr_size == rseg->max_size) {
+	buf_block_t*	block = trx_undo_seg_create(
+		rseg->space,
+		trx_rsegf_get(rseg->space, rseg->page_no, mtr), &id, err, mtr);
 
-		return(DB_OUT_OF_FILE_SPACE);
+	if (!block) {
+		return NULL;
 	}
 
 	rseg->curr_size++;
 
-	rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
+	ulint offset = trx_undo_header_create(block->frame, trx->id, mtr);
 
-	err = trx_undo_seg_create(rseg, rseg_header, type, &id,
-				  &undo_page, mtr);
+	trx_undo_header_add_space_for_xid(block->frame, block->frame + offset,
+					  mtr);
 
-	if (err != DB_SUCCESS) {
-		/* Did not succeed */
-
-		rseg->curr_size--;
-
-		return(err);
-	}
-
-	page_no = page_get_page_no(undo_page);
-
-	offset = trx_undo_header_create(undo_page, trx_id, mtr);
-
-	trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr);
-
-	*undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
-				   page_no, offset);
+	*undo = trx_undo_mem_create(rseg, id, trx->id, trx->xid,
+				    block->page.id.page_no(), offset);
 	if (*undo == NULL) {
+		*err = DB_OUT_OF_MEMORY;
+		 /* FIXME: this will not free the undo block to the file */
+		return NULL;
+	} else if (rseg != trx->rsegs.m_redo.rseg) {
+		return block;
+	}
 
-		err = DB_OUT_OF_MEMORY;
+	switch (trx_get_dict_operation(trx)) {
+	case TRX_DICT_OP_NONE:
+		break;
+	case TRX_DICT_OP_INDEX:
+		/* Do not discard the table on recovery. */
+		trx->table_id = 0;
+		/* fall through */
+	case TRX_DICT_OP_TABLE:
+		(*undo)->table_id = trx->table_id;
+		(*undo)->dict_operation = TRUE;
+		mlog_write_ulint(block->frame + offset + TRX_UNDO_DICT_TRANS,
+				 TRUE, MLOG_1BYTE, mtr);
+		mlog_write_ull(block->frame + offset + TRX_UNDO_TABLE_ID,
+			       trx->table_id, mtr);
 	}
 
-	return(err);
+	*err = DB_SUCCESS;
+	return block;
 }
 
 /*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
 
-/********************************************************************//**
-Reuses a cached undo log.
-@return the undo log memory object, NULL if none cached */
+/** Reuse a cached undo log block.
+@param[in,out]	trx	transaction
+@param[in,out]	rseg	rollback segment
+@param[out]	pundo	the undo log memory object
+@param[in,out]	mtr	mini-transaction
+@return	the undo log block
+@retval	NULL	if none cached */
 static
-trx_undo_t*
-trx_undo_reuse_cached(
-/*==================*/
-	trx_t*		trx,	/*!< in: transaction */
-	trx_rseg_t*	rseg,	/*!< in: rollback segment memory object */
-	ulint		type,	/*!< in: type of the log: TRX_UNDO_INSERT or
-				TRX_UNDO_UPDATE */
-	trx_id_t	trx_id,	/*!< in: id of the trx for which the undo log
-				is used */
-	const XID*	xid,	/*!< in: X/Open XA transaction identification */
-	mtr_t*		mtr)	/*!< in: mtr */
+buf_block_t*
+trx_undo_reuse_cached(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** pundo,
+		      mtr_t* mtr)
 {
-	trx_undo_t*	undo;
-	page_t*		undo_page;
-	ulint		offset;
-
-	ut_ad(mutex_own(&(rseg->mutex)));
-
-	if (type == TRX_UNDO_INSERT) {
-
-		undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
-		if (undo == NULL) {
-
-			return(NULL);
-		}
-
-		UT_LIST_REMOVE(rseg->insert_undo_cached, undo);
-
-		MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
-	} else {
-		ut_ad(type == TRX_UNDO_UPDATE);
-
-		undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
-		if (undo == NULL) {
-
-			return(NULL);
-		}
-
-		UT_LIST_REMOVE(rseg->update_undo_cached, undo);
+	ut_ad(mutex_own(&rseg->mutex));
 
-		MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
+	trx_undo_t* undo = UT_LIST_GET_FIRST(rseg->undo_cached);
+	if (!undo) {
+		return NULL;
 	}
 
 	ut_ad(undo->size == 1);
-	ut_a(undo->id < TRX_RSEG_N_SLOTS);
+	ut_ad(undo->id < TRX_RSEG_N_SLOTS);
 
-	undo_page = trx_undo_page_get(
-		page_id_t(undo->space, undo->hdr_page_no), mtr);
-
-	if (type == TRX_UNDO_INSERT) {
-		offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
+	buf_block_t*	block = buf_page_get(page_id_t(undo->rseg->space->id,
+						       undo->hdr_page_no),
+					     univ_page_size, RW_X_LATCH, mtr);
+	if (!block) {
+		return NULL;
+	}
 
-		trx_undo_header_add_space_for_xid(
-			undo_page, undo_page + offset, mtr);
-	} else {
-		ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
-				      + TRX_UNDO_PAGE_TYPE)
-		     == TRX_UNDO_UPDATE);
+	buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
 
-		offset = trx_undo_header_create(undo_page, trx_id, mtr);
+	UT_LIST_REMOVE(rseg->undo_cached, undo);
+	MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
 
-		trx_undo_header_add_space_for_xid(
-			undo_page, undo_page + offset, mtr);
-	}
+	*pundo = undo;
 
-	trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset);
+	ulint offset = trx_undo_header_create(block->frame, trx->id, mtr);
 
-	return(undo);
-}
+	trx_undo_header_add_space_for_xid(block->frame, block->frame + offset,
+					  mtr);
 
-/** Mark that an undo log header belongs to a data dictionary transaction.
-@param[in]	trx	dictionary transaction
-@param[in,out]	undo	undo log
-@param[in,out]	mtr	mini-transaction */
-void trx_undo_mark_as_dict(const trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
-{
-	ut_ad(undo == trx->rsegs.m_redo.insert_undo
-	      || undo == trx->rsegs.m_redo.update_undo);
+	trx_undo_mem_init_for_reuse(undo, trx->id, trx->xid, offset);
 
-	page_t*	hdr_page = trx_undo_page_get(
-		page_id_t(undo->space, undo->hdr_page_no), mtr);
+	if (rseg != trx->rsegs.m_redo.rseg) {
+		return block;
+	}
 
 	switch (trx_get_dict_operation(trx)) {
 	case TRX_DICT_OP_NONE:
-		ut_error;
+		return block;
 	case TRX_DICT_OP_INDEX:
 		/* Do not discard the table on recovery. */
-		undo->table_id = 0;
-		break;
+		trx->table_id = 0;
+		/* fall through */
 	case TRX_DICT_OP_TABLE:
 		undo->table_id = trx->table_id;
-		break;
+		undo->dict_operation = TRUE;
+		mlog_write_ulint(block->frame + offset + TRX_UNDO_DICT_TRANS,
+				 TRUE, MLOG_1BYTE, mtr);
+		mlog_write_ull(block->frame + offset + TRX_UNDO_TABLE_ID,
+			       trx->table_id, mtr);
 	}
 
-	mlog_write_ulint(hdr_page + undo->hdr_offset
-			 + TRX_UNDO_DICT_TRANS,
-			 TRUE, MLOG_1BYTE, mtr);
+	return block;
+}
+
+/** Assign an undo log for a persistent transaction.
+A new undo log is created or a cached undo log reused.
+@param[in,out]	trx	transaction
+@param[out]	err	error code
+@param[in,out]	mtr	mini-transaction
+@return	the undo log block
+@retval	NULL	on error */
+buf_block_t*
+trx_undo_assign(trx_t* trx, dberr_t* err, mtr_t* mtr)
+{
+	ut_ad(mtr->get_log_mode() == MTR_LOG_ALL);
+
+	trx_undo_t* undo = trx->rsegs.m_redo.undo;
+
+	if (undo) {
+		return buf_page_get_gen(
+			page_id_t(undo->rseg->space->id, undo->last_page_no),
+			univ_page_size, RW_X_LATCH,
+			buf_pool_is_obsolete(undo->withdraw_clock)
+			? NULL : undo->guess_block,
+			BUF_GET, __FILE__, __LINE__, mtr, err);
+	}
+
+	trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
 
-	mlog_write_ull(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID,
-		       undo->table_id, mtr);
+	mutex_enter(&rseg->mutex);
+	buf_block_t* block = trx_undo_reuse_cached(
+		trx, rseg, &trx->rsegs.m_redo.undo, mtr);
+
+	if (!block) {
+		block = trx_undo_create(trx, rseg, &trx->rsegs.m_redo.undo,
+					err, mtr);
+		ut_ad(!block == (*err != DB_SUCCESS));
+		if (!block) {
+			goto func_exit;
+		}
+	} else {
+		*err = DB_SUCCESS;
+	}
 
-	undo->dict_operation = TRUE;
+	UT_LIST_ADD_FIRST(rseg->undo_list, trx->rsegs.m_redo.undo);
+
+func_exit:
+	mutex_exit(&rseg->mutex);
+	return block;
 }
 
 /** Assign an undo log for a transaction.
@@ -1562,73 +1433,57 @@ A new undo log is created or a cached undo log reused.
 @param[in,out]	trx	transaction
 @param[in]	rseg	rollback segment
 @param[out]	undo	the undo log
-@param[in]	type	TRX_UNDO_INSERT or TRX_UNDO_UPDATE
-@retval	DB_SUCCESS	on success
-@retval	DB_TOO_MANY_CONCURRENT_TRXS
-@retval	DB_OUT_OF_FILE_SPACE
-@retval	DB_READ_ONLY
-@retval DB_OUT_OF_MEMORY */
-dberr_t
-trx_undo_assign_undo(
-	trx_t*		trx,
-	trx_rseg_t*	rseg,
-	trx_undo_t**	undo,
-	ulint		type)
+@param[out]	err	error code
+@param[in,out]	mtr	mini-transaction
+@return	the undo log block
+@retval	NULL	on error */
+buf_block_t*
+trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
+		    dberr_t* err, mtr_t* mtr)
 {
-	const bool	is_temp = rseg == trx->rsegs.m_noredo.rseg;
-	mtr_t		mtr;
-	dberr_t		err = DB_SUCCESS;
+  const bool	is_temp __attribute__((unused)) = rseg == trx->rsegs.m_noredo.rseg;
 
-	ut_ad(mutex_own(&trx->undo_mutex));
 	ut_ad(rseg == trx->rsegs.m_redo.rseg
 	      || rseg == trx->rsegs.m_noredo.rseg);
-	ut_ad(type == TRX_UNDO_INSERT || type == TRX_UNDO_UPDATE);
-
-	mtr.start();
-
-	if (is_temp) {
-		mtr.set_log_mode(MTR_LOG_NO_REDO);
-		ut_ad(undo == &trx->rsegs.m_noredo.undo);
-	} else {
-		ut_ad(undo == (type == TRX_UNDO_INSERT
-			       ? &trx->rsegs.m_redo.insert_undo
-			       : &trx->rsegs.m_redo.update_undo));
+	ut_ad(undo == (is_temp
+		       ? &trx->rsegs.m_noredo.undo
+		       : &trx->rsegs.m_redo.undo));
+	ut_ad(mtr->get_log_mode()
+	      == (is_temp ? MTR_LOG_NO_REDO : MTR_LOG_ALL));
+
+	if (*undo) {
+		return buf_page_get_gen(
+			page_id_t(rseg->space->id, (*undo)->last_page_no),
+			univ_page_size, RW_X_LATCH,
+			buf_pool_is_obsolete((*undo)->withdraw_clock)
+			? NULL : (*undo)->guess_block,
+			BUF_GET, __FILE__, __LINE__, mtr, err);
 	}
 
-	mutex_enter(&rseg->mutex);
-
 	DBUG_EXECUTE_IF(
 		"ib_create_table_fail_too_many_trx",
-		err = DB_TOO_MANY_CONCURRENT_TRXS;
-		goto func_exit;
+		*err = DB_TOO_MANY_CONCURRENT_TRXS; return NULL;
 	);
 
-	*undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, trx->xid,
-				     &mtr);
-	if (*undo == NULL) {
-		err = trx_undo_create(trx, rseg, type, trx->id, trx->xid,
-				      undo, &mtr);
-		if (err != DB_SUCCESS) {
+	mutex_enter(&rseg->mutex);
+
+	buf_block_t* block = trx_undo_reuse_cached(trx, rseg, undo, mtr);
+
+	if (!block) {
+		block = trx_undo_create(trx, rseg, undo, err, mtr);
+		ut_ad(!block == (*err != DB_SUCCESS));
+		if (!block) {
 			goto func_exit;
 		}
-	}
-
-	if (is_temp) {
-		UT_LIST_ADD_FIRST(rseg->insert_undo_list, *undo);
 	} else {
-		UT_LIST_ADD_FIRST(type == TRX_UNDO_INSERT
-				  ? rseg->insert_undo_list
-				  : rseg->update_undo_list, *undo);
-		if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
-			trx_undo_mark_as_dict(trx, *undo, &mtr);
-		}
+		*err = DB_SUCCESS;
 	}
 
+	UT_LIST_ADD_FIRST(rseg->undo_list, *undo);
+
 func_exit:
 	mutex_exit(&rseg->mutex);
-	mtr.commit();
-
-	return(err);
+	return block;
 }
 
 /******************************************************************//**
@@ -1648,7 +1503,7 @@ trx_undo_set_state_at_finish(
 	ut_a(undo->id < TRX_RSEG_N_SLOTS);
 
 	undo_page = trx_undo_page_get(
-		page_id_t(undo->space, undo->hdr_page_no), mtr);
+		page_id_t(undo->rseg->space->id, undo->hdr_page_no), mtr);
 
 	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
 	page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
@@ -1658,10 +1513,6 @@ trx_undo_set_state_at_finish(
 	       < TRX_UNDO_PAGE_REUSE_LIMIT) {
 
 		state = TRX_UNDO_CACHED;
-
-	} else if (undo->type == TRX_UNDO_INSERT) {
-
-		state = TRX_UNDO_TO_FREE;
 	} else {
 		state = TRX_UNDO_TO_PURGE;
 	}
@@ -1675,7 +1526,7 @@ trx_undo_set_state_at_finish(
 
 /** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK.
 @param[in,out]	trx		transaction
-@param[in,out]	undo		insert_undo or update_undo log
+@param[in,out]	undo		undo log
 @param[in]	rollback	false=XA PREPARE, true=XA ROLLBACK
 @param[in,out]	mtr		mini-transaction
 @return undo log segment header page, x-latched */
@@ -1696,7 +1547,7 @@ trx_undo_set_state_at_prepare(
 	ut_a(undo->id < TRX_RSEG_N_SLOTS);
 
 	undo_page = trx_undo_page_get(
-		page_id_t(undo->space, undo->hdr_page_no), mtr);
+		page_id_t(undo->rseg->space->id, undo->hdr_page_no), mtr);
 
 	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
 
@@ -1727,43 +1578,7 @@ trx_undo_set_state_at_prepare(
 	return(undo_page);
 }
 
-/**********************************************************************//**
-Adds the update undo log header as the first in the history list, and
-frees the memory object, or puts it to the list of cached update undo log
-segments. */
-void
-trx_undo_update_cleanup(
-/*====================*/
-	trx_t*		trx,		/*!< in: trx owning the update
-					undo log */
-	page_t*		undo_page,	/*!< in: update undo log header page,
-					x-latched */
-	mtr_t*		mtr)		/*!< in: mtr */
-{
-	trx_undo_t*	undo	= trx->rsegs.m_redo.update_undo;
-	trx_rseg_t*	rseg	= undo->rseg;
-
-	ut_ad(mutex_own(&rseg->mutex));
-
-	trx_purge_add_update_undo_to_history(trx, undo_page, mtr);
-
-	UT_LIST_REMOVE(rseg->update_undo_list, undo);
-
-	trx->rsegs.m_redo.update_undo = NULL;
-
-	if (undo->state == TRX_UNDO_CACHED) {
-
-		UT_LIST_ADD_FIRST(rseg->update_undo_cached, undo);
-
-		MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
-	} else {
-		ut_ad(undo->state == TRX_UNDO_TO_PURGE);
-
-		trx_undo_mem_free(undo);
-	}
-}
-
-/** Free an insert or temporary undo log after commit or rollback.
+/** Free an old insert or temporary undo log after commit or rollback.
 The information is not needed after a commit or rollback, therefore
 the data can be discarded.
 @param[in,out]	undo	undo log
@@ -1773,44 +1588,39 @@ trx_undo_commit_cleanup(trx_undo_t* undo, bool is_temp)
 {
 	trx_rseg_t*	rseg	= undo->rseg;
 	ut_ad(is_temp == !rseg->is_persistent());
+	ut_ad(!is_temp || 0 == UT_LIST_GET_LEN(rseg->old_insert_list));
 
 	mutex_enter(&rseg->mutex);
 
-	UT_LIST_REMOVE(rseg->insert_undo_list, undo);
+	UT_LIST_REMOVE(is_temp ? rseg->undo_list : rseg->old_insert_list,
+		       undo);
 
 	if (undo->state == TRX_UNDO_CACHED) {
-		UT_LIST_ADD_FIRST(rseg->insert_undo_cached, undo);
+		UT_LIST_ADD_FIRST(rseg->undo_cached, undo);
 		MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
 	} else {
-		ut_ad(undo->state == TRX_UNDO_TO_FREE);
+		ut_ad(undo->state == TRX_UNDO_TO_PURGE);
 
 		/* Delete first the undo log segment in the file */
 		mutex_exit(&rseg->mutex);
-		if (!srv_read_only_mode) {
-			trx_undo_seg_free(undo, is_temp);
-		}
+		trx_undo_seg_free(undo, true);
 		mutex_enter(&rseg->mutex);
 
 		ut_ad(rseg->curr_size > undo->size);
 		rseg->curr_size -= undo->size;
 
-		trx_undo_mem_free(undo);
+		ut_free(undo);
 	}
 
 	mutex_exit(&rseg->mutex);
 }
 
-/********************************************************************//**
-At shutdown, frees the undo logs of a PREPARED transaction. */
+/** At shutdown, frees the undo logs of a transaction. */
 void
-trx_undo_free_prepared(
-/*===================*/
-	trx_t*	trx)	/*!< in/out: PREPARED transaction */
+trx_undo_free_at_shutdown(trx_t *trx)
 {
-	ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
-
-	if (trx->rsegs.m_redo.update_undo) {
-		switch (trx->rsegs.m_redo.update_undo->state) {
+	if (trx_undo_t*& undo = trx->rsegs.m_redo.undo) {
+		switch (undo->state) {
 		case TRX_UNDO_PREPARED:
 			break;
 		case TRX_UNDO_CACHED:
@@ -1821,10 +1631,7 @@ trx_undo_free_prepared(
 			/* fall through */
 		case TRX_UNDO_ACTIVE:
 			/* lock_trx_release_locks() assigns
-			trx->is_recovered=false and
-			trx->state = TRX_STATE_COMMITTED_IN_MEMORY,
-			also for transactions that we faked
-			to TRX_STATE_PREPARED in trx_rollback_resurrected(). */
+			trx->state = TRX_STATE_COMMITTED_IN_MEMORY. */
 			ut_a(!srv_was_started
 			     || srv_read_only_mode
 			     || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
@@ -1834,15 +1641,13 @@ trx_undo_free_prepared(
 			ut_error;
 		}
 
-		UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->update_undo_list,
-			       trx->rsegs.m_redo.update_undo);
-		trx_undo_mem_free(trx->rsegs.m_redo.update_undo);
-
-		trx->rsegs.m_redo.update_undo = NULL;
+		UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->undo_list, undo);
+		ut_free(undo);
+		undo = NULL;
 	}
 
-	if (trx->rsegs.m_redo.insert_undo) {
-		switch (trx->rsegs.m_redo.insert_undo->state) {
+	if (trx_undo_t*& undo = trx->rsegs.m_redo.old_insert) {
+		switch (undo->state) {
 		case TRX_UNDO_PREPARED:
 			break;
 		case TRX_UNDO_CACHED:
@@ -1853,10 +1658,7 @@ trx_undo_free_prepared(
 			/* fall through */
 		case TRX_UNDO_ACTIVE:
 			/* lock_trx_release_locks() assigns
-			trx->is_recovered=false and
-			trx->state = TRX_STATE_COMMITTED_IN_MEMORY,
-			also for transactions that we faked
-			to TRX_STATE_PREPARED in trx_rollback_resurrected(). */
+			trx->state = TRX_STATE_COMMITTED_IN_MEMORY. */
 			ut_a(!srv_was_started
 			     || srv_read_only_mode
 			     || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
@@ -1866,19 +1668,16 @@ trx_undo_free_prepared(
 			ut_error;
 		}
 
-		UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->insert_undo_list,
-			       trx->rsegs.m_redo.insert_undo);
-		trx_undo_mem_free(trx->rsegs.m_redo.insert_undo);
-
-		trx->rsegs.m_redo.insert_undo = NULL;
+		UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->old_insert_list, undo);
+		ut_free(undo);
+		undo = NULL;
 	}
 
 	if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) {
 		ut_a(undo->state == TRX_UNDO_PREPARED);
 
-		UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->insert_undo_list,
-			       undo);
-		trx_undo_mem_free(undo);
+		UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->undo_list, undo);
+		ut_free(undo);
 		undo = NULL;
 	}
 }
diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc
index cbb571e8f47..5d95202b7c9 100644
--- a/storage/innobase/ut/ut0crc32.cc
+++ b/storage/innobase/ut/ut0crc32.cc
@@ -89,20 +89,6 @@ mysys/my_perf.c, contributed by Facebook under the following license.
 #include <intrin.h>
 #endif
 
-/** Pointer to CRC32 calculation function. */
-ut_crc32_func_t	ut_crc32;
-
-/** Pointer to CRC32 calculation function, which uses big-endian byte order
-when converting byte strings to integers internally. */
-ut_crc32_func_t	ut_crc32_legacy_big_endian;
-
-/** Pointer to CRC32-byte-by-byte calculation function (byte order agnostic,
-but very slow). */
-ut_crc32_func_t	ut_crc32_byte_by_byte;
-
-/** Text description of CRC32 implementation */
-const char*	ut_crc32_implementation;
-
 /** Swap the byte order of an 8 byte integer.
 @param[in]	i	8-byte integer
 @return 8-byte integer */
@@ -136,6 +122,13 @@ ut_crc32_power8(
 {
 	return crc32c_vpmsum(0, buf, len);
 }
+
+ut_crc32_func_t	ut_crc32 = ut_crc32_power8;
+const char*	ut_crc32_implementation = "Using POWER8 crc32 instructions";
+#else
+uint32_t ut_crc32_sw(const byte* buf, ulint len);
+ut_crc32_func_t	ut_crc32 = ut_crc32_sw;
+const char*	ut_crc32_implementation = "Using generic crc32 instructions";
 #endif
 
 #if (defined(__GNUC__) && defined(__x86_64__)) || defined(_MSC_VER)
@@ -278,37 +271,6 @@ ut_crc32_64_hw(
 	*len -= 8;
 }
 
-/** Calculate CRC32 over 64-bit byte string using a hardware/CPU instruction.
-The byte string is converted to a 64-bit integer using big endian byte order.
-@param[in,out]	crc	crc32 checksum so far when this function is called,
-when the function ends it will contain the new checksum
-@param[in,out]	data	data to be checksummed, the pointer will be advanced
-with 8 bytes
-@param[in,out]	len	remaining bytes, it will be decremented with 8 */
-inline
-void
-ut_crc32_64_legacy_big_endian_hw(
-	uint32_t*	crc,
-	const byte**	data,
-	ulint*		len)
-{
-	uint64_t	data_int = *reinterpret_cast<const uint64_t*>(*data);
-
-#ifndef WORDS_BIGENDIAN
-	data_int = ut_crc32_swap_byteorder(data_int);
-#else
-	/* Currently we only support x86_64 (little endian) CPUs. In case
-	some big endian CPU supports a CRC32 instruction, then maybe we will
-	NOT need a byte order swap here. */
-#error Dont know how to handle big endian CPUs
-#endif /* WORDS_BIGENDIAN */
-
-	*crc = ut_crc32_64_low_hw(*crc, data_int);
-
-	*data += 8;
-	*len -= 8;
-}
-
 /** Calculates CRC32 using hardware/CPU instructions.
 @param[in]	buf	data over which to calculate CRC32
 @param[in]	len	data length
@@ -395,76 +357,6 @@ ut_crc32_hw(
 
 	return(~crc);
 }
-
-/** Calculates CRC32 using hardware/CPU instructions.
-This function uses big endian byte ordering when converting byte sequence to
-integers.
-@param[in]	buf	data over which to calculate CRC32
-@param[in]	len	data length
-@return CRC-32C (polynomial 0x11EDC6F41) */
-uint32_t
-ut_crc32_legacy_big_endian_hw(
-	const byte*	buf,
-	ulint		len)
-{
-	uint32_t	crc = 0xFFFFFFFFU;
-
-	/* Calculate byte-by-byte up to an 8-byte aligned address. After
-	this consume the input 8-bytes at a time. */
-	while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
-		ut_crc32_8_hw(&crc, &buf, &len);
-	}
-
-	while (len >= 128) {
-		/* This call is repeated 16 times. 16 * 8 = 128. */
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-	}
-
-	while (len >= 8) {
-		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
-	}
-
-	while (len > 0) {
-		ut_crc32_8_hw(&crc, &buf, &len);
-	}
-
-	return(~crc);
-}
-
-/** Calculates CRC32 using hardware/CPU instructions.
-This function processes one byte at a time (very slow) and thus it does
-not depend on the byte order of the machine.
-@param[in]	buf	data over which to calculate CRC32
-@param[in]	len	data length
-@return CRC-32C (polynomial 0x11EDC6F41) */
-uint32_t
-ut_crc32_byte_by_byte_hw(
-	const byte*	buf,
-	ulint		len)
-{
-	uint32_t	crc = 0xFFFFFFFFU;
-
-	while (len > 0) {
-		ut_crc32_8_hw(&crc, &buf, &len);
-	}
-
-	return(~crc);
-}
 #endif /* defined(__GNUC__) && defined(__x86_64__) || (_WIN64) */
 
 /* CRC32 software implementation. */
@@ -660,7 +552,7 @@ integers.
 @param[in]	len	data length
 @return CRC-32C (polynomial 0x11EDC6F41) */
 uint32_t
-ut_crc32_legacy_big_endian_sw(
+ut_crc32_legacy_big_endian(
 	const byte*	buf,
 	ulint		len)
 {
@@ -705,28 +597,6 @@ ut_crc32_legacy_big_endian_sw(
 	return(~crc);
 }
 
-/** Calculates CRC32 in software, without using CPU instructions.
-This function processes one byte at a time (very slow) and thus it does
-not depend on the byte order of the machine.
-@param[in]	buf	data over which to calculate CRC32
-@param[in]	len	data length
-@return CRC-32C (polynomial 0x11EDC6F41) */
-uint32_t
-ut_crc32_byte_by_byte_sw(
-	const byte*	buf,
-	ulint		len)
-{
-	uint32_t	crc = 0xFFFFFFFFU;
-
-	ut_a(ut_crc32_slice8_table_initialized);
-
-	while (len > 0) {
-		ut_crc32_8_sw(&crc, &buf, &len);
-	}
-
-	return(~crc);
-}
-
 /********************************************************************//**
 Initializes the data structures used by ut_crc32*(). Does not do any
 allocations, would not hurt if called twice, but would be pointless. */
@@ -735,10 +605,6 @@ ut_crc32_init()
 /*===========*/
 {
 	ut_crc32_slice8_table_init();
-	ut_crc32 = ut_crc32_sw;
-	ut_crc32_legacy_big_endian = ut_crc32_legacy_big_endian_sw;
-	ut_crc32_byte_by_byte = ut_crc32_byte_by_byte_sw;
-	ut_crc32_implementation = "Using generic crc32 instructions";
 
 #if (defined(__GNUC__) && defined(__x86_64__)) || defined(_MSC_VER)
 	uint32_t	vend[3];
@@ -770,14 +636,7 @@ ut_crc32_init()
 
 	if (features_ecx & 1 << 20) {
 		ut_crc32 = ut_crc32_hw;
-		ut_crc32_legacy_big_endian = ut_crc32_legacy_big_endian_hw;
-		ut_crc32_byte_by_byte = ut_crc32_byte_by_byte_hw;
 		ut_crc32_implementation = "Using SSE2 crc32 instructions";
 	}
-
-#elif defined(HAVE_CRC32_VPMSUM)
-	ut_crc32 = ut_crc32_power8;
-	ut_crc32_implementation = "Using POWER8 crc32 instructions";
 #endif
-
 }
diff --git a/storage/innobase/ut/ut0new.cc b/storage/innobase/ut/ut0new.cc
index 8aad921278c..14f2748218c 100644
--- a/storage/innobase/ut/ut0new.cc
+++ b/storage/innobase/ut/ut0new.cc
@@ -41,7 +41,6 @@ PSI_memory_key	mem_key_other;
 PSI_memory_key	mem_key_row_log_buf;
 PSI_memory_key	mem_key_row_merge_sort;
 PSI_memory_key	mem_key_std;
-PSI_memory_key	mem_key_trx_sys_t_rw_trx_ids;
 PSI_memory_key	mem_key_partitioning;
 
 #ifdef UNIV_PFS_MEMORY
@@ -70,7 +69,6 @@ static PSI_memory_info	pfs_info[] = {
 	{&mem_key_row_log_buf, "row_log_buf", 0},
 	{&mem_key_row_merge_sort, "row_merge_sort", 0},
 	{&mem_key_std, "std", 0},
-	{&mem_key_trx_sys_t_rw_trx_ids, "trx_sys_t::rw_trx_ids", 0},
 	{&mem_key_partitioning, "partitioning", 0},
 };
 
diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc
index baae817f217..39fb037aa28 100644
--- a/storage/innobase/ut/ut0ut.cc
+++ b/storage/innobase/ut/ut0ut.cc
@@ -37,6 +37,7 @@ Created 5/11/1994 Heikki Tuuri
 #include "trx0trx.h"
 #include <string>
 #include "log.h"
+#include "my_cpu.h"
 
 #ifdef _WIN32
 typedef VOID(WINAPI *time_fn)(LPFILETIME);
@@ -159,7 +160,7 @@ ut_time_us(
 
 	ut_gettimeofday(&tv, NULL);
 
-	us = static_cast<uintmax_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+	us = uintmax_t(tv.tv_sec) * 1000000 + uintmax_t(tv.tv_usec);
 
 	if (tloc != NULL) {
 		*tloc = us;
@@ -181,7 +182,7 @@ ut_time_ms(void)
 
 	ut_gettimeofday(&tv, NULL);
 
-	return((ulint) tv.tv_sec * 1000 + tv.tv_usec / 1000);
+	return(ulint(tv.tv_sec) * 1000 + ulint(tv.tv_usec / 1000));
 }
 
 /**********************************************************//**
@@ -293,14 +294,14 @@ ut_delay(
 {
 	ulint	i;
 
-	UT_LOW_PRIORITY_CPU();
+	HMT_low();
 
 	for (i = 0; i < delay * 50; i++) {
-		UT_RELAX_CPU();
+		MY_RELAX_CPU();
 		UT_COMPILER_BARRIER();
 	}
 
-	UT_RESUME_PRIORITY_CPU();
+	HMT_medium();
 }
 
 /*************************************************************//**
@@ -428,7 +429,7 @@ ut_get_name(
 				       name, strlen(name),
 				       trx ? trx->mysql_thd : NULL);
 	buf[bufend - buf] = '\0';
-	return(std::string(buf, 0, bufend - buf));
+	return(std::string(buf, 0, size_t(bufend - buf)));
 }
 
 /**********************************************************************//**
@@ -452,7 +453,7 @@ ut_print_name(
 				       name, strlen(name),
 				       trx ? trx->mysql_thd : NULL);
 
-	if (fwrite(buf, 1, bufend - buf, f) != (size_t) (bufend - buf)) {
+	if (fwrite(buf, 1, size_t(bufend - buf), f) != size_t(bufend - buf)) {
 		perror("fwrite");
 	}
 }
@@ -529,32 +530,6 @@ ut_copy_file(
 The returned string is static and should not be freed or modified.
 @param[in]	num	InnoDB internal error number
 @return string, describing the error */
-std::string
-ut_get_name(
-/*=========*/
-	const trx_t*	trx,	/*!< in: transaction (NULL=no quotes) */
-	ibool		table_id,/*!< in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name)	/*!< in: name to print */
-{
-	/* 2 * NAME_LEN for database and table name,
-	and some slack for the #mysql50# prefix and quotes */
-	char		buf[3 * NAME_LEN];
-	const char*	bufend;
-	ulint		namelen = strlen(name);
-
-	bufend = innobase_convert_name(buf, sizeof buf,
-				       name, namelen,
-				       trx ? trx->mysql_thd : NULL);
-	buf[bufend-buf]='\0';
-	std::string str(buf);
-	return str;
-}
-
-/** Convert an error number to a human readable text message.
-The returned string is static and should not be freed or modified.
-@param[in]	num	InnoDB internal error number
-@return string, describing the error */
 const char*
 ut_strerr(
 	dberr_t	num)