139 files changed, 4449 insertions, 5858 deletions
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index 2fccdfc431c..54f13a17c4c 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -2,7 +2,7 @@
 
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2017, MariaDB Corporation.
+Copyright (c) 2014, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -37,9 +37,15 @@ Created 6/2/1994 Heikki Tuuri
 #include "btr0types.h"
 #include "gis0type.h"
 
+#define BTR_MAX_NODE_LEVEL	50	/*!< Maximum B-tree page level
+					(not really a hard limit).
+					Used in debug assertions
+					in btr_page_set_level and
+					btr_page_get_level */
+
 /** Maximum record size which can be stored on a page, without using the
 special big record storage structure */
-#define	BTR_PAGE_MAX_REC_SIZE	(UNIV_PAGE_SIZE / 2 - 200)
+#define	BTR_PAGE_MAX_REC_SIZE	(srv_page_size / 2 - 200)
 
 /** @brief Maximum depth of a B-tree in InnoDB.
 
@@ -151,23 +157,23 @@ free the pages of externally stored fields. */
 record is in spatial index */
 #define BTR_RTREE_DELETE_MARK	524288U
 
-#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode)			\
-	((latch_mode) & btr_latch_mode(~(BTR_INSERT			\
-					 | BTR_DELETE_MARK		\
-					 | BTR_RTREE_UNDO_INS		\
-					 | BTR_RTREE_DELETE_MARK	\
-					 | BTR_DELETE			\
-					 | BTR_ESTIMATE			\
-					 | BTR_IGNORE_SEC_UNIQUE	\
-					 | BTR_ALREADY_S_LATCHED	\
-					 | BTR_LATCH_FOR_INSERT		\
-					 | BTR_LATCH_FOR_DELETE		\
-					 | BTR_MODIFY_EXTERNAL)))
-
-#define BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode)			\
-	((latch_mode) & btr_latch_mode(~(BTR_LATCH_FOR_INSERT		\
-					 | BTR_LATCH_FOR_DELETE		\
-					 | BTR_MODIFY_EXTERNAL)))
+#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode)		\
+	((latch_mode) & ulint(~(BTR_INSERT			\
+				| BTR_DELETE_MARK		\
+				| BTR_RTREE_UNDO_INS		\
+				| BTR_RTREE_DELETE_MARK		\
+				| BTR_DELETE			\
+				| BTR_ESTIMATE			\
+				| BTR_IGNORE_SEC_UNIQUE		\
+				| BTR_ALREADY_S_LATCHED		\
+				| BTR_LATCH_FOR_INSERT		\
+				| BTR_LATCH_FOR_DELETE		\
+				| BTR_MODIFY_EXTERNAL)))
+
+#define BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode)		\
+	((latch_mode) & ulint(~(BTR_LATCH_FOR_INSERT		\
+				| BTR_LATCH_FOR_DELETE		\
+				| BTR_MODIFY_EXTERNAL)))
 
 /**************************************************************//**
 Report that an index page is corrupted. */
@@ -289,14 +295,22 @@ btr_page_get_index_id(
 	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************//**
 Gets the node level field in an index page.
+@param[in]	page	index page
 @return level, leaf level == 0 */
 UNIV_INLINE
 ulint
-btr_page_get_level_low(
-/*===================*/
-	const page_t*	page)	/*!< in: index page */
-	MY_ATTRIBUTE((warn_unused_result));
-#define btr_page_get_level(page, mtr) btr_page_get_level_low(page)
+btr_page_get_level(const page_t* page)
+{
+	ulint	level;
+
+	ut_ad(page);
+
+	level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
+
+	ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+	return(level);
+} MY_ATTRIBUTE((warn_unused_result))
 /********************************************************//**
 Gets the next index page number.
 @return next page number */
@@ -345,8 +359,7 @@ btr_node_ptr_get_child_page_no(
 
 /** Create the root node for a new index tree.
 @param[in]	type			type of the index
-@param[in]	space			space where created
-@param[in]	page_size		page size
+@param[in,out]	space			tablespace where created
 @param[in]	index_id		index id
 @param[in]	index			index, or NULL when applying TRUNCATE
 log record during recovery
@@ -357,8 +370,7 @@ record during recovery
 ulint
 btr_create(
 	ulint			type,
-	ulint			space,
-	const page_size_t&	page_size,
+	fil_space_t*		space,
 	index_id_t		index_id,
 	dict_index_t*		index,
 	const btr_create_t*	btr_redo_create_info,
@@ -684,6 +696,20 @@ btr_page_free(
 	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
 	mtr_t*		mtr)	/*!< in: mtr */
 	MY_ATTRIBUTE((nonnull));
+/** Empty an index page (possibly the root page). @see btr_page_create().
+@param[in,out]	block		page to be emptied
+@param[in,out]	page_zip	compressed page frame, or NULL
+@param[in]	index		index of the page
+@param[in]	level		B-tree level of the page (0=leaf)
+@param[in,out]	mtr		mini-transaction */
+void
+btr_page_empty(
+	buf_block_t*	block,
+	page_zip_des_t*	page_zip,
+	dict_index_t*	index,
+	ulint		level,
+	mtr_t*		mtr)
+	MY_ATTRIBUTE((nonnull(1, 3, 5)));
 /**************************************************************//**
 Creates a new index page (not the root, and also not
 used in page reorganization).  @see btr_page_empty(). */
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
index bd4f2a40267..d24458beace 100644
--- a/storage/innobase/include/btr0btr.ic
+++ b/storage/innobase/include/btr0btr.ic
@@ -29,12 +29,6 @@ Created 6/2/1994 Heikki Tuuri
 #include "mtr0log.h"
 #include "page0zip.h"
 
-#define BTR_MAX_NODE_LEVEL	50	/*!< Maximum B-tree page level
-					(not really a hard limit).
-					Used in debug assertions
-					in btr_page_set_level and
-					btr_page_get_level_low */
-
 /** Gets a buffer page and declares its latching order level.
 @param[in]	page_id	page id
 @param[in]	mode	latch mode
@@ -144,26 +138,6 @@ btr_page_get_index_id(
 }
 
 /********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level_low(
-/*===================*/
-	const page_t*	page)	/*!< in: index page */
-{
-	ulint	level;
-
-	ut_ad(page);
-
-	level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
-
-	ut_ad(level <= BTR_MAX_NODE_LEVEL);
-
-	return(level);
-}
-
-/********************************************************//**
 Sets the node level field in an index page. */
 UNIV_INLINE
 void
diff --git a/storage/innobase/include/btr0bulk.h b/storage/innobase/include/btr0bulk.h
index edf34bd5ae4..5047dce38b4 100644
--- a/storage/innobase/include/btr0bulk.h
+++ b/storage/innobase/include/btr0bulk.h
@@ -33,7 +33,7 @@ Created 03/11/2014 Shaohua Wang
 #include <vector>
 
 /** Innodb B-tree index fill factor for bulk load. */
-extern	long	innobase_fill_factor;
+extern	uint	innobase_fill_factor;
 /** whether to reduce redo logging during ALTER TABLE */
 extern	my_bool	innodb_log_optimize_ddl;
 
@@ -86,7 +86,7 @@ public:
 		m_err(DB_SUCCESS)
 	{
 		ut_ad(!dict_index_is_spatial(m_index));
-		ut_ad(!dict_table_is_temporary(m_index->table));
+		ut_ad(!m_index->table->is_temporary());
 	}
 
 	/** Deconstructor */
@@ -287,7 +287,8 @@ public:
 	{
 #ifdef UNIV_DEBUG
 		if (m_flush_observer)
-		fil_space_inc_redo_skipped_count(m_index->space);
+		my_atomic_addlint(&m_index->table->space->redo_skipped_count,
+				  1);
 #endif /* UNIV_DEBUG */
 	}
 
@@ -296,7 +297,8 @@ public:
 	{
 #ifdef UNIV_DEBUG
 		if (m_flush_observer)
-		fil_space_dec_redo_skipped_count(m_index->space);
+		my_atomic_addlint(&m_index->table->space->redo_skipped_count,
+				  ulint(-1));
 #endif /* UNIV_DEBUG */
 	}
 
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 1df382bb995..f87370be70c 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,6 +28,7 @@ Created 10/16/1994 Heikki Tuuri
 #define btr0cur_h
 
 #include "univ.i"
+#include "my_base.h"
 #include "dict0dict.h"
 #include "page0cur.h"
 #include "btr0types.h"
@@ -42,6 +43,11 @@ enum {
 	/** sys fields will be found in the update vector or inserted
 	entry */
 	BTR_KEEP_SYS_FLAG = 4,
+
+	/** no rollback */
+	BTR_NO_ROLLBACK = BTR_NO_UNDO_LOG_FLAG
+		| BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG,
+
 	/** btr_cur_pessimistic_update() must keep cursor position
 	when moving columns to big_rec */
 	BTR_KEEP_POS_FLAG = 8,
@@ -127,6 +133,24 @@ btr_cur_position(
 	buf_block_t*	block,	/*!< in: buffer block of rec */
 	btr_cur_t*	cursor);/*!< in: cursor */
 
+/** Load the instant ALTER TABLE metadata from the clustered index
+when loading a table definition.
+@param[in,out]	table	table definition from the data dictionary
+@return	error code
+@retval	DB_SUCCESS	if no error occurred */
+dberr_t
+btr_cur_instant_init(dict_table_t* table)
+	ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result));
+
+/** Initialize the n_core_null_bytes on first access to a clustered
+index root page.
+@param[in]	index	clustered index that is on its first access
+@param[in]	page	clustered index root page
+@return	whether the page is corrupted */
+bool
+btr_cur_instant_root_init(dict_index_t* index, const page_t* page)
+	ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result));
+
 /** Optimistically latches the leaf page or pages requested.
 @param[in]	block		guessed buffer block
 @param[in]	modify_clock	modify clock value
@@ -154,8 +178,7 @@ Note that if mode is PAGE_CUR_LE, which is used in inserts, then
 cursor->up_match and cursor->low_match both will have sensible values.
 If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
 dberr_t
-btr_cur_search_to_nth_level(
-/*========================*/
+btr_cur_search_to_nth_level_func(
 	dict_index_t*	index,	/*!< in: index */
 	ulint		level,	/*!< in: the tree level of search */
 	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
@@ -174,23 +197,29 @@ btr_cur_search_to_nth_level(
 				cursor->left_block is used to store a pointer
 				to the left neighbor page, in the cases
 				BTR_SEARCH_PREV and BTR_MODIFY_PREV;
-				NOTE that if has_search_latch
-				is != 0, we maybe do not have a latch set
-				on the cursor page, we assume
-				the caller uses his search latch
-				to protect the record! */
+				NOTE that if ahi_latch, we might not have a
+				cursor page latch, we assume that ahi_latch
+				protects the record! */
 	btr_cur_t*	cursor, /*!< in/out: tree cursor; the cursor page is
 				s- or x-latched, but see also above! */
-	ulint		has_search_latch,
-				/*!< in: latch mode the caller
-				currently has on search system:
-				RW_S_LATCH, or 0 */
+#ifdef BTR_CUR_HASH_ADAPT
+	rw_lock_t*	ahi_latch,
+				/*!< in: currently held btr_search_latch
+				(in RW_S_LATCH mode), or NULL */
+#endif /* BTR_CUR_HASH_ADAPT */
 	const char*	file,	/*!< in: file name */
 	unsigned	line,	/*!< in: line where called */
 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
 	ib_uint64_t	autoinc = 0);
 				/*!< in: PAGE_ROOT_AUTO_INC to be written
 				(0 if none) */
+#ifdef BTR_CUR_HASH_ADAPT
+# define btr_cur_search_to_nth_level(i,l,t,m,lm,c,a,fi,li,mtr) \
+	btr_cur_search_to_nth_level_func(i,l,t,m,lm,c,a,fi,li,mtr)
+#else /* BTR_CUR_HASH_ADAPT */
+# define btr_cur_search_to_nth_level(i,l,t,m,lm,c,a,fi,li,mtr) \
+	btr_cur_search_to_nth_level_func(i,l,t,m,lm,c,fi,li,mtr)
+#endif /* BTR_CUR_HASH_ADAPT */
 
 /*****************************************************************//**
 Opens a cursor at either end of an index.
@@ -572,7 +601,7 @@ btr_cur_parse_del_mark_set_sec_rec(
 @param[in]	tuple2	range end, may also be empty tuple
 @param[in]	mode2	search mode for range end
 @return estimated number of rows */
-int64_t
+ha_rows
 btr_estimate_n_rows_in_range(
 	dict_index_t*	index,
 	const dtuple_t*	tuple1,
@@ -806,7 +835,7 @@ btr_cur_latch_leaves(
 /** In the pessimistic delete, if the page data size drops below this
 limit, merging it to a neighbor is tried */
 #define BTR_CUR_PAGE_COMPRESS_LIMIT(index) \
-	((UNIV_PAGE_SIZE * (ulint)((index)->merge_threshold)) / 100)
+	((srv_page_size * (ulint)((index)->merge_threshold)) / 100)
 
 /** A slot in the path array. We store here info on a search path down the
 tree. Each slot contains data on a single level of the tree. */
@@ -974,11 +1003,11 @@ We store locally a long enough prefix of each column so that we can determine
 the ordering parts of each index record without looking into the externally
 stored part. */
 /*-------------------------------------- @{ */
-#define BTR_EXTERN_SPACE_ID		0	/*!< space id where stored */
-#define BTR_EXTERN_PAGE_NO		4	/*!< page no where stored */
-#define BTR_EXTERN_OFFSET		8	/*!< offset of BLOB header
+#define BTR_EXTERN_SPACE_ID		0U	/*!< space id where stored */
+#define BTR_EXTERN_PAGE_NO		4U	/*!< page no where stored */
+#define BTR_EXTERN_OFFSET		8U	/*!< offset of BLOB header
 						on that page */
-#define BTR_EXTERN_LEN			12	/*!< 8 bytes containing the
+#define BTR_EXTERN_LEN			12U	/*!< 8 bytes containing the
 						length of the externally
 						stored part of the BLOB.
 						The 2 highest bits are
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
index b1e59651a1d..adcd92e2fc8 100644
--- a/storage/innobase/include/btr0cur.ic
+++ b/storage/innobase/include/btr0cur.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +29,7 @@ Created 10/16/1994 Heikki Tuuri
 #ifdef UNIV_DEBUG
 # define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\
 if (btr_cur_limit_optimistic_insert_debug > 1\
-    && (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\
+    && (NREC) >= btr_cur_limit_optimistic_insert_debug) {\
         CODE;\
 }
 #else
@@ -128,19 +129,17 @@ btr_cur_compress_recommendation(
 {
 	const page_t*	page;
 
-	ut_ad(mtr_is_block_fix(
-		mtr, btr_cur_get_block(cursor),
-		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+			       MTR_MEMO_PAGE_X_FIX));
 
 	page = btr_cur_get_page(cursor);
 
-	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
+	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2U,
 				      return(FALSE));
 
-	if ((page_get_data_size(page)
-	     < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index))
-	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
-		&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
+	if (page_get_data_size(page)
+	    < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index)
+	    || !page_has_siblings(page)) {
 
 		/* The page fillfactor has dropped below a predefined
 		minimum value OR the level in the B-tree contains just
@@ -173,11 +172,9 @@ btr_cur_can_delete_without_compress(
 
 	page = btr_cur_get_page(cursor);
 
-	if ((page_get_data_size(page) - rec_size
-	     < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index))
-	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
-		&& (btr_page_get_prev(page, mtr) == FIL_NULL))
-	    || (page_get_n_recs(page) < 2)) {
+	if (page_get_data_size(page) - rec_size
+	    < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index)
+	    || !page_has_siblings(page) || page_get_n_recs(page) < 2) {
 
 		/* The page fillfactor will drop below a predefined
 		minimum value, OR the level in the B-tree contains just
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
index b84d9840a28..747ad676e33 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innobase/include/btr0pcur.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -136,20 +136,25 @@ btr_pcur_open_with_no_init_func(
 				may end up on the previous page of the
 				record! */
 	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
-				NOTE that if has_search_latch != 0 then
-				we maybe do not acquire a latch on the cursor
-				page, but assume that the caller uses his
-				btr search latch to protect the record! */
+				NOTE that if ahi_latch then we might not
+				acquire a cursor page latch, but assume
+				that the ahi_latch protects the record! */
 	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
-	ulint		has_search_latch,
-				/*!< in: latch mode the caller
-				currently has on search system:
-				RW_S_LATCH, or 0 */
+#ifdef BTR_CUR_HASH_ADAPT
+	rw_lock_t*	ahi_latch,
+				/*!< in: adaptive hash index latch held
+				by the caller, or NULL if none */
+#endif /* BTR_CUR_HASH_ADAPT */
 	const char*	file,	/*!< in: file name */
 	unsigned	line,	/*!< in: line where called */
 	mtr_t*		mtr);	/*!< in: mtr */
-#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m)			\
-	btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m)
+#ifdef BTR_CUR_HASH_ADAPT
+# define btr_pcur_open_with_no_init(ix,t,md,l,cur,ahi,m)		\
+	btr_pcur_open_with_no_init_func(ix,t,md,l,cur,ahi,__FILE__,__LINE__,m)
+#else /* BTR_CUR_HASH_ADAPT */
+# define btr_pcur_open_with_no_init(ix,t,md,l,cur,ahi,m)		\
+	btr_pcur_open_with_no_init_func(ix,t,md,l,cur,__FILE__,__LINE__,m)
+#endif /* BTR_CUR_HASH_ADAPT */
 
 /*****************************************************************//**
 Opens a persistent cursor at either end of an index. */
@@ -436,21 +441,11 @@ btr_pcur_is_before_first_on_page(
 /*********************************************************//**
 Checks if the persistent cursor is before the first user record in
 the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_in_tree(
-/*=============================*/
-	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
-	mtr_t*		mtr);	/*!< in: mtr */
+static inline bool btr_pcur_is_before_first_in_tree(btr_pcur_t* cursor);
 /*********************************************************//**
 Checks if the persistent cursor is after the last user record in
 the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_in_tree(
-/*===========================*/
-	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
-	mtr_t*		mtr);	/*!< in: mtr */
+static inline bool btr_pcur_is_after_last_in_tree(btr_pcur_t* cursor);
 /*********************************************************//**
 Moves the persistent cursor to the next record on the same page. */
 UNIV_INLINE
diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic
index b2a85def63d..6e38bf61701 100644
--- a/storage/innobase/include/btr0pcur.ic
+++ b/storage/innobase/include/btr0pcur.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
+Copyright (c) 2015, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -209,43 +209,25 @@ btr_pcur_is_on_user_rec(
 /*********************************************************//**
 Checks if the persistent cursor is before the first user record in
 the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_in_tree(
-/*=============================*/
-	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
-	mtr_t*		mtr)	/*!< in: mtr */
+static inline bool btr_pcur_is_before_first_in_tree(btr_pcur_t* cursor)
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
-	if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
-		return(FALSE);
-	}
-
-	return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
+	return !page_has_prev(btr_pcur_get_page(cursor))
+		&& page_cur_is_before_first(btr_pcur_get_page_cur(cursor));
 }
 
 /*********************************************************//**
 Checks if the persistent cursor is after the last user record in
 the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_in_tree(
-/*===========================*/
-	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
-	mtr_t*		mtr)	/*!< in: mtr */
+static inline bool btr_pcur_is_after_last_in_tree(btr_pcur_t* cursor)
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
-	if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
-		return(FALSE);
-	}
-
-	return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
+	return !page_has_next(btr_pcur_get_page(cursor))
+		&& page_cur_is_after_last(btr_pcur_get_page_cur(cursor));
 }
 
 /*********************************************************//**
@@ -315,9 +297,7 @@ btr_pcur_move_to_next_user_rec(
 	cursor->old_stored = false;
 loop:
 	if (btr_pcur_is_after_last_on_page(cursor)) {
-
-		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
-
+		if (btr_pcur_is_after_last_in_tree(cursor)) {
 			return(FALSE);
 		}
 
@@ -352,19 +332,15 @@ btr_pcur_move_to_next(
 	cursor->old_stored = false;
 
 	if (btr_pcur_is_after_last_on_page(cursor)) {
-
-		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
-
+		if (btr_pcur_is_after_last_in_tree(cursor)) {
 			return(FALSE);
 		}
 
 		btr_pcur_move_to_next_page(cursor, mtr);
-
 		return(TRUE);
 	}
 
 	btr_pcur_move_to_next_on_page(cursor);
-
 	return(TRUE);
 }
 
@@ -480,9 +456,12 @@ btr_pcur_open_low(
 
 	ut_ad(!dict_index_is_spatial(index));
 
-	err = btr_cur_search_to_nth_level(
-		index, level, tuple, mode, latch_mode,
-		btr_cursor, 0, file, line, mtr, autoinc);
+	err = btr_cur_search_to_nth_level_func(
+		index, level, tuple, mode, latch_mode, btr_cursor,
+#ifdef BTR_CUR_HASH_ADAPT
+		NULL,
+#endif /* BTR_CUR_HASH_ADAPT */
+		file, line, mtr, autoinc);
 
 	if (err != DB_SUCCESS) {
 		ib::warn() << " Error code: " << err
@@ -517,15 +496,15 @@ btr_pcur_open_with_no_init_func(
 				may end up on the previous page of the
 				record! */
 	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
-				NOTE that if has_search_latch != 0 then
-				we maybe do not acquire a latch on the cursor
-				page, but assume that the caller uses his
-				btr search latch to protect the record! */
+				NOTE that if ahi_latch then we might not
+				acquire a cursor page latch, but assume
+				that the ahi_latch protects the record! */
 	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
-	ulint		has_search_latch,
-				/*!< in: latch mode the caller
-				currently has on search system:
-				RW_S_LATCH, or 0 */
+#ifdef BTR_CUR_HASH_ADAPT
+	rw_lock_t*	ahi_latch,
+				/*!< in: adaptive hash index latch held
+				by the caller, or NULL if none */
+#endif /* BTR_CUR_HASH_ADAPT */
 	const char*	file,	/*!< in: file name */
 	unsigned	line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mtr */
@@ -540,9 +519,12 @@ btr_pcur_open_with_no_init_func(
 
 	btr_cursor = btr_pcur_get_btr_cur(cursor);
 
-	err = btr_cur_search_to_nth_level(
+	err = btr_cur_search_to_nth_level_func(
 		index, 0, tuple, mode, latch_mode, btr_cursor,
-		has_search_latch, file, line, mtr);
+#ifdef BTR_CUR_HASH_ADAPT
+		ahi_latch,
+#endif /* BTR_CUR_HASH_ADAPT */
+		file, line, mtr);
 
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
index e6983cacffb..4aaf3fb835e 100644
--- a/storage/innobase/include/btr0sea.h
+++ b/storage/innobase/include/btr0sea.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -38,26 +38,20 @@ Created 2/17/1996 Heikki Tuuri
 
 /** Creates and initializes the adaptive search system at a database start.
 @param[in]	hash_size	hash table size. */
-void
-btr_search_sys_create(ulint hash_size);
+void btr_search_sys_create(ulint hash_size);
 
 /** Resize hash index hash table.
 @param[in]	hash_size	hash index hash table size */
-void
-btr_search_sys_resize(ulint hash_size);
+void btr_search_sys_resize(ulint hash_size);
 
 /** Frees the adaptive search system at a database shutdown. */
-void
-btr_search_sys_free();
+void btr_search_sys_free();
 
 /** Disable the adaptive hash search system and empty the index.
 @param  need_mutex      need to acquire dict_sys->mutex */
-void
-btr_search_disable(
-	bool	need_mutex);
+void btr_search_disable(bool need_mutex);
 /** Enable the adaptive hash search system. */
-void
-btr_search_enable();
+void btr_search_enable();
 
 /** Returns the value of ref_count. The value is protected by latch.
 @param[in]	info		search info
@@ -91,12 +85,11 @@ both have sensible values.
 				we assume the caller uses his search latch
 				to protect the record!
 @param[out]	cursor		tree cursor
-@param[in]	has_search_latch
-				latch mode the caller currently has on
-				search system: RW_S/X_LATCH or 0
+@param[in]	ahi_latch	the adaptive hash index latch being held,
+				or NULL
 @param[in]	mtr		mini transaction
-@return TRUE if succeeded */
-ibool
+@return whether the search succeeded */
+bool
 btr_search_guess_on_hash(
 	dict_index_t*	index,
 	btr_search_t*	info,
@@ -104,22 +97,19 @@ btr_search_guess_on_hash(
 	ulint		mode,
 	ulint		latch_mode,
 	btr_cur_t*	cursor,
-	ulint		has_search_latch,
+	rw_lock_t*	ahi_latch,
 	mtr_t*		mtr);
 
-/** Moves or deletes hash entries for moved records. If new_page is already
-hashed, then the hash index for page, if any, is dropped. If new_page is not
-hashed, and page is hashed, then a new hash index is built to new_page with the
-same parameters as page (this often happens when a page is split).
-@param[in,out]	new_block	records are copied to this page.
-@param[in,out]	block		index page from which record are copied, and the
-				copied records will be deleted from this page.
-@param[in,out]	index		record descriptor */
+/** Move or delete hash entries for moved records, usually in a page split.
+If new_block is already hashed, then any hash index for block is dropped.
+If new_block is not hashed, and block is hashed, then a new hash index is
+built to new_block with the same parameters as block.
+@param[in,out]	new_block	destination page
+@param[in,out]	block		source page (subject to deletion later) */
 void
 btr_search_move_or_delete_hash_entries(
 	buf_block_t*	new_block,
-	buf_block_t*	block,
-	dict_index_t*	index);
+	buf_block_t*	block);
 
 /** Drop any adaptive hash index entries that point to an index page.
 @param[in,out]	block	block containing index page, s- or x-latched, or an
@@ -127,8 +117,7 @@ btr_search_move_or_delete_hash_entries(
 			block->buf_fix_count == 0 or it is an index page which
 			has already been removed from the buf_pool->page_hash
 			i.e.: it is in state BUF_BLOCK_REMOVE_HASH */
-void
-btr_search_drop_page_hash_index(buf_block_t* block);
+void btr_search_drop_page_hash_index(buf_block_t* block);
 
 /** Drop possible adaptive hash index entries when a page is evicted
 from the buffer pool or freed in a file, or the index is being dropped.
@@ -138,118 +127,78 @@ void btr_search_drop_page_hash_when_freed(const page_id_t& page_id);
 /** Updates the page hash index when a single record is inserted on a page.
 @param[in]	cursor	cursor which was positioned to the place to insert
 			using btr_cur_search_, and the new record has been
-			inserted next to the cursor. */
+			inserted next to the cursor.
+@param[in]	ahi_latch	the adaptive hash index latch */
 void
-btr_search_update_hash_node_on_insert(btr_cur_t* cursor);
+btr_search_update_hash_node_on_insert(btr_cur_t* cursor, rw_lock_t* ahi_latch);
 
 /** Updates the page hash index when a single record is inserted on a page.
-@param[in]	cursor		cursor which was positioned to the
+@param[in,out]	cursor		cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
-				to the cursor */
+				to the cursor
+@param[in]	ahi_latch	the adaptive hash index latch */
 void
-btr_search_update_hash_on_insert(btr_cur_t* cursor);
+btr_search_update_hash_on_insert(btr_cur_t* cursor, rw_lock_t* ahi_latch);
 
 /** Updates the page hash index when a single record is deleted from a page.
 @param[in]	cursor	cursor which was positioned on the record to delete
 			using btr_cur_search_, the record is not yet deleted.*/
-void
-btr_search_update_hash_on_delete(btr_cur_t* cursor);
+void btr_search_update_hash_on_delete(btr_cur_t* cursor);
 
 /** Validates the search system.
 @return true if ok */
-bool
-btr_search_validate();
-
-/** X-Lock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_x_lock(const dict_index_t* index);
-
-/** X-Unlock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_x_unlock(const dict_index_t* index);
+bool btr_search_validate();
 
 /** Lock all search latches in exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_lock_all();
+static inline void btr_search_x_lock_all();
 
 /** Unlock all search latches from exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_unlock_all();
-
-/** S-Lock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_s_lock(const dict_index_t* index);
-
-/** S-Unlock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_s_unlock(const dict_index_t* index);
+static inline void btr_search_x_unlock_all();
 
 /** Lock all search latches in shared mode. */
-UNIV_INLINE
-void
-btr_search_s_lock_all();
+static inline void btr_search_s_lock_all();
 
 #ifdef UNIV_DEBUG
 /** Check if thread owns all the search latches.
 @param[in]	mode	lock mode check
 @retval true if owns all of them
 @retval false if does not own some of them */
-UNIV_INLINE
-bool
-btr_search_own_all(ulint mode);
+static inline bool btr_search_own_all(ulint mode);
 
 /** Check if thread owns any of the search latches.
 @param[in]	mode	lock mode check
 @retval true if owns any of them
 @retval false if owns no search latch */
-UNIV_INLINE
-bool
-btr_search_own_any(ulint mode);
+static inline bool btr_search_own_any(ulint mode);
+
+/** @return whether this thread holds any of the search latches */
+static inline bool btr_search_own_any();
 #endif /* UNIV_DEBUG */
 
 /** Unlock all search latches from shared mode. */
-UNIV_INLINE
-void
-btr_search_s_unlock_all();
+static inline void btr_search_s_unlock_all();
 
 /** Get the latch based on index attributes.
 A latch is selected from an array of latches using pair of index-id, space-id.
 @param[in]	index	index handler
 @return latch */
-UNIV_INLINE
-rw_lock_t*
-btr_get_search_latch(const dict_index_t* index);
+static inline rw_lock_t* btr_get_search_latch(const dict_index_t* index);
 
 /** Get the hash-table based on index attributes.
 A table is selected from an array of tables using pair of index-id, space-id.
 @param[in]	index	index handler
 @return hash table */
-UNIV_INLINE
-hash_table_t*
-btr_get_search_table(const dict_index_t* index);
+static inline hash_table_t* btr_get_search_table(const dict_index_t* index);
 #else /* BTR_CUR_HASH_ADAPT */
 # define btr_search_sys_create(size)
+# define btr_search_sys_free()
 # define btr_search_drop_page_hash_index(block)
-# define btr_search_s_lock(index)
-# define btr_search_s_unlock(index)
 # define btr_search_s_lock_all(index)
 # define btr_search_s_unlock_all(index)
-# define btr_search_x_lock(index)
-# define btr_search_x_unlock(index)
 # define btr_search_info_update(index, cursor)
-# define btr_search_move_or_delete_hash_entries(new_block, block, index)
-# define btr_search_update_hash_on_insert(cursor)
+# define btr_search_move_or_delete_hash_entries(new_block, block)
+# define btr_search_update_hash_on_insert(cursor, ahi_latch)
 # define btr_search_update_hash_on_delete(cursor)
 # define btr_search_sys_resize(hash_size)
 #endif /* BTR_CUR_HASH_ADAPT */
@@ -258,15 +207,11 @@ btr_get_search_table(const dict_index_t* index);
 /** Create and initialize search info.
 @param[in,out]	heap		heap where created
 @return own: search info struct */
-UNIV_INLINE
-btr_search_t*
-btr_search_info_create(mem_heap_t* heap)
+static inline btr_search_t* btr_search_info_create(mem_heap_t* heap)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /** @return the search info of an index */
-UNIV_INLINE
-btr_search_t*
-btr_search_get_info(dict_index_t* index)
+static inline btr_search_t* btr_search_get_info(dict_index_t* index)
 {
 	return(index->search_info);
 }
@@ -310,7 +255,7 @@ struct btr_search_t{
 	ulint	n_bytes;	/*!< recommended prefix: number of bytes in
 				an incomplete field
 				@see BTR_PAGE_MAX_REC_SIZE */
-	ibool	left_side;	/*!< TRUE or FALSE, depending on whether
+	bool	left_side;	/*!< true or false, depending on whether
 				the leftmost record of several records with
 				the same prefix should be indexed in the
 				hash index */
diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
index b5a7536a2b4..716410e3557 100644
--- a/storage/innobase/include/btr0sea.ic
+++ b/storage/innobase/include/btr0sea.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,9 +31,7 @@ Created 2/17/1996 Heikki Tuuri
 /** Create and initialize search info.
 @param[in,out]	heap		heap where created
 @return own: search info struct */
-UNIV_INLINE
-btr_search_t*
-btr_search_info_create(mem_heap_t* heap)
+static inline btr_search_t* btr_search_info_create(mem_heap_t* heap)
 {
 	btr_search_t*	info = static_cast<btr_search_t*>(
 		mem_heap_zalloc(heap, sizeof(btr_search_t)));
@@ -45,25 +44,23 @@ btr_search_info_create(mem_heap_t* heap)
 }
 
 #ifdef BTR_CUR_HASH_ADAPT
-/*********************************************************************//**
-Updates the search info. */
+/** Updates the search info.
+@param[in,out]	info	search info
+@param[in,out]	cursor	cursor which was just positioned */
 void
-btr_search_info_update_slow(
-/*========================*/
-	btr_search_t*	info,	/*!< in/out: search info */
-	btr_cur_t*	cursor);/*!< in: cursor which was just positioned */
+btr_search_info_update_slow(btr_search_t* info, btr_cur_t* cursor);
 
 /*********************************************************************//**
 Updates the search info. */
-UNIV_INLINE
+static inline
 void
 btr_search_info_update(
 /*===================*/
 	dict_index_t*	index,	/*!< in: index of the cursor */
 	btr_cur_t*	cursor)	/*!< in: cursor which was just positioned */
 {
-	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
-	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
+	ut_ad(!btr_search_own_any(RW_LOCK_S));
+	ut_ad(!btr_search_own_any(RW_LOCK_X));
 
 	if (dict_index_is_spatial(index) || !btr_search_enabled) {
 		return;
@@ -87,28 +84,8 @@ btr_search_info_update(
 	btr_search_info_update_slow(info, cursor);
 }
 
-/** X-Lock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_x_lock(const dict_index_t* index)
-{
-	rw_lock_x_lock(btr_get_search_latch(index));
-}
-
-/** X-Unlock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_x_unlock(const dict_index_t* index)
-{
-	rw_lock_x_unlock(btr_get_search_latch(index));
-}
-
 /** Lock all search latches in exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_lock_all()
+static inline void btr_search_x_lock_all()
 {
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 		rw_lock_x_lock(btr_search_latches[i]);
@@ -116,37 +93,15 @@ btr_search_x_lock_all()
 }
 
 /** Unlock all search latches from exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_unlock_all()
+static inline void btr_search_x_unlock_all()
 {
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 		rw_lock_x_unlock(btr_search_latches[i]);
 	}
 }
 
-/** S-Lock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_s_lock(const dict_index_t* index)
-{
-	rw_lock_s_lock(btr_get_search_latch(index));
-}
-
-/** S-Unlock the search latch (corresponding to given index)
-@param[in]	index	index handler */
-UNIV_INLINE
-void
-btr_search_s_unlock(const dict_index_t* index)
-{
-	rw_lock_s_unlock(btr_get_search_latch(index));
-}
-
 /** Lock all search latches in shared mode. */
-UNIV_INLINE
-void
-btr_search_s_lock_all()
+static inline void btr_search_s_lock_all()
 {
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 		rw_lock_s_lock(btr_search_latches[i]);
@@ -154,9 +109,7 @@ btr_search_s_lock_all()
 }
 
 /** Unlock all search latches from shared mode. */
-UNIV_INLINE
-void
-btr_search_s_unlock_all()
+static inline void btr_search_s_unlock_all()
 {
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 		rw_lock_s_unlock(btr_search_latches[i]);
@@ -168,9 +121,7 @@ btr_search_s_unlock_all()
 @param[in]	mode	lock mode check
 @retval true if owns all of them
 @retval false if does not own some of them */
-UNIV_INLINE
-bool
-btr_search_own_all(ulint mode)
+static inline bool btr_search_own_all(ulint mode)
 {
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 		if (!rw_lock_own(btr_search_latches[i], mode)) {
@@ -184,9 +135,7 @@ btr_search_own_all(ulint mode)
 @param[in]	mode	lock mode check
 @retval true if owns any of them
 @retval false if owns no search latch */
-UNIV_INLINE
-bool
-btr_search_own_any(ulint mode)
+static inline bool btr_search_own_any(ulint mode)
 {
 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 		if (rw_lock_own(btr_search_latches[i], mode)) {
@@ -195,19 +144,31 @@ btr_search_own_any(ulint mode)
 	}
 	return(false);
 }
+
+/** @return whether this thread holds any of the search latches */
+static inline bool btr_search_own_any()
+{
+	for (ulint i = btr_ahi_parts; i--; ) {
+		if (rw_lock_own_flagged(btr_search_latches[i],
+					RW_LOCK_FLAG_X | RW_LOCK_FLAG_S)) {
+			return true;
+		}
+	}
+	return false;
+}
 #endif /* UNIV_DEBUG */
 
 /** Get the adaptive hash search index latch for a b-tree.
 @param[in]	index	b-tree index
 @return latch */
-UNIV_INLINE
-rw_lock_t*
-btr_get_search_latch(const dict_index_t* index)
+static inline rw_lock_t* btr_get_search_latch(const dict_index_t* index)
 {
 	ut_ad(index != NULL);
+	ut_ad(!index->table->space
+	      || index->table->space->id == index->table->space_id);
 
-	ulint	ifold = ut_fold_ulint_pair(static_cast<ulint>(index->id),
-					   static_cast<ulint>(index->space));
+	ulint	ifold = ut_fold_ulint_pair(ulint(index->id),
+					   index->table->space_id);
 
 	return(btr_search_latches[ifold % btr_ahi_parts]);
 }
@@ -216,14 +177,13 @@ btr_get_search_latch(const dict_index_t* index)
 A table is selected from an array of tables using pair of index-id, space-id.
 @param[in]	index	index handler
 @return hash table */
-UNIV_INLINE
-hash_table_t*
-btr_get_search_table(const dict_index_t* index)
+static inline hash_table_t* btr_get_search_table(const dict_index_t* index)
 {
 	ut_ad(index != NULL);
+	ut_ad(index->table->space->id == index->table->space_id);
 
-	ulint	ifold = ut_fold_ulint_pair(static_cast<ulint>(index->id),
-					   static_cast<ulint>(index->space));
+	ulint	ifold = ut_fold_ulint_pair(ulint(index->id),
+					   index->table->space_id);
 
 	return(btr_search_sys->hash_tables[ifold % btr_ahi_parts]);
 }
diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
index f56ac2e5e70..8befc038f23 100644
--- a/storage/innobase/include/buf0buddy.h
+++ b/storage/innobase/include/buf0buddy.h
@@ -48,9 +48,9 @@ buf_buddy_alloc(
 					the page resides */
 	ulint		size,		/*!< in: compressed page size
 					(between UNIV_ZIP_SIZE_MIN and
-					UNIV_PAGE_SIZE) */
-	ibool*		lru)		/*!< in: pointer to a variable
-					that will be assigned TRUE if
+					srv_page_size) */
+	bool*		lru)		/*!< in: pointer to a variable
+					that will be assigned true if
 				       	storage was allocated from the
 				       	LRU list and buf_pool->mutex was
 				       	temporarily released */
@@ -67,14 +67,14 @@ buf_buddy_free(
 	void*		buf,		/*!< in: block to be freed, must not
 					be pointed to by the buffer pool */
 	ulint		size)		/*!< in: block size,
-					up to UNIV_PAGE_SIZE */
+					up to srv_page_size */
 	MY_ATTRIBUTE((nonnull));
 
 /** Reallocate a block.
 @param[in]	buf_pool	buffer pool instance
 @param[in]	buf		block to be reallocated, must be pointed
 to by the buffer pool
-@param[in]	size		block size, up to UNIV_PAGE_SIZE
+@param[in]	size		block size, up to srv_page_size
 @retval false	if failed because of no free blocks. */
 bool
 buf_buddy_realloc(
diff --git a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
index 2b6d76df009..d166ab8441c 100644
--- a/storage/innobase/include/buf0buddy.ic
+++ b/storage/innobase/include/buf0buddy.ic
@@ -42,8 +42,8 @@ buf_buddy_alloc_low(
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
 	ulint		i,		/*!< in: index of buf_pool->zip_free[],
 					or BUF_BUDDY_SIZES */
-	ibool*		lru)		/*!< in: pointer to a variable that
-					will be assigned TRUE if storage was
+	bool*		lru)		/*!< in: pointer to a variable that
+					will be assigned true if storage was
 					allocated from the LRU list and
 					buf_pool->mutex was temporarily
 					released */
@@ -96,9 +96,9 @@ buf_buddy_alloc(
 					the page resides */
 	ulint		size,		/*!< in: compressed page size
 					(between UNIV_ZIP_SIZE_MIN and
-					UNIV_PAGE_SIZE) */
-	ibool*		lru)		/*!< in: pointer to a variable
-					that will be assigned TRUE if
+					srv_page_size) */
+	bool*		lru)		/*!< in: pointer to a variable
+					that will be assigned true if
 				       	storage was allocated from the
 				       	LRU list and buf_pool->mutex was
 				       	temporarily released */
@@ -106,7 +106,7 @@ buf_buddy_alloc(
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(ut_is_2pow(size));
 	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
-	ut_ad(size <= UNIV_PAGE_SIZE);
+	ut_ad(size <= srv_page_size);
 
 	return((byte*) buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size),
 					   lru));
@@ -123,12 +123,12 @@ buf_buddy_free(
 	void*		buf,		/*!< in: block to be freed, must not
 					be pointed to by the buffer pool */
 	ulint		size)		/*!< in: block size,
-					up to UNIV_PAGE_SIZE */
+					up to srv_page_size */
 {
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(ut_is_2pow(size));
 	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
-	ut_ad(size <= UNIV_PAGE_SIZE);
+	ut_ad(size <= srv_page_size);
 
 	buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
 }
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index a79b39235f3..33612f85ed6 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -268,18 +268,8 @@ public:
 		m_fold = src.fold();
 	}
 
-	/** Reset the values from a (space, page_no).
-	@param[in]	space	tablespace id
-	@param[in]	page_no	page number */
-	inline void reset(ulint space, ulint page_no)
-	{
-		m_space = static_cast<ib_uint32_t>(space);
-		m_page_no = static_cast<ib_uint32_t>(page_no);
-		m_fold = ULINT_UNDEFINED;
-
-		ut_ad(space <= 0xFFFFFFFFU);
-		ut_ad(page_no <= 0xFFFFFFFFU);
-	}
+	/** Reset the object. */
+	void reset() { m_space= ~0U; m_page_no= ~0U; m_fold= ULINT_UNDEFINED; }
 
 	/** Reset the page number only.
 	@param[in]	page_no	page number */
@@ -1605,7 +1595,7 @@ public:
 	bool            encrypted;	/*!< page is still encrypted */
 
 	ulint           real_size;	/*!< Real size of the page
-					Normal pages == UNIV_PAGE_SIZE
+					Normal pages == srv_page_size
 					page compressed pages, payload
 					size alligned to sector boundary.
 					*/
@@ -1740,9 +1730,9 @@ struct buf_block_t{
 					buf_pool->page_hash can point
 					to buf_page_t or buf_block_t */
 	byte*		frame;		/*!< pointer to buffer frame which
-					is of size UNIV_PAGE_SIZE, and
+					is of size srv_page_size, and
 					aligned to an address divisible by
-					UNIV_PAGE_SIZE */
+					srv_page_size */
 	BPageLock	lock;		/*!< read-write lock of the buffer
 					frame */
 	UT_LIST_NODE_T(buf_block_t) unzip_LRU;
@@ -1756,7 +1746,7 @@ struct buf_block_t{
 					used in debugging */
 	ibool		in_withdraw_list;
 #endif /* UNIV_DEBUG */
-	unsigned	lock_hash_val:32;/*!< hashed value of the page address
+	uint32_t	lock_hash_val;	/*!< hashed value of the page address
 					in the record lock hash table;
 					protected by buf_block_t::lock
 					(or buf_block_t::mutex, buf_pool->mutex
@@ -1902,7 +1892,7 @@ struct buf_block_t{
 /**********************************************************************//**
 Compute the hash fold value for blocks in buf_pool->zip_hash. */
 /* @{ */
-#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
+#define BUF_POOL_ZIP_FOLD_PTR(ptr) (ulint(ptr) >> srv_page_size_shift)
 #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
 #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
 /* @} */
@@ -2369,8 +2359,12 @@ Use these instead of accessing buf_pool->mutex directly. */
 
 
 /** Get appropriate page_hash_lock. */
-# define buf_page_hash_lock_get(buf_pool, page_id)	\
-	hash_get_lock((buf_pool)->page_hash, (page_id).fold())
+UNIV_INLINE
+rw_lock_t*
+buf_page_hash_lock_get(const buf_pool_t* buf_pool, const page_id_t& page_id)
+{
+	return hash_get_lock(buf_pool->page_hash, page_id.fold());
+}
 
 /** If not appropriate page_hash_lock, relock until appropriate. */
 # define buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id)\
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index 38c52d5e608..8314797e78d 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -2,7 +2,7 @@
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
-Copyright (c) 2014, 2017, MariaDB Corporation.
+Copyright (c) 2014, 2018, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -115,7 +115,7 @@ ulint
 buf_pool_get_n_pages(void)
 /*======================*/
 {
-	return(buf_pool_get_curr_size() / UNIV_PAGE_SIZE);
+	return buf_pool_get_curr_size() >> srv_page_size_shift;
 }
 
 /********************************************************************//**
@@ -761,7 +761,7 @@ buf_frame_align(
 
         ut_ad(ptr);
 
-        frame = (buf_frame_t*) ut_align_down(ptr, UNIV_PAGE_SIZE);
+        frame = (buf_frame_t*) ut_align_down(ptr, srv_page_size);
 
         return(frame);
 }
@@ -778,11 +778,11 @@ buf_ptr_get_fsp_addr(
 	fil_addr_t*	addr)	/*!< out: page offset and byte offset */
 {
 	const page_t*	page = (const page_t*) ut_align_down(ptr,
-							     UNIV_PAGE_SIZE);
+							     srv_page_size);
 
 	*space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 	addr->page = mach_read_from_4(page + FIL_PAGE_OFFSET);
-	addr->boffset = ut_align_offset(ptr, UNIV_PAGE_SIZE);
+	addr->boffset = ut_align_offset(ptr, srv_page_size);
 }
 
 /**********************************************************************//**
@@ -867,7 +867,7 @@ buf_frame_copy(
 {
 	ut_ad(buf && frame);
 
-	ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
+	ut_memcpy(buf, frame, srv_page_size);
 
 	return(buf);
 }
@@ -955,7 +955,7 @@ ulint
 buf_block_fix(
 	buf_page_t*	bpage)
 {
-	return(my_atomic_add32((int32*) &bpage->buf_fix_count, 1) + 1);
+	return uint32(my_atomic_add32((int32*) &bpage->buf_fix_count, 1) + 1);
 }
 
 /** Increments the bufferfix count.
@@ -1003,9 +1003,10 @@ ulint
 buf_block_unfix(
 	buf_page_t*	bpage)
 {
-	ulint	count = my_atomic_add32((int32*) &bpage->buf_fix_count, -1) - 1;
-	ut_ad(count + 1 != 0);
-	return(count);
+	uint32	count = uint32(my_atomic_add32((int32*) &bpage->buf_fix_count,
+					       -1));
+	ut_ad(count != 0);
+	return count - 1;
 }
 
 /** Decrements the bufferfix count.
@@ -1424,8 +1425,8 @@ bool
 buf_pool_is_obsolete(
 	ulint	withdraw_clock)
 {
-	return(buf_pool_withdrawing
-	       || buf_withdraw_clock != withdraw_clock);
+	return(UNIV_UNLIKELY(buf_pool_withdrawing
+			     || buf_withdraw_clock != withdraw_clock));
 }
 
 /** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
diff --git a/storage/innobase/include/buf0checksum.h b/storage/innobase/include/buf0checksum.h
index 20955a5b2e6..dc0dbafa4c4 100644
--- a/storage/innobase/include/buf0checksum.h
+++ b/storage/innobase/include/buf0checksum.h
@@ -36,7 +36,7 @@ when it is written to a file and also checked for a match when reading from
 the file. When reading we allow both normal CRC32 and CRC-legacy-big-endian
 variants. Note that we must be careful to calculate the same value on 32-bit
 and 64-bit architectures.
-@param[in]	page			buffer page (UNIV_PAGE_SIZE bytes)
+@param[in]	page			buffer page (srv_page_size bytes)
 @param[in]	use_legacy_big_endian	if true then use big endian
 byteorder when converting byte strings to integers
 @return checksum */
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index 598609e2be4..5d2e5e9fdf7 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -131,7 +131,7 @@ struct buf_dblwr_t{
 				doublewrite block (64 pages) */
 	ulint		block2;	/*!< page number of the second block */
 	ulint		first_free;/*!< first free position in write_buf
-				measured in units of UNIV_PAGE_SIZE */
+				measured in units of srv_page_size */
 	ulint		b_reserved;/*!< number of slots currently reserved
 				for batch flush. */
 	os_event_t	b_event;/*!< event where threads wait for a
@@ -150,7 +150,7 @@ struct buf_dblwr_t{
 				buffer. */
 	byte*		write_buf;/*!< write buffer used in writing to the
 				doublewrite buffer, aligned to an
-				address divisible by UNIV_PAGE_SIZE
+				address divisible by srv_page_size
 				(which is required by Windows aio) */
 	byte*		write_buf_unaligned;/*!< pointer to write_buf,
 				but unaligned */
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 5c1dddd9a3b..741cb1dbca3 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -217,16 +217,10 @@ buf_flush_ready_for_replace(
 #ifdef UNIV_DEBUG
 /** Disables page cleaner threads (coordinator and workers).
 It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0).
-@param[in]	thd		thread handle
-@param[in]	var		pointer to system variable
-@param[out]	var_ptr		where the formal string goes
 @param[in]	save		immediate result from check function */
-void
-buf_flush_page_cleaner_disabled_debug_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save);
+void buf_flush_page_cleaner_disabled_debug_update(THD*,
+						  st_mysql_sys_var*, void*,
+						  const void* save);
 #endif /* UNIV_DEBUG */
 
 /******************************************************************//**
@@ -239,6 +233,12 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(
 /*===============================================*/
 	void*	arg);		/*!< in: a dummy parameter required by
 				os_thread_create */
+
+/** Adjust thread count for page cleaner workers.
+@param[in]	new_cnt		Number of threads to be used */
+void
+buf_flush_set_page_cleaner_thread_cnt(ulong new_cnt);
+
 /******************************************************************//**
 Worker thread of page_cleaner.
 @return a dummy parameter */
@@ -339,12 +339,12 @@ flushed to disk before any redo logged operations go to the index. */
 class FlushObserver {
 public:
 	/** Constructor
-	@param[in]	space_id	table space id
+	@param[in,out]	space		tablespace
 	@param[in]	trx		trx instance
 	@param[in]	stage		performance schema accounting object,
 	used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages()
 	for accounting. */
-	FlushObserver(ulint space_id, trx_t* trx, ut_stage_alter_t* stage);
+	FlushObserver(fil_space_t* space, trx_t* trx, ut_stage_alter_t* stage);
 
 	/** Deconstructor */
 	~FlushObserver();
@@ -390,8 +390,8 @@ public:
 		buf_pool_t*	buf_pool,
 		buf_page_t*	bpage);
 private:
-	/** Table space id */
-	const ulint		m_space_id;
+	/** Tablespace */
+	fil_space_t*		m_space;
 
 	/** Trx instance */
 	const trx_t* const	m_trx;
@@ -413,57 +413,6 @@ private:
 	bool			m_interrupted;
 };
 
-/******************************************************************//**
-Start a buffer flush batch for LRU or flush list */
-ibool
-buf_flush_start(
-/*============*/
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	buf_flush_t	flush_type);	/*!< in: BUF_FLUSH_LRU
-					or BUF_FLUSH_LIST */
-/******************************************************************//**
-End a buffer flush batch for LRU or flush list */
-void
-buf_flush_end(
-/*==========*/
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	buf_flush_t	flush_type);	/*!< in: BUF_FLUSH_LRU
-					or BUF_FLUSH_LIST */
-/******************************************************************//**
-Gather the aggregated stats for both flush list and LRU list flushing */
-void
-buf_flush_common(
-/*=============*/
-	buf_flush_t	flush_type,	/*!< in: type of flush */
-	ulint		page_count);	/*!< in: number of pages flushed */
-
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages! */
-__attribute__((nonnull))
-void
-buf_flush_batch(
-/*============*/
-	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	buf_flush_t	flush_type,	/*!< in: BUF_FLUSH_LRU or
-					BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
-					then the caller must not own any
-					latches on pages */
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit,	/*!< in: in the case of BUF_FLUSH_LIST
-					all blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
-	flush_counters_t*	n);	/*!< out: flushed/evicted page
-					counts  */
-
-
 #include "buf0flu.ic"
 
 #endif
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 10dcdb27eb0..d3e953ad9c7 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -33,6 +33,7 @@ Created 11/5/1995 Heikki Tuuri
 
 // Forward declaration
 struct trx_t;
+struct fil_space_t;
 
 /******************************************************************//**
 Returns TRUE if less than 25 % of the buffer pool is available. This can be
diff --git a/storage/innobase/include/buf0mtflu.h b/storage/innobase/include/buf0mtflu.h
deleted file mode 100644
index 0475335bbf5..00000000000
--- a/storage/innobase/include/buf0mtflu.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2014 SkySQL Ab. All Rights Reserved.
-Copyright (C) 2014 Fusion-io. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/buf0mtflu.h
-Multi-threadef flush method interface function prototypes
-
-Created 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
-		   Dhananjoy Das DDas@fusionio.com
-***********************************************************************/
-
-#ifndef buf0mtflu_h
-#define buf0mtflu_h
-
-/******************************************************************//**
-Add exit work item to work queue to signal multi-threded flush
-threads that they should exit.
-*/
-void
-buf_mtflu_io_thread_exit(void);
-/*===========================*/
-
-/******************************************************************//**
-Initialize multi-threaded flush thread syncronization data.
-@return Initialized multi-threaded flush thread syncroniztion data. */
-void*
-buf_mtflu_handler_init(
-/*===================*/
-	ulint n_threads,	/*!< in: Number of threads to create */
-	ulint wrk_cnt);		/*!< in: Number of work items */
-
-/******************************************************************//**
-Return true if multi-threaded flush is initialized
-@return true if initialized, false if not */
-bool
-buf_mtflu_init_done(void);
-/*======================*/
-
-/*********************************************************************//**
-Clears up tail of the LRU lists:
-* Put replaceable pages at the tail of LRU to the free list
-* Flush dirty pages at the tail of LRU to the disk
-The depth to which we scan each buffer pool is controlled by dynamic
-config parameter innodb_LRU_scan_depth.
-@return total pages flushed */
-UNIV_INTERN
-ulint
-buf_mtflu_flush_LRU_tail(void);
-/*===========================*/
-
-/*******************************************************************//**
-Multi-threaded version of buf_flush_list
-*/
-bool
-buf_mtflu_flush_list(
-/*=================*/
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
-					blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
-	ulint*		n_processed);	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
-
-/*********************************************************************//**
-Set correct thread identifiers to io thread array based on
-information we have. */
-void
-buf_mtflu_set_thread_ids(
-/*=====================*/
-	ulint n_threads,		/*!<in: Number of threads to fill */
-	void* ctx,		        /*!<in: thread context */
-	os_thread_id_t* thread_ids);	/*!<in: thread id array */
-
-#endif
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index 719699f5ee2..2847e328515 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -113,7 +113,7 @@ is_checksum_strict(ulint algo)
 #define BUF_BUDDY_LOW		(1U << BUF_BUDDY_LOW_SHIFT)
 
 /** Actual number of buddy sizes based on current page size */
-#define BUF_BUDDY_SIZES		(UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
+#define BUF_BUDDY_SIZES		(srv_page_size_shift - BUF_BUDDY_LOW_SHIFT)
 
 /** Maximum number of buddy sizes based on the max page size */
 #define BUF_BUDDY_SIZES_MAX	(UNIV_PAGE_SIZE_SHIFT_MAX	\
@@ -121,7 +121,7 @@ is_checksum_strict(ulint algo)
 
 /** twice the maximum block size of the buddy system;
 the underlying memory is aligned by this amount:
-this must be equal to UNIV_PAGE_SIZE */
+this must be equal to srv_page_size */
 #define BUF_BUDDY_HIGH	(BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
 /* @} */
 
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
index b6187d46025..d3361ad8b3b 100644
--- a/storage/innobase/include/data0data.h
+++ b/storage/innobase/include/data0data.h
@@ -591,6 +591,22 @@ struct dfield_t{
 	@param[in,out]	heap	memory heap in which the clone will be created
 	@return	the cloned object */
 	dfield_t* clone(mem_heap_t* heap) const;
+
+	/** @return system field indicates history row */
+	bool vers_history_row() const
+	{
+		ut_ad(type.vers_sys_end());
+		if (type.mtype == DATA_FIXBINARY) {
+			ut_ad(len == sizeof timestamp_max_bytes);
+			return 0 != memcmp(data, timestamp_max_bytes, len);
+		} else {
+			ut_ad(type.mtype == DATA_INT);
+			ut_ad(len == sizeof trx_id_max_bytes);
+			return 0 != memcmp(data, trx_id_max_bytes, len);
+		}
+		ut_ad(0);
+		return false;
+	}
 };
 
 /** Structure for an SQL data tuple of fields (logical record) */
@@ -619,6 +635,15 @@ struct dtuple_t {
 /** Value of dtuple_t::magic_n */
 # define		DATA_TUPLE_MAGIC_N	65478679
 #endif /* UNIV_DEBUG */
+
+	/** Trim the tail of an index tuple before insert or update.
+	After instant ADD COLUMN, if the last fields of a clustered index tuple
+	match the 'default row', there will be no need to store them.
+	NOTE: A page latch in the index must be held, so that the index
+	may not lose 'instantness' before the trimmed tuple has been
+	inserted or updated.
+	@param[in]	index	index possibly with instantly added columns */
+	void trim(const dict_index_t& index);
 };
 
 /** A slot for a field in a big rec vector */
diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic
index 81788885aa5..310902f5166 100644
--- a/storage/innobase/include/data0data.ic
+++ b/storage/innobase/include/data0data.ic
@@ -94,6 +94,7 @@ dfield_get_len(
 	ut_ad(field);
 	ut_ad((field->len == UNIV_SQL_NULL)
 	      || (field->data != &data_error));
+	ut_ad(field->len != UNIV_SQL_DEFAULT);
 
 	return(field->len);
 }
@@ -108,6 +109,7 @@ dfield_set_len(
 	ulint		len)	/*!< in: length or UNIV_SQL_NULL */
 {
 	ut_ad(field);
+	ut_ad(len != UNIV_SQL_DEFAULT);
 #ifdef UNIV_VALGRIND_DEBUG
 	if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len);
 #endif /* UNIV_VALGRIND_DEBUG */
@@ -326,6 +328,7 @@ dfield_data_is_binary_equal(
 	ulint		len,	/*!< in: data length or UNIV_SQL_NULL */
 	const byte*	data)	/*!< in: data */
 {
+	ut_ad(len != UNIV_SQL_DEFAULT);
 	return(len == dfield_get_len(field)
 	       && (len == UNIV_SQL_NULL
 		   || !memcmp(dfield_get_data(field), data, len)));
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
index c4521d0723b..b999106fee0 100644
--- a/storage/innobase/include/data0type.h
+++ b/storage/innobase/include/data0type.h
@@ -29,6 +29,12 @@ Created 1/16/1996 Heikki Tuuri
 
 #include "univ.i"
 
+/** Special length indicating a missing instantly added column */
+#define UNIV_SQL_DEFAULT (UNIV_SQL_NULL - 1)
+
+/** @return whether a length is actually stored in a field */
+#define len_is_stored(len) (len != UNIV_SQL_NULL && len != UNIV_SQL_DEFAULT)
+
 extern ulint	data_mysql_default_charset_coll;
 #define DATA_MYSQL_BINARY_CHARSET_COLL 63
 
@@ -183,8 +189,12 @@ be less than 256 */
 				for shorter VARCHARs MySQL uses only 1 byte */
 #define	DATA_VIRTUAL	8192U	/* Virtual column */
 
-/** Get the number of system columns in a table. */
-#define dict_table_get_n_sys_cols(table) DATA_N_SYS_COLS
+/** System Versioning */
+#define DATA_VERS_START	16384U	/* start system field */
+#define DATA_VERS_END	32768U	/* end system field */
+/** system-versioned user data column */
+#define DATA_VERSIONED (DATA_VERS_START|DATA_VERS_END)
+
 /** Check whether locking is disabled (never). */
 #define dict_table_is_locking_disabled(table) false
 
@@ -355,9 +365,9 @@ dtype_form_prtype(ulint old_prtype, ulint charset_coll)
 Determines if a MySQL string type is a subset of UTF-8.  This function
 may return false negatives, in case further character-set collation
 codes are introduced in MySQL later.
-@return TRUE if a subset of UTF-8 */
+@return whether a subset of UTF-8 */
 UNIV_INLINE
-ibool
+bool
 dtype_is_utf8(
 /*==========*/
 	ulint	prtype);/*!< in: precise data type */
@@ -531,8 +541,24 @@ struct dtype_t{
 					in bytes */
 	unsigned	mbmaxlen:3;	/*!< maximum length of a character,
 					in bytes */
+
+	/** @return whether this is system versioned user field */
+	bool is_versioned() const { return !(~prtype & DATA_VERSIONED); }
+	/** @return whether this is the system field start */
+	bool vers_sys_start() const
+	{
+		return (prtype & DATA_VERSIONED) == DATA_VERS_START;
+	}
+	/** @return whether this is the system field end */
+	bool vers_sys_end() const
+	{
+		return (prtype & DATA_VERSIONED) == DATA_VERS_END;
+	}
 };
 
+/** The DB_TRX_ID,DB_ROLL_PTR values for "no history is available" */
+extern const byte reset_trx_id[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
+
 #include "data0type.ic"
 
 #endif
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
index 59f8c75fd65..56a588562ee 100644
--- a/storage/innobase/include/data0type.ic
+++ b/storage/innobase/include/data0type.ic
@@ -43,9 +43,9 @@ dtype_get_charset_coll(
 Determines if a MySQL string type is a subset of UTF-8.  This function
 may return false negatives, in case further character-set collation
 codes are introduced in MySQL later.
-@return TRUE if a subset of UTF-8 */
+@return whether a subset of UTF-8 */
 UNIV_INLINE
-ibool
+bool
 dtype_is_utf8(
 /*==========*/
 	ulint	prtype)	/*!< in: precise data type */
@@ -58,10 +58,10 @@ dtype_is_utf8(
 	case 33: /* utf8_general_ci */
 	case 83: /* utf8_bin */
 	case 254: /* utf8_general_cs */
-			return(TRUE);
+		return true;
 	}
 
-	return(FALSE);
+	return false;
 }
 
 /*********************************************************************//**
@@ -235,9 +235,8 @@ dtype_new_store_for_order_and_null_size(
 	ulint		prefix_len)/*!< in: prefix length to
 				replace type->len, or 0 */
 {
-#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
+	compile_time_assert(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+
 	ulint	len;
 
 	ut_ad(type);
@@ -280,10 +279,7 @@ dtype_read_for_order_and_null_size(
 	dtype_t*	type,	/*!< in: type struct */
 	const byte*	buf)	/*!< in: buffer for stored type order info */
 {
-#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE
-# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
-
+	compile_time_assert(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
 	type->mtype = buf[0] & 63;
 	type->prtype = buf[1];
 
@@ -309,11 +305,7 @@ dtype_new_read_for_order_and_null_size(
 	dtype_t*	type,	/*!< in: type struct */
 	const byte*	buf)	/*!< in: buffer for stored type order info */
 {
-	ulint	charset_coll;
-
-#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
+	compile_time_assert(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
 
 	type->mtype = buf[0] & 63;
 	type->prtype = buf[1];
@@ -328,7 +320,7 @@ dtype_new_read_for_order_and_null_size(
 
 	type->len = mach_read_from_2(buf + 2);
 
-	charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK;
+	ulint charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK;
 
 	if (dtype_is_string_type(type->mtype)) {
 		ut_a(charset_coll <= MAX_CHAR_COLL_NUM);
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
index d6de7dcf71b..25aced44b2e 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innobase/include/dict0boot.h
@@ -122,7 +122,7 @@ dict_is_sys_table(
 /* The ids for the basic system tables and their indexes */
 #define DICT_TABLES_ID		1
 #define DICT_COLUMNS_ID		2
-#define DICT_INDEXES_ID		3
+#define DICT_INDEXES_ID		dict_index_t::DICT_INDEXES_ID /* 3 */
 #define DICT_FIELDS_ID		4
 /* The following is a secondary index on SYS_TABLES */
 #define DICT_TABLE_IDS_ID	5
diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic
index e40c3f844e3..845a0a3888d 100644
--- a/storage/innobase/include/dict0boot.ic
+++ b/storage/innobase/include/dict0boot.ic
@@ -58,10 +58,7 @@ dict_sys_read_row_id(
 /*=================*/
 	const byte*	field)	/*!< in: record field */
 {
-#if DATA_ROW_ID_LEN != 6
-# error "DATA_ROW_ID_LEN != 6"
-#endif
-
+	compile_time_assert(DATA_ROW_ID_LEN == 6);
 	return(mach_read_from_6(field));
 }
 
@@ -74,10 +71,7 @@ dict_sys_write_row_id(
 	byte*		field,	/*!< in: record field */
 	row_id_t	row_id)	/*!< in: row id */
 {
-#if DATA_ROW_ID_LEN != 6
-# error "DATA_ROW_ID_LEN != 6"
-#endif
-
+	compile_time_assert(DATA_ROW_ID_LEN == 6);
 	mach_write_to_6(field, row_id);
 }
 
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
index 12c78862261..dc48aa59809 100644
--- a/storage/innobase/include/dict0crea.h
+++ b/storage/innobase/include/dict0crea.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -50,6 +50,7 @@ tab_create_graph_create(
 
 /** Creates an index create graph.
 @param[in]	index	index to create, built as a memory data structure
+@param[in]	table	table name
 @param[in,out]	heap	heap where created
 @param[in]	add_v	new virtual columns added in the same clause with
 			add index
@@ -57,8 +58,9 @@ tab_create_graph_create(
 ind_node_t*
 ind_create_graph_create(
 	dict_index_t*		index,
+	const char*		table,
 	mem_heap_t*		heap,
-	const dict_add_v_col_t*	add_v);
+	const dict_add_v_col_t*	add_v = NULL);
 
 /***********************************************************//**
 Creates a table. This is a high-level function used in SQL execution graphs.
@@ -68,15 +70,6 @@ dict_create_table_step(
 /*===================*/
 	que_thr_t*	thr);		/*!< in: query thread */
 
-/** Builds a tablespace to contain a table, using file-per-table=1.
-@param[in,out]	table	Table to build in its own tablespace.
-@param[in]	node	Table create node
-@return DB_SUCCESS or error code */
-dberr_t
-dict_build_tablespace_for_table(
-	dict_table_t*	table,
-	tab_node_t*	node);
-
 /** Assign a new table ID and put it into the table cache and the transaction.
 @param[in,out]	table	Table that needs an ID
 @param[in,out]	trx	Transaction */
@@ -151,14 +144,6 @@ dict_create_index_tree_in_mem(
 	dict_index_t*	index,		/*!< in/out: index */
 	const trx_t*	trx);		/*!< in: InnoDB transaction handle */
 
-/*******************************************************************//**
-Drops the index tree but don't update SYS_INDEXES table. */
-void
-dict_drop_index_tree_in_mem(
-/*========================*/
-	const dict_index_t*	index,	/*!< in: index */
-	ulint			page_no);/*!< in: index page-no */
-
 /****************************************************************//**
 Creates the foreign key constraints system tables inside InnoDB
 at server bootstrap or server start if they are not found or are
@@ -317,6 +302,7 @@ struct ind_node_t{
 	dict_index_t*	index;		/*!< index to create, built as a
 					memory data structure with
 					dict_mem_... functions */
+	const char*	table_name;	/*!< table name */
 	ins_node_t*	ind_def;	/*!< child node which does the insert of
 					the index definition; the row to be
 					inserted is built by the parent node  */
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index b5f2b108959..3dcf290a276 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -192,7 +192,7 @@ dict_col_copy_type(
 
 /**********************************************************************//**
 Determine bytes of column prefix to be stored in the undo log. Please
-note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
+note that if !dict_table_has_atomic_blobs(table), no prefix
 needs to be stored in the undo log.
 @return bytes of column prefix to be stored in the undo log */
 UNIV_INLINE
@@ -375,15 +375,6 @@ dict_table_add_system_columns(
 	mem_heap_t*	heap)	/*!< in: temporary heap */
 	MY_ATTRIBUTE((nonnull));
 /**********************************************************************//**
-Adds a table object to the dictionary cache. */
-void
-dict_table_add_to_cache(
-/*====================*/
-	dict_table_t*	table,		/*!< in: table */
-	bool		can_be_evicted,	/*!< in: whether can be evicted*/
-	mem_heap_t*	heap)		/*!< in: temporary heap */
-	MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
 Removes a table object from the dictionary cache. */
 void
 dict_table_remove_from_cache(
@@ -577,16 +568,6 @@ dict_foreign_find_index(
 					happened */
 
 	MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
-/**********************************************************************//**
-Returns a column's name.
-@return column name. NOTE: not guaranteed to stay valid if table is
-modified in any way (columns added, etc.). */
-const char*
-dict_table_get_col_name(
-/*====================*/
-	const dict_table_t*	table,	/*!< in: table */
-	ulint			col_nr)	/*!< in: column number */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /** Returns a virtual column's name.
 @param[in]	table		table object
@@ -889,14 +870,25 @@ dict_table_get_sys_col(
 	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 #else /* UNIV_DEBUG */
-#define dict_table_get_nth_col(table, pos)	\
-((table)->cols + (pos))
-#define dict_table_get_sys_col(table, sys)	\
-((table)->cols + (table)->n_cols + (sys)	\
- - (dict_table_get_n_sys_cols(table)))
+#define dict_table_get_nth_col(table, pos)				\
+	(&(table)->cols[pos])
+#define dict_table_get_sys_col(table, sys)				\
+	(&(table)->cols[(table)->n_cols + (sys) - DATA_N_SYS_COLS])
 /* Get nth virtual columns */
-#define dict_table_get_nth_v_col(table, pos)	((table)->v_cols + (pos))
+#define dict_table_get_nth_v_col(table, pos)	(&(table)->v_cols[pos])
 #endif /* UNIV_DEBUG */
+/** Wrapper function.
+@see dict_col_t::name()
+@param[in]	table	table
+@param[in]	col_nr	column number in table
+@return	column name */
+inline
+const char*
+dict_table_get_col_name(const dict_table_t* table, ulint col_nr)
+{
+	return(dict_table_get_nth_col(table, col_nr)->name(*table));
+}
+
 /********************************************************************//**
 Gets the given system column number of a table.
 @return column number */
@@ -921,30 +913,21 @@ dict_index_get_min_size(
 Check whether the table uses the compact page format.
 @return TRUE if table uses the compact page format */
 UNIV_INLINE
-ibool
+bool
 dict_table_is_comp(
 /*===============*/
 	const dict_table_t*	table)	/*!< in: table */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/********************************************************************//**
-Determine the file format of a table.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_table_get_format(
-/*==================*/
-	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Determine the file format from a dict_table_t::flags.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_tf_get_format(
-/*===============*/
-	ulint		flags)		/*!< in: dict_table_t::flags */
-	MY_ATTRIBUTE((warn_unused_result));
+/** Determine if a table uses atomic BLOBs (no locally stored prefix).
+@param[in]	table	InnoDB table
+@return whether BLOBs are atomic */
+inline
+bool
+dict_table_has_atomic_blobs(const dict_table_t* table)
+{
+	return(DICT_TF_HAS_ATOMIC_BLOBS(table->flags));
+}
 
 /** Set the various values in a dict_table_t::flags pointer.
 @param[in,out]	flags,		Pointer to a 4 byte Table Flags
@@ -952,8 +935,7 @@ dict_tf_get_format(
 @param[in]	zip_ssize	Zip Shift Size
 @param[in]	use_data_dir	Table uses DATA DIRECTORY
 @param[in]	page_compressed Table uses page compression
-@param[in]	page_compression_level Page compression level
-@param[in]	not_used        For future */
+@param[in]	page_compression_level Page compression level */
 UNIV_INLINE
 void
 dict_tf_set(
@@ -962,8 +944,7 @@ dict_tf_set(
 	ulint		zip_ssize,
 	bool		use_data_dir,
 	bool		page_compressed,
-	ulint		page_compression_level,
-	ulint		not_used);
+	ulint		page_compression_level);
 
 /** Convert a 32 bit integer table flags to the 32 bit FSP Flags.
 Fsp Flags are written into the tablespace header at the offset
@@ -999,14 +980,8 @@ ulint
 dict_table_extent_size(
 	const dict_table_t*	table);
 
-/** Get the table page size.
-@param[in]	table	table
-@return compressed page size, or 0 if not compressed */
-UNIV_INLINE
-const page_size_t
-dict_table_page_size(
-	const dict_table_t*	table)
-	MY_ATTRIBUTE((warn_unused_result));
+/** Get the table page size. */
+#define dict_table_page_size(table) page_size_t(table->space->flags)
 
 /*********************************************************************//**
 Obtain exclusive locks on all index trees of the table. This is to prevent
@@ -1098,51 +1073,32 @@ dict_make_room_in_cache(
 	ulint		max_tables,	/*!< in: max tables allowed in cache */
 	ulint		pct_check);	/*!< in: max percent to check */
 
-#define BIG_ROW_SIZE	1024
-
-/** Adds an index to the dictionary cache.
-@param[in]	table	table on which the index is
-@param[in]	index	index; NOTE! The index memory
-			object is freed in this function!
-@param[in]	page_no	root page number of the index
-@param[in]	strict	TRUE=refuse to create the index
-			if records could be too big to fit in
-			an B-tree page
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-dberr_t
-dict_index_add_to_cache(
-	dict_table_t*	table,
-	dict_index_t*	index,
-	ulint		page_no,
-	ibool		strict)
-	MY_ATTRIBUTE((warn_unused_result));
-
 /** Clears the virtual column's index list before index is being freed.
 @param[in]  index   Index being freed */
-void
-dict_index_remove_from_v_col_list(
-	dict_index_t* index);
+void dict_index_remove_from_v_col_list(dict_index_t* index);
 
 /** Adds an index to the dictionary cache, with possible indexing newly
 added column.
-@param[in]	table	table on which the index is
 @param[in]	index	index; NOTE! The index memory
 			object is freed in this function!
-@param[in]	add_v	new virtual column that being added along with
-			an add index call
 @param[in]	page_no	root page number of the index
-@param[in]	strict	TRUE=refuse to create the index
+@param[in]	strict	true=refuse to create the index
 			if records could be too big to fit in
 			an B-tree page
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-dberr_t
-dict_index_add_to_cache_w_vcol(
-	dict_table_t*		table,
+@param[out]	err	DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION
+@param[in]	add_v	new virtual column that being added along with
+			an add index call
+@return	the added index
+@retval	NULL	on error */
+dict_index_t*
+dict_index_add_to_cache(
 	dict_index_t*		index,
-	const dict_add_v_col_t* add_v,
 	ulint			page_no,
-	ibool			strict)
-	MY_ATTRIBUTE((warn_unused_result));
+	bool			strict = false,
+	dberr_t*		err = NULL,
+	const dict_add_v_col_t* add_v = NULL)
+	MY_ATTRIBUTE((nonnull(1)));
+
 /********************************************************************//**
 Gets the number of fields in the internal representation of an index,
 including fields added by the dictionary system.
@@ -1155,6 +1111,7 @@ dict_index_get_n_fields(
 					representation of index (in
 					the dictionary cache) */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /********************************************************************//**
 Gets the number of fields in the internal representation of an index
 that uniquely determine the position of an index entry in the index, if
@@ -1281,7 +1238,7 @@ Returns TRUE if the index contains a column or a prefix of that column.
 @param[in]	n		column number
 @param[in]	is_virtual	whether it is a virtual col
 @return TRUE if contains the column or its prefix */
-ibool
+bool
 dict_index_contains_col_or_prefix(
 /*==============================*/
 	const dict_index_t*	index,	/*!< in: index */
@@ -1443,42 +1400,15 @@ dict_index_copy_rec_order_prefix(
 @param[in,out]	heap		memory heap for allocation
 @return own: data tuple */
 dtuple_t*
-dict_index_build_data_tuple_func(
+dict_index_build_data_tuple(
 	const rec_t*		rec,
 	const dict_index_t*	index,
-#ifdef UNIV_DEBUG
 	bool			leaf,
-#endif /* UNIV_DEBUG */
 	ulint			n_fields,
 	mem_heap_t*		heap)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifdef UNIV_DEBUG
-# define dict_index_build_data_tuple(rec, index, leaf, n_fields, heap)	\
-	dict_index_build_data_tuple_func(rec, index, leaf, n_fields, heap)
-#else /* UNIV_DEBUG */
-# define dict_index_build_data_tuple(rec, index, leaf, n_fields, heap)	\
-	dict_index_build_data_tuple_func(rec, index, n_fields, heap)
-#endif /* UNIV_DEBUG */
 
 /*********************************************************************//**
-Gets the space id of the root of the index tree.
-@return space id */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
-	const dict_index_t*	index)	/*!< in: index */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
-	dict_index_t*	index,	/*!< in/out: index */
-	ulint		space)	/*!< in: space id */
-	MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
 Gets the page number of the root of the index tree.
 @return page number */
 UNIV_INLINE
@@ -1860,18 +1790,10 @@ dict_set_corrupted_index_cache_only(
 Flags a table with specified space_id corrupted in the table dictionary
 cache.
 @return TRUE if successful */
-ibool
-dict_set_corrupted_by_space(
-/*========================*/
-	ulint		space_id);	/*!< in: space ID */
+bool dict_set_corrupted_by_space(const fil_space_t* space);
 
-/** Flag a table with specified space_id encrypted in the data dictionary
-cache
-@param[in]	space_id	Tablespace id */
-UNIV_INTERN
-void
-dict_set_encrypted_by_space(
-	ulint	space_id);
+/** Flag a table encrypted in the data dictionary cache. */
+void dict_set_encrypted_by_space(const fil_space_t* space);
 
 /** Sets merge_threshold in the SYS_INDEXES
 @param[in,out]	index		index
@@ -1908,18 +1830,6 @@ dict_tf2_is_valid(
 	ulint	flags,
 	ulint	flags2);
 
-/********************************************************************//**
-Check if the tablespace for the table has been discarded.
-@return true if the tablespace has been discarded. */
-UNIV_INLINE
-bool
-dict_table_is_discarded(
-/*====================*/
-	const dict_table_t*	table)	/*!< in: table to check */
-	MY_ATTRIBUTE((warn_unused_result));
-
-#define dict_table_is_temporary(table) (table)->is_temporary()
-
 /*********************************************************************//**
 This function should be called whenever a page is successfully
 compressed. Updates the compression padding information. */
@@ -1953,8 +1863,6 @@ dict_tf_to_row_format_string(
 /*=========================*/
 	ulint	table_flag);		/*!< in: row format setting */
 
-#define dict_col_is_virtual(col) (col)->is_virtual()
-
 /** encode number of columns and number of virtual columns in one
 4 bytes value. We could do this because the number of columns in
 InnoDB is limited to 1017
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index fe2f8e32b1a..3bcd1abfbbf 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -270,7 +270,6 @@ dict_index_is_clust(
 	const dict_index_t*	index)	/*!< in: index */
 {
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
 	return(index->type & DICT_CLUSTERED);
 }
 
@@ -312,7 +311,7 @@ dict_index_is_spatial(
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 
-	return(index->type & DICT_SPATIAL);
+	return ulint(UNIV_EXPECT(index->type & DICT_SPATIAL, 0));
 }
 
 /********************************************************************//**
@@ -356,8 +355,10 @@ dict_table_get_n_user_cols(
 	const dict_table_t*	table)	/*!< in: table */
 {
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
-	return(table->n_cols - dict_table_get_n_sys_cols(table));
+	/* n_cols counts stored columns only. A table may contain
+	virtual columns and no user-specified stored columns at all. */
+	ut_ad(table->n_cols >= DATA_N_SYS_COLS);
+	return unsigned(table->n_cols) - DATA_N_SYS_COLS;
 }
 
 /********************************************************************//**
@@ -489,8 +490,8 @@ dict_table_get_nth_v_col(
 	ut_ad(table);
 	ut_ad(pos < table->n_v_def);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
-	return(static_cast<dict_v_col_t*>(table->v_cols) + pos);
+	ut_ad(!table->v_cols[pos].m_col.is_instant());
+	return &table->v_cols[pos];
 }
 
 /********************************************************************//**
@@ -504,14 +505,8 @@ dict_table_get_sys_col(
 	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
 {
 	dict_col_t*	col;
-
-	ut_ad(table);
-	ut_ad(sys < dict_table_get_n_sys_cols(table));
-	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
-	col = dict_table_get_nth_col(table, table->n_cols
-				     - dict_table_get_n_sys_cols(table)
-				     + sys);
+	col = dict_table_get_nth_col(table,
+				     dict_table_get_sys_col_no(table, sys));
 	ut_ad(col->mtype == DATA_SYS);
 	ut_ad(col->prtype == (sys | DATA_NOT_NULL));
 
@@ -530,28 +525,23 @@ dict_table_get_sys_col_no(
 	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
 {
 	ut_ad(table);
-	ut_ad(sys < dict_table_get_n_sys_cols(table));
+	ut_ad(sys < DATA_N_SYS_COLS);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
-	return(table->n_cols - dict_table_get_n_sys_cols(table) + sys);
+	return unsigned(table->n_cols) + (sys - DATA_N_SYS_COLS);
 }
 
 /********************************************************************//**
 Check whether the table uses the compact page format.
 @return TRUE if table uses the compact page format */
 UNIV_INLINE
-ibool
+bool
 dict_table_is_comp(
 /*===============*/
 	const dict_table_t*	table)	/*!< in: table */
 {
 	ut_ad(table);
-
-#if DICT_TF_COMPACT != 1
-#error "DICT_TF_COMPACT must be 1"
-#endif
-
-	return(table->flags & DICT_TF_COMPACT);
+	return (table->flags & DICT_TF_COMPACT) != 0;
 }
 
 /************************************************************************
@@ -586,8 +576,8 @@ dict_tf_is_valid_not_redundant(ulint flags)
 		for the uncompressed page format */
 		return(false);
 	} else if (zip_ssize > PAGE_ZIP_SSIZE_MAX
-		   || zip_ssize > UNIV_PAGE_SIZE_SHIFT
-		   || UNIV_PAGE_SIZE_SHIFT > UNIV_ZIP_SIZE_SHIFT_MAX) {
+		   || zip_ssize > srv_page_size_shift
+		   || srv_page_size_shift > UNIV_ZIP_SIZE_SHIFT_MAX) {
 		/* KEY_BLOCK_SIZE is out of bounds, or
 		ROW_FORMAT=COMPRESSED is not supported with this
 		innodb_page_size (only up to 16KiB) */
@@ -627,7 +617,7 @@ dict_tf_is_valid(
 		bit. For ROW_FORMAT=REDUNDANT, only the DATA_DIR flag
 		(which we cleared above) can be set. If any other flags
 		are set, the flags are invalid. */
-		return(flags == 0);
+		return(flags == 0 || flags == DICT_TF_MASK_NO_ROLLBACK);
 	}
 
 	return(dict_tf_is_valid_not_redundant(flags));
@@ -683,44 +673,13 @@ dict_tf_get_rec_format(
 	return(REC_FORMAT_DYNAMIC);
 }
 
-/********************************************************************//**
-Determine the file format from a dict_table_t::flags.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_tf_get_format(
-/*===============*/
-	ulint		flags)	/*!< in: dict_table_t::flags */
-{
-	if (DICT_TF_HAS_ATOMIC_BLOBS(flags)) {
-		return(UNIV_FORMAT_B);
-	}
-
-	return(UNIV_FORMAT_A);
-}
-
-/********************************************************************//**
-Determine the file format of a table.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_table_get_format(
-/*==================*/
-	const dict_table_t*	table)	/*!< in: table */
-{
-	ut_ad(table);
-
-	return(dict_tf_get_format(table->flags));
-}
-
 /** Set the various values in a dict_table_t::flags pointer.
 @param[in,out]	flags,		Pointer to a 4 byte Table Flags
 @param[in]	format		File Format
 @param[in]	zip_ssize	Zip Shift Size
 @param[in]	use_data_dir	Table uses DATA DIRECTORY
 @param[in]	page_compressed Table uses page compression
-@param[in]	page_compression_level Page compression level
-@param[in]	not_used        For future */
+@param[in]	page_compression_level Page compression level */
 UNIV_INLINE
 void
 dict_tf_set(
@@ -730,8 +689,7 @@ dict_tf_set(
 	ulint		zip_ssize,
 	bool		use_data_dir,
 	bool		page_compressed,
-	ulint		page_compression_level,
-	ulint		not_used)
+	ulint		page_compression_level)
 {
 	switch (format) {
 	case REC_FORMAT_REDUNDANT:
@@ -848,7 +806,8 @@ dict_tf_to_sys_tables_type(
 			 | DICT_TF_MASK_ATOMIC_BLOBS
 			 | DICT_TF_MASK_DATA_DIR
 			 | DICT_TF_MASK_PAGE_COMPRESSION
-			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL);
+			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+			 | DICT_TF_MASK_NO_ROLLBACK);
 
 	return(type);
 }
@@ -872,21 +831,7 @@ dict_tf_get_page_size(
 
 	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
 
-	return(page_size_t(zip_size, univ_page_size.logical(), true));
-}
-
-/** Get the table page size.
-@param[in]	table	table
-@return a structure containing the compressed and uncompressed
-page sizes and a boolean indicating if the page is compressed */
-UNIV_INLINE
-const page_size_t
-dict_table_page_size(
-	const dict_table_t*	table)
-{
-	ut_ad(table != NULL);
-
-	return(dict_tf_get_page_size(table->flags));
+	return(page_size_t(zip_size, srv_page_size, true));
 }
 
 /*********************************************************************//**
@@ -1177,36 +1122,6 @@ dict_index_get_min_size(
 }
 
 /*********************************************************************//**
-Gets the space id of the root of the index tree.
-@return space id */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
-	const dict_index_t*	index)	/*!< in: index */
-{
-	ut_ad(index);
-	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
-	return(index->space);
-}
-
-/*********************************************************************//**
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
-	dict_index_t*	index,	/*!< in/out: index */
-	ulint		space)	/*!< in: space id */
-{
-	ut_ad(index);
-	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
-	index->space = unsigned(space);
-}
-
-/*********************************************************************//**
 Gets the page number of the root of the index tree.
 @return page number */
 UNIV_INLINE
@@ -1246,7 +1161,7 @@ ulint
 dict_index_get_space_reserve(void)
 /*==============================*/
 {
-	return(UNIV_PAGE_SIZE / 16);
+	return(srv_page_size / 16);
 }
 
 /********************************************************************//**
@@ -1376,7 +1291,7 @@ dict_table_is_fts_column(
 
 /**********************************************************************//**
 Determine bytes of column prefix to be stored in the undo log. Please
-note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
+note that if !dict_table_has_atomic_blobs(table), no prefix
 needs to be stored in the undo log.
 @return bytes of column prefix to be stored in the undo log */
 UNIV_INLINE
@@ -1387,16 +1302,15 @@ dict_max_field_len_store_undo(
 	const dict_col_t*	col)	/*!< in: column which index prefix
 					is based on */
 {
-	ulint	prefix_len = 0;
+	if (!dict_table_has_atomic_blobs(table)) {
+		return(0);
+	}
 
-	if (dict_table_get_format(table) >= UNIV_FORMAT_B)
-	{
-		prefix_len = col->max_prefix
-			? col->max_prefix
-			: DICT_MAX_FIELD_LEN_BY_FORMAT(table);
+	if (col->max_prefix != 0) {
+		return(col->max_prefix);
 	}
 
-	return(prefix_len);
+	return(REC_VERSION_56_MAX_INDEX_COL_LEN);
 }
 
 /** Determine maximum bytes of a virtual column need to be stored
@@ -1416,10 +1330,10 @@ dict_max_v_field_len_store_undo(
 
 	/* This calculation conforms to the non-virtual column
 	maximum log length calculation:
-	1) for UNIV_FORMAT_A, upto REC_ANTELOPE_MAX_INDEX_COL_LEN
-	for UNIV_FORMAT_B, upto col->max_prefix or
-	2) REC_VERSION_56_MAX_INDEX_COL_LEN, whichever is less */
-	if (dict_table_get_format(table) >= UNIV_FORMAT_B) {
+	1) if No atomic BLOB, upto REC_ANTELOPE_MAX_INDEX_COL_LEN
+	2) if atomic BLOB, upto col->max_prefix or
+	REC_VERSION_56_MAX_INDEX_COL_LEN, whichever is less */
+	if (dict_table_has_atomic_blobs(table)) {
 		if (DATA_BIG_COL(col) && col->max_prefix > 0) {
 			max_log_len = col->max_prefix;
 		} else {
@@ -1462,18 +1376,6 @@ dict_table_is_corrupted(
 	return(table->corrupted);
 }
 
-/********************************************************************//**
-Check if the tablespace for the table has been discarded.
-@return true if the tablespace has been discarded. */
-UNIV_INLINE
-bool
-dict_table_is_discarded(
-/*====================*/
-	const dict_table_t*	table)	/*!< in: table to check */
-{
-	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_DISCARDED));
-}
-
 /** Check if the table is found is a file_per_table tablespace.
 This test does not use table flags2 since some REDUNDANT tables in the
 system tablespace may have garbage in the MIX_LEN field where flags2 is
@@ -1495,7 +1397,8 @@ bool
 dict_table_is_file_per_table(
 	const dict_table_t*	table)	/*!< in: table to check */
 {
-	return !is_system_tablespace(table->space);
+	return table->space != fil_system.sys_space
+		&& table->space != fil_system.temp_space;
 }
 
 /** Acquire the table handle. */
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
index 9ba42007568..9b798353afd 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innobase/include/dict0load.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -57,15 +57,6 @@ enum dict_system_id_t {
 	SYS_NUM_SYSTEM_TABLES
 };
 
-/** Status bit for dict_process_sys_tables_rec_and_mtr_commit() */
-enum dict_table_info_t {
-	DICT_TABLE_LOAD_FROM_RECORD = 0,/*!< Directly populate a dict_table_t
-					structure with information from
-					a SYS_TABLES record */
-	DICT_TABLE_LOAD_FROM_CACHE = 1	/*!< Check first whether dict_table_t
-					is in the cache, if so, return it */
-};
-
 /** Check each tablespace found in the data dictionary.
 Look at each table defined in SYS_TABLES that has a space_id > 0.
 If the tablespace is not yet in the fil_system cache, look up the
@@ -201,10 +192,7 @@ dict_process_sys_tables_rec_and_mtr_commit(
 	mem_heap_t*	heap,		/*!< in: temporary memory heap */
 	const rec_t*	rec,		/*!< in: SYS_TABLES record */
 	dict_table_t**	table,		/*!< out: dict_table_t to fill */
-	dict_table_info_t status,	/*!< in: status bit controls
-					options such as whether we shall
-					look for dict_table_t from cache
-					first */
+	bool		cached,		/*!< in: whether to load from cache */
 	mtr_t*		mtr);		/*!< in/out: mini-transaction,
 					will be committed */
 /********************************************************************//**
@@ -245,7 +233,6 @@ information
 @return error message, or NULL on success */
 const char*
 dict_process_sys_virtual_rec(
-	mem_heap_t*	heap,
 	const rec_t*	rec,
 	table_id_t*	table_id,
 	ulint*		pos,
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index fc120149c5f..3e06def55b0 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -49,7 +49,6 @@ Created 1/8/1996 Heikki Tuuri
 #include "os0once.h"
 #include "ut0new.h"
 #include "fil0fil.h"
-#include <my_crypt.h>
 #include "fil0crypt.h"
 #include <set>
 #include <algorithm>
@@ -110,7 +109,7 @@ are described in fsp0fsp.h. */
 /** dict_table_t::flags bit 0 is equal to 0 if the row format = Redundant */
 #define DICT_TF_REDUNDANT		0	/*!< Redundant row format. */
 /** dict_table_t::flags bit 0 is equal to 1 if the row format = Compact */
-#define DICT_TF_COMPACT			1	/*!< Compact row format. */
+#define DICT_TF_COMPACT			1U	/*!< Compact row format. */
 
 /** This bitmask is used in SYS_TABLES.N_COLS to set and test whether
 the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */
@@ -122,9 +121,10 @@ the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */
 /** Width of the ZIP_SSIZE flag */
 #define DICT_TF_WIDTH_ZIP_SSIZE		4
 
-/** Width of the ATOMIC_BLOBS flag.  The Antelope file formats broke up
-BLOB and TEXT fields, storing the first 768 bytes in the clustered index.
-Barracuda row formats store the whole blob or text field off-page atomically.
+/** Width of the ATOMIC_BLOBS flag.  The ROW_FORMAT=REDUNDANT and
+ROW_FORMAT=COMPACT broke up BLOB and TEXT fields, storing the first 768 bytes
+in the clustered index. ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED
+store the whole blob or text field off-page atomically.
 Secondary indexes are created from this external data using row_ext_t
 to cache the BLOB prefixes. */
 #define DICT_TF_WIDTH_ATOMIC_BLOBS	1
@@ -142,10 +142,10 @@ Width of the page compression flag
 #define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4
 
 /**
-Width of atomic writes flag
-DEFAULT=0, ON = 1, OFF = 2
+The NO_ROLLBACK flag (3=yes; the values 1,2 used stand for
+ATOMIC_WRITES=ON and ATOMIC_WRITES=OFF between MariaDB 10.1.0 and 10.2.3)
 */
-#define DICT_TF_WIDTH_ATOMIC_WRITES 2
+#define DICT_TF_WIDTH_NO_ROLLBACK 2
 
 /** Width of all the currently known table flags */
 #define DICT_TF_BITS	(DICT_TF_WIDTH_COMPACT			\
@@ -153,7 +153,8 @@ DEFAULT=0, ON = 1, OFF = 2
 			+ DICT_TF_WIDTH_ATOMIC_BLOBS		\
 			+ DICT_TF_WIDTH_DATA_DIR		\
 			+ DICT_TF_WIDTH_PAGE_COMPRESSION	\
-			+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
+			+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL	\
+			+ DICT_TF_WIDTH_NO_ROLLBACK)
 
 /** Zero relative shift position of the COMPACT field */
 #define DICT_TF_POS_COMPACT		0
@@ -172,11 +173,11 @@ DEFAULT=0, ON = 1, OFF = 2
 /** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
 #define DICT_TF_POS_PAGE_COMPRESSION_LEVEL	(DICT_TF_POS_PAGE_COMPRESSION	\
 					+ DICT_TF_WIDTH_PAGE_COMPRESSION)
-/** Zero relative shift position of the ATOMIC_WRITES field */
-#define DICT_TF_POS_ATOMIC_WRITES	(DICT_TF_POS_PAGE_COMPRESSION_LEVEL \
+/** Zero relative shift position of the NO_ROLLBACK field */
+#define DICT_TF_POS_NO_ROLLBACK		(DICT_TF_POS_PAGE_COMPRESSION_LEVEL \
 					+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
-#define DICT_TF_POS_UNUSED		(DICT_TF_POS_ATOMIC_WRITES     \
-					+ DICT_TF_WIDTH_ATOMIC_WRITES)
+#define DICT_TF_POS_UNUSED		(DICT_TF_POS_NO_ROLLBACK     \
+					+ DICT_TF_WIDTH_NO_ROLLBACK)
 
 /** Bit mask of the COMPACT field */
 #define DICT_TF_MASK_COMPACT				\
@@ -202,10 +203,10 @@ DEFAULT=0, ON = 1, OFF = 2
 #define DICT_TF_MASK_PAGE_COMPRESSION_LEVEL		\
 		((~(~0U << DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)) \
 		<< DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
-/** Bit mask of the ATOMIC_WRITES field */
-#define DICT_TF_MASK_ATOMIC_WRITES		\
-		((~(~0U << DICT_TF_WIDTH_ATOMIC_WRITES)) \
-		<< DICT_TF_POS_ATOMIC_WRITES)
+/** Bit mask of the NO_ROLLBACK field */
+#define DICT_TF_MASK_NO_ROLLBACK		\
+		((~(~0U << DICT_TF_WIDTH_NO_ROLLBACK)) \
+		<< DICT_TF_POS_NO_ROLLBACK)
 
 /** Return the value of the COMPACT field */
 #define DICT_TF_GET_COMPACT(flags)			\
@@ -231,10 +232,6 @@ DEFAULT=0, ON = 1, OFF = 2
 #define DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags)       \
 		((flags & DICT_TF_MASK_PAGE_COMPRESSION_LEVEL)	\
 		>> DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
-/** Return the value of the ATOMIC_WRITES field */
-#define DICT_TF_GET_ATOMIC_WRITES(flags)       \
-		((flags & DICT_TF_MASK_ATOMIC_WRITES)	\
-		>> DICT_TF_POS_ATOMIC_WRITES)
 
 /* @} */
 
@@ -309,22 +306,13 @@ dict_table_t*
 dict_mem_table_create(
 /*==================*/
 	const char*	name,		/*!< in: table name */
-	ulint		space,		/*!< in: space where the clustered index
-					of the table is placed */
+	fil_space_t*	space,		/*!< in: tablespace */
 	ulint		n_cols,		/*!< in: total number of columns
 					including virtual and non-virtual
 					columns */
 	ulint		n_v_cols,	/*!< in: number of virtual columns */
 	ulint		flags,		/*!< in: table flags */
 	ulint		flags2);	/*!< in: table flags2 */
-/**********************************************************************//**
-Determines if a table belongs to a system database
-@return */
-UNIV_INTERN
-bool
-dict_mem_table_is_system(
-/*==================*/
-	char	*name);		/*!< in: table name */
 /****************************************************************//**
 Free a table memory object. */
 void
@@ -408,11 +396,7 @@ dict_mem_fill_index_struct(
 /*=======================*/
 	dict_index_t*	index,		/*!< out: index to be filled */
 	mem_heap_t*	heap,		/*!< in: memory heap */
-	const char*	table_name,	/*!< in: table name */
 	const char*	index_name,	/*!< in: index name */
-	ulint		space,		/*!< in: space where the index tree is
-					placed, ignored if the index is of
-					the clustered type */
 	ulint		type,		/*!< in: DICT_UNIQUE,
 					DICT_CLUSTERED, ... ORed */
 	ulint		n_fields);	/*!< in: number of fields */
@@ -422,11 +406,8 @@ Creates an index memory object.
 dict_index_t*
 dict_mem_index_create(
 /*==================*/
-	const char*	table_name,	/*!< in: table name */
+	dict_table_t*	table,		/*!< in: table */
 	const char*	index_name,	/*!< in: index name */
-	ulint		space,		/*!< in: space where the index tree is
-					placed, ignored if the index is of
-					the clustered type */
 	ulint		type,		/*!< in: DICT_UNIQUE,
 					DICT_CLUSTERED, ... ORed */
 	ulint		n_fields);	/*!< in: number of fields */
@@ -563,36 +544,6 @@ private:
 	const char*	m_name;
 };
 
-/** Table name wrapper for pretty-printing */
-struct table_name_t
-{
-	/** The name in internal representation */
-	char*	m_name;
-
-	/** @return the end of the schema name */
-	const char* dbend() const
-	{
-		const char* sep = strchr(m_name, '/');
-		ut_ad(sep);
-		return sep;
-	}
-
-	/** @return the length of the schema name, in bytes */
-	size_t dblen() const { return dbend() - m_name; }
-
-	/** Determine the filename-safe encoded table name.
-	@return	the filename-safe encoded table name */
-	const char* basename() const { return dbend() + 1; }
-
-	/** The start of the table basename suffix for partitioned tables */
-	static const char part_suffix[4];
-
-	/** Determine the partition or subpartition name suffix.
-	@return the partition name
-	@retval	NULL	if the table is not partitioned */
-	const char* part() const { return strstr(basename(), part_suffix); }
-};
-
 /** Data structure for a column in a table */
 struct dict_col_t{
 	/*----------------------*/
@@ -634,14 +585,74 @@ struct dict_col_t{
 					of an index */
 	unsigned	max_prefix:12;	/*!< maximum index prefix length on
 					this column. Our current max limit is
-					3072 for Barracuda table */
-
-	/** @return whether this is a virtual column */
-	bool is_virtual() const { return prtype & DATA_VIRTUAL; }
+					3072 (REC_VERSION_56_MAX_INDEX_COL_LEN)
+					bytes. */
 
 	/** Detach the column from an index.
 	@param[in]	index	index to be detached from */
 	inline void detach(const dict_index_t& index);
+
+	/** Data for instantly added columns */
+	struct def_t {
+		/** original default value of instantly added column */
+		const void*	data;
+		/** len of data, or UNIV_SQL_DEFAULT if unavailable */
+		ulint		len;
+	} def_val;
+
+	/** Retrieve the column name.
+	@param[in]	table	table name */
+	const char* name(const dict_table_t& table) const;
+
+	/** @return whether this is a virtual column */
+	bool is_virtual() const { return prtype & DATA_VIRTUAL; }
+	/** @return whether NULL is an allowed value for this column */
+	bool is_nullable() const { return !(prtype & DATA_NOT_NULL); }
+
+	/** @return whether table of this system field is TRX_ID-based */
+	bool vers_native() const
+	{
+		ut_ad(vers_sys_start() || vers_sys_end());
+		ut_ad(mtype == DATA_INT || mtype == DATA_FIXBINARY);
+		return mtype == DATA_INT;
+	}
+	/** @return whether this is system versioned */
+	bool is_versioned() const { return !(~prtype & DATA_VERSIONED); }
+	/** @return whether this is the system version start */
+	bool vers_sys_start() const
+	{
+		return (prtype & DATA_VERSIONED) == DATA_VERS_START;
+	}
+	/** @return whether this is the system version end */
+	bool vers_sys_end() const
+	{
+		return (prtype & DATA_VERSIONED) == DATA_VERS_END;
+	}
+
+	/** @return whether this is an instantly-added column */
+	bool is_instant() const
+	{
+		DBUG_ASSERT(def_val.len != UNIV_SQL_DEFAULT || !def_val.data);
+		return def_val.len != UNIV_SQL_DEFAULT;
+	}
+	/** Get the default value of an instantly-added column.
+	@param[out]	len	value length (in bytes), or UNIV_SQL_NULL
+	@return	default value
+	@retval	NULL	if the default value is SQL NULL (len=UNIV_SQL_NULL) */
+	const byte* instant_value(ulint* len) const
+	{
+		DBUG_ASSERT(is_instant());
+		*len = def_val.len;
+		return static_cast<const byte*>(def_val.data);
+	}
+
+	/** Remove the 'instant ADD' status of the column */
+	void remove_instant()
+	{
+		DBUG_ASSERT(is_instant());
+		def_val.len = UNIV_SQL_DEFAULT;
+		def_val.data = NULL;
+	}
 };
 
 /** Index information put in a list of virtual column structure. Index
@@ -653,6 +664,9 @@ struct dict_v_idx_t {
 
 	/** position in this index */
 	ulint		nth_field;
+
+	dict_v_idx_t(dict_index_t* index, ulint nth_field)
+		: index(index), nth_field(nth_field) {}
 };
 
 /** Index list to put in dict_v_col_t */
@@ -722,17 +736,17 @@ files would be at risk! */
 /** Find out maximum indexed column length by its table format.
 For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT, the maximum
 field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For
-Barracuda row formats COMPRESSED and DYNAMIC, the length could
+ROW_FORMAT=COMPRESSED and ROW_FORMAT=DYNAMIC, the length could
 be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
-#define DICT_MAX_FIELD_LEN_BY_FORMAT(table)				\
-		((dict_table_get_format(table) < UNIV_FORMAT_B)		\
-			? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1)		\
-			: REC_VERSION_56_MAX_INDEX_COL_LEN)
+#define DICT_MAX_FIELD_LEN_BY_FORMAT(table)	\
+	(dict_table_has_atomic_blobs(table)	\
+	 ? REC_VERSION_56_MAX_INDEX_COL_LEN	\
+	 : REC_ANTELOPE_MAX_INDEX_COL_LEN - 1)
 
-#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags)			\
-		((DICT_TF_HAS_ATOMIC_BLOBS(flags) < UNIV_FORMAT_B)	\
-			? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1)		\
-			: REC_VERSION_56_MAX_INDEX_COL_LEN)
+#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags)	\
+	(DICT_TF_HAS_ATOMIC_BLOBS(flags)		\
+	 ? REC_VERSION_56_MAX_INDEX_COL_LEN		\
+	 : REC_ANTELOPE_MAX_INDEX_COL_LEN - 1)
 
 /** Defines the maximum fixed length column size */
 #define DICT_MAX_FIXED_COL_LEN		DICT_ANTELOPE_MAX_INDEX_COL_LEN
@@ -759,6 +773,15 @@ struct dict_field_t{
 
 	/** Zero-initialize all fields */
 	dict_field_t() : col(NULL), name(NULL), prefix_len(0), fixed_len(0) {}
+
+	/** Check whether two index fields are equivalent.
+	@param[in]	old	the other index field
+	@return	whether the index fields are equivalent */
+	bool same(const dict_field_t& other) const
+	{
+		return(prefix_len == other.prefix_len
+		       && fixed_len == other.fixed_len);
+	}
 };
 
 /**********************************************************************//**
@@ -834,10 +857,7 @@ struct dict_index_t{
 	index_id_t	id;	/*!< id of the index */
 	mem_heap_t*	heap;	/*!< memory heap */
 	id_name_t	name;	/*!< index name */
-	const char*	table_name;/*!< table name */
 	dict_table_t*	table;	/*!< back pointer to table */
-	unsigned	space:32;
-				/*!< space where the index tree is placed */
 	unsigned	page:32;/*!< index tree root page number */
 	unsigned	merge_threshold:6;
 				/*!< In the pessimistic delete, if the page
@@ -853,8 +873,8 @@ struct dict_index_t{
 				in a clustered index record, if the fields
 				before it are known to be of a fixed size,
 				0 otherwise */
-#if (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
-# error (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
+#if (1<<MAX_KEY_LENGTH_BITS) < HA_MAX_KEY_LENGTH
+# error (1<<MAX_KEY_LENGTH_BITS) < HA_MAX_KEY_LENGTH
 #endif
 	unsigned	n_user_defined_cols:10;
 				/*!< number of columns the user defined to
@@ -877,6 +897,17 @@ struct dict_index_t{
 	unsigned	n_def:10;/*!< number of fields defined so far */
 	unsigned	n_fields:10;/*!< number of fields in the index */
 	unsigned	n_nullable:10;/*!< number of nullable fields */
+	unsigned	n_core_fields:10;/*!< number of fields in the index
+				(before the first time of instant add columns) */
+	/** number of bytes of null bits in ROW_FORMAT!=REDUNDANT node pointer
+	records; usually equal to UT_BITS_IN_BYTES(n_nullable), but
+	can be less in clustered indexes with instant ADD COLUMN */
+	unsigned	n_core_null_bytes:8;
+	/** magic value signalling that n_core_null_bytes was not
+	initialized yet */
+	static const unsigned NO_CORE_NULL_BYTES = 0xff;
+	/** The clustered index ID of the hard-coded SYS_INDEXES table. */
+	static const unsigned DICT_INDEXES_ID = 3;
 	unsigned	cached:1;/*!< TRUE if the index object is in the
 				dictionary cache */
 	unsigned	to_be_dropped:1;
@@ -1000,6 +1031,10 @@ struct dict_index_t{
 		uncommitted = !committed;
 	}
 
+	/** Notify that the index pages are going to be modified.
+	@param[in,out]	mtr	mini-transaction */
+	inline void set_modified(mtr_t& mtr) const;
+
 	/** @return whether this index is readable
 	@retval	true	normally
 	@retval	false	if this is a single-table tablespace
@@ -1007,6 +1042,9 @@ struct dict_index_t{
 			page cannot be read or decrypted */
 	inline bool is_readable() const;
 
+	/** @return whether instant ADD COLUMN is in effect */
+	inline bool is_instant() const;
+
 	/** @return whether the index is the primary key index
 	(not the clustered index of the change buffer) */
 	bool is_primary() const
@@ -1031,6 +1069,69 @@ struct dict_index_t{
 			n_fields = 0;
 		}
 	}
+
+	/** Determine how many fields of a given prefix can be set NULL.
+	@param[in]	n_prefix	number of fields in the prefix
+	@return	number of fields 0..n_prefix-1 that can be set NULL */
+	unsigned get_n_nullable(ulint n_prefix) const
+	{
+		DBUG_ASSERT(n_prefix > 0);
+		DBUG_ASSERT(n_prefix <= n_fields);
+		unsigned n = n_nullable;
+		for (; n_prefix < n_fields; n_prefix++) {
+			const dict_col_t* col = fields[n_prefix].col;
+			DBUG_ASSERT(!col->is_virtual());
+			n -= col->is_nullable();
+		}
+		DBUG_ASSERT(n < n_def);
+		return n;
+	}
+
+	/** Get the default value of an instantly-added clustered index field.
+	@param[in]	n	instantly added field position
+	@param[out]	len	value length (in bytes), or UNIV_SQL_NULL
+	@return	default value
+	@retval	NULL	if the default value is SQL NULL (len=UNIV_SQL_NULL) */
+	const byte* instant_field_value(ulint n, ulint* len) const
+	{
+		DBUG_ASSERT(is_instant() || id == DICT_INDEXES_ID);
+		DBUG_ASSERT(n + (id == DICT_INDEXES_ID) >= n_core_fields);
+		DBUG_ASSERT(n < n_fields);
+		return fields[n].col->instant_value(len);
+	}
+
+	/** Adjust clustered index metadata for instant ADD COLUMN.
+	@param[in]	clustered index definition after instant ADD COLUMN */
+	void instant_add_field(const dict_index_t& instant);
+
+	/** Remove the 'instant ADD' status of a clustered index.
+	Protected by index root page x-latch or table X-lock. */
+	void remove_instant()
+	{
+		DBUG_ASSERT(is_primary());
+		if (!is_instant()) {
+			return;
+		}
+		for (unsigned i = n_core_fields; i < n_fields; i++) {
+			fields[i].col->remove_instant();
+		}
+		n_core_fields = n_fields;
+		n_core_null_bytes = UT_BITS_IN_BYTES(unsigned(n_nullable));
+	}
+
+	/** Check if record in clustered index is historical row.
+	@param[in]	rec	clustered row
+	@param[in]	offsets	offsets
+	@return true if row is historical */
+	bool
+	vers_history_row(const rec_t* rec, const ulint* offsets);
+
+	/** Check if record in secondary index is historical row.
+	@param[in]	rec	record in a secondary index
+	@param[out]	history_row true if row is historical
+	@return true on error */
+	bool
+	vers_history_row(const rec_t* rec, bool &history_row);
 };
 
 /** Detach a column from an index.
@@ -1396,6 +1497,11 @@ struct dict_table_t {
 	@return	whether the last handle was released */
 	inline bool release();
 
+	/** @return whether the table supports transactions */
+	bool no_rollback() const
+	{
+		return !(~unsigned(flags) & DICT_TF_MASK_NO_ROLLBACK);
+        }
 	/** @return whether this is a temporary table */
 	bool is_temporary() const
 	{
@@ -1409,9 +1515,66 @@ struct dict_table_t {
 			page cannot be read or decrypted */
 	bool is_readable() const
 	{
+		ut_ad(file_unreadable || space);
 		return(UNIV_LIKELY(!file_unreadable));
 	}
 
+	/** @return whether instant ADD COLUMN is in effect */
+	bool is_instant() const
+	{
+		return(UT_LIST_GET_FIRST(indexes)->is_instant());
+	}
+
+	/** @return whether the table supports instant ADD COLUMN */
+	bool supports_instant() const
+	{
+		return(!(flags & DICT_TF_MASK_ZIP_SSIZE));
+	}
+
+	/** Adjust metadata for instant ADD COLUMN.
+	@param[in]	table	table definition after instant ADD COLUMN */
+	void instant_add_column(const dict_table_t& table);
+
+	/** Roll back instant_add_column().
+	@param[in]	old_n_cols	original n_cols
+	@param[in]	old_cols	original cols
+	@param[in]	old_col_names	original col_names */
+	void rollback_instant(
+		unsigned	old_n_cols,
+		dict_col_t*	old_cols,
+		const char*	old_col_names);
+
+	/** Trim the instantly added columns when an insert into SYS_COLUMNS
+	is rolled back during ALTER TABLE or recovery.
+	@param[in]	n	number of surviving non-system columns */
+	void rollback_instant(unsigned n);
+
+	/** Add the table definition to the data dictionary cache */
+	void add_to_cache();
+
+	bool versioned() const { return vers_start || vers_end; }
+	bool versioned_by_id() const
+	{
+		return vers_start && cols[vers_start].mtype == DATA_INT;
+	}
+
+	void inc_fk_checks()
+	{
+#ifdef UNIV_DEBUG
+		lint fk_checks= (lint)
+#endif
+		my_atomic_addlint(&n_foreign_key_checks_running, 1);
+		ut_ad(fk_checks >= 0);
+	}
+	void dec_fk_checks()
+	{
+#ifdef UNIV_DEBUG
+		lint fk_checks= (lint)
+#endif
+		my_atomic_addlint(&n_foreign_key_checks_running, ulint(-1));
+		ut_ad(fk_checks > 0);
+	}
+
 	/** Id of the table. */
 	table_id_t				id;
 
@@ -1432,8 +1595,10 @@ struct dict_table_t {
 	/** NULL or the directory path specified by DATA DIRECTORY. */
 	char*					data_dir_path;
 
-	/** Space where the clustered index of the table is placed. */
-	uint32_t				space;
+	/** The tablespace of the table */
+	fil_space_t*				space;
+	/** Tablespace ID */
+	ulint					space_id;
 
 	/** Stores information about:
 	1 row format (redundant or compact),
@@ -1532,7 +1697,10 @@ struct dict_table_t {
 
 	/** Virtual column names */
 	const char*				v_col_names;
-
+	unsigned	vers_start:10;
+				/*!< System Versioning: row start col index */
+	unsigned	vers_end:10;
+				/*!< System Versioning: row end col index */
 	bool		is_system_db;
 				/*!< True if the table belongs to a system
 				database (mysql, information_schema or
@@ -1749,7 +1917,7 @@ struct dict_table_t {
 	ulong					n_waiting_or_granted_auto_inc_locks;
 
 	/** The transaction that currently holds the the AUTOINC lock on this
-	table. Protected by lock_sys->mutex. */
+	table. Protected by lock_sys.mutex. */
 	const trx_t*				autoinc_trx;
 
 	/* @} */
@@ -1764,7 +1932,7 @@ struct dict_table_t {
 
 	/** Count of the number of record locks on this table. We use this to
 	determine whether we can evict the table from the dictionary cache.
-	It is protected by lock_sys->mutex. */
+	It is protected by lock_sys.mutex. */
 	ulint					n_rec_locks;
 
 private:
@@ -1774,7 +1942,7 @@ private:
 	int32					n_ref_count;
 
 public:
-	/** List of locks on the table. Protected by lock_sys->mutex. */
+	/** List of locks on the table. Protected by lock_sys.mutex. */
 	table_lock_list_t			locks;
 
 	/** Timestamp of the last modification of this table. */
@@ -1792,9 +1960,22 @@ public:
 	dict_vcol_templ_t*			vc_templ;
 };
 
-inline bool dict_index_t::is_readable() const
+inline void dict_index_t::set_modified(mtr_t& mtr) const
+{
+	mtr.set_named_space(table->space);
+}
+
+inline bool dict_index_t::is_readable() const { return table->is_readable(); }
+
+inline bool dict_index_t::is_instant() const
 {
-	return(UNIV_LIKELY(!table->file_unreadable));
+	ut_ad(n_core_fields > 0);
+	ut_ad(n_core_fields <= n_fields);
+	ut_ad(n_core_fields == n_fields
+	      || (type & ~(DICT_UNIQUE | DICT_CORRUPT)) == DICT_CLUSTERED);
+	ut_ad(n_core_fields == n_fields || table->supports_instant());
+	ut_ad(n_core_fields == n_fields || !table->is_temporary());
+	return(n_core_fields != n_fields);
 }
 
 inline bool dict_index_t::is_corrupted() const
diff --git a/storage/innobase/include/dict0mem.ic b/storage/innobase/include/dict0mem.ic
index da2ac629850..70424af7347 100644
--- a/storage/innobase/include/dict0mem.ic
+++ b/storage/innobase/include/dict0mem.ic
@@ -37,11 +37,7 @@ dict_mem_fill_index_struct(
 /*=======================*/
 	dict_index_t*	index,		/*!< out: index to be filled */
 	mem_heap_t*	heap,		/*!< in: memory heap */
-	const char*	table_name,	/*!< in: table name */
 	const char*	index_name,	/*!< in: index name */
-	ulint		space,		/*!< in: space where the index tree is
-					placed, ignored if the index is of
-					the clustered type */
 	ulint		type,		/*!< in: DICT_UNIQUE,
 					DICT_CLUSTERED, ... ORed */
 	ulint		n_fields)	/*!< in: number of fields */
@@ -61,11 +57,10 @@ dict_mem_fill_index_struct(
 	/* Assign a ulint to a 4-bit-mapped field.
 	Only the low-order 4 bits are assigned. */
 	index->type = unsigned(type);
-	index->space = (unsigned int) space;
 	index->page = FIL_NULL;
 	index->merge_threshold = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
-	index->table_name = table_name;
 	index->n_fields = (unsigned int) n_fields;
+	index->n_core_fields = (unsigned int) n_fields;
 	/* The '1 +' above prevents allocation
 	of an empty mem block */
 	index->nulls_equal = false;
diff --git a/storage/innobase/include/dict0stats_bg.h b/storage/innobase/include/dict0stats_bg.h
index e66666b66a3..d66afdd4b25 100644
--- a/storage/innobase/include/dict0stats_bg.h
+++ b/storage/innobase/include/dict0stats_bg.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -116,16 +116,9 @@ dict_stats_thread_deinit();
 #ifdef UNIV_DEBUG
 /** Disables dict stats thread. It's used by:
 	SET GLOBAL innodb_dict_stats_disabled_debug = 1 (0).
-@param[in]	thd		thread handle
-@param[in]	var		pointer to system variable
-@param[out]	var_ptr		where the formal string goes
 @param[in]	save		immediate result from check function */
-void
-dict_stats_disabled_debug_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save);
+void dict_stats_disabled_debug_update(THD*, st_mysql_sys_var*, void*,
+				      const void* save);
 #endif /* UNIV_DEBUG */
 
 /*****************************************************************//**
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index 27b4cc0e694..f2fcae69bd5 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -28,6 +28,7 @@ Created 1/8/1996 Heikki Tuuri
 #define dict0types_h
 
 #include <ut0mutex.h>
+#include <rem0types.h>
 
 struct dict_sys_t;
 struct dict_col_t;
@@ -52,6 +53,13 @@ DICT_IBUF_ID_MIN plus the space id */
 typedef ib_id_t		table_id_t;
 typedef ib_id_t		index_id_t;
 
+/** Maximum transaction identifier */
+#define TRX_ID_MAX	IB_ID_MAX
+
+/** The bit pattern corresponding to TRX_ID_MAX */
+extern const byte trx_id_max_bytes[8];
+extern const byte timestamp_max_bytes[7];
+
 /** Error to ignore when we load table dictionary into memory. However,
 the table and index will be marked as "corrupted", and caller will
 be responsible to deal with corrupted table or index.
@@ -92,6 +100,36 @@ typedef ib_mutex_t DictSysMutex;
 #define TEMP_TABLE_PREFIX                "#sql"
 #define TEMP_TABLE_PATH_PREFIX           "/" TEMP_TABLE_PREFIX
 
+/** Table name wrapper for pretty-printing */
+struct table_name_t
+{
+	/** The name in internal representation */
+	char*	m_name;
+
+	/** @return the end of the schema name */
+	const char* dbend() const
+	{
+		const char* sep = strchr(m_name, '/');
+		ut_ad(sep);
+		return sep;
+	}
+
+	/** @return the length of the schema name, in bytes */
+	size_t dblen() const { return size_t(dbend() - m_name); }
+
+	/** Determine the filename-safe encoded table name.
+	@return	the filename-safe encoded table name */
+	const char* basename() const { return dbend() + 1; }
+
+	/** The start of the table basename suffix for partitioned tables */
+	static const char part_suffix[4];
+
+	/** Determine the partition or subpartition name suffix.
+	@return the partition name
+	@retval	NULL	if the table is not partitioned */
+	const char* part() const { return strstr(basename(), part_suffix); }
+};
+
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 /** Flag to control insert buffer debugging. */
 extern uint		ibuf_debug;
diff --git a/storage/innobase/include/dyn0buf.h b/storage/innobase/include/dyn0buf.h
index 3126c8e4683..4b6c808b47c 100644
--- a/storage/innobase/include/dyn0buf.h
+++ b/storage/innobase/include/dyn0buf.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -32,14 +33,13 @@ Created 2013-03-16 Sunny Bains
 #include "dyn0types.h"
 
 /** Class that manages dynamic buffers. It uses a UT_LIST of
-dyn_buf_t::block_t instances. We don't use STL containers in
+mtr_buf_t::block_t instances. We don't use STL containers in
 order to avoid the overhead of heap calls. Using a custom memory
 allocator doesn't solve the problem either because we have to get
 the memory from somewhere. We can't use the block_t::m_data as the
 backend for the custom allocator because we would like the data in
 the blocks to be contiguous. */
-template <size_t SIZE = DYN_ARRAY_DATA_SIZE>
-class dyn_buf_t {
+class mtr_buf_t {
 public:
 
 	class block_t;
@@ -47,17 +47,19 @@ public:
 	typedef UT_LIST_NODE_T(block_t) block_node_t;
 	typedef UT_LIST_BASE_NODE_T(block_t) block_list_t;
 
+	/** SIZE - sizeof(m_node) + sizeof(m_used) */
+	enum { MAX_DATA_SIZE = DYN_ARRAY_DATA_SIZE
+	       - sizeof(block_node_t) + sizeof(ib_uint32_t) };
+
 	class block_t {
 	public:
 
 		block_t()
 		{
-			ut_ad(MAX_DATA_SIZE <= (2 << 15));
+			compile_time_assert(MAX_DATA_SIZE <= (2 << 15));
 			init();
 		}
 
-		~block_t() { }
-
 		/**
 		Gets the number of used bytes in a block.
 		@return	number of bytes used */
@@ -112,12 +114,12 @@ public:
 		/**
 		@return pointer to start of reserved space */
 		template <typename Type>
-		Type push(ib_uint32_t size)
+		Type push(uint32_t size)
 		{
 			Type	ptr = reinterpret_cast<Type>(end());
 
 			m_used += size;
-			ut_ad(m_used <= static_cast<ib_uint32_t>(MAX_DATA_SIZE));
+			ut_ad(m_used <= uint32_t(MAX_DATA_SIZE));
 
 			return(ptr);
 		}
@@ -131,7 +133,7 @@ public:
 			ut_ad(ptr <= begin() + m_buf_end);
 
 			/* We have done the boundary check above */
-			m_used = static_cast<ib_uint32_t>(ptr - begin());
+			m_used = uint32_t(ptr - begin());
 
 			ut_ad(m_used <= MAX_DATA_SIZE);
 			ut_d(m_buf_end = 0);
@@ -154,13 +156,6 @@ public:
 		ulint		m_magic_n;
 #endif /* UNIV_DEBUG */
 
-		/** SIZE - sizeof(m_node) + sizeof(m_used) */
-		enum {
-			MAX_DATA_SIZE = SIZE
-				      - sizeof(block_node_t)
-				      + sizeof(ib_uint32_t)
-		};
-
 		/** Storage */
 		byte		m_data[MAX_DATA_SIZE];
 
@@ -169,15 +164,13 @@ public:
 
 		/** number of data bytes used in this block;
 		DYN_BLOCK_FULL_FLAG is set when the block becomes full */
-		ib_uint32_t	m_used;
+		uint32_t	m_used;
 
-		friend class dyn_buf_t;
+		friend class mtr_buf_t;
 	};
 
-	enum { MAX_DATA_SIZE = block_t::MAX_DATA_SIZE};
-
 	/** Default constructor */
-	dyn_buf_t()
+	mtr_buf_t()
 		:
 		m_heap(),
 		m_size()
@@ -187,7 +180,7 @@ public:
 	}
 
 	/** Destructor */
-	~dyn_buf_t()
+	~mtr_buf_t()
 	{
 		erase();
 	}
@@ -252,7 +245,7 @@ public:
 	@param size	in bytes of the element
 	@return	pointer to the element */
 	template <typename Type>
-	Type push(ib_uint32_t size)
+	Type push(uint32_t size)
 	{
 		ut_ad(size > 0);
 		ut_ad(size <= MAX_DATA_SIZE);
@@ -272,17 +265,11 @@ public:
 	Pushes n bytes.
 	@param str	string to write
 	@param len	string length */
-	void push(const byte* ptr, ib_uint32_t len)
+	void push(const byte* ptr, uint32_t len)
 	{
 		while (len > 0) {
-			ib_uint32_t	n_copied;
-
-			if (len >= MAX_DATA_SIZE) {
-				n_copied = MAX_DATA_SIZE;
-			} else {
-				n_copied = len;
-			}
-
+			uint32_t n_copied = std::min(len,
+						     uint32_t(MAX_DATA_SIZE));
 			::memmove(push<byte*>(n_copied), ptr, n_copied);
 
 			ptr += n_copied;
@@ -298,7 +285,7 @@ public:
 	const Type at(ulint pos) const
 	{
 		block_t*	block = const_cast<block_t*>(
-			const_cast<dyn_buf_t*>(this)->find(pos));
+			const_cast<mtr_buf_t*>(this)->find(pos));
 
 		return(reinterpret_cast<Type>(block->begin() + pos));
 	}
@@ -391,8 +378,8 @@ public:
 
 private:
 	// Disable copying
-	dyn_buf_t(const dyn_buf_t&);
-	dyn_buf_t& operator=(const dyn_buf_t&);
+	mtr_buf_t(const mtr_buf_t&);
+	mtr_buf_t& operator=(const mtr_buf_t&);
 
 	/**
 	Add the block to the end of the list*/
@@ -404,7 +391,7 @@ private:
 	}
 
 	/** @return the last block in the list */
-	block_t* back()
+	block_t* back() const
 	{
 		return(UT_LIST_GET_LAST(m_list));
 	}
@@ -484,8 +471,6 @@ private:
 	block_t			m_first_block;
 };
 
-typedef dyn_buf_t<DYN_ARRAY_DATA_SIZE> mtr_buf_t;
-
 /** mtr_buf_t copier */
 struct mtr_buf_copy_t {
 	/** The copied buffer */
diff --git a/storage/innobase/include/fil0crypt.h b/storage/innobase/include/fil0crypt.h
index 13b3ec4e37e..5238213135f 100644
--- a/storage/innobase/include/fil0crypt.h
+++ b/storage/innobase/include/fil0crypt.h
@@ -27,9 +27,9 @@ Created 04/01/2015 Jan Lindström
 #define fil0crypt_h
 
 #ifndef UNIV_INNOCHECKSUM
-
 #include "os0event.h"
 #include "my_crypt.h"
+#include "fil0fil.h"
 #endif /*! UNIV_INNOCHECKSUM */
 
 /**
@@ -296,7 +296,6 @@ fil_space_destroy_crypt_data(
 Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry
 @param[in]	ptr		Log entry start
 @param[in]	end_ptr		Log entry end
-@param[in]	block		buffer block
 @param[out]	err		DB_SUCCESS or DB_DECRYPTION_FAILED
 @return position on log buffer */
 UNIV_INTERN
@@ -304,7 +303,6 @@ byte*
 fil_parse_write_crypt_data(
 	byte*			ptr,
 	const byte*		end_ptr,
-	const buf_block_t*	block,
 	dberr_t*		err)
 	MY_ATTRIBUTE((warn_unused_result));
 
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 1307598971b..890684af67e 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -35,17 +35,13 @@ Created 10/25/1995 Heikki Tuuri
 #include "page0size.h"
 #include "ibuf0types.h"
 
-#include <list>
-
 // Forward declaration
-extern ibool srv_use_doublewrite_buf;
+extern my_bool srv_use_doublewrite_buf;
 extern struct buf_dblwr_t* buf_dblwr;
 struct trx_t;
 class page_id_t;
 class truncate_t;
 
-typedef std::list<char*, ut_allocator<char*> >	space_name_list_t;
-
 /** Structure containing encryption specification */
 struct fil_space_crypt_t;
 
@@ -86,7 +82,7 @@ struct fil_space_t {
 				/*!< LSN of the most recent
 				fil_names_write_if_was_clean().
 				Reset to 0 by fil_names_clear().
-				Protected by log_sys->mutex.
+				Protected by log_sys.mutex.
 				If and only if this is nonzero, the
 				tablespace will be in named_spaces. */
 	bool		stop_new_ops;
@@ -107,7 +103,8 @@ struct fil_space_t {
 	ulint		redo_skipped_count;
 				/*!< reference count for operations who want
 				to skip redo log in the file space in order
-				to make fsp_space_modify_check pass. */
+				to make fsp_space_modify_check pass.
+				Uses my_atomic_loadlint() and friends. */
 #endif
 	fil_type_t	purpose;/*!< purpose */
 	UT_LIST_BASE_NODE_T(fil_node_t) chain;
@@ -141,14 +138,14 @@ struct fil_space_t {
 	dropped. An example is change buffer merge.
 	The tablespace cannot be dropped while this is nonzero,
 	or while fil_node_t::n_pending is nonzero.
-	Protected by fil_system->mutex. */
+	Protected by fil_system.mutex and my_atomic_loadlint() and friends. */
 	ulint		n_pending_ops;
 	/** Number of pending block read or write operations
 	(when a write is imminent or a read has recently completed).
 	The tablespace object cannot be freed while this is nonzero,
 	but it can be detached from fil_system.
 	Note that fil_node_t::n_pending tracks actual pending I/O requests.
-	Protected by fil_system->mutex. */
+	Protected by fil_system.mutex and my_atomic_loadlint() and friends. */
 	ulint		n_pending_ios;
 	hash_node_t	hash;	/*!< hash chain node */
 	hash_node_t	name_hash;/*!< hash chain the name_hash table */
@@ -176,10 +173,6 @@ struct fil_space_t {
 	/** True if the device this filespace is on supports atomic writes */
 	bool		atomic_write_supported;
 
-	/** Release the reserved free extents.
-	@param[in]	n_reserved	number of reserved extents */
-	void release_free_extents(ulint n_reserved);
-
 	/** True if file system storing this tablespace supports
 	punch hole */
 	bool		punch_hole;
@@ -195,6 +188,82 @@ struct fil_space_t {
 		return !atomic_write_supported
 			&& srv_use_doublewrite_buf && buf_dblwr;
 	}
+
+	/** Try to reserve free extents.
+	@param[in]	n_free_now	current number of free extents
+	@param[in]	n_to_reserve	number of extents to reserve
+	@return	whether the reservation succeeded */
+	bool reserve_free_extents(ulint n_free_now, ulint n_to_reserve)
+	{
+		ut_ad(rw_lock_own(&latch, RW_LOCK_X));
+		if (n_reserved_extents + n_to_reserve > n_free_now) {
+			return false;
+		}
+
+		n_reserved_extents += n_to_reserve;
+		return true;
+	}
+
+	/** Release the reserved free extents.
+	@param[in]	n_reserved	number of reserved extents */
+	void release_free_extents(ulint n_reserved)
+	{
+		if (!n_reserved) return;
+		ut_ad(rw_lock_own(&latch, RW_LOCK_X));
+		ut_a(n_reserved_extents >= n_reserved);
+		n_reserved_extents -= n_reserved;
+	}
+
+	/** Rename a file.
+	@param[in]	name	table name after renaming
+	@param[in]	path	tablespace file name after renaming
+	@param[in]	log	whether to write redo log
+	@return	error code
+	@retval	DB_SUCCESS	on success */
+	dberr_t rename(const char* name, const char* path, bool log);
+
+	/** Note that the tablespace has been imported.
+	Initially, purpose=FIL_TYPE_IMPORT so that no redo log is
+	written while the space ID is being updated in each page. */
+	void set_imported();
+
+	/** Open each file. Only invoked on fil_system.temp_space.
+	@return whether all files were opened */
+	bool open();
+	/** Close each file. Only invoked on fil_system.temp_space. */
+	void close();
+
+	/** Acquire a tablespace reference. */
+	void acquire() { my_atomic_addlint(&n_pending_ops, 1); }
+	/** Release a tablespace reference. */
+	void release()
+	{
+		ut_ad(referenced());
+		my_atomic_addlint(&n_pending_ops, ulint(-1));
+	}
+	/** @return whether references are being held */
+	bool referenced() { return my_atomic_loadlint(&n_pending_ops); }
+	/** @return whether references are being held */
+	bool referenced() const
+	{
+		return const_cast<fil_space_t*>(this)->referenced();
+	}
+
+	/** Acquire a tablespace reference for I/O. */
+	void acquire_for_io() { my_atomic_addlint(&n_pending_ios, 1); }
+	/** Release a tablespace reference for I/O. */
+	void release_for_io()
+	{
+		ut_ad(pending_io());
+		my_atomic_addlint(&n_pending_ios, ulint(-1));
+	}
+	/** @return whether I/O is pending */
+	bool pending_io() { return my_atomic_loadlint(&n_pending_ios); }
+	/** @return whether I/O is pending */
+	bool pending_io() const
+	{
+		return const_cast<fil_space_t*>(this)->pending_io();
+	}
 };
 
 /** Value of fil_space_t::magic_n */
@@ -204,13 +273,13 @@ struct fil_space_t {
 struct fil_node_t {
 	/** tablespace containing this file */
 	fil_space_t*	space;
-	/** file name; protected by fil_system->mutex and log_sys->mutex. */
+	/** file name; protected by fil_system.mutex and log_sys.mutex. */
 	char*		name;
 	/** file handle (valid if is_open) */
 	pfs_os_file_t	handle;
 	/** event that groups and serializes calls to fsync;
 	os_event_set() and os_event_reset() are protected by
-	fil_system_t::mutex */
+	fil_system.mutex */
 	os_event_t	sync_event;
 	/** whether the file actually is a raw device or disk partition */
 	bool		is_raw_disk;
@@ -235,7 +304,7 @@ struct fil_node_t {
 	int64_t		flush_counter;
 	/** link to other files in this tablespace */
 	UT_LIST_NODE_T(fil_node_t) chain;
-	/** link to the fil_system->LRU list (keeping track of open files) */
+	/** link to the fil_system.LRU list (keeping track of open files) */
 	UT_LIST_NODE_T(fil_node_t) LRU;
 
 	/** whether this file could use atomic write (data file) */
@@ -252,6 +321,9 @@ struct fil_node_t {
 	{
 		return(handle != OS_FILE_CLOSED);
 	}
+
+	/** Close the file handle. */
+	void close();
 };
 
 /** Value of fil_node_t::magic_n */
@@ -283,15 +355,15 @@ typedef	byte	fil_faddr_t;	/*!< 'type' definition in C: an address
 #endif /* !UNIV_INNOCHECKSUM */
 
 /** Initial size of a single-table tablespace in pages */
-#define FIL_IBD_FILE_INITIAL_SIZE	4
+#define FIL_IBD_FILE_INITIAL_SIZE	4U
 
 /** 'null' (undefined) page offset in the context of file spaces */
 #define	FIL_NULL	ULINT32_UNDEFINED
 
 
-#define FIL_ADDR_PAGE	0	/* first in address is the page offset */
-#define	FIL_ADDR_BYTE	4	/* then comes 2-byte byte offset within page*/
-#define	FIL_ADDR_SIZE	6	/* address size is 6 bytes */
+#define FIL_ADDR_PAGE	0U	/* first in address is the page offset */
+#define	FIL_ADDR_BYTE	4U	/* then comes 2-byte byte offset within page*/
+#define	FIL_ADDR_SIZE	6U	/* address size is 6 bytes */
 
 #ifndef UNIV_INNOCHECKSUM
 
@@ -302,7 +374,7 @@ struct fil_addr_t {
 };
 
 /** The null file address */
-extern fil_addr_t	fil_addr_null;
+extern const fil_addr_t	fil_addr_null;
 
 #endif /* !UNIV_INNOCHECKSUM */
 
@@ -311,15 +383,15 @@ extern fil_addr_t	fil_addr_null;
 					page belongs to (== 0) but in later
 					versions the 'new' checksum of the
 					page */
-#define FIL_PAGE_OFFSET		4	/*!< page offset inside space */
-#define FIL_PAGE_PREV		8	/*!< if there is a 'natural'
+#define FIL_PAGE_OFFSET		4U	/*!< page offset inside space */
+#define FIL_PAGE_PREV		8U	/*!< if there is a 'natural'
 					predecessor of the page, its
 					offset.  Otherwise FIL_NULL.
 					This field is not set on BLOB
 					pages, which are stored as a
 					singly-linked list.  See also
 					FIL_PAGE_NEXT. */
-#define FIL_PAGE_NEXT		12	/*!< if there is a 'natural' successor
+#define FIL_PAGE_NEXT		12U	/*!< if there is a 'natural' successor
 					of the page, its offset.
 					Otherwise FIL_NULL.
 					B-tree index pages
@@ -329,9 +401,9 @@ extern fil_addr_t	fil_addr_null;
 					FIL_PAGE_PREV and FIL_PAGE_NEXT
 					in the collation order of the
 					smallest user record on each page. */
-#define FIL_PAGE_LSN		16	/*!< lsn of the end of the newest
+#define FIL_PAGE_LSN		16U	/*!< lsn of the end of the newest
 					modification log record to the page */
-#define	FIL_PAGE_TYPE		24	/*!< file page type: FIL_PAGE_INDEX,...,
+#define	FIL_PAGE_TYPE		24U	/*!< file page type: FIL_PAGE_INDEX,...,
 					2 bytes.
 
 					The contents of this field can only
@@ -346,7 +418,7 @@ extern fil_addr_t	fil_addr_null;
 					MySQL/InnoDB 5.1.7 or later, the
 					contents of this field is valid
 					for all uncompressed pages. */
-#define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26 /*!< for the first page
+#define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26U /*!< for the first page
 					in a system tablespace data file
 					(ibdata*, not *.ibd): the file has
 					been flushed to disk at least up
@@ -360,7 +432,7 @@ extern fil_addr_t	fil_addr_null;
 #define	FIL_RTREE_SPLIT_SEQ_NUM	FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
 
 /** starting from 4.1.x this contains the space id of the page */
-#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34
+#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34U
 
 #define FIL_PAGE_SPACE_ID  FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
 
@@ -387,7 +459,7 @@ extern fil_addr_t	fil_addr_null;
 						 then encrypted */
 #define FIL_PAGE_PAGE_COMPRESSED 34354  /*!< page compressed page */
 #define FIL_PAGE_INDEX		17855	/*!< B-tree node */
-#define FIL_PAGE_RTREE		17854	/*!< B-tree node */
+#define FIL_PAGE_RTREE		17854	/*!< R-tree node (SPATIAL INDEX) */
 #define FIL_PAGE_UNDO_LOG	2	/*!< Undo log page */
 #define FIL_PAGE_INODE		3	/*!< Index node */
 #define FIL_PAGE_IBUF_FREE_LIST	4	/*!< Insert buffer free list */
@@ -410,15 +482,26 @@ extern fil_addr_t	fil_addr_null;
 //#define FIL_PAGE_ENCRYPTED	15
 //#define FIL_PAGE_COMPRESSED_AND_ENCRYPTED 16
 //#define FIL_PAGE_ENCRYPTED_RTREE 17
+/** Clustered index root page after instant ADD COLUMN */
+#define FIL_PAGE_TYPE_INSTANT	18
 
-/** Used by i_s.cc to index into the text description. */
+/** Used by i_s.cc to index into the text description.
+Note: FIL_PAGE_TYPE_INSTANT maps to the same as FIL_PAGE_INDEX. */
 #define FIL_PAGE_TYPE_LAST	FIL_PAGE_TYPE_UNKNOWN
 					/*!< Last page type */
 /* @} */
 
-/** macro to check whether the page type is index (Btree or Rtree) type */
-#define fil_page_type_is_index(page_type)                          \
-        (page_type == FIL_PAGE_INDEX || page_type == FIL_PAGE_RTREE)
+/** @return whether the page type is B-tree or R-tree index */
+inline bool fil_page_type_is_index(ulint page_type)
+{
+	switch (page_type) {
+	case FIL_PAGE_TYPE_INSTANT:
+	case FIL_PAGE_INDEX:
+	case FIL_PAGE_RTREE:
+		return(true);
+	}
+	return(false);
+}
 
 /** Check whether the page is index page (either regular Btree index or Rtree
 index */
@@ -453,7 +536,7 @@ The caller should hold an InnoDB table lock or a MDL that prevents
 the tablespace from being dropped during the operation,
 or the caller should be in single-threaded crash recovery mode
 (no user connections that could drop tablespaces).
-If this is not the case, fil_space_acquire() and fil_space_release()
+If this is not the case, fil_space_acquire() and fil_space_t::release()
 should be used instead.
 @param[in]	id	tablespace ID
 @return tablespace, or NULL if not found */
@@ -466,12 +549,42 @@ fil_space_get(
 data space) is stored here; below we talk about tablespaces, but also
 the ib_logfiles form a 'space' and it is handled here */
 struct fil_system_t {
+  /**
+    Constructor.
+
+    Some members may require late initialisation, thus we just mark object as
+    uninitialised. Real initialisation happens in create().
+  */
+  fil_system_t(): m_initialised(false)
+  {
+    UT_LIST_INIT(LRU, &fil_node_t::LRU);
+    UT_LIST_INIT(space_list, &fil_space_t::space_list);
+    UT_LIST_INIT(rotation_list, &fil_space_t::rotation_list);
+    UT_LIST_INIT(unflushed_spaces, &fil_space_t::unflushed_spaces);
+    UT_LIST_INIT(named_spaces, &fil_space_t::named_spaces);
+  }
+
+  bool is_initialised() const { return m_initialised; }
+
+  /**
+    Create the file system interface at database start.
+
+    @param[in] hash_size	hash table size
+  */
+  void create(ulint hash_size);
+
+  /** Close the file system interface at shutdown */
+  void close();
+
+private:
+  bool m_initialised;
+public:
 	ib_mutex_t	mutex;		/*!< The mutex protecting the cache */
+	fil_space_t*	sys_space;	/*!< The innodb_system tablespace */
+	fil_space_t*	temp_space;	/*!< The innodb_temporary tablespace */
 	hash_table_t*	spaces;		/*!< The hash table of spaces in the
 					system; they are hashed on the space
 					id */
-	hash_table_t*	name_hash;	/*!< hash table based on the space
-					name */
 	UT_LIST_BASE_NODE_T(fil_node_t) LRU;
 					/*!< base node for the LRU list of the
 					most recently used open files with no
@@ -490,8 +603,6 @@ struct fil_system_t {
 					at least one file node where
 					modification_counter > flush_counter */
 	ulint		n_open;		/*!< number of files currently open */
-	ulint		max_n_open;	/*!< n_open is not allowed to exceed
-					this */
 	int64_t		modification_counter;/*!< when we write to a file we
 					increment this by one */
 	ulint		max_assigned_id;/*!< maximum space id in the existing
@@ -507,20 +618,19 @@ struct fil_system_t {
 					for which a MLOG_FILE_NAME
 					record has been written since
 					the latest redo log checkpoint.
-					Protected only by log_sys->mutex. */
+					Protected only by log_sys.mutex. */
 	UT_LIST_BASE_NODE_T(fil_space_t) rotation_list;
 					/*!< list of all file spaces needing
 					key rotation.*/
 
-	ibool		space_id_reuse_warned;
-					/* !< TRUE if fil_space_create()
+	bool		space_id_reuse_warned;
+					/*!< whether fil_space_create()
 					has issued a warning about
 					potential space_id reuse */
 };
 
-/** The tablespace memory cache. This variable is NULL before the module is
-initialized. */
-extern fil_system_t*	fil_system;
+/** The tablespace memory cache. */
+extern fil_system_t	fil_system;
 
 #include "fil0crypt.h"
 
@@ -533,23 +643,6 @@ fil_space_get_latch(
 	ulint	id,
 	ulint*	flags);
 
-/** Gets the type of a file space.
-@param[in]	id	tablespace identifier
-@return file type */
-fil_type_t
-fil_space_get_type(
-	ulint	id);
-
-/** Note that a tablespace has been imported.
-It is initially marked as FIL_TYPE_IMPORT so that no logging is
-done during the import process when the space ID is stamped to each page.
-Now we change it to FIL_SPACE_TABLESPACE to start redo and undo logging.
-NOTE: temporary tablespaces are never imported.
-@param[in]	id	tablespace identifier */
-void
-fil_space_set_imported(
-	ulint	id);
-
 /** Append a file to the chain of files of a space.
 @param[in]	name		file name of a file that is not open
 @param[in]	size		file size in entire database blocks
@@ -611,16 +704,6 @@ fil_space_free(
 	ulint		id,
 	bool		x_latched);
 
-/** Returns the path from the first fil_node_t found with this space ID.
-The caller is responsible for freeing the memory allocated here for the
-value returned.
-@param[in]	id	Tablespace ID
-@return own: A copy of fil_node_t::path, NULL if space ID is zero
-or not found. */
-char*
-fil_space_get_first_path(
-	ulint		id);
-
 /** Set the recovered size of a tablespace in pages.
 @param id	tablespace ID
 @param size	recovered size in pages */
@@ -644,19 +727,6 @@ fil_space_get_flags(
 /*================*/
 	ulint	id);	/*!< in: space id */
 
-/** Open each fil_node_t of a named fil_space_t if not already open.
-@param[in]	name	Tablespace name
-@return true if all file nodes are opened. */
-bool
-fil_space_open(
-	const char*	name);
-
-/** Close each fil_node_t of a named fil_space_t if open.
-@param[in]	name	Tablespace name */
-void
-fil_space_close(
-	const char*	name);
-
 /** Returns the page size of the space and whether it is compressed or not.
 The tablespace must be cached in the memory cache.
 @param[in]	id	space id
@@ -667,18 +737,6 @@ fil_space_get_page_size(
 	ulint	id,
 	bool*	found);
 
-/****************************************************************//**
-Initializes the tablespace memory cache. */
-void
-fil_init(
-/*=====*/
-	ulint	hash_size,	/*!< in: hash table size */
-	ulint	max_n_open);	/*!< in: max number of open files */
-/*******************************************************************//**
-Initializes the tablespace memory cache. */
-void
-fil_close(void);
-/*===========*/
 /*******************************************************************//**
 Opens all log files and system tablespace data files. They stay open until the
 database server shutdown. This should be called at a server startup after the
@@ -756,11 +814,6 @@ fil_space_acquire_silent(ulint id)
 	return (fil_space_acquire_low(id, true));
 }
 
-/** Release a tablespace acquired with fil_space_acquire().
-@param[in,out]	space	tablespace to release  */
-void
-fil_space_release(fil_space_t* space);
-
 /** Acquire a tablespace for reading or writing a block,
 when it could be dropped concurrently.
 @param[in]	id	tablespace ID
@@ -769,17 +822,12 @@ when it could be dropped concurrently.
 fil_space_t*
 fil_space_acquire_for_io(ulint id);
 
-/** Release a tablespace acquired with fil_space_acquire_for_io().
-@param[in,out]	space	tablespace to release  */
-void
-fil_space_release_for_io(fil_space_t* space);
-
 /** Return the next fil_space_t.
 Once started, the caller must keep calling this until it returns NULL.
-fil_space_acquire() and fil_space_release() are invoked here which
+fil_space_acquire() and fil_space_t::release() are invoked here which
 blocks a concurrent operation from dropping the tablespace.
 @param[in,out]	prev_space	Pointer to the previous fil_space_t.
-If NULL, use the first fil_space_t on fil_system->space_list.
+If NULL, use the first fil_space_t on fil_system.space_list.
 @return pointer to the next fil_space_t.
 @retval NULL if this was the last  */
 fil_space_t*
@@ -789,10 +837,10 @@ fil_space_next(
 
 /** Return the next fil_space_t from key rotation list.
 Once started, the caller must keep calling this until it returns NULL.
-fil_space_acquire() and fil_space_release() are invoked here which
+fil_space_acquire() and fil_space_t::release() are invoked here which
 blocks a concurrent operation from dropping the tablespace.
 @param[in,out]	prev_space	Pointer to the previous fil_space_t.
-If NULL, use the first fil_space_t on fil_system->space_list.
+If NULL, use the first fil_space_t on fil_system.space_list.
 @return pointer to the next fil_space_t.
 @retval NULL if this was the last*/
 fil_space_t*
@@ -800,68 +848,6 @@ fil_space_keyrotate_next(
 	fil_space_t*	prev_space)
 	MY_ATTRIBUTE((warn_unused_result));
 
-/** Wrapper with reference-counting for a fil_space_t. */
-class FilSpace
-{
-public:
-	/** Default constructor: Use this when reference counting
-	is done outside this wrapper. */
-	FilSpace() : m_space(NULL) {}
-
-	/** Constructor: Look up the tablespace and increment the
-	reference count if found.
-	@param[in]	space_id	tablespace ID
-	@param[in]	silent		whether not to display errors */
-	explicit FilSpace(ulint space_id, bool silent = false)
-		: m_space(fil_space_acquire_low(space_id, silent)) {}
-
-	/** Assignment operator: This assumes that fil_space_acquire()
-	has already been done for the fil_space_t. The caller must
-	assign NULL if it calls fil_space_release().
-	@param[in]	space	tablespace to assign */
-	class FilSpace& operator=(fil_space_t* space)
-	{
-		/* fil_space_acquire() must have been invoked. */
-		ut_ad(space == NULL || space->n_pending_ops > 0);
-		m_space = space;
-		return(*this);
-	}
-
-	/** Destructor - Decrement the reference count if a fil_space_t
-	is still assigned. */
-	~FilSpace()
-	{
-		if (m_space != NULL) {
-			fil_space_release(m_space);
-		}
-	}
-
-	/** Implicit type conversion
-	@return the wrapped object */
-	operator const fil_space_t*() const
-	{
-		return(m_space);
-	}
-
-	/** Member accessor
-	@return the wrapped object */
-	const fil_space_t* operator->() const
-	{
-		return(m_space);
-	}
-
-	/** Explicit type conversion
-	@return the wrapped object */
-	const fil_space_t* operator()() const
-	{
-		return(m_space);
-	}
-
-private:
-	/** The wrapped pointer */
-	fil_space_t*	m_space;
-};
-
 /********************************************************//**
 Creates the database directory for a table if it does not exist yet. */
 void
@@ -869,43 +855,6 @@ fil_create_directory_for_tablename(
 /*===============================*/
 	const char*	name);	/*!< in: name in the standard
 				'databasename/tablename' format */
-/** Write redo log for renaming a file.
-@param[in]	space_id	tablespace id
-@param[in]	old_name	tablespace file name
-@param[in]	new_name	tablespace file name after renaming */
-void
-fil_name_write_rename(
-	ulint		space_id,
-	const char*	old_name,
-	const char*	new_name);
-/********************************************************//**
-Recreates table indexes by applying
-TRUNCATE log record during recovery.
-@return DB_SUCCESS or error code */
-dberr_t
-fil_recreate_table(
-/*===============*/
-	ulint			space_id,	/*!< in: space id */
-	ulint			format_flags,	/*!< in: page format */
-	ulint			flags,		/*!< in: tablespace flags */
-	const char*		name,		/*!< in: table name */
-	truncate_t&		truncate);	/*!< in/out: The information of
-						TRUNCATE log record */
-/********************************************************//**
-Recreates the tablespace and table indexes by applying
-TRUNCATE log record during recovery.
-@return DB_SUCCESS or error code */
-dberr_t
-fil_recreate_tablespace(
-/*====================*/
-	ulint			space_id,	/*!< in: space id */
-	ulint			format_flags,	/*!< in: page format */
-	ulint			flags,		/*!< in: tablespace flags */
-	const char*		name,		/*!< in: table name */
-	truncate_t&		truncate,	/*!< in/out: The information of
-						TRUNCATE log record */
-	lsn_t			recv_lsn);	/*!< in: the end LSN of
-						the log record */
 /** Replay a file rename operation if possible.
 @param[in]	space_id	tablespace identifier
 @param[in]	first_page_no	first page number in the file
@@ -966,56 +915,6 @@ fil_close_tablespace(
 	ulint	id);	/*!< in: space id */
 
 /*******************************************************************//**
-Discards a single-table tablespace. The tablespace must be cached in the
-memory cache. Discarding is like deleting a tablespace, but
-
- 1. We do not drop the table from the data dictionary;
-
- 2. We remove all insert buffer entries for the tablespace immediately;
-    in DROP TABLE they are only removed gradually in the background;
-
- 3. When the user does IMPORT TABLESPACE, the tablespace will have the
-    same id as it originally had.
-
- 4. Free all the pages in use by the tablespace if rename=true.
-@return DB_SUCCESS or error */
-dberr_t
-fil_discard_tablespace(
-/*===================*/
-	ulint	id)	/*!< in: space id */
-	MY_ATTRIBUTE((warn_unused_result));
-
-/** Test if a tablespace file can be renamed to a new filepath by checking
-if that the old filepath exists and the new filepath does not exist.
-@param[in]	space_id	tablespace id
-@param[in]	old_path	old filepath
-@param[in]	new_path	new filepath
-@param[in]	is_discarded	whether the tablespace is discarded
-@return innodb error code */
-dberr_t
-fil_rename_tablespace_check(
-	ulint		space_id,
-	const char*	old_path,
-	const char*	new_path,
-	bool		is_discarded);
-
-/** Rename a single-table tablespace.
-The tablespace must exist in the memory cache.
-@param[in]	id		tablespace identifier
-@param[in]	old_path	old file name
-@param[in]	new_name	new table name in the
-databasename/tablename format
-@param[in]	new_path_in	new file name,
-or NULL if it is located in the normal data directory
-@return true if success */
-bool
-fil_rename_tablespace(
-	ulint		id,
-	const char*	old_path,
-	const char*	new_name,
-	const char*	new_path_in);
-
-/*******************************************************************//**
 Allocates and builds a file name from a path, a table or tablespace name
 and a suffix. The string must be freed by caller with ut_free().
 @param[in] path NULL or the direcory path or the full path and filename.
@@ -1038,8 +937,10 @@ fil_make_filepath(
 must be >= FIL_IBD_FILE_INITIAL_SIZE
 @param[in]	mode		MariaDB encryption mode
 @param[in]	key_id		MariaDB encryption key_id
-@return DB_SUCCESS or error code */
-dberr_t
+@param[out]	err		DB_SUCCESS or error code
+@return	the created tablespace
+@retval	NULL	on error */
+fil_space_t*
 fil_ibd_create(
 	ulint		space_id,
 	const char*	name,
@@ -1047,16 +948,15 @@ fil_ibd_create(
 	ulint		flags,
 	ulint		size,
 	fil_encryption_t mode,
-	uint32_t	key_id)
-	MY_ATTRIBUTE((nonnull(2), warn_unused_result));
+	uint32_t	key_id,
+	dberr_t*	err)
+	MY_ATTRIBUTE((nonnull(2,8), warn_unused_result));
 
 /** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations.
 (Typically when upgrading from MariaDB 10.1.0..10.1.20.)
-@param[in]	space_id	tablespace ID
+@param[in,out]	space		tablespace
 @param[in]	flags		desired tablespace flags */
-UNIV_INTERN
-void
-fsp_flags_try_adjust(ulint space_id, ulint flags);
+void fsp_flags_try_adjust(fil_space_t* space, ulint flags);
 
 /********************************************************************//**
 Tries to open a single-table tablespace and optionally checks the space id is
@@ -1083,19 +983,22 @@ statement to update the dictionary tables if they are incorrect.
 @param[in]	purpose		FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
 @param[in]	id		tablespace ID
 @param[in]	flags		expected FSP_SPACE_FLAGS
-@param[in]	space_name	tablespace name of the datafile
+@param[in]	tablename	table name
 If file-per-table, it is the table name in the databasename/tablename format
 @param[in]	path_in		expected filepath, usually read from dictionary
-@return DB_SUCCESS or error code */
-dberr_t
+@param[out]	err		DB_SUCCESS or error code
+@return	tablespace
+@retval	NULL	if the tablespace could not be opened */
+fil_space_t*
 fil_ibd_open(
-	bool		validate,
-	bool		fix_dict,
-	fil_type_t	purpose,
-	ulint		id,
-	ulint		flags,
-	const char*	tablename,
-	const char*	path_in)
+	bool			validate,
+	bool			fix_dict,
+	fil_type_t		purpose,
+	ulint			id,
+	ulint			flags,
+	const table_name_t&	tablename,
+	const char*		path_in,
+	dberr_t*		err = NULL)
 	MY_ATTRIBUTE((warn_unused_result));
 
 enum fil_load_status {
@@ -1145,15 +1048,14 @@ startup, there may be many tablespaces which are not yet in the memory cache.
 @param[in]	print_error_if_does_not_exist
 				Print detailed error information to the
 error log if a matching tablespace is not found from memory.
-@param[in]	heap		Heap memory
 @param[in]	table_flags	table flags
-@return true if a matching tablespace exists in the memory cache */
-bool
+@return the tablespace
+@retval	NULL	if no matching tablespace exists in the memory cache */
+fil_space_t*
 fil_space_for_table_exists_in_mem(
 	ulint		id,
 	const char*	name,
 	bool		print_error_if_does_not_exist,
-	mem_heap_t*	heap,
 	ulint		table_flags);
 
 /** Try to extend a tablespace if it is smaller than the specified size.
@@ -1164,29 +1066,6 @@ bool
 fil_space_extend(
 	fil_space_t*	space,
 	ulint		size);
-/*******************************************************************//**
-Tries to reserve free extents in a file space.
-@return true if succeed */
-bool
-fil_space_reserve_free_extents(
-/*===========================*/
-	ulint	id,		/*!< in: space id */
-	ulint	n_free_now,	/*!< in: number of free extents now */
-	ulint	n_to_reserve);	/*!< in: how many one wants to reserve */
-/*******************************************************************//**
-Releases free extents in a file space. */
-void
-fil_space_release_free_extents(
-/*===========================*/
-	ulint	id,		/*!< in: space id */
-	ulint	n_reserved);	/*!< in: how many one reserved */
-/*******************************************************************//**
-Gets the number of reserved extents. If the database is silent, this number
-should be zero. */
-ulint
-fil_space_get_n_reserved_extents(
-/*=============================*/
-	ulint	id);		/*!< in: space id */
 
 /** Reads or writes data. This operation could be asynchronous (aio).
 
@@ -1342,20 +1221,6 @@ Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
 #define fil_block_check_type(block, type, mtr)				\
 	fil_page_check_type(block->page.id, block->frame, type, mtr)
 
-#ifdef UNIV_DEBUG
-/** Increase redo skipped of a tablespace.
-@param[in]	id	space id */
-void
-fil_space_inc_redo_skipped_count(
-	ulint		id);
-
-/** Decrease redo skipped of a tablespace.
-@param[in]	id	space id */
-void
-fil_space_dec_redo_skipped_count(
-	ulint		id);
-#endif
-
 /********************************************************************//**
 Delete the tablespace file and any related files like .cfg.
 This should not be called for temporary tables. */
@@ -1384,27 +1249,6 @@ char*
 fil_path_to_space_name(
 	const char*	filename);
 
-/** Returns the space ID based on the tablespace name.
-The tablespace must be found in the tablespace memory cache.
-This call is made from external to this module, so the mutex is not owned.
-@param[in]	tablespace	Tablespace name
-@return space ID if tablespace found, ULINT_UNDEFINED if space not. */
-ulint
-fil_space_get_id_by_name(
-	const char*	tablespace);
-
-/**
-Iterate over all the spaces in the space list and fetch the
-tablespace names. It will return a copy of the name that must be
-freed by the caller using: delete[].
-@return DB_SUCCESS if all OK. */
-dberr_t
-fil_get_space_names(
-/*================*/
-	space_name_list_t&	space_name_list)
-				/*!< in/out: Vector for collecting the names. */
-	MY_ATTRIBUTE((warn_unused_result));
-
 /** Generate redo log for swapping two .ibd files
 @param[in]	old_table	old table
 @param[in]	new_table	new table
@@ -1420,9 +1264,9 @@ fil_mtr_rename_log(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /** Acquire the fil_system mutex. */
-#define fil_system_enter()	mutex_enter(&fil_system->mutex)
+#define fil_system_enter()	mutex_enter(&fil_system.mutex)
 /** Release the fil_system mutex. */
-#define fil_system_exit()	mutex_exit(&fil_system->mutex)
+#define fil_system_exit()	mutex_exit(&fil_system.mutex)
 
 /*******************************************************************//**
 Returns the table space by a given id, NULL if not found. */
@@ -1431,14 +1275,7 @@ fil_space_get_by_id(
 /*================*/
 	ulint	id);	/*!< in: space id */
 
-/** Look up a tablespace.
-@param[in]	name	tablespace name
-@return	tablespace
-@retval	NULL	if not found */
-fil_space_t*
-fil_space_get_by_name(const char* name);
-
-/*******************************************************************//**
+/** Note that a non-predefined persistent tablespace has been modified
 by redo log.
 @param[in,out]	space	tablespace */
 void
@@ -1473,8 +1310,8 @@ fil_names_write_if_was_clean(
 	}
 
 	const bool	was_clean = space->max_lsn == 0;
-	ut_ad(space->max_lsn <= log_sys->lsn);
-	space->max_lsn = log_sys->lsn;
+	ut_ad(space->max_lsn <= log_sys.lsn);
+	space->max_lsn = log_sys.lsn;
 
 	if (was_clean) {
 		fil_names_dirty_and_write(space, mtr);
diff --git a/storage/innobase/include/fil0fil.ic b/storage/innobase/include/fil0fil.ic
index 023a48a5066..2a7d06e243f 100644
--- a/storage/innobase/include/fil0fil.ic
+++ b/storage/innobase/include/fil0fil.ic
@@ -39,6 +39,7 @@ fil_get_page_type_name(
 		return "PAGE_COMPRESSED_ENRYPTED";
 	case FIL_PAGE_PAGE_COMPRESSED:
 		return "PAGE_COMPRESSED";
+	case FIL_PAGE_TYPE_INSTANT:
 	case FIL_PAGE_INDEX:
 		return "INDEX";
 	case FIL_PAGE_RTREE:
@@ -89,6 +90,7 @@ fil_page_type_validate(
 	if (!((page_type == FIL_PAGE_PAGE_COMPRESSED ||
 		page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED ||
 		page_type == FIL_PAGE_INDEX ||
+		page_type == FIL_PAGE_TYPE_INSTANT ||
 		page_type == FIL_PAGE_RTREE ||
 		page_type == FIL_PAGE_UNDO_LOG ||
 		page_type == FIL_PAGE_INODE ||
diff --git a/storage/innobase/include/fsp0file.h b/storage/innobase/include/fsp0file.h
index 68e9f687fcd..794d44373e8 100644
--- a/storage/innobase/include/fsp0file.h
+++ b/storage/innobase/include/fsp0file.h
@@ -417,7 +417,8 @@ private:
 	/** Flags to use for opening the data file */
 	os_file_create_t	m_open_flags;
 
-	/** size in database pages */
+	/** size in megabytes or pages; converted from megabytes to
+	pages in SysTablespace::normalize_size() */
 	ulint			m_size;
 
 	/** ordinal position of this datafile in the tablespace */
@@ -480,7 +481,7 @@ public:
 		/* No op - base constructor is called. */
 	}
 
-	RemoteDatafile(const char* name, ulint size, ulint order)
+	RemoteDatafile(const char*, ulint, ulint)
 		:
 		m_link_filepath()
 	{
@@ -502,12 +503,6 @@ public:
 		return(m_link_filepath);
 	}
 
-	/** Set the link filepath. Use default datadir, the base name of
-	the path provided without its suffix, plus DOT_ISL.
-	@param[in]	path	filepath which contains a basename to use.
-				If NULL, use m_name as the basename. */
-	void set_link_filepath(const char* path);
-
 	/** Create a link filename based on the contents of m_name,
 	open that file, and read the contents into m_filepath.
 	@retval DB_SUCCESS if remote linked tablespace file is opened and read.
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index 368f0daa201..3222f1c761a 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -45,8 +45,8 @@ Created 12/18/1995 Heikki Tuuri
 
 /** @return the PAGE_SSIZE flags for the current innodb_page_size */
 #define FSP_FLAGS_PAGE_SSIZE()						\
-	((UNIV_PAGE_SIZE == UNIV_PAGE_SIZE_ORIG) ?			\
-	 0 : (UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)	\
+	((srv_page_size == UNIV_PAGE_SIZE_ORIG) ?			\
+	 0U : (srv_page_size_shift - UNIV_ZIP_SIZE_SHIFT_MIN + 1)	\
 	 << FSP_FLAGS_POS_PAGE_SSIZE)
 
 /* @defgroup Compatibility macros for MariaDB 10.1.0 through 10.1.20;
@@ -294,22 +294,6 @@ the extent are free and which contain old tuple version to clean. */
 #ifndef UNIV_INNOCHECKSUM
 /* @} */
 
-/**********************************************************************//**
-Initializes the file space system. */
-void
-fsp_init(void);
-/*==========*/
-
-/**********************************************************************//**
-Gets the size of the system tablespace from the tablespace header.  If
-we do not have an auto-extending data file, this should be equal to
-the size of the data files.  If there is an auto-extending data file,
-this can be smaller.
-@return size in pages */
-ulint
-fsp_header_get_tablespace_size(void);
-/*================================*/
-
 /** Calculate the number of pages to extend a datafile.
 We extend single-table tablespaces first one extent at a time,
 but 4 at a time for bigger tablespaces. It is not enough to extend always
@@ -334,7 +318,7 @@ UNIV_INLINE
 ulint
 fsp_get_extent_size_in_pages(const page_size_t&	page_size)
 {
-	return(FSP_EXTENT_SIZE * UNIV_PAGE_SIZE / page_size.physical());
+	return (FSP_EXTENT_SIZE << srv_page_size_shift) / page_size.physical();
 }
 
 /**********************************************************************//**
@@ -397,56 +381,33 @@ fsp_header_init_fields(
 	ulint	flags);		/*!< in: tablespace flags (FSP_SPACE_FLAGS):
 				0, or table->flags if newer than COMPACT */
 /** Initialize a tablespace header.
-@param[in]	space_id	space id
-@param[in]	size		current size in blocks
-@param[in,out]	mtr		mini-transaction */
-void
-fsp_header_init(ulint space_id, ulint size, mtr_t* mtr);
+@param[in,out]	space	tablespace
+@param[in]	size	current size in blocks
+@param[in,out]	mtr	mini-transaction */
+void fsp_header_init(fil_space_t* space, ulint size, mtr_t* mtr)
+	MY_ATTRIBUTE((nonnull));
 
 /**********************************************************************//**
-Increases the space size field of a space. */
-void
-fsp_header_inc_size(
-/*================*/
-	ulint	space_id,	/*!< in: space id */
-	ulint	size_inc,	/*!< in: size increment in pages */
-	mtr_t*	mtr);		/*!< in/out: mini-transaction */
-/**********************************************************************//**
 Creates a new segment.
 @return the block where the segment header is placed, x-latched, NULL
 if could not create segment because of lack of space */
 buf_block_t*
 fseg_create(
-/*========*/
-	ulint	space_id,/*!< in: space id */
-	ulint	page,	/*!< in: page where the segment header is placed: if
-			this is != 0, the page must belong to another segment,
-			if this is 0, a new page will be allocated and it
-			will belong to the created segment */
-	ulint	byte_offset, /*!< in: byte offset of the created segment header
-			on the page */
-	mtr_t*	mtr);	/*!< in/out: mini-transaction */
-/**********************************************************************//**
-Creates a new segment.
-@return the block where the segment header is placed, x-latched, NULL
-if could not create segment because of lack of space */
-buf_block_t*
-fseg_create_general(
-/*================*/
-	ulint	space_id,/*!< in: space id */
+	fil_space_t* space, /*!< in,out: tablespace */
 	ulint	page,	/*!< in: page where the segment header is placed: if
 			this is != 0, the page must belong to another segment,
 			if this is 0, a new page will be allocated and it
 			will belong to the created segment */
 	ulint	byte_offset, /*!< in: byte offset of the created segment header
 			on the page */
-	ibool	has_done_reservation, /*!< in: TRUE if the caller has already
-			done the reservation for the pages with
+	mtr_t*	mtr,
+   	bool	has_done_reservation = false); /*!< in: whether the caller
+			has already done the reservation for the pages with
 			fsp_reserve_free_extents (at least 2 extents: one for
 			the inode and the other for the segment) then there is
 			no need to do the check for this individual
 			operation */
-	mtr_t*	mtr);	/*!< in/out: mini-transaction */
+
 /**********************************************************************//**
 Calculates the number of pages reserved by a segment, and how many pages are
 currently used.
@@ -508,7 +469,7 @@ fseg_alloc_free_page_general(
 use several pages from the tablespace should call this function beforehand
 and reserve enough free extents so that they certainly will be able
 to do their operation, like a B-tree page split, fully. Reservations
-must be released with function fil_space_release_free_extents!
+must be released with function fil_space_t::release_free_extents()!
 
 The alloc_type below has the following meaning: FSP_NORMAL means an
 operation which will probably result in more space usage, like an
@@ -534,7 +495,7 @@ free pages available.
 				return true and the tablespace size is <
 				FSP_EXTENT_SIZE pages, then this can be 0,
 				otherwise it is n_ext
-@param[in]	space_id	tablespace identifier
+@param[in,out]	space		tablespace
 @param[in]	n_ext		number of extents to reserve
 @param[in]	alloc_type	page reservation type (FSP_BLOB, etc)
 @param[in,out]	mtr		the mini transaction
@@ -545,30 +506,12 @@ free pages available.
 bool
 fsp_reserve_free_extents(
 	ulint*		n_reserved,
-	ulint		space_id,
+	fil_space_t*	space,
 	ulint		n_ext,
 	fsp_reserve_t	alloc_type,
 	mtr_t*		mtr,
 	ulint		n_pages = 2);
 
-/** Calculate how many KiB of new data we will be able to insert to the
-tablespace without running out of space.
-@param[in]	space_id	tablespace ID
-@return available space in KiB
-@retval UINTMAX_MAX if unknown */
-uintmax_t
-fsp_get_available_space_in_free_extents(
-	ulint		space_id);
-
-/** Calculate how many KiB of new data we will be able to insert to the
-tablespace without running out of space. Start with a space object that has
-been acquired by the caller who holds it for the calculation,
-@param[in]	space		tablespace object from fil_space_acquire()
-@return available space in KiB */
-uintmax_t
-fsp_get_available_space_in_free_extents(
-	const fil_space_t*	space);
-
 /**********************************************************************//**
 Frees a single page of a segment. */
 void
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index 2da3320eef7..38d890fd2f3 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -92,21 +92,15 @@ xdes_calc_descriptor_page(
 	const page_size_t&	page_size,
 	ulint			offset)
 {
-#ifndef DOXYGEN /* Doxygen gets confused by these */
-# if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET				\
-			   + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX)	\
-			   * XDES_SIZE_MAX
-#  error
-# endif
-# if UNIV_ZIP_SIZE_MIN <= XDES_ARR_OFFSET				\
-			  + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE_MIN)	\
-			  * XDES_SIZE_MIN
-#  error
-# endif
-#endif /* !DOXYGEN */
-
-	ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
-	      + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE)
+	compile_time_assert(UNIV_PAGE_SIZE_MAX > XDES_ARR_OFFSET
+			    + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX)
+			    * XDES_SIZE_MAX);
+	compile_time_assert(UNIV_PAGE_SIZE_MIN > XDES_ARR_OFFSET
+			    + (UNIV_PAGE_SIZE_MIN / FSP_EXTENT_SIZE_MIN)
+			    * XDES_SIZE_MIN);
+
+	ut_ad(srv_page_size > XDES_ARR_OFFSET
+	      + (srv_page_size / FSP_EXTENT_SIZE)
 	      * XDES_SIZE);
 	ut_ad(UNIV_ZIP_SIZE_MIN > XDES_ARR_OFFSET
 	      + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
diff --git a/storage/innobase/include/fsp0sysspace.h b/storage/innobase/include/fsp0sysspace.h
index efbd4fc3f24..80b006f2dd7 100644
--- a/storage/innobase/include/fsp0sysspace.h
+++ b/storage/innobase/include/fsp0sysspace.h
@@ -33,14 +33,6 @@ Created 2013-7-26 by Kevin Lewis
 at a time. We have to make this public because it is a config variable. */
 extern ulong sys_tablespace_auto_extend_increment;
 
-#ifdef UNIV_DEBUG
-/** Control if extra debug checks need to be done for temporary tablespace.
-Default = true that is disable such checks.
-This variable is not exposed to end-user but still kept as variable for
-developer to enable it during debug. */
-extern bool srv_skip_temp_table_checks_debug;
-#endif /* UNIV_DEBUG */
-
 /** Data structure that contains the information about shared tablespaces.
 Currently this can be the system tablespace or a temporary table tablespace */
 class SysTablespace : public Tablespace
@@ -111,7 +103,7 @@ public:
 	void shutdown();
 
 	/** Normalize the file size, convert to extents. */
-	void normalize();
+	void normalize_size();
 
 	/**
 	@return true if a new raw device was created. */
@@ -147,8 +139,8 @@ public:
 	@return the autoextend increment in pages. */
 	ulint get_autoextend_increment() const
 	{
-		return(sys_tablespace_auto_extend_increment
-		       * ((1024 * 1024) / UNIV_PAGE_SIZE));
+		return sys_tablespace_auto_extend_increment
+			<< (20 - srv_page_size_shift);
 	}
 
 	/**
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index c6dbe52def4..f7a5befa6ae 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2017, MariaDB Corporation.
+Copyright (c) 2014, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -59,11 +59,8 @@ page size | file space extent size
   32 KiB  |  64 pages = 2 MiB
   64 KiB  |  64 pages = 4 MiB
 */
-#define FSP_EXTENT_SIZE         ((UNIV_PAGE_SIZE <= (16384) ?	\
-				(1048576 / UNIV_PAGE_SIZE) :	\
-				((UNIV_PAGE_SIZE <= (32768)) ?	\
-				(2097152 / UNIV_PAGE_SIZE) :	\
-				(4194304 / UNIV_PAGE_SIZE))))
+#define FSP_EXTENT_SIZE         (srv_page_size_shift < 14 ?	\
+				 (1048576U >> srv_page_size_shift) : 64U)
 
 /** File space extent size (four megabyte) in pages for MAX page size */
 #define	FSP_EXTENT_SIZE_MAX	(4194304 / UNIV_PAGE_SIZE_MAX)
@@ -151,38 +148,38 @@ enum fsp_reserve_t {
 /* Number of pages described in a single descriptor page: currently each page
 description takes less than 1 byte; a descriptor page is repeated every
 this many file pages */
-/* #define XDES_DESCRIBED_PER_PAGE		UNIV_PAGE_SIZE */
-/* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */
+/* #define XDES_DESCRIBED_PER_PAGE		srv_page_size */
+/* This has been replaced with either srv_page_size or page_zip->size. */
 
 /** @name The space low address page map
 The pages at FSP_XDES_OFFSET and FSP_IBUF_BITMAP_OFFSET are repeated
 every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
 /* @{ */
 /*--------------------------------------*/
-#define FSP_XDES_OFFSET			0	/* !< extent descriptor */
-#define FSP_IBUF_BITMAP_OFFSET		1	/* !< insert buffer bitmap */
+#define FSP_XDES_OFFSET			0U	/* !< extent descriptor */
+#define FSP_IBUF_BITMAP_OFFSET		1U	/* !< insert buffer bitmap */
 				/* The ibuf bitmap pages are the ones whose
 				page number is the number above plus a
 				multiple of XDES_DESCRIBED_PER_PAGE */
 
-#define FSP_FIRST_INODE_PAGE_NO		2	/*!< in every tablespace */
+#define FSP_FIRST_INODE_PAGE_NO		2U	/*!< in every tablespace */
 				/* The following pages exist
 				in the system tablespace (space 0). */
-#define FSP_IBUF_HEADER_PAGE_NO		3	/*!< insert buffer
+#define FSP_IBUF_HEADER_PAGE_NO		3U	/*!< insert buffer
 						header page, in
 						tablespace 0 */
-#define FSP_IBUF_TREE_ROOT_PAGE_NO	4	/*!< insert buffer
+#define FSP_IBUF_TREE_ROOT_PAGE_NO	4U	/*!< insert buffer
 						B-tree root page in
 						tablespace 0 */
 				/* The ibuf tree root page number in
 				tablespace 0; its fseg inode is on the page
 				number FSP_FIRST_INODE_PAGE_NO */
-#define FSP_TRX_SYS_PAGE_NO		5	/*!< transaction
+#define FSP_TRX_SYS_PAGE_NO		5U	/*!< transaction
 						system header, in
 						tablespace 0 */
-#define	FSP_FIRST_RSEG_PAGE_NO		6	/*!< first rollback segment
+#define	FSP_FIRST_RSEG_PAGE_NO		6U	/*!< first rollback segment
 						page, in tablespace 0 */
-#define FSP_DICT_HDR_PAGE_NO		7	/*!< data dictionary header
+#define FSP_DICT_HDR_PAGE_NO		7U	/*!< data dictionary header
 						page, in tablespace 0 */
 /*--------------------------------------*/
 /* @} */
@@ -196,17 +193,6 @@ fsp_is_system_temporary(ulint	space_id)
 {
 	return(space_id == SRV_TMP_SPACE_ID);
 }
-
-#ifdef UNIV_DEBUG
-/** Skip some of the sanity checks that are time consuming even in debug mode
-and can affect frequent verification runs that are done to ensure stability of
-the product.
-@return true if check should be skipped for given space. */
-bool
-fsp_skip_sanity_check(
-	ulint	space_id);
-#endif /* UNIV_DEBUG */
-
 #endif /* !UNIV_INNOCHECKSUM */
 
 /* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */
@@ -217,7 +203,7 @@ fsp_skip_sanity_check(
 #define FSP_FLAGS_WIDTH_ZIP_SSIZE	4
 /** Width of the ATOMIC_BLOBS flag.  The ability to break up a long
 column into an in-record prefix and an externally stored part is available
-to the two Barracuda row formats COMPRESSED and DYNAMIC. */
+to ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. */
 #define FSP_FLAGS_WIDTH_ATOMIC_BLOBS	1
 /** Number of flag bits used to indicate the tablespace page size */
 #define FSP_FLAGS_WIDTH_PAGE_SSIZE	4
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index cad9ef37560..068720c1947 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -488,47 +488,49 @@ fts_trx_free(
 /*=========*/
 	fts_trx_t*	fts_trx);		/*!< in, own: FTS trx */
 
-/******************************************************************//**
-Creates the common ancillary tables needed for supporting an FTS index
-on the given table. row_mysql_lock_data_dictionary must have been
-called before this.
-@return DB_SUCCESS or error code */
+/** Creates the common auxiliary tables needed for supporting an FTS index
+on the given table. row_mysql_lock_data_dictionary must have been called
+before this.
+The following tables are created.
+CREATE TABLE $FTS_PREFIX_DELETED
+	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_DELETED_CACHE
+	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_BEING_DELETED
+	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
+	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_CONFIG
+	(key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
+@param[in,out]	trx			transaction
+@param[in]	table			table with FTS index
+@param[in]	skip_doc_id_index	Skip index on doc id
+@return DB_SUCCESS if succeed */
 dberr_t
 fts_create_common_tables(
-/*=====================*/
-	trx_t*		trx,			/*!< in: transaction handle */
-	const dict_table_t*
-			table,			/*!< in: table with one FTS
-						index */
-	const char*	name,			/*!< in: table name */
-	bool		skip_doc_id_index)	/*!< in: Skip index on doc id */
-	MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************//**
-Wrapper function of fts_create_index_tables_low(), create auxiliary
-tables for an FTS index
-@return DB_SUCCESS or error code */
-dberr_t
-fts_create_index_tables(
-/*====================*/
-	trx_t*			trx,		/*!< in: transaction handle */
-	const dict_index_t*	index)		/*!< in: the FTS index
-						instance */
-	MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************//**
-Creates the column specific ancillary tables needed for supporting an
+	trx_t*		trx,
+	dict_table_t*	table,
+	bool		skip_doc_id_index)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Creates the column specific ancillary tables needed for supporting an
 FTS index on the given table. row_mysql_lock_data_dictionary must have
 been called before this.
+
+All FTS AUX Index tables have the following schema.
+CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
+	word		VARCHAR(FTS_MAX_WORD_LEN),
+	first_doc_id	INT NOT NULL,
+	last_doc_id	UNSIGNED NOT NULL,
+	doc_count	UNSIGNED INT NOT NULL,
+	ilist		VARBINARY NOT NULL,
+	UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
+@param[in,out]	trx	dictionary transaction
+@param[in]	index	fulltext index
+@param[in]	id	table id
 @return DB_SUCCESS or error code */
 dberr_t
-fts_create_index_tables_low(
-/*========================*/
-	trx_t*		trx,			/*!< in: transaction handle */
-	const dict_index_t*
-			index,			/*!< in: the FTS index
-						instance */
-	const char*	table_name,		/*!< in: the table name */
-	table_id_t	table_id)		/*!< in: the table id */
-	MY_ATTRIBUTE((warn_unused_result));
+fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /******************************************************************//**
 Add the FTS document id hidden column. */
 void
@@ -559,7 +561,7 @@ fts_commit(
 	MY_ATTRIBUTE((warn_unused_result));
 
 /** FTS Query entry point.
-@param[in]	trx		transaction
+@param[in,out]	trx		transaction
 @param[in]	index		fts index to search
 @param[in]	flags		FTS search mode
 @param[in]	query_str	FTS query
@@ -740,7 +742,6 @@ Take a FTS savepoint. */
 void
 fts_savepoint_take(
 /*===============*/
-	trx_t*		trx,			/*!< in: transaction */
 	fts_trx_t*	fts_trx,		/*!< in: fts transaction */
 	const char*	name);			/*!< in: savepoint name */
 
diff --git a/storage/innobase/include/fts0priv.h b/storage/innobase/include/fts0priv.h
index fca22bdc7d6..d045c9d3c72 100644
--- a/storage/innobase/include/fts0priv.h
+++ b/storage/innobase/include/fts0priv.h
@@ -319,7 +319,6 @@ the dict mutex
 que_t*
 fts_parse_sql_no_dict_lock(
 /*=======================*/
-	fts_table_t*	fts_table,	/*!< in: table with FTS index */
 	pars_info_t*	info,		/*!< in: parser info */
 	const char*	sql)		/*!< in: SQL string to evaluate */
 	MY_ATTRIBUTE((warn_unused_result));
diff --git a/storage/innobase/include/fts0tokenize.h b/storage/innobase/include/fts0tokenize.h
index 15726aea1de..909d2ce07ba 100644
--- a/storage/innobase/include/fts0tokenize.h
+++ b/storage/innobase/include/fts0tokenize.h
@@ -144,7 +144,7 @@ fts_get_word(
 				}
 			}
 
-			info->prev = *doc;
+			info->prev = char(*doc);
 			info->yesno = (FTB_YES == ' ') ? 1 : (info->quot != 0);
 			info->weight_adjust = info->wasign = 0;
 		}
diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic
index a8712751412..487e7c33b63 100644
--- a/storage/innobase/include/fts0types.ic
+++ b/storage/innobase/include/fts0types.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -172,7 +172,6 @@ fts_select_index_by_hash(
 	const byte*		str,
 	ulint			len)
 {
-	int	char_len;
 	ulong	nr1 = 1;
 	ulong	nr2 = 4;
 
@@ -187,9 +186,9 @@ fts_select_index_by_hash(
 	char_len = my_mbcharlen_ptr(cs, reinterpret_cast<const char*>(str),
 				    reinterpret_cast<const char*>(str + len));
 	*/
-	char_len = cs->cset->charlen(cs, str, str+len);
+	size_t char_len = size_t(cs->cset->charlen(cs, str, str + len));
 
-	ut_ad(static_cast<ulint>(char_len) <= len);
+	ut_ad(char_len <= len);
 
 	/* Get collation hash code */
 	cs->coll->hash_sort(cs, str, char_len, &nr1, &nr2);
diff --git a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic
index 6fe031876e6..56be971f233 100644
--- a/storage/innobase/include/fut0fut.ic
+++ b/storage/innobase/include/fut0fut.ic
@@ -48,7 +48,7 @@ fut_get_ptr(
 	buf_block_t*	block;
 	byte*		ptr = NULL;
 
-	ut_ad(addr.boffset < UNIV_PAGE_SIZE);
+	ut_ad(addr.boffset < srv_page_size);
 	ut_ad((rw_latch == RW_S_LATCH)
 	      || (rw_latch == RW_X_LATCH)
 	      || (rw_latch == RW_SX_LATCH));
diff --git a/storage/innobase/include/fut0lst.ic b/storage/innobase/include/fut0lst.ic
index 128dc77ed92..5c9a9ca94c1 100644
--- a/storage/innobase/include/fut0lst.ic
+++ b/storage/innobase/include/fut0lst.ic
@@ -58,7 +58,7 @@ flst_write_addr(
 					     MTR_MEMO_PAGE_X_FIX
 					     | MTR_MEMO_PAGE_SX_FIX));
 	ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
-	ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
+	ut_a(ut_align_offset(faddr, srv_page_size) >= FIL_PAGE_DATA);
 
 	mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
 	mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
@@ -83,7 +83,7 @@ flst_read_addr(
 	addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
 				      mtr);
 	ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
-	ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
+	ut_a(ut_align_offset(faddr, srv_page_size) >= FIL_PAGE_DATA);
 	return(addr);
 }
 
diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h
index 65a53ec1d39..461d2816653 100644
--- a/storage/innobase/include/gis0rtree.h
+++ b/storage/innobase/include/gis0rtree.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,6 +28,7 @@ Created 2013/03/27 Jimmy Yang and Allen Lai
 #define gis0rtree_h
 
 #include "univ.i"
+#include "my_base.h"
 
 #include "data0type.h"
 #include "data0types.h"
@@ -87,10 +88,8 @@ rtr_index_build_node_ptr(
 					pointer */
 	ulint			page_no,/*!< in: page number to put in node
 					pointer */
-	mem_heap_t*		heap,	/*!< in: memory heap where pointer
+	mem_heap_t*		heap);	/*!< in: memory heap where pointer
 					created */
-	ulint			level);	/*!< in: level of rec in tree:
-					0 means leaf level */
 
 /*************************************************************//**
 Splits an R-tree index page to halves and inserts the tuple. It is assumed
@@ -179,7 +178,6 @@ dberr_t
 rtr_ins_enlarge_mbr(
 /*=================*/
 	btr_cur_t*		cursor,	/*!< in: btr cursor */
-	que_thr_t*		thr,	/*!< in: query thread */
 	mtr_t*			mtr);	/*!< in: mtr */
 
 /********************************************************************//**
@@ -438,9 +436,6 @@ rtr_merge_and_update_mbr(
 	ulint*			offsets,	/*!< in: rec offsets */
 	ulint*			offsets2,	/*!< in: rec offsets */
 	page_t*			child_page,	/*!< in: the child page. */
-	buf_block_t*		merge_block,	/*!< in: page to merge */
-	buf_block_t*		block,		/*!< in: page be merged */
-	dict_index_t*		index,		/*!< in: index */
 	mtr_t*			mtr);		/*!< in: mtr */
 
 /*************************************************************//**
@@ -448,10 +443,8 @@ Deletes on the upper level the node pointer to a page. */
 void
 rtr_node_ptr_delete(
 /*================*/
-	dict_index_t*	index,	/*!< in: index tree */
-	btr_cur_t*	sea_cur,/*!< in: search cursor, contains information
+	btr_cur_t*	cursor,	/*!< in: search cursor, contains information
 				about parent nodes in search */
-	buf_block_t*	block,	/*!< in: page whose node pointer is deleted */
 	mtr_t*		mtr);	/*!< in: mtr */
 
 /****************************************************************//**
@@ -463,10 +456,7 @@ rtr_merge_mbr_changed(
 	btr_cur_t*	cursor2,	/*!< in: the other cursor */
 	ulint*		offsets,	/*!< in: rec offsets */
 	ulint*		offsets2,	/*!< in: rec offsets */
-	rtr_mbr_t*	new_mbr,	/*!< out: MBR to update */
-	buf_block_t*	merge_block,	/*!< in: page to merge */
-	buf_block_t*	block,		/*!< in: page be merged */
-	dict_index_t*	index);		/*!< in: index */
+	rtr_mbr_t*	new_mbr);	/*!< out: MBR to update */
 
 
 /**************************************************************//**
@@ -543,7 +533,7 @@ rtr_info_reinit_in_cursor(
 @param[in]	tuple	range tuple containing mbr, may also be empty tuple
 @param[in]	mode	search mode
 @return estimated number of rows */
-int64_t
+ha_rows
 rtr_estimate_n_rows_in_range(
 	dict_index_t*	index,
 	const dtuple_t*	tuple,
diff --git a/storage/innobase/include/gis0rtree.ic b/storage/innobase/include/gis0rtree.ic
index e852ebd8028..4dd05d3b251 100644
--- a/storage/innobase/include/gis0rtree.ic
+++ b/storage/innobase/include/gis0rtree.ic
@@ -38,7 +38,7 @@ rtr_page_cal_mbr(
 {
 	page_t*		page;
 	rec_t*		rec;
-	byte*		field;
+	const byte*	field;
 	ulint		len;
 	ulint*		offsets = NULL;
 	double		bmin, bmax;
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index 86defe9b166..1313705f119 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -70,13 +70,11 @@ innobase_invalidate_query_cache(
 /*============================*/
 	trx_t*		trx,		/*!< in: transaction which
 					modifies the table */
-	const char*	full_name,	/*!< in: concatenation of
+	const char*	full_name);	/*!< in: concatenation of
 					database name, path separator,
 					table name, null char NUL;
 					NOTE that in Windows this is
 					always in LOWER CASE! */
-	ulint		full_name_len);	/*!< in: full name length where
-					also the null chars count */
 
 /** Quote a standard SQL identifier like tablespace, index or column name.
 @param[in]	file	output stream
@@ -158,7 +156,6 @@ UNIV_INTERN
 void
 innobase_mysql_log_notify(
 /*======================*/
-	ib_uint64_t	write_lsn,	/*!< in: LSN written to log file */
 	ib_uint64_t	flush_lsn);	/*!< in: LSN flushed to disk */
 
 /** Converts a MySQL type to an InnoDB type. Note that this function returns
@@ -240,7 +237,7 @@ wsrep_innobase_kill_one_trx(void * const thd_ptr,
                             const trx_t * const bf_trx,
                             trx_t *victim_trx,
                             ibool signal);
-int wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
+ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
                              unsigned char* str, unsigned int str_length,
                              unsigned int buf_length);
 #endif /* WITH_WSREP */
@@ -309,14 +306,6 @@ thd_lock_wait_timeout(
 /*==================*/
 	THD*	thd);	/*!< in: thread handle, or NULL to query
 			the global innodb_lock_wait_timeout */
-/******************************************************************//**
-Add up the time waited for the lock for the current query. */
-void
-thd_set_lock_wait_time(
-/*===================*/
-	THD*	thd,	/*!< in/out: thread handle */
-	ulint	value);	/*!< in: time waited for the lock */
-
 /** Get status of innodb_tmpdir.
 @param[in]	thd	thread handle, or NULL to query
 			the global innodb_tmpdir.
@@ -453,14 +442,6 @@ const char*
 server_get_hostname();
 /*=================*/
 
-/******************************************************************//**
-Get the error message format string.
-@return the format string or 0 if not found. */
-const char*
-innobase_get_err_msg(
-/*=================*/
-	int	error_code);	/*!< in: MySQL error code */
-
 /*********************************************************************//**
 Compute the next autoinc value.
 
@@ -533,7 +514,7 @@ UNIV_INTERN
 void
 ib_push_warning(
 	trx_t*		trx,	/*!< in: trx */
-	ulint		error,	/*!< in: error code to push as warning */
+	dberr_t		error,	/*!< in: error code to push as warning */
 	const char	*format,/*!< in: warning message */
 	...);
 
@@ -543,7 +524,7 @@ UNIV_INTERN
 void
 ib_push_warning(
 	void*		ithd,	/*!< in: thd */
-	ulint		error,	/*!< in: error code to push as warning */
+	dberr_t		error,	/*!< in: error code to push as warning */
 	const char	*format,/*!< in: warning message */
 	...);
 
diff --git a/storage/innobase/include/handler0alter.h b/storage/innobase/include/handler0alter.h
index 1c690839449..81c0fd18a29 100644
--- a/storage/innobase/include/handler0alter.h
+++ b/storage/innobase/include/handler0alter.h
@@ -53,14 +53,6 @@ innobase_row_to_mysql(
 	const dtuple_t*		row)	/*!< in: InnoDB row */
 	MY_ATTRIBUTE((nonnull));
 
-/*************************************************************//**
-Resets table->record[0]. */
-void
-innobase_rec_reset(
-/*===============*/
-	struct TABLE*		table)		/*!< in/out: MySQL table */
-	MY_ATTRIBUTE((nonnull));
-
 /** Generate the next autoinc based on a snapshot of the session
 auto_increment_increment and auto_increment_offset variables. */
 struct ib_sequence_t {
diff --git a/storage/innobase/include/ib0mutex.h b/storage/innobase/include/ib0mutex.h
index 7b289c7a98c..eaf391be09b 100644
--- a/storage/innobase/include/ib0mutex.h
+++ b/storage/innobase/include/ib0mutex.h
@@ -53,15 +53,8 @@ struct OSTrackMutex {
 		ut_ad(!m_destroy_at_exit || !m_locked);
 	}
 
-	/** Initialise the mutex.
-	@param[in]	id              Mutex ID
-	@param[in]	filename	File where mutex was created
-	@param[in]	line		Line in filename */
-	void init(
-		latch_id_t	id,
-		const char*	filename,
-		uint32_t	line)
-		UNIV_NOTHROW
+	/** Initialise the mutex. */
+	void init(latch_id_t, const char*, uint32_t) UNIV_NOTHROW
 	{
 		ut_ad(m_freed);
 		ut_ad(!m_locked);
@@ -92,16 +85,8 @@ struct OSTrackMutex {
 		m_mutex.exit();
 	}
 
-	/** Acquire the mutex.
-	@param[in]	max_spins	max number of spins
-	@param[in]	max_delay	max delay per spin
-	@param[in]	filename	from where called
-	@param[in]	line		within filename */
-	void enter(
-		uint32_t	max_spins,
-		uint32_t	max_delay,
-		const char*	filename,
-		uint32_t	line)
+	/** Acquire the mutex. */
+	void enter(uint32_t, uint32_t, const char*, uint32_t)
 		UNIV_NOTHROW
 	{
 		ut_ad(!m_freed);
@@ -186,15 +171,8 @@ struct TTASFutexMutex {
 	}
 
 	/** Called when the mutex is "created". Note: Not from the constructor
-	but when the mutex is initialised.
-	@param[in]	id		Mutex ID
-	@param[in]	filename	File where mutex was created
-	@param[in]	line		Line in filename */
-	void init(
-		latch_id_t	id,
-		const char*	filename,
-		uint32_t	line)
-		UNIV_NOTHROW
+	but when the mutex is initialised. */
+	void init(latch_id_t, const char*, uint32_t) UNIV_NOTHROW
 	{
 		ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
 	}
@@ -208,14 +186,9 @@ struct TTASFutexMutex {
 
 	/** Acquire the mutex.
 	@param[in]	max_spins	max number of spins
-	@param[in]	max_delay	max delay per spin
-	@param[in]	filename	from where called
-	@param[in]	line		within filename */
-	void enter(
-		uint32_t	max_spins,
-		uint32_t	max_delay,
-		const char*	filename,
-		uint32_t	line) UNIV_NOTHROW
+	@param[in]	max_delay	max delay per spin */
+	void enter(uint32_t max_spins, uint32_t max_delay,
+		   const char*, uint32_t) UNIV_NOTHROW
 	{
 		uint32_t n_spins, n_waits;
 
@@ -225,7 +198,7 @@ struct TTASFutexMutex {
 				return;
 			}
 
-			ut_delay(ut_rnd_interval(0, max_delay));
+			ut_delay(max_delay);
 		}
 
 		for (n_waits= 0;; n_waits++) {
@@ -308,15 +281,8 @@ struct TTASMutex {
 	}
 
 	/** Called when the mutex is "created". Note: Not from the constructor
-	but when the mutex is initialised.
-	@param[in]	id		Mutex ID
-	@param[in]	filename	File where mutex was created
-	@param[in]	line		Line in filename */
-	void init(
-		latch_id_t	id,
-		const char*	filename,
-		uint32_t	line)
-		UNIV_NOTHROW
+	but when the mutex is initialised. */
+	void init(latch_id_t) UNIV_NOTHROW
 	{
 		ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
 	}
@@ -349,20 +315,15 @@ struct TTASMutex {
 
 	/** Acquire the mutex.
 	@param max_spins	max number of spins
-	@param max_delay	max delay per spin
-	@param filename		from where called
-	@param line		within filename */
-	void enter(
-		uint32_t	max_spins,
-		uint32_t	max_delay,
-		const char*	filename,
-		uint32_t	line) UNIV_NOTHROW
+	@param max_delay	max delay per spin */
+	void enter(uint32_t max_spins, uint32_t max_delay,
+		   const char*, uint32_t) UNIV_NOTHROW
 	{
 		const uint32_t	step = max_spins;
 		uint32_t n_spins = 0;
 
 		while (!try_lock()) {
-			ut_delay(ut_rnd_interval(0, max_delay));
+			ut_delay(max_delay);
 			if (++n_spins == max_spins) {
 				os_thread_yield();
 				max_spins+= step;
@@ -420,14 +381,8 @@ struct TTASEventMutex {
 
 	/** Called when the mutex is "created". Note: Not from the constructor
 	but when the mutex is initialised.
-	@param[in]	id		Mutex ID
-	@param[in]	filename	File where mutex was created
-	@param[in]	line		Line in filename */
-	void init(
-		latch_id_t	id,
-		const char*	filename,
-		uint32_t	line)
-		UNIV_NOTHROW
+	@param[in]	id		Mutex ID */
+	void init(latch_id_t id, const char*, uint32_t) UNIV_NOTHROW
 	{
 		ut_a(m_event == 0);
 		ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
@@ -516,7 +471,7 @@ struct TTASEventMutex {
 					sync_array_wait_event(sync_arr, cell);
 				}
 			} else {
-				ut_delay(ut_rnd_interval(0, max_delay));
+				ut_delay(max_delay);
 			}
 		}
 
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index 6cff26635bd..8233a536abc 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2017, MariaDB Corporation.
+Copyright (c) 2016, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -49,22 +49,19 @@ typedef enum {
 	IBUF_OP_COUNT = 3
 } ibuf_op_t;
 
-/** Combinations of operations that can be buffered.  Because the enum
-values are used for indexing innobase_change_buffering_values[], they
-should start at 0 and there should not be any gaps. */
-typedef enum {
+/** Combinations of operations that can be buffered.
+@see innodb_change_buffering_names */
+enum ibuf_use_t {
 	IBUF_USE_NONE = 0,
 	IBUF_USE_INSERT,	/* insert */
 	IBUF_USE_DELETE_MARK,	/* delete */
 	IBUF_USE_INSERT_DELETE_MARK,	/* insert+delete */
 	IBUF_USE_DELETE,	/* delete+purge */
-	IBUF_USE_ALL,		/* insert+delete+purge */
-
-	IBUF_USE_COUNT		/* number of entries in ibuf_use_t */
-} ibuf_use_t;
+	IBUF_USE_ALL		/* insert+delete+purge */
+};
 
 /** Operations that can currently be buffered. */
-extern ibuf_use_t	ibuf_use;
+extern ulong		innodb_change_buffering;
 
 /** The insert buffer control structure */
 extern ibuf_t*		ibuf;
@@ -421,14 +418,11 @@ void
 ibuf_close(void);
 /*============*/
 
-/******************************************************************//**
-Checks the insert buffer bitmaps on IMPORT TABLESPACE.
+/** Check the insert buffer bitmaps on IMPORT TABLESPACE.
+@param[in]	trx	transaction
+@param[in,out]	space	tablespace being imported
 @return DB_SUCCESS or error code */
-dberr_t
-ibuf_check_bitmap_on_import(
-/*========================*/
-	const trx_t*	trx,		/*!< in: transaction */
-	ulint		space_id)	/*!< in: tablespace identifier */
+dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /** Updates free bits and buffered bits for bulk loaded page.
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index 09070c14059..355fad62f24 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -28,7 +28,7 @@ Created 7/19/1997 Heikki Tuuri
 #include "fsp0types.h"
 #include "buf0lru.h"
 
-/** An index page must contain at least UNIV_PAGE_SIZE /
+/** An index page must contain at least srv_page_size /
 IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to
 buffer inserts to this page.  If there is this much of free space, the
 corresponding bits are set in the ibuf bitmap. */
@@ -124,7 +124,7 @@ ibuf_should_try(
 						a secondary index when we
 						decide */
 {
-	return(ibuf_use != IBUF_USE_NONE
+	return(innodb_change_buffering
 	       && ibuf->max_size != 0
 	       && !dict_index_is_clust(index)
 	       && !dict_index_is_spatial(index)
@@ -314,9 +314,7 @@ ibuf_update_free_bits_if_full(
 		block->page.size.physical(), max_ins_size);
 
 	if (max_ins_size >= increase) {
-#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX
-# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX"
-#endif
+		compile_time_assert(ULINT32_UNDEFINED > UNIV_PAGE_SIZE_MAX);
 		after = ibuf_index_page_calc_free_bits(
 			block->page.size.physical(), max_ins_size - increase);
 #ifdef UNIV_IBUF_DEBUG
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index 45f69cad9a5..0f6fe158264 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -65,23 +65,6 @@ ulint
 lock_get_size(void);
 /*===============*/
 /*********************************************************************//**
-Creates the lock system at database start. */
-void
-lock_sys_create(
-/*============*/
-	ulint	n_cells);	/*!< in: number of slots in lock hash table */
-/** Resize the lock hash table.
-@param[in]	n_cells	number of slots in lock hash table */
-void
-lock_sys_resize(
-	ulint	n_cells);
-
-/*********************************************************************//**
-Closes the lock system at database shutdown. */
-void
-lock_sys_close(void);
-/*================*/
-/*********************************************************************//**
 Gets the heap_no of the smallest user record on a page.
 @return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
 UNIV_INLINE
@@ -296,7 +279,7 @@ lock_rec_insert_check_and_lock(
 	dict_index_t*	index,	/*!< in: index */
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
-	ibool*		inherit)/*!< out: set to TRUE if the new
+	bool*		inherit)/*!< out: set to true if the new
 				inserted record maybe should inherit
 				LOCK_GAP type locks from the successor
 				record */
@@ -509,18 +492,6 @@ void
 lock_trx_release_locks(
 /*===================*/
 	trx_t*	trx);	/*!< in/out: transaction */
-/*********************************************************************//**
-Removes locks on a table to be dropped or discarded.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-void
-lock_remove_all_on_table(
-/*=====================*/
-	dict_table_t*	table,			/*!< in: table to be dropped
-						or discarded */
-	ibool		remove_also_table_sx_locks);/*!< in: also removes
-						table S and X locks */
 
 /*********************************************************************//**
 Calculates the fold value of a page file address: used in inserting or
@@ -565,8 +536,8 @@ lock_rec_find_set_bit(
 
 /*********************************************************************//**
 Checks if a lock request lock1 has to wait for request lock2.
-@return TRUE if lock1 has to wait for lock2 to be removed */
-ibool
+@return whether lock1 has to wait for lock2 to be removed */
+bool
 lock_has_to_wait(
 /*=============*/
 	const lock_t*	lock1,	/*!< in: waiting lock */
@@ -583,7 +554,7 @@ lock_report_trx_id_insanity(
 	const rec_t*	rec,		/*!< in: user record */
 	dict_index_t*	index,		/*!< in: index */
 	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
-	trx_id_t	max_trx_id);	/*!< in: trx_sys_get_max_trx_id() */
+	trx_id_t	max_trx_id);	/*!< in: trx_sys.get_max_trx_id() */
 /*********************************************************************//**
 Prints info of locks for all transactions.
 @return FALSE if not able to obtain lock mutex and exits without
@@ -615,7 +586,7 @@ lock_print_info_all_transactions(
 Return approximate number or record locks (bits set in the bitmap) for
 this transaction. Since delete-marked records may be removed, the
 record count will not be precise.
-The caller must be holding lock_sys->mutex. */
+The caller must be holding lock_sys.mutex. */
 ulint
 lock_number_of_rows_locked(
 /*=======================*/
@@ -624,7 +595,7 @@ lock_number_of_rows_locked(
 
 /*********************************************************************//**
 Return the number of table locks for a transaction.
-The caller must be holding lock_sys->mutex. */
+The caller must be holding lock_sys.mutex. */
 ulint
 lock_number_of_tables_locked(
 /*=========================*/
@@ -799,7 +770,6 @@ Set the lock system timeout event. */
 void
 lock_set_timeout_event();
 /*====================*/
-#ifdef UNIV_DEBUG
 /*********************************************************************//**
 Checks that a transaction id is sensible, i.e., not in the future.
 @return true if ok */
@@ -809,8 +779,8 @@ lock_check_trx_id_sanity(
 	trx_id_t	trx_id,		/*!< in: trx id */
 	const rec_t*	rec,		/*!< in: user record */
 	dict_index_t*	index,		/*!< in: index */
-	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
-	MY_ATTRIBUTE((warn_unused_result));
+	const ulint*	offsets);	/*!< in: rec_get_offsets(rec, index) */
+#ifdef UNIV_DEBUG
 /*******************************************************************//**
 Check if the transaction holds any locks on the sys tables
 or its records.
@@ -819,19 +789,21 @@ const lock_t*
 lock_trx_has_sys_table_locks(
 /*=========================*/
 	const trx_t*	trx)	/*!< in: transaction to check */
-	MY_ATTRIBUTE((warn_unused_result));
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/*******************************************************************//**
-Check if the transaction holds an exclusive lock on a record.
-@return whether the locks are held */
+/** Check if the transaction holds an explicit exclusive lock on a record.
+@param[in]	trx	transaction
+@param[in]	table	table
+@param[in]	block	leaf page
+@param[in]	heap_no	heap number identifying the record
+@return whether an explicit X-lock is held */
 bool
-lock_trx_has_rec_x_lock(
-/*====================*/
+lock_trx_has_expl_x_lock(
 	const trx_t*		trx,	/*!< in: transaction to check */
 	const dict_table_t*	table,	/*!< in: table to check */
 	const buf_block_t*	block,	/*!< in: buffer block of the record */
 	ulint			heap_no)/*!< in: record heap number */
-	MY_ATTRIBUTE((warn_unused_result));
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
 #endif /* UNIV_DEBUG */
 
 /** Lock operation struct */
@@ -843,11 +815,12 @@ struct lock_op_t{
 typedef ib_mutex_t LockMutex;
 
 /** The lock system struct */
-struct lock_sys_t{
-	char		pad1[CACHE_LINE_SIZE];	/*!< padding to prevent other
-						memory update hotspots from
-						residing on the same memory
-						cache line */
+class lock_sys_t
+{
+  bool m_initialised;
+
+public:
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	LockMutex	mutex;			/*!< Mutex protecting the
 						locks */
 	hash_table_t*	rec_hash;		/*!< hash table of the record
@@ -857,13 +830,13 @@ struct lock_sys_t{
 	hash_table_t*	prdt_page_hash;		/*!< hash table of the page
 						lock */
 
-	char		pad2[CACHE_LINE_SIZE];	/*!< Padding */
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	LockMutex	wait_mutex;		/*!< Mutex protecting the
 						next two fields */
 	srv_slot_t*	waiting_threads;	/*!< Array  of user threads
 						suspended while waiting for
 						locks within InnoDB, protected
-						by the lock_sys->wait_mutex;
+						by the lock_sys.wait_mutex;
 						os_event_set() and
 						os_event_reset() on
 						waiting_threads[]->event
@@ -872,12 +845,7 @@ struct lock_sys_t{
 	srv_slot_t*	last_slot;		/*!< highest slot ever used
 						in the waiting_threads array,
 						protected by
-						lock_sys->wait_mutex */
-	ibool		rollback_complete;
-						/*!< TRUE if rollback of all
-						recovered transactions is
-						complete. Protected by
-						lock_sys->mutex */
+						lock_sys.wait_mutex */
 
 	ulint		n_lock_max_wait_time;	/*!< Max wait time */
 
@@ -889,6 +857,38 @@ struct lock_sys_t{
 
 	bool		timeout_thread_active;	/*!< True if the timeout thread
 						is running */
+
+
+  /**
+    Constructor.
+
+    Some members may require late initialisation, thus we just mark object as
+    uninitialised. Real initialisation happens in create().
+  */
+  lock_sys_t(): m_initialised(false) {}
+
+
+  bool is_initialised() { return m_initialised; }
+
+
+  /**
+    Creates the lock system at database start.
+
+    @param[in] n_cells number of slots in lock hash table
+  */
+  void create(ulint n_cells);
+
+
+  /**
+    Resize the lock hash table.
+
+    @param[in] n_cells number of slots in lock hash table
+  */
+  void resize(ulint n_cells);
+
+
+  /** Closes the lock system at database shutdown. */
+  void close();
 };
 
 /*********************************************************************//**
@@ -1002,36 +1002,36 @@ lock_rec_free_all_from_discard_page(
 	const buf_block_t*	block);		/*!< in: page to be discarded */
 
 /** The lock system */
-extern lock_sys_t*	lock_sys;
+extern lock_sys_t lock_sys;
 
-/** Test if lock_sys->mutex can be acquired without waiting. */
+/** Test if lock_sys.mutex can be acquired without waiting. */
 #define lock_mutex_enter_nowait() 		\
-	(lock_sys->mutex.trylock(__FILE__, __LINE__))
+	(lock_sys.mutex.trylock(__FILE__, __LINE__))
 
-/** Test if lock_sys->mutex is owned. */
-#define lock_mutex_own() (lock_sys->mutex.is_owned())
+/** Test if lock_sys.mutex is owned. */
+#define lock_mutex_own() (lock_sys.mutex.is_owned())
 
-/** Acquire the lock_sys->mutex. */
+/** Acquire the lock_sys.mutex. */
 #define lock_mutex_enter() do {			\
-	mutex_enter(&lock_sys->mutex);		\
+	mutex_enter(&lock_sys.mutex);		\
 } while (0)
 
-/** Release the lock_sys->mutex. */
+/** Release the lock_sys.mutex. */
 #define lock_mutex_exit() do {			\
-	lock_sys->mutex.exit();			\
+	lock_sys.mutex.exit();			\
 } while (0)
 
-/** Test if lock_sys->wait_mutex is owned. */
-#define lock_wait_mutex_own() (lock_sys->wait_mutex.is_owned())
+/** Test if lock_sys.wait_mutex is owned. */
+#define lock_wait_mutex_own() (lock_sys.wait_mutex.is_owned())
 
-/** Acquire the lock_sys->wait_mutex. */
+/** Acquire the lock_sys.wait_mutex. */
 #define lock_wait_mutex_enter() do {		\
-	mutex_enter(&lock_sys->wait_mutex);	\
+	mutex_enter(&lock_sys.wait_mutex);	\
 } while (0)
 
-/** Release the lock_sys->wait_mutex. */
+/** Release the lock_sys.wait_mutex. */
 #define lock_wait_mutex_exit() do {		\
-	lock_sys->wait_mutex.exit();		\
+	lock_sys.wait_mutex.exit();		\
 } while (0)
 
 #ifdef WITH_WSREP
diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic
index 475f2ccedf1..c1c886f6832 100644
--- a/storage/innobase/include/lock0lock.ic
+++ b/storage/innobase/include/lock0lock.ic
@@ -54,7 +54,7 @@ lock_rec_hash(
 	ulint	page_no)/*!< in: page number */
 {
 	return(unsigned(hash_calc_hash(lock_rec_fold(space, page_no),
-				       lock_sys->rec_hash)));
+				       lock_sys.rec_hash)));
 }
 
 /*********************************************************************//**
@@ -90,11 +90,11 @@ lock_hash_get(
 	ulint	mode)	/*!< in: lock mode */
 {
 	if (mode & LOCK_PREDICATE) {
-		return(lock_sys->prdt_hash);
+		return(lock_sys.prdt_hash);
 	} else if (mode & LOCK_PRDT_PAGE) {
-		return(lock_sys->prdt_page_hash);
+		return(lock_sys.prdt_page_hash);
 	} else {
-		return(lock_sys->rec_hash);
+		return(lock_sys.rec_hash);
 	}
 }
 
diff --git a/storage/innobase/include/lock0prdt.h b/storage/innobase/include/lock0prdt.h
index e4e37776d22..9eb38ff8975 100644
--- a/storage/innobase/include/lock0prdt.h
+++ b/storage/innobase/include/lock0prdt.h
@@ -51,9 +51,8 @@ lock_prdt_lock(
 				SELECT FOR UPDATE */
 	ulint		type_mode,
 				/*!< in: LOCK_PREDICATE or LOCK_PRDT_PAGE */
-	que_thr_t*	thr,	/*!< in: query thread
+	que_thr_t*	thr);	/*!< in: query thread
 				(can be NULL if BTR_NO_LOCKING_FLAG) */
-	mtr_t*		mtr);	/*!< in/out: mini-transaction */
 
 /*********************************************************************//**
 Acquire a "Page" lock on a block
@@ -107,7 +106,6 @@ Update predicate lock when page splits */
 void
 lock_prdt_update_split(
 /*===================*/
-	buf_block_t*	block,		/*!< in/out: page to be split */
 	buf_block_t*	new_block,	/*!< in/out: the new half page */
 	lock_prdt_t*	prdt,		/*!< in: MBR on the old page */
 	lock_prdt_t*	new_prdt,	/*!< in: MBR on the new page */
@@ -123,7 +121,6 @@ lock_prdt_update_parent(
 	buf_block_t*	right_block,	/*!< in/out: the new half page */
 	lock_prdt_t*	left_prdt,	/*!< in: MBR on the old page */
 	lock_prdt_t*	right_prdt,	/*!< in: MBR on the new page */
-	lock_prdt_t*	parent_prdt,	/*!< in: original parent MBR */
 	ulint		space,		/*!< in: space id */
 	ulint		page_no);	/*!< in: page number */
 
diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h
index 1a03d1d0297..1aac5d20a59 100644
--- a/storage/innobase/include/lock0types.h
+++ b/storage/innobase/include/lock0types.h
@@ -32,7 +32,6 @@ Created 5/7/1996 Heikki Tuuri
 #define lock_t ib_lock_t
 
 struct lock_t;
-struct lock_sys_t;
 struct lock_table_t;
 
 /* Basic lock modes */
@@ -175,7 +174,7 @@ operator<<(std::ostream& out, const lock_rec_t& lock)
 #endif
 /* @} */
 
-/** Lock struct; protected by lock_sys->mutex */
+/** Lock struct; protected by lock_sys.mutex */
 struct ib_lock_t
 {
 	trx_t*		trx;		/*!< transaction owning the
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 4759e5a85f4..b215ba34a77 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -41,8 +41,8 @@ Created 12/9/1995 Heikki Tuuri
 #include "os0event.h"
 #include "os0file.h"
 
-/** Redo log group */
-struct log_group_t;
+/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
+#define SRV_N_LOG_FILES_MAX 100
 
 /** Magic value to use instead of log checksums when they are disabled */
 #define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
@@ -50,13 +50,13 @@ struct log_group_t;
 /* Margin for the free space in the smallest log group, before a new query
 step which modifies the database, is started */
 
-#define LOG_CHECKPOINT_FREE_PER_THREAD	(4 * UNIV_PAGE_SIZE)
-#define LOG_CHECKPOINT_EXTRA_FREE	(8 * UNIV_PAGE_SIZE)
+#define LOG_CHECKPOINT_FREE_PER_THREAD	(4U << srv_page_size_shift)
+#define LOG_CHECKPOINT_EXTRA_FREE	(8U << srv_page_size_shift)
 
 typedef ulint (*log_checksum_func_t)(const byte* log_block);
 
 /** Pointer to the log checksum calculation function. Protected with
-log_sys->mutex. */
+log_sys.mutex. */
 extern log_checksum_func_t log_checksum_algorithm_ptr;
 
 /** Append a string to the log.
@@ -82,9 +82,7 @@ log_free_check(void);
 
 /** Extends the log buffer.
 @param[in]	len	requested minimum size in bytes */
-void
-log_buffer_extend(
-	ulint	len);
+void log_buffer_extend(ulong len);
 
 /** Check margin not to overwrite transaction log from the last checkpoint.
 If would estimate the log write to exceed the log_group_capacity,
@@ -138,7 +136,7 @@ log_get_flush_lsn(void);
 /*=============*/
 /****************************************************************
 Gets the log group capacity. It is OK to read the value without
-holding log_sys->mutex because it is constant.
+holding log_sys.mutex because it is constant.
 @return log group capacity */
 UNIV_INLINE
 lsn_t
@@ -152,14 +150,7 @@ UNIV_INLINE
 lsn_t
 log_get_max_modified_age_async(void);
 /*================================*/
-/** Initializes the redo logging subsystem. */
-void
-log_sys_init();
 
-/** Initialize the redo log.
-@param[in]	n_files		number of files */
-void
-log_init(ulint n_files);
 /** Calculate the recommended highest values for lsn - last_checkpoint_lsn
 and lsn - buf_get_oldest_modification().
 @param[in]	file_size	requested innodb_log_file_size
@@ -171,12 +162,6 @@ log_set_capacity(ulonglong file_size)
 	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
-Completes an i/o to a log file. */
-void
-log_io_complete(
-/*============*/
-	log_group_t*	group);	/*!< in: log group */
-/******************************************************//**
 This function is called, e.g., when a transaction wants to commit. It checks
 that the log has been written to the log file up to the last log entry written
 by the transaction. If there is a flush running, it waits and checks if the
@@ -235,13 +220,9 @@ shutdown. This function also writes all log in log files to the log archive. */
 void
 logs_empty_and_mark_files_at_shutdown(void);
 /*=======================================*/
-/** Read a log group header page to log_sys->checkpoint_buf.
-@param[in]	group	log group
-@param[in]	header	0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
-void
-log_group_header_read(
-	const log_group_t*	group,
-	ulint			header);
+/** Read a log group header page to log_sys.checkpoint_buf.
+@param[in]	header	0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */
+void log_header_read(ulint header);
 /** Write checkpoint info to the log header and invoke log_mutex_exit().
 @param[in]	sync	whether to wait for the write to complete
 @param[in]	end_lsn	start LSN of the MLOG_CHECKPOINT mini-transaction */
@@ -262,16 +243,6 @@ objects! */
 void
 log_check_margins(void);
 
-/********************************************************//**
-Sets the field values in group to correspond to a given lsn. For this function
-to work, the values must already be correctly initialized to correspond to
-some lsn, for instance, a checkpoint lsn. */
-void
-log_group_set_fields(
-/*=================*/
-	log_group_t*	group,	/*!< in/out: group */
-	lsn_t		lsn);	/*!< in: lsn for which the values should be
-				set */
 /************************************************************//**
 Gets a log block flush bit.
 @return TRUE if this block was the first to be written in a log flush */
@@ -322,11 +293,10 @@ log_block_calc_checksum_crc32(
 	const byte*	block);
 
 /** Calculates the checksum for a log block using the "no-op" algorithm.
-@param[in]	block	the redo log block
 @return		the calculated checksum value */
 UNIV_INLINE
 ulint
-log_block_calc_checksum_none(const byte*	block);
+log_block_calc_checksum_none(const byte*);
 
 /************************************************************//**
 Gets a log block checksum field value.
@@ -403,14 +373,6 @@ Refreshes the statistics used to print per-second averages. */
 void
 log_refresh_stats(void);
 /*===================*/
-/********************************************************//**
-Closes all log groups. */
-void
-log_group_close_all(void);
-/*=====================*/
-/** Shut down the redo log subsystem. */
-void
-log_shutdown();
 
 /** Whether to generate and require checksums on the redo log pages */
 extern my_bool	innodb_log_checksums;
@@ -422,8 +384,6 @@ extern my_bool	innodb_log_checksums;
 /* The counting of lsn's starts from this value: this must be non-zero */
 #define LOG_START_LSN		((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
 
-#define LOG_BUFFER_SIZE		(srv_log_buffer_size * UNIV_PAGE_SIZE)
-
 /* Offsets of a log block header */
 #define	LOG_BLOCK_HDR_NO	0	/* block number which must be > 0 and
 					is allowed to wrap around at 2G; the
@@ -447,7 +407,7 @@ extern my_bool	innodb_log_checksums;
 					from this offset in this log block,
 					if value not 0 */
 #define LOG_BLOCK_CHECKPOINT_NO	8	/* 4 lower bytes of the value of
-					log_sys->next_checkpoint_no when the
+					log_sys.next_checkpoint_no when the
 					log block was last written to: if the
 					block has not yet been written full,
 					this value is only updated before a
@@ -470,7 +430,7 @@ extern my_bool	innodb_log_checksums;
 #define LOG_CHECKPOINT_LSN		8
 /** Byte offset of the log record corresponding to LOG_CHECKPOINT_LSN */
 #define LOG_CHECKPOINT_OFFSET		16
-/** log_sys_t::buf_size at the time of the checkpoint (not used) */
+/** srv_log_buffer_size at the time of the checkpoint (not used) */
 #define LOG_CHECKPOINT_LOG_BUF_SIZE	24
 /** MariaDB 10.2.5 encrypted redo log encryption key version (32 bits)*/
 #define LOG_CHECKPOINT_CRYPT_KEY	32
@@ -512,16 +472,20 @@ or the MySQL version that created the redo log file. */
 	IB_TO_STR(MYSQL_VERSION_MINOR) "."	\
 	IB_TO_STR(MYSQL_VERSION_PATCH)
 
-/** The redo log format identifier corresponding to the current format version.
-Stored in LOG_HEADER_FORMAT.
+/** The original (not version-tagged) InnoDB redo log format */
+#define LOG_HEADER_FORMAT_3_23		0
+/** The MySQL 5.7.9/MariaDB 10.2.2 log format */
+#define LOG_HEADER_FORMAT_10_2		1
+/** The MariaDB 10.3.2 log format.
 To prevent crash-downgrade to earlier 10.2 due to the inability to
 roll back a retroactively introduced TRX_UNDO_RENAME_TABLE undo log record,
 MariaDB 10.2.18 and later will use the 10.3 format, but LOG_HEADER_SUBFORMAT
 1 instead of 0. MariaDB 10.3 will use subformat 0 (5.7-style TRUNCATE) or 2
 (MDEV-13564 backup-friendly TRUNCATE). */
-#define LOG_HEADER_FORMAT_CURRENT	103
-/** The old MariaDB 10.2.2..10.2.17 log format */
-#define LOG_HEADER_FORMAT_10_2		1
+#define LOG_HEADER_FORMAT_10_3		103
+/** The redo log format identifier corresponding to the current format version.
+Stored in LOG_HEADER_FORMAT. */
+#define LOG_HEADER_FORMAT_CURRENT	LOG_HEADER_FORMAT_10_3
 /** Future MariaDB 10.4 log format */
 #define LOG_HEADER_FORMAT_10_4		104
 /** Encrypted MariaDB redo log */
@@ -540,102 +504,43 @@ MariaDB 10.2.18 and later will use the 10.3 format, but LOG_HEADER_SUBFORMAT
 					header */
 #define LOG_FILE_HDR_SIZE	(4 * OS_FILE_LOG_BLOCK_SIZE)
 
-/** The state of a log group */
-enum log_group_state_t {
-	/** No corruption detected */
-	LOG_GROUP_OK,
-	/** Corrupted */
-	LOG_GROUP_CORRUPTED
-};
-
 typedef ib_mutex_t	LogSysMutex;
 typedef ib_mutex_t	FlushOrderMutex;
 
-/** Log group consists of a number of log files, each of the same size; a log
-group is implemented as a space in the sense of the module fil0fil.
-Currently, this is only protected by log_sys->mutex. However, in the case
-of log_write_up_to(), we will access some members only with the protection
-of log_sys->write_mutex, which should affect nothing for now. */
-struct log_group_t{
-	/** number of files in the group */
-	ulint				n_files;
-	/** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */
-	uint32_t			format;
-	/** redo log subformat: 0 with separately logged TRUNCATE,
-	1 with fully redo-logged TRUNCATE */
-	uint32_t			subformat;
-	/** individual log file size in bytes, including the header */
-	lsn_t				file_size;
-	/** corruption status */
-	log_group_state_t		state;
-	/** lsn used to fix coordinates within the log group */
-	lsn_t				lsn;
-	/** the byte offset of the above lsn */
-	lsn_t				lsn_offset;
-	/** unaligned buffers */
-	byte**				file_header_bufs_ptr;
-	/** buffers for each file header in the group */
-	byte**				file_header_bufs;
-
-	/** used only in recovery: recovery scan succeeded up to this
-	lsn in this log group */
-	lsn_t				scanned_lsn;
-	/** unaligned checkpoint header */
-	byte*				checkpoint_buf_ptr;
-	/** buffer for writing a checkpoint header */
-	byte*				checkpoint_buf;
-
-	/** @return whether the redo log is encrypted */
-	bool is_encrypted() const
-	{
-		return((format & LOG_HEADER_FORMAT_ENCRYPTED) != 0);
-	}
-
-	/** @return capacity in bytes */
-	inline lsn_t capacity() const
-	{
-		return((file_size - LOG_FILE_HDR_SIZE) * n_files);
-	}
-};
-
 /** Redo log buffer */
 struct log_t{
-	char		pad1[CACHE_LINE_SIZE];
-					/*!< Padding to prevent other memory
-					update hotspots from residing on the
-					same memory cache line */
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	lsn_t		lsn;		/*!< log sequence number */
-	ulint		buf_free;	/*!< first free offset within the log
+	ulong		buf_free;	/*!< first free offset within the log
 					buffer in use */
 
-	char		pad2[CACHE_LINE_SIZE];/*!< Padding */
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	LogSysMutex	mutex;		/*!< mutex protecting the log */
-	char		pad3[CACHE_LINE_SIZE]; /*!< Padding */
-	LogSysMutex	write_mutex;	/*!< mutex protecting writing to log
-					file and accessing to log_group_t */
-	char		pad4[CACHE_LINE_SIZE];/*!< Padding */
+	MY_ALIGNED(CACHE_LINE_SIZE)
+	LogSysMutex	write_mutex;	/*!< mutex protecting writing to log */
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	FlushOrderMutex	log_flush_order_mutex;/*!< mutex to serialize access to
 					the flush list when we are putting
 					dirty blocks in the list. The idea
 					behind this mutex is to be able
-					to release log_sys->mutex during
+					to release log_sys.mutex during
 					mtr_commit and still ensure that
 					insertions in the flush_list happen
 					in the LSN order. */
-	byte*		buf_ptr;	/*!< unaligned log buffer, which should
-					be of double of buf_size */
-	byte*		buf;		/*!< log buffer currently in use;
-					this could point to either the first
-					half of the aligned(buf_ptr) or the
+	byte*		buf;		/*!< Memory of double the
+					srv_log_buffer_size is
+					allocated here. This pointer will change
+					however to either the first half or the
 					second half in turns, so that log
 					write/flush to disk don't block
 					concurrent mtrs which will write
-					log to this buffer */
+					log to this buffer. Care to switch back
+					to the first half before freeing/resizing
+					must be undertaken. */
 	bool		first_in_use;	/*!< true if buf points to the first
 					half of the aligned(buf_ptr), false
 					if the second half */
-	ulint		buf_size;	/*!< log buffer size of each in bytes */
-	ulint		max_buf_free;	/*!< recommended maximum value of
+	ulong		max_buf_free;	/*!< recommended maximum value of
 					buf_free for the buffer in use, after
 					which the buffer is flushed */
 	bool		check_flush_or_checkpoint;
@@ -647,12 +552,72 @@ struct log_t{
 					max_checkpoint_age; this flag is
 					peeked at by log_free_check(), which
 					does not reserve the log mutex */
-	/** the redo log */
-	log_group_t			log;
+
+  /** Log files. Protected by mutex or write_mutex. */
+  struct files {
+    /** number of files */
+    ulint				n_files;
+    /** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */
+    uint32_t				format;
+    /** redo log subformat: 0 with separately logged TRUNCATE,
+    2 with fully redo-logged TRUNCATE (1 in MariaDB 10.2) */
+    uint32_t				subformat;
+    /** individual log file size in bytes, including the header */
+    lsn_t				file_size;
+    /** lsn used to fix coordinates within the log group */
+    lsn_t				lsn;
+    /** the byte offset of the above lsn */
+    lsn_t				lsn_offset;
+
+    /** unaligned buffers */
+    byte*				file_header_bufs_ptr;
+    /** buffers for each file header in the group */
+    byte*				file_header_bufs[SRV_N_LOG_FILES_MAX];
+
+    /** used only in recovery: recovery scan succeeded up to this
+    lsn in this log group */
+    lsn_t				scanned_lsn;
+
+    /** @return whether the redo log is encrypted */
+    bool is_encrypted() const { return format & LOG_HEADER_FORMAT_ENCRYPTED; }
+    /** @return capacity in bytes */
+    lsn_t capacity() const{ return (file_size - LOG_FILE_HDR_SIZE) * n_files; }
+    /** Calculate the offset of a log sequence number.
+    @param[in]	lsn	log sequence number
+    @return offset within the log */
+    inline lsn_t calc_lsn_offset(lsn_t lsn) const;
+
+    /** Set the field values to correspond to a given lsn. */
+    void set_fields(lsn_t lsn)
+    {
+      lsn_offset = calc_lsn_offset(lsn);
+      this->lsn = lsn;
+    }
+
+    /** Read a log segment to log_sys.buf.
+    @param[in,out]	start_lsn	in: read area start,
+					out: the last read valid lsn
+    @param[in]		end_lsn		read area end
+    @return	whether no invalid blocks (e.g checksum mismatch) were found */
+    bool read_log_seg(lsn_t* start_lsn, lsn_t end_lsn);
+
+    /** Initialize the redo log buffer.
+    @param[in]	n_files		number of files */
+    void create(ulint n_files);
+
+    /** Close the redo log buffer. */
+    void close()
+    {
+      ut_free(file_header_bufs_ptr);
+      n_files = 0;
+      file_header_bufs_ptr = NULL;
+      memset(file_header_bufs, 0, sizeof file_header_bufs);
+    }
+  } log;
 
 	/** The fields involved in the log buffer flush @{ */
 
-	ulint		buf_next_to_write;/*!< first offset in the log buffer
+	ulong		buf_next_to_write;/*!< first offset in the log buffer
 					where the byte content may not exist
 					written to file, e.g., the start
 					offset of a log record catenated
@@ -669,11 +634,11 @@ struct log_t{
 					AND flushed to disk */
 	ulint		n_pending_flushes;/*!< number of currently
 					pending flushes; protected by
-					log_sys_t::mutex */
+					log_sys.mutex */
 	os_event_t	flush_event;	/*!< this event is in the reset state
 					when a flush is running;
 					os_event_set() and os_event_reset()
-					are protected by log_sys_t::mutex */
+					are protected by log_sys.mutex */
 	ulint		n_log_ios;	/*!< number of log i/os initiated thus
 					far */
 	ulint		n_log_ios_old;	/*!< number of log i/o's at the
@@ -719,7 +684,7 @@ struct log_t{
 					/*!< extra redo log records to write
 					during a checkpoint, or NULL if none.
 					The pointer is protected by
-					log_sys->mutex, and the data must
+					log_sys.mutex, and the data must
 					remain constant as long as this
 					pointer is not NULL. */
 	ulint		n_pending_checkpoint_writes;
@@ -729,73 +694,105 @@ struct log_t{
 					checkpoint write is running; a thread
 					should wait for this without owning
 					the log mutex */
-	byte*		checkpoint_buf_ptr;/* unaligned checkpoint header */
-	byte*		checkpoint_buf;	/*!< checkpoint header is read to this
-					buffer */
+
+	/** buffer for checkpoint header */
+	MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE)
+	byte		checkpoint_buf[OS_FILE_LOG_BLOCK_SIZE];
 	/* @} */
 
-	/** @return whether the redo log is encrypted */
-	bool is_encrypted() const
-	{
-		return(log.is_encrypted());
-	}
+private:
+  bool m_initialised;
+public:
+  /**
+    Constructor.
+
+    Some members may require late initialisation, thus we just mark object as
+    uninitialised. Real initialisation happens in create().
+  */
+  log_t(): m_initialised(false) {}
+
+  /** @return whether the redo log is encrypted */
+  bool is_encrypted() const { return(log.is_encrypted()); }
+
+  bool is_initialised() { return m_initialised; }
+
+  /** Complete an asynchronous checkpoint write. */
+  void complete_checkpoint();
+
+  /** Initialise the redo log subsystem. */
+  void create();
+
+  /** Shut down the redo log subsystem. */
+  void close();
 };
 
 /** Redo log system */
-extern log_t*	log_sys;
+extern log_t	log_sys;
+
+/** Calculate the offset of a log sequence number.
+@param[in]     lsn     log sequence number
+@return offset within the log */
+inline lsn_t log_t::files::calc_lsn_offset(lsn_t lsn) const
+{
+  ut_ad(this == &log_sys.log);
+  /* The lsn parameters are updated while holding both the mutexes
+  and it is ok to have either of them while reading */
+  ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned());
+  const lsn_t group_size= capacity();
+  lsn_t l= lsn - this->lsn;
+  if (longlong(l) < 0) {
+    l= lsn_t(-longlong(l)) % group_size;
+    l= group_size - l;
+  }
+
+  l+= lsn_offset - LOG_FILE_HDR_SIZE * (1 + lsn_offset / file_size);
+  l%= group_size;
+  return l + LOG_FILE_HDR_SIZE * (1 + l / (file_size - LOG_FILE_HDR_SIZE));
+}
 
 /** Test if flush order mutex is owned. */
 #define log_flush_order_mutex_own()			\
-	mutex_own(&log_sys->log_flush_order_mutex)
+	mutex_own(&log_sys.log_flush_order_mutex)
 
 /** Acquire the flush order mutex. */
 #define log_flush_order_mutex_enter() do {		\
-	mutex_enter(&log_sys->log_flush_order_mutex);	\
+	mutex_enter(&log_sys.log_flush_order_mutex);	\
 } while (0)
 /** Release the flush order mutex. */
 # define log_flush_order_mutex_exit() do {		\
-	mutex_exit(&log_sys->log_flush_order_mutex);	\
+	mutex_exit(&log_sys.log_flush_order_mutex);	\
 } while (0)
 
 /** Test if log sys mutex is owned. */
-#define log_mutex_own() mutex_own(&log_sys->mutex)
+#define log_mutex_own() mutex_own(&log_sys.mutex)
 
 /** Test if log sys write mutex is owned. */
-#define log_write_mutex_own() mutex_own(&log_sys->write_mutex)
+#define log_write_mutex_own() mutex_own(&log_sys.write_mutex)
 
 /** Acquire the log sys mutex. */
-#define log_mutex_enter() mutex_enter(&log_sys->mutex)
+#define log_mutex_enter() mutex_enter(&log_sys.mutex)
 
 /** Acquire the log sys write mutex. */
-#define log_write_mutex_enter() mutex_enter(&log_sys->write_mutex)
+#define log_write_mutex_enter() mutex_enter(&log_sys.write_mutex)
 
 /** Acquire all the log sys mutexes. */
 #define log_mutex_enter_all() do {		\
-	mutex_enter(&log_sys->write_mutex);	\
-	mutex_enter(&log_sys->mutex);		\
+	mutex_enter(&log_sys.write_mutex);	\
+	mutex_enter(&log_sys.mutex);		\
 } while (0)
 
 /** Release the log sys mutex. */
-#define log_mutex_exit() mutex_exit(&log_sys->mutex)
+#define log_mutex_exit() mutex_exit(&log_sys.mutex)
 
 /** Release the log sys write mutex.*/
-#define log_write_mutex_exit() mutex_exit(&log_sys->write_mutex)
+#define log_write_mutex_exit() mutex_exit(&log_sys.write_mutex)
 
 /** Release all the log sys mutexes. */
 #define log_mutex_exit_all() do {		\
-	mutex_exit(&log_sys->mutex);		\
-	mutex_exit(&log_sys->write_mutex);	\
+	mutex_exit(&log_sys.mutex);		\
+	mutex_exit(&log_sys.write_mutex);	\
 } while (0)
 
-/** Calculate the offset of an lsn within a log group.
-@param[in]	lsn	log sequence number
-@param[in]	group	log group
-@return offset within the log group */
-lsn_t
-log_group_calc_lsn_offset(
-	lsn_t			lsn,
-	const log_group_t*	group);
-
 /* log scrubbing speed, in bytes/sec */
 extern ulonglong innodb_scrub_log_speed;
 
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
index 58da7bacc6f..87d55f9e01d 100644
--- a/storage/innobase/include/log0log.ic
+++ b/storage/innobase/include/log0log.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,12 +26,12 @@ Created 12/9/1995 Heikki Tuuri
 
 #include "mach0data.h"
 #include "srv0mon.h"
-#include "srv0srv.h"
 #include "ut0crc32.h"
 
 #ifdef UNIV_LOG_LSN_DEBUG
 #include "mtr0types.h"
 #endif /* UNIV_LOG_LSN_DEBUG */
+extern ulong srv_log_buffer_size;
 
 /************************************************************//**
 Gets a log block flush bit.
@@ -241,12 +241,10 @@ log_block_calc_checksum_crc32(
 }
 
 /** Calculates the checksum for a log block using the "no-op" algorithm.
-@param[in]     block   log block
 @return        checksum */
 UNIV_INLINE
 ulint
-log_block_calc_checksum_none(
-	const byte*	block)
+log_block_calc_checksum_none(const byte*)
 {
 	return(LOG_NO_CHECKSUM_MAGIC);
 }
@@ -330,15 +328,15 @@ log_reserve_and_write_fast(
 			len - SIZE_OF_MLOG_CHECKPOINT]
 		? 0
 		: 1
-		+ mach_get_compressed_size(log_sys->lsn >> 32)
-		+ mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
+		+ mach_get_compressed_size(log_sys.lsn >> 32)
+		+ mach_get_compressed_size(log_sys.lsn & 0xFFFFFFFFUL);
 #endif /* UNIV_LOG_LSN_DEBUG */
 
 	const ulint	data_len = len
 #ifdef UNIV_LOG_LSN_DEBUG
 		+ lsn_len
 #endif /* UNIV_LOG_LSN_DEBUG */
-		+ log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE;
+		+ log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE;
 
 	if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 
@@ -348,44 +346,44 @@ log_reserve_and_write_fast(
 		return(0);
 	}
 
-	*start_lsn = log_sys->lsn;
+	*start_lsn = log_sys.lsn;
 
 #ifdef UNIV_LOG_LSN_DEBUG
 	if (lsn_len) {
 		/* Write the LSN pseudo-record. */
-		byte* b = &log_sys->buf[log_sys->buf_free];
+		byte* b = &log_sys.buf[log_sys.buf_free];
 
 		*b++ = MLOG_LSN | (MLOG_SINGLE_REC_FLAG & *(const byte*) str);
 
 		/* Write the LSN in two parts,
 		as a pseudo page number and space id. */
-		b += mach_write_compressed(b, log_sys->lsn >> 32);
-		b += mach_write_compressed(b, log_sys->lsn & 0xFFFFFFFFUL);
-		ut_a(b - lsn_len == &log_sys->buf[log_sys->buf_free]);
+		b += mach_write_compressed(b, log_sys.lsn >> 32);
+		b += mach_write_compressed(b, log_sys.lsn & 0xFFFFFFFFUL);
+		ut_a(b - lsn_len == &log_sys.buf[log_sys.buf_free]);
 
 		::memcpy(b, str, len);
 
 		len += lsn_len;
 	} else
 #endif /* UNIV_LOG_LSN_DEBUG */
-	memcpy(log_sys->buf + log_sys->buf_free, str, len);
+	memcpy(log_sys.buf + log_sys.buf_free, str, len);
 
 	log_block_set_data_len(
                 reinterpret_cast<byte*>(ut_align_down(
-                        log_sys->buf + log_sys->buf_free,
+                        log_sys.buf + log_sys.buf_free,
                         OS_FILE_LOG_BLOCK_SIZE)),
                 data_len);
 
-	log_sys->buf_free += len;
+	log_sys.buf_free += ulong(len);
 
-	ut_ad(log_sys->buf_free <= log_sys->buf_size);
+	ut_ad(log_sys.buf_free <= srv_log_buffer_size);
 
-	log_sys->lsn += len;
+	log_sys.lsn += len;
 
 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
-		    log_sys->lsn - log_sys->last_checkpoint_lsn);
+		    log_sys.lsn - log_sys.last_checkpoint_lsn);
 
-	return(log_sys->lsn);
+	return(log_sys.lsn);
 }
 
 /************************************************************//**
@@ -400,7 +398,7 @@ log_get_lsn(void)
 
 	log_mutex_enter();
 
-	lsn = log_sys->lsn;
+	lsn = log_sys.lsn;
 
 	log_mutex_exit();
 
@@ -418,7 +416,7 @@ log_get_flush_lsn(void)
 
 	log_mutex_enter();
 
-	lsn = log_sys->flushed_to_disk_lsn;
+	lsn = log_sys.flushed_to_disk_lsn;
 
 	log_mutex_exit();
 
@@ -435,11 +433,11 @@ log_get_lsn_nowait(void)
 {
 	lsn_t	lsn=0;
 
-	if (!mutex_enter_nowait(&(log_sys->mutex))) {
+	if (!mutex_enter_nowait(&(log_sys.mutex))) {
 
-		lsn = log_sys->lsn;
+		lsn = log_sys.lsn;
 
-		mutex_exit(&(log_sys->mutex));
+		mutex_exit(&(log_sys.mutex));
 	}
 
 	return(lsn);
@@ -447,14 +445,14 @@ log_get_lsn_nowait(void)
 
 /****************************************************************
 Gets the log group capacity. It is OK to read the value without
-holding log_sys->mutex because it is constant.
+holding log_sys.mutex because it is constant.
 @return log group capacity */
 UNIV_INLINE
 lsn_t
 log_get_capacity(void)
 /*==================*/
 {
-	return(log_sys->log_group_capacity);
+	return(log_sys.log_group_capacity);
 }
 
 /****************************************************************
@@ -466,7 +464,7 @@ lsn_t
 log_get_max_modified_age_async(void)
 /*================================*/
 {
-	return(log_sys->max_modified_age_async);
+	return(log_sys.max_modified_age_async);
 }
 
 /***********************************************************************//**
@@ -498,7 +496,7 @@ log_free_check(void)
 		      sync_allowed_latches(latches,
 					   latches + UT_ARR_SIZE(latches))));
 
-	if (log_sys->check_flush_or_checkpoint) {
+	if (log_sys.check_flush_or_checkpoint) {
 
 		log_check_margins();
 	}
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index d3c891c9cba..d15ec19d86b 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -96,20 +96,6 @@ void
 recv_sys_debug_free(void);
 /*=====================*/
 
-/** Read a log segment to a buffer.
-@param[out]	buf		buffer
-@param[in]	group		redo log files
-@param[in, out]	start_lsn	in : read area start, out: the last read valid lsn
-@param[in]	end_lsn		read area end
-@param[out] invalid_block - invalid, (maybe incompletely written) block encountered
-@return	false, if invalid block encountered (e.g checksum mismatch), true otherwise */
-bool
-log_group_read_log_seg(
-	byte*			buf,
-	const log_group_t*	group,
-	lsn_t*			start_lsn,
-	lsn_t			end_lsn);
-
 /********************************************************//**
 Reset the state of the recovery system variables. */
 void
@@ -227,7 +213,7 @@ struct recv_sys_t{
 	ib_mutex_t		writer_mutex;/*!< mutex coordinating
 				flushing between recv_writer_thread and
 				the recovery thread. */
-	os_event_t		flush_start;/*!< event to acticate
+	os_event_t		flush_start;/*!< event to activate
 				page cleaner threads */
 	os_event_t		flush_end;/*!< event to signal that the page
 				cleaner has finished the request */
@@ -243,6 +229,7 @@ struct recv_sys_t{
 				/*!< this is TRUE when a log rec application
 				batch is running */
 	byte*		buf;	/*!< buffer for parsing log records */
+	size_t		buf_size;	/*!< size of buf */
 	ulint		len;	/*!< amount of data in buf */
 	lsn_t		parse_start_lsn;
 				/*!< this is the lsn from which we were able to
@@ -330,7 +317,7 @@ extern bool		recv_no_ibuf_operations;
 extern bool		recv_needed_recovery;
 #ifdef UNIV_DEBUG
 /** TRUE if writing to the redo log (mtr_commit) is forbidden.
-Protected by log_sys->mutex. */
+Protected by log_sys.mutex. */
 extern bool		recv_no_log_write;
 #endif /* UNIV_DEBUG */
 
@@ -341,11 +328,11 @@ extern bool		recv_lsn_checks_on;
 
 /** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
 times! */
-#define RECV_PARSING_BUF_SIZE	(2 * 1024 * 1024)
+#define RECV_PARSING_BUF_SIZE	(2U << 20)
 
 /** Size of block reads when the log groups are scanned forward to do a
 roll-forward */
-#define RECV_SCAN_SIZE		(4 * UNIV_PAGE_SIZE)
+#define RECV_SCAN_SIZE		(4U << srv_page_size_shift)
 
 /** This many frames must be left free in the buffer pool when we scan
 the log and store the scanned log records in the buffer pool: we will
diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h
index e44f3f730af..2cdb307ea96 100644
--- a/storage/innobase/include/mem0mem.h
+++ b/storage/innobase/include/mem0mem.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -71,11 +71,11 @@ allocations of small buffers. */
 
 #define MEM_BLOCK_START_SIZE		64
 #define MEM_BLOCK_STANDARD_SIZE		\
-	(UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
+	(srv_page_size >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
 
 /** If a memory heap is allowed to grow into the buffer pool, the following
 is the maximum size for a single allocated buffer: */
-#define MEM_MAX_ALLOC_IN_BUF		(UNIV_PAGE_SIZE - 200)
+#define MEM_MAX_ALLOC_IN_BUF		(srv_page_size - 200)
 
 /** Space needed when allocating for a user a field of length N.
 The space is allocated only in multiples of UNIV_MEM_ALIGNMENT.  */
@@ -294,26 +294,42 @@ mem_strdupl(
 	const char*	str,	/*!< in: string to be copied */
 	ulint		len);	/*!< in: length of str, in bytes */
 
-/** Duplicates a NUL-terminated string, allocated from a memory heap.
+/** Duplicate a block of data, allocated from a memory heap.
+@param[in]	heap	memory heap where string is allocated
+@param[in]	data	block of data to be copied
+@param[in]	len	length of data, in bytes
+@return own: a copy of data */
+inline
+void*
+mem_heap_dup(mem_heap_t* heap, const void* data, size_t len)
+{
+	return(memcpy(mem_heap_alloc(heap, len), data, len));
+}
+
+/** Duplicate a NUL-terminated string, allocated from a memory heap.
 @param[in]	heap	memory heap where string is allocated
 @param[in]	str	string to be copied
 @return own: a copy of the string */
+inline
 char*
-mem_heap_strdup(
-	mem_heap_t*	heap,
-	const char*	str);
+mem_heap_strdup(mem_heap_t* heap, const char* str)
+{
+	return(static_cast<char*>(mem_heap_dup(heap, str, strlen(str) + 1)));
+}
 
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INLINE
+/** Duplicate a string, allocated from a memory heap.
+@param[in]	heap	memory heap where string is allocated
+@param[in]	str	string to be copied
+@param[in]	len	length of str, in bytes
+@return own: a NUL-terminated copy of str */
+inline
 char*
-mem_heap_strdupl(
-/*=============*/
-	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
-	const char*	str,	/*!< in: string to be copied */
-	ulint		len);	/*!< in: length of str, in bytes */
+mem_heap_strdupl(mem_heap_t* heap, const char* str, size_t len)
+{
+	char*	s = static_cast<char*>(mem_heap_alloc(heap, len + 1));
+	s[len] = 0;
+	return(static_cast<char*>(memcpy(s, str, len)));
+}
 
 /**********************************************************************//**
 Concatenate two strings and return the result, using a memory heap.
@@ -325,16 +341,6 @@ mem_heap_strcat(
 	const char*	s1,	/*!< in: string 1 */
 	const char*	s2);	/*!< in: string 2 */
 
-/**********************************************************************//**
-Duplicate a block of data, allocated from a memory heap.
-@return own: a copy of the data */
-void*
-mem_heap_dup(
-/*=========*/
-	mem_heap_t*	heap,	/*!< in: memory heap where copy is allocated */
-	const void*	data,	/*!< in: data to be copied */
-	ulint		len);	/*!< in: length of data, in bytes */
-
 /****************************************************************//**
 A simple sprintf replacement that dynamically allocates the space for the
 formatted string from the given heap. This supports a very limited set of
@@ -458,13 +464,14 @@ public:
 	allocated by mem_heap_allocator) can be used as a hint to the
 	implementation about where the new memory should be allocated in
 	order to improve locality. */
-	pointer	allocate(size_type n, const_pointer hint = 0)
+	pointer	allocate(size_type n)
 	{
 		return(reinterpret_cast<pointer>(
 			mem_heap_alloc(m_heap, n * sizeof(T))));
 	}
+	pointer	allocate(size_type n, const_pointer) { return allocate(n); }
 
-	void deallocate(pointer p, size_type n) { }
+	void deallocate(pointer, size_type) {}
 
 	pointer address (reference r) const { return(&r); }
 
diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic
index 4d76f07694d..405b7338b51 100644
--- a/storage/innobase/include/mem0mem.ic
+++ b/storage/innobase/include/mem0mem.ic
@@ -277,7 +277,8 @@ mem_heap_free_heap_top(
 	ut_ad(block);
 
 	/* Set the free field of block */
-	mem_block_set_free(block, old_top - (byte*) block);
+	mem_block_set_free(block,
+			   ulint(old_top - reinterpret_cast<byte*>(block)));
 
 	ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
 	UNIV_MEM_FREE(old_top, (byte*) block + block->len - old_top);
@@ -547,7 +548,7 @@ mem_heap_get_size(
 	size = heap->total_size;
 
 	if (heap->free_block) {
-		size += UNIV_PAGE_SIZE;
+		size += srv_page_size;
 	}
 
 	return(size);
@@ -580,20 +581,3 @@ mem_strdupl(
 	s[len] = 0;
 	return(static_cast<char*>(memcpy(s, str, len)));
 }
-
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INLINE
-char*
-mem_heap_strdupl(
-/*=============*/
-	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
-	const char*	str,	/*!< in: string to be copied */
-	ulint		len)	/*!< in: length of str, in bytes */
-{
-	char*	s = (char*) mem_heap_alloc(heap, len + 1);
-	s[len] = 0;
-	return((char*) memcpy(s, str, len));
-}
diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
index dd68ea25613..5c72c7cb5da 100644
--- a/storage/innobase/include/mtr0log.ic
+++ b/storage/innobase/include/mtr0log.ic
@@ -225,7 +225,7 @@ mlog_write_initial_log_record_fast(
 	ut_ad(log_ptr);
 	ut_d(mtr->memo_modify_page(ptr));
 
-	page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
+	page = (const byte*) ut_align_down(ptr, srv_page_size);
 	space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 	offset = mach_read_from_4(page + FIL_PAGE_OFFSET);
 
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index bdd3a6a67b9..0c157cb87cf 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -29,9 +29,7 @@ Created 11/26/1995 Heikki Tuuri
 #define mtr0mtr_h
 
 #include "univ.i"
-#include "log0types.h"
-#include "mtr0types.h"
-#include "buf0types.h"
+#include "fil0fil.h"
 #include "dyn0buf.h"
 
 /** Start a mini-transaction. */
@@ -73,13 +71,6 @@ savepoint. */
 				(m)->memo_release((o), (t))
 
 #ifdef UNIV_DEBUG
-
-/** Check if memo contains the given item. */
-#define mtr_is_block_fix(m, o, t, table) mtr_memo_contains(m, o, t)
-
-/** Check if memo contains the given page. */
-#define mtr_is_page_fix(m, p, t, table) mtr_memo_contains_page(m, p, t)
-
 /** Check if memo contains the given item.
 @return	TRUE if contains */
 #define mtr_memo_contains(m, o, t)					\
@@ -133,9 +124,6 @@ savepoint. */
 @return true if the mtr is dirtying a clean page. */
 #define mtr_block_dirtied(b)	mtr_t::is_block_dirtied((b))
 
-/** Forward declaration of a tablespace object */
-struct fil_space_t;
-
 /** Append records to the system-wide redo log buffer.
 @param[in]	log	redo log records */
 void
@@ -187,12 +175,6 @@ struct mtr_t {
 		/** User tablespace that is being modified by the
 		mini-transaction */
 		fil_space_t*	m_user_space;
-		/** Undo tablespace that is being modified by the
-		mini-transaction */
-		fil_space_t*	m_undo_space;
-		/** System tablespace if it is being modified by the
-		mini-transaction */
-		fil_space_t*	m_sys_space;
 
 		/** State of the transaction */
 		mtr_state_t	m_state;
@@ -216,17 +198,9 @@ struct mtr_t {
 
 	~mtr_t() { }
 
-	/** Release the free extents that was reserved using
-	fsp_reserve_free_extents().  This is equivalent to calling
-	fil_space_release_free_extents().  This is intended for use
-	with index pages.
-	@param[in]	n_reserved	number of reserved extents */
-	void release_free_extents(ulint n_reserved);
-
 	/** Start a mini-transaction.
-	@param sync		true if it is a synchronous mini-transaction
-	@param read_only	true if read only mini-transaction */
-	void start(bool sync = true, bool read_only = false);
+	@param sync		true if it is a synchronous mini-transaction */
+	void start(bool sync = true);
 
 	/** @return whether this is an asynchronous mini-transaction. */
 	bool is_async() const
@@ -295,17 +269,6 @@ struct mtr_t {
 	@return	old mode */
 	inline mtr_log_t set_log_mode(mtr_log_t mode);
 
-	/** Note that the mini-transaction is modifying the system tablespace
-	(for example, for the change buffer or for undo logs)
-	@return the system tablespace */
-	fil_space_t* set_sys_modified()
-	{
-		if (!m_impl.m_sys_space) {
-			lookup_sys_space();
-		}
-		return(m_impl.m_sys_space);
-	}
-
 	/** Copy the tablespaces associated with the mini-transaction
 	(needed for generating MLOG_FILE_NAME records)
 	@param[in]	mtr	mini-transaction that may modify
@@ -314,35 +277,41 @@ struct mtr_t {
 	{
 		ut_ad(!m_impl.m_user_space_id);
 		ut_ad(!m_impl.m_user_space);
-		ut_ad(!m_impl.m_undo_space);
-		ut_ad(!m_impl.m_sys_space);
 
 		ut_d(m_impl.m_user_space_id = mtr.m_impl.m_user_space_id);
 		m_impl.m_user_space = mtr.m_impl.m_user_space;
-		m_impl.m_undo_space = mtr.m_impl.m_undo_space;
-		m_impl.m_sys_space = mtr.m_impl.m_sys_space;
 	}
 
 	/** Set the tablespace associated with the mini-transaction
 	(needed for generating a MLOG_FILE_NAME record)
 	@param[in]	space_id	user or system tablespace ID
 	@return	the tablespace */
-	fil_space_t* set_named_space(ulint space_id)
+	fil_space_t* set_named_space_id(ulint space_id)
 	{
 		ut_ad(!m_impl.m_user_space_id);
 		ut_d(m_impl.m_user_space_id = space_id);
 		if (!space_id) {
-			return(set_sys_modified());
+			return fil_system.sys_space;
 		} else {
-			lookup_user_space(space_id);
-			return(m_impl.m_user_space);
+			ut_ad(m_impl.m_user_space_id == space_id);
+			ut_ad(!m_impl.m_user_space);
+			m_impl.m_user_space = fil_space_get(space_id);
+			ut_ad(m_impl.m_user_space);
+			return m_impl.m_user_space;
 		}
 	}
 
 	/** Set the tablespace associated with the mini-transaction
 	(needed for generating a MLOG_FILE_NAME record)
 	@param[in]	space	user or system tablespace */
-	void set_named_space(fil_space_t* space);
+	void set_named_space(fil_space_t* space)
+	{
+		ut_ad(!m_impl.m_user_space_id);
+		ut_d(m_impl.m_user_space_id = space->id);
+		if (space->id) {
+			m_impl.m_user_space = space;
+		}
+	}
 
 #ifdef UNIV_DEBUG
 	/** Check the tablespace associated with the mini-transaction
@@ -350,6 +319,11 @@ struct mtr_t {
 	@param[in]	space	tablespace
 	@return whether the mini-transaction is associated with the space */
 	bool is_named_space(ulint space) const;
+	/** Check the tablespace associated with the mini-transaction
+	(needed for generating a MLOG_FILE_NAME record)
+	@param[in]	space	tablespace
+	@return whether the mini-transaction is associated with the space */
+	bool is_named_space(const fil_space_t* space) const;
 #endif /* UNIV_DEBUG */
 
 	/** Read 1 - 4 bytes from a file page buffered in the buffer pool.
@@ -575,12 +549,6 @@ struct mtr_t {
 		MY_ATTRIBUTE((warn_unused_result));
 
 private:
-	/** Look up the system tablespace. */
-	void lookup_sys_space();
-	/** Look up the user tablespace.
-	@param[in]	space_id	tablespace ID  */
-	void lookup_user_space(ulint space_id);
-
 	class Command;
 
 	friend class Command;
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
index af8f1d2c7db..eaf838aaa76 100644
--- a/storage/innobase/include/mtr0types.h
+++ b/storage/innobase/include/mtr0types.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -100,16 +100,16 @@ enum mlog_id_t {
 	/** Create an index page */
 	MLOG_PAGE_CREATE = 19,
 
-	/** Insert entry in an undo log */
+	/** insert an undo log record */
 	MLOG_UNDO_INSERT = 20,
 
-	/** erase an undo log page end */
+	/** erase an undo log page end (used in MariaDB 10.2) */
 	MLOG_UNDO_ERASE_END = 21,
 
 	/** initialize a page in an undo log */
 	MLOG_UNDO_INIT = 22,
 
-	/** reuse an insert undo log header */
+	/** reuse an insert undo log header (used in MariaDB 10.2) */
 	MLOG_UNDO_HDR_REUSE = 24,
 
 	/** create an undo log header */
@@ -223,8 +223,12 @@ enum mlog_id_t {
 	redo log about individual pages */
 	MLOG_INDEX_LOAD = 61,
 
+	/** write DB_TRX_ID,DB_ROLL_PTR to a clustered index leaf page
+	of a ROW_FORMAT=COMPRESSED table */
+	MLOG_ZIP_WRITE_TRX_ID = 62,
+
 	/** biggest value (used in assertions) */
-	MLOG_BIGGEST_TYPE = MLOG_INDEX_LOAD,
+	MLOG_BIGGEST_TYPE = MLOG_ZIP_WRITE_TRX_ID,
 
 	/** log record for writing/updating crypt data of
 	a tablespace */
diff --git a/storage/innobase/include/os0event.h b/storage/innobase/include/os0event.h
index d5fdc6ba080..f8227235211 100644
--- a/storage/innobase/include/os0event.h
+++ b/storage/innobase/include/os0event.h
@@ -42,11 +42,7 @@ Creates an event semaphore, i.e., a semaphore which may just have two states:
 signaled and nonsignaled. The created event is manual reset: it must be reset
 explicitly by calling os_event_reset().
 @return	the event handle */
-os_event_t
-os_event_create(
-/*============*/
-	const char*	name);	/*!< in: the name of the event, if NULL
-				the event is created without a name */
+os_event_t os_event_create(const char*);
 
 /**
 Sets an event semaphore to the signaled state: lets waiting threads
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index c19079e1f9e..71da751ad25 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -50,7 +50,6 @@ struct fil_node_t;
 struct fil_space_t;
 
 extern bool	os_has_said_disk_full;
-extern my_bool	srv_use_trim;
 
 /** File offset in bytes */
 typedef ib_uint64_t os_offset_t;
@@ -69,10 +68,6 @@ the OS actually supports it: Win 95 does not, NT does. */
 /** File handle */
 typedef HANDLE os_file_t;
 
-/** Convert a C file descriptor to a native file handle
-@param fd file descriptor
-@return native file handle */
-# define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
 
 #else /* _WIN32 */
 
@@ -81,14 +76,9 @@ typedef DIR*	os_file_dir_t;	/*!< directory stream */
 /** File handle */
 typedef int	os_file_t;
 
-/** Convert a C file descriptor to a native file handle
-@param fd file descriptor
-@return native file handle */
-# define OS_FILE_FROM_FD(fd) fd
-
 #endif /* _WIN32 */
 
-static const os_file_t OS_FILE_CLOSED = os_file_t(~0);
+static const os_file_t OS_FILE_CLOSED = IF_WIN(os_file_t(INVALID_HANDLE_VALUE),-1);
 
 /** File descriptor with optional PERFORMANCE_SCHEMA instrumentation */
 struct pfs_os_file_t
@@ -251,7 +241,7 @@ public:
 		m_fil_node(NULL),
 		m_type(static_cast<uint16_t>(type))
 	{
-		if (!is_punch_hole_supported() || !srv_use_trim) {
+		if (!is_punch_hole_supported()) {
 			clear_punch_hole();
 		}
 	}
@@ -270,7 +260,7 @@ public:
 			set_punch_hole();
 		}
 
-		if (!is_punch_hole_supported() || !srv_use_trim) {
+		if (!is_punch_hole_supported()) {
 			clear_punch_hole();
 		}
 	}
@@ -357,7 +347,7 @@ public:
 	/** Set the punch hole flag */
 	void set_punch_hole()
 	{
-		if (is_punch_hole_supported() && srv_use_trim) {
+		if (is_punch_hole_supported()) {
 			m_type |= PUNCH_HOLE;
 		}
 	}
@@ -372,8 +362,7 @@ public:
 	@param[in] node			File node */
 	void set_fil_node(fil_node_t* node)
 	{
-		if (!srv_use_trim ||
-		   (node && !fil_node_should_punch_hole(node))) {
+		if (node && !fil_node_should_punch_hole(node)) {
 			clear_punch_hole();
 		}
 
@@ -537,14 +526,11 @@ struct os_file_stat_t {
 };
 
 /** Create a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the given parameter path. If the path
-is null then it will create the file in the mysql server configuration
+the temporary file is created in the in the mysql server configuration
 parameter (--tmpdir).
-@param[in]	path	location for creating temporary file
 @return temporary file handle, or NULL on error */
 FILE*
-os_file_create_tmpfile(
-	const char*	path);
+os_file_create_tmpfile();
 
 /** The os_file_opendir() function opens a directory stream corresponding to the
 directory named by the dirname argument. The directory stream is positioned
@@ -848,18 +834,10 @@ The wrapper functions have the prefix of "innodb_". */
 	pfs_os_file_read_no_error_handling_func(			\
 		type, file, buf, offset, n, o, __FILE__, __LINE__)
 
-# define os_file_read_no_error_handling_int_fd(type, file, buf, offset, n) \
-	pfs_os_file_read_no_error_handling_int_fd_func(			\
-		type, file, buf, offset, n, __FILE__, __LINE__)
-
 # define os_file_write(type, name, file, buf, offset, n)	\
 	pfs_os_file_write_func(type, name, file, buf, offset,	\
 			       n, __FILE__, __LINE__)
 
-# define os_file_write_int_fd(type, name, file, buf, offset, n)		\
-	pfs_os_file_write_int_fd_func(type, name, file, buf, offset,	\
-		n, __FILE__, __LINE__)
-
 # define os_file_flush(file)					\
 	pfs_os_file_flush_func(file, __FILE__, __LINE__)
 
@@ -1570,7 +1548,7 @@ path. If the path is NULL then it will be created on --tmpdir location.
 This function is defined in ha_innodb.cc.
 @param[in]	path	location for creating temporary file
 @return temporary file descriptor, or < 0 on error */
-int
+os_file_t
 innobase_mysql_tmpfile(
 	const char*	path);
 
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index a7e4f2695da..895f82cf2d8 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -340,49 +340,6 @@ pfs_os_file_read_no_error_handling_func(
 	return(result);
 }
 
-/** NOTE! Please use the corresponding macro
-os_file_read_no_error_handling_int_fd() to request
-a synchronous read operation.
-@param[in]	type		read request
-@param[in]      file            file handle
-@param[out]     buf             buffer where to read
-@param[in]      offset          file offset where to read
-@param[in]      n               number of bytes to read
-@param[in]      src_file        caller file name
-@param[in]      src_line        caller line number
-@return	whether the request was successful */
-UNIV_INLINE
-bool
-pfs_os_file_read_no_error_handling_int_fd_func(
-	const IORequest&	type,
-	int			file,
-	void*			buf,
-	os_offset_t		offset,
-	ulint			n,
-	const char*		src_file,
-	uint			src_line)
-{
-	PSI_file_locker_state	state;
-
-	PSI_file_locker* locker = PSI_FILE_CALL(
-		get_thread_file_descriptor_locker)(
-			&state, file, PSI_FILE_READ);
-	if (locker != NULL) {
-		PSI_FILE_CALL(start_file_wait)(
-			locker, n,
-			__FILE__, __LINE__);
-	}
-
-	bool success = DB_SUCCESS == os_file_read_no_error_handling_func(
-		type, OS_FILE_FROM_FD(file), buf, offset, n, NULL);
-
-	if (locker != NULL) {
-		PSI_FILE_CALL(end_file_wait)(locker, n);
-	}
-
-	return(success);
-}
-
 /** NOTE! Please use the corresponding macro os_file_write(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
@@ -425,51 +382,6 @@ pfs_os_file_write_func(
 	return(result);
 }
 
-/** NOTE! Please use the corresponding macro os_file_write_int_fd(),
-not directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_write_int_fd() which requests a synchronous write operation.
-@param[in]	type		write request
-@param[in]	name		file name
-@param[in]	file		file handle
-@param[in]	buf		buffer to write
-@param[in]	offset		file offset
-@param[in]	n		number of bytes
-@param[in]	src_file	file name where func invoked
-@param[in]	src_line	line where the func invoked
-@return	whether the request was successful */
-UNIV_INLINE
-bool
-pfs_os_file_write_int_fd_func(
-	const IORequest&	type,
-	const char*		name,
-	int			file,
-	const void*		buf,
-	os_offset_t		offset,
-	ulint			n,
-	const char*		src_file,
-	uint			src_line)
-{
-	PSI_file_locker_state   state;
-	struct PSI_file_locker* locker;
-
-	locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(
-		&state, file, PSI_FILE_WRITE);
-	if (locker != NULL) {
-                PSI_FILE_CALL(start_file_wait)(
-			locker, n,
-			__FILE__, __LINE__);
-	}
-
-        bool success = DB_SUCCESS == os_file_write_func(
-		type, name, OS_FILE_FROM_FD(file), buf, offset, n);
-
-        if (locker != NULL) {
-                PSI_FILE_CALL(end_file_wait)(locker, n);
-        }
-
-        return(success);
-}
 
 /** NOTE! Please use the corresponding macro os_file_flush(), not directly
 this function!
diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h
index 05a45a69f33..551e78d24ba 100644
--- a/storage/innobase/include/os0once.h
+++ b/storage/innobase/include/os0once.h
@@ -30,6 +30,7 @@ Created Feb 20, 2014 Vasil Dimov
 #include "univ.i"
 
 #include "ut0ut.h"
+#include "my_cpu.h"
 
 /** Execute a given function exactly once in a multi-threaded environment
 or wait for the function to be executed by another thread.
@@ -110,7 +111,7 @@ public:
 					ut_error;
 				}
 
-				UT_RELAX_CPU();
+				MY_RELAX_CPU();
 			}
 		}
 	}
diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h
index c240f5dacdd..b6838c919a0 100644
--- a/storage/innobase/include/os0thread.h
+++ b/storage/innobase/include/os0thread.h
@@ -30,12 +30,6 @@ Created 9/8/1995 Heikki Tuuri
 
 #include "univ.i"
 
-/* Maximum number of threads which can be created in the program;
-this is also the size of the wait slot array for MySQL threads which
-can wait inside InnoDB */
-
-#define	OS_THREAD_MAX_N		srv_max_n_threads
-
 /* Possible fixed priorities for threads */
 #define OS_THREAD_PRIORITY_NONE		100
 #define OS_THREAD_PRIORITY_BACKGROUND	1
@@ -53,12 +47,8 @@ typedef LPTHREAD_START_ROUTINE	os_thread_func_t;
 /** Macro for specifying a Windows thread start function. */
 #define DECLARE_THREAD(func)	WINAPI func
 
-/** Required to get around a build error on Windows. Even though our functions
-are defined/declared as WINAPI f(LPVOID a); the compiler complains that they
-are defined as: os_thread_ret_t (__cdecl*)(void*). Because our functions
-don't access the arguments and don't return any value, we should be safe. */
 #define os_thread_create(f,a,i)	\
-	os_thread_create_func(reinterpret_cast<os_thread_func_t>(f), a, i)
+	os_thread_create_func(f, a, i)
 
 #else
 
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
index a038f68731c..d98dfa5ec07 100644
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@@ -157,10 +157,7 @@ page_cur_tuple_insert(
 	ulint**		offsets,/*!< out: offsets on *rec */
 	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	mtr_t*		mtr,	/*!< in: mini-transaction handle, or NULL */
-	bool		use_cache = false)
-				/*!< in: if true, then use record cache to
-				hold the tuple converted record. */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
 	MY_ATTRIBUTE((nonnull(1,2,3,4,5), warn_unused_result));
 /***********************************************************//**
 Inserts a record next to page cursor. Returns pointer to inserted record if
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
index 3e6d40cba4a..86e560395f3 100644
--- a/storage/innobase/include/page0cur.ic
+++ b/storage/innobase/include/page0cur.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, MariaDB Corporation.
+Copyright (c) 2015, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -262,10 +262,7 @@ page_cur_tuple_insert(
 	ulint**		offsets,/*!< out: offsets on *rec */
 	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	mtr_t*		mtr,	/*!< in: mini-transaction handle, or NULL */
-	bool		use_cache)
-				/*!< in: if true, then use record cache to
-				hold the tuple converted record. */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
 {
 	rec_t*		rec;
 	ulint		size = rec_get_converted_size(index, tuple, n_ext);
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index 53a58de229d..d3f6bd304a6 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -63,9 +63,42 @@ typedef	byte		page_header_t;
 #define	PAGE_FREE	 6	/* pointer to start of page free record list */
 #define	PAGE_GARBAGE	 8	/* number of bytes in deleted records */
 #define	PAGE_LAST_INSERT 10	/* pointer to the last inserted record, or
-				NULL if this info has been reset by a delete,
+				0 if this info has been reset by a delete,
 				for example */
-#define	PAGE_DIRECTION	 12	/* last insert direction: PAGE_LEFT, ... */
+
+/** This 10-bit field is usually 0. In B-tree index pages of
+ROW_FORMAT=REDUNDANT tables, this byte can contain garbage if the .ibd
+file was created in MySQL 4.1.0 or if the table resides in the system
+tablespace and was created before MySQL 4.1.1 or MySQL 4.0.14.
+In this case, the FIL_PAGE_TYPE would be FIL_PAGE_INDEX.
+
+In ROW_FORMAT=COMPRESSED tables, this field is always 0, because
+instant ADD COLUMN is not supported.
+
+In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC tables, this field is
+always 0, except in the root page of the clustered index after instant
+ADD COLUMN.
+
+Instant ADD COLUMN will change FIL_PAGE_TYPE to FIL_PAGE_TYPE_INSTANT
+and initialize the PAGE_INSTANT field to the original number of
+fields in the clustered index (dict_index_t::n_core_fields).  The most
+significant bits are in the first byte, and the least significant 5
+bits are stored in the most significant 5 bits of PAGE_DIRECTION_B.
+
+These FIL_PAGE_TYPE_INSTANT and PAGE_INSTANT may be assigned even if
+instant ADD COLUMN was not committed. Changes to these page header fields
+are not undo-logged, but changes to the 'default value record' are.
+If the server is killed and restarted, the page header fields could
+remain set even though no 'default value record' is present.
+
+When the table becomes empty, the PAGE_INSTANT field and the
+FIL_PAGE_TYPE can be reset and any 'default value record' be removed. */
+#define PAGE_INSTANT	12
+
+/** last insert direction: PAGE_LEFT, ....
+In ROW_FORMAT=REDUNDANT tables created before MySQL 4.1.1 or MySQL 4.0.14,
+this byte can be garbage. */
+#define	PAGE_DIRECTION_B 13
 #define	PAGE_N_DIRECTION 14	/* number of consecutive inserts to the same
 				direction */
 #define	PAGE_N_RECS	 16	/* number of user records on the page */
@@ -125,9 +158,9 @@ Otherwise written as 0. @see PAGE_ROOT_AUTO_INC */
 /*-----------------------------*/
 
 /* Heap numbers */
-#define PAGE_HEAP_NO_INFIMUM	0	/* page infimum */
-#define PAGE_HEAP_NO_SUPREMUM	1	/* page supremum */
-#define PAGE_HEAP_NO_USER_LOW	2	/* first user record in
+#define PAGE_HEAP_NO_INFIMUM	0U	/* page infimum */
+#define PAGE_HEAP_NO_SUPREMUM	1U	/* page supremum */
+#define PAGE_HEAP_NO_USER_LOW	2U	/* first user record in
 					creation (insertion) order,
 					not necessarily collation order;
 					this record may have been deleted */
@@ -177,7 +210,7 @@ inline
 page_t*
 page_align(const void* ptr)
 {
-	return(static_cast<page_t*>(ut_align_down(ptr, UNIV_PAGE_SIZE)));
+	return(static_cast<page_t*>(ut_align_down(ptr, srv_page_size)));
 }
 
 /** Gets the byte offset within a page frame.
@@ -188,7 +221,7 @@ inline
 ulint
 page_offset(const void*	ptr)
 {
-	return(ut_align_offset(ptr, UNIV_PAGE_SIZE));
+	return(ut_align_offset(ptr, srv_page_size));
 }
 
 /** Determine whether an index page is not in ROW_FORMAT=REDUNDANT.
@@ -251,6 +284,20 @@ page_rec_is_comp(const byte* rec)
 	return(page_is_comp(page_align(rec)));
 }
 
+# ifdef UNIV_DEBUG
+/** Determine if the record is the 'default row' pseudo-record
+in the clustered index.
+@param[in]	rec	leaf page record on an index page
+@return	whether the record is the 'default row' pseudo-record */
+inline
+bool
+page_rec_is_default_row(const rec_t* rec)
+{
+	return rec_get_info_bits(rec, page_rec_is_comp(rec))
+		& REC_INFO_MIN_REC_FLAG;
+}
+# endif /* UNIV_DEBUG */
+
 /** Determine the offset of the infimum record on the page.
 @param[in]	page	index page
 @return offset of the infimum record in record list, relative from page */
@@ -288,7 +335,7 @@ page_rec_is_user_rec_low(ulint offset)
 	compile_time_assert(PAGE_NEW_SUPREMUM < PAGE_OLD_SUPREMUM_END);
 	compile_time_assert(PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM_END);
 	ut_ad(offset >= PAGE_NEW_INFIMUM);
-	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+	ut_ad(offset <= srv_page_size - PAGE_EMPTY_DIR_START);
 
 	return(offset != PAGE_NEW_SUPREMUM
 	       && offset != PAGE_NEW_INFIMUM
@@ -304,7 +351,7 @@ bool
 page_rec_is_supremum_low(ulint offset)
 {
 	ut_ad(offset >= PAGE_NEW_INFIMUM);
-	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+	ut_ad(offset <= srv_page_size - PAGE_EMPTY_DIR_START);
 	return(offset == PAGE_NEW_SUPREMUM || offset == PAGE_OLD_SUPREMUM);
 }
 
@@ -316,7 +363,7 @@ bool
 page_rec_is_infimum_low(ulint offset)
 {
 	ut_ad(offset >= PAGE_NEW_INFIMUM);
-	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+	ut_ad(offset <= srv_page_size - PAGE_EMPTY_DIR_START);
 	return(offset == PAGE_NEW_INFIMUM || offset == PAGE_OLD_INFIMUM);
 }
 
@@ -457,7 +504,7 @@ page_header_set_field(
 Returns the offset stored in the given header field.
 @return offset from the start of the page, or 0 */
 UNIV_INLINE
-ulint
+uint16_t
 page_header_get_offs(
 /*=================*/
 	const page_t*	page,	/*!< in: page */
@@ -551,7 +598,7 @@ Gets the number of user records on page (the infimum and supremum records
 are not user records).
 @return number of user records */
 UNIV_INLINE
-ulint
+uint16_t
 page_get_n_recs(
 /*============*/
 	const page_t*	page);	/*!< in: index page */
@@ -569,7 +616,7 @@ page_rec_get_n_recs_before(
 Gets the number of records in the heap.
 @return number of user records */
 UNIV_INLINE
-ulint
+uint16_t
 page_dir_get_n_heap(
 /*================*/
 	const page_t*	page);	/*!< in: index page */
@@ -590,7 +637,7 @@ page_dir_set_n_heap(
 Gets the number of dir slots in directory.
 @return number of slots */
 UNIV_INLINE
-ulint
+uint16_t
 page_dir_get_n_slots(
 /*=================*/
 	const page_t*	page);	/*!< in: index page */
@@ -616,7 +663,7 @@ page_dir_get_nth_slot(
 	ulint		n);	/*!< in: position */
 #else /* UNIV_DEBUG */
 # define page_dir_get_nth_slot(page, n)			\
-	((page) + (UNIV_PAGE_SIZE - PAGE_DIR		\
+	((page) + (srv_page_size - PAGE_DIR		\
 		   - (n + 1) * PAGE_DIR_SLOT_SIZE))
 #endif /* UNIV_DEBUG */
 /**************************************************************//**
@@ -686,14 +733,52 @@ ulint
 page_rec_get_heap_no(
 /*=================*/
 	const rec_t*	rec);	/*!< in: the physical record */
+/** Determine whether a page has any siblings.
+@param[in]	page	page frame
+@return true if the page has any siblings */
+inline
+bool
+page_has_siblings(const page_t* page)
+{
+	compile_time_assert(!(FIL_PAGE_PREV % 8));
+	compile_time_assert(FIL_PAGE_NEXT == FIL_PAGE_PREV + 4);
+	compile_time_assert(FIL_NULL == 0xffffffff);
+	return *reinterpret_cast<const uint64_t*>(page + FIL_PAGE_PREV)
+		!= ~uint64_t(0);
+}
+
 /** Determine whether a page is an index root page.
 @param[in]	page	page frame
 @return true if the page is a root page of an index */
-UNIV_INLINE
+inline
 bool
-page_is_root(
-	const page_t*	page)
-	MY_ATTRIBUTE((warn_unused_result));
+page_is_root(const page_t* page)
+{
+	return fil_page_index_page_check(page) && !page_has_siblings(page);
+}
+
+/** Determine whether a page has a predecessor.
+@param[in]	page	page frame
+@return true if the page has a predecessor */
+inline
+bool
+page_has_prev(const page_t* page)
+{
+	return *reinterpret_cast<const uint32_t*>(page + FIL_PAGE_PREV)
+		!= FIL_NULL;
+}
+
+/** Determine whether a page has a successor.
+@param[in]	page	page frame
+@return true if the page has a successor */
+inline
+bool
+page_has_next(const page_t* page)
+{
+	return *reinterpret_cast<const uint32_t*>(page + FIL_PAGE_NEXT)
+		!= FIL_NULL;
+}
+
 /************************************************************//**
 Gets the pointer to the next record on the page.
 @return pointer to next record */
@@ -865,7 +950,7 @@ Returns the sum of the sizes of the records in the record list
 excluding the infimum and supremum records.
 @return data in bytes */
 UNIV_INLINE
-ulint
+uint16_t
 page_get_data_size(
 /*===============*/
 	const page_t*	page);	/*!< in: index page */
@@ -911,6 +996,45 @@ page_mem_free(
 	const dict_index_t*	index,	/*!< in: index of rec */
 	const ulint*		offsets);/*!< in: array returned by
 					 rec_get_offsets() */
+
+/** Read the PAGE_DIRECTION field from a byte.
+@param[in]	ptr	pointer to PAGE_DIRECTION_B
+@return	the value of the PAGE_DIRECTION field */
+inline
+byte
+page_ptr_get_direction(const byte* ptr);
+
+/** Set the PAGE_DIRECTION field.
+@param[in]	ptr	pointer to PAGE_DIRECTION_B
+@param[in]	dir	the value of the PAGE_DIRECTION field */
+inline
+void
+page_ptr_set_direction(byte* ptr, byte dir);
+
+/** Read the PAGE_DIRECTION field.
+@param[in]	page	index page
+@return	the value of the PAGE_DIRECTION field */
+inline
+byte
+page_get_direction(const page_t* page)
+{
+	return page_ptr_get_direction(PAGE_HEADER + PAGE_DIRECTION_B + page);
+}
+
+/** Read the PAGE_INSTANT field.
+@param[in]	page	index page
+@return the value of the PAGE_INSTANT field */
+inline
+uint16_t
+page_get_instant(const page_t* page);
+/** Assign the PAGE_INSTANT field.
+@param[in,out]	page	clustered index root page
+@param[in]	n	original number of clustered index fields
+@param[in,out]	mtr	mini-transaction */
+inline
+void
+page_set_instant(page_t* page, unsigned n, mtr_t* mtr);
+
 /**********************************************************//**
 Create an uncompressed B-tree index page.
 @return pointer to the page */
@@ -1251,5 +1375,4 @@ page_warn_strict_checksum(
 
 #include "page0page.ic"
 
-
 #endif
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index 0062db56bfa..307803367c0 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2017, MariaDB Corporation.
+Copyright (c) 2016, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -172,8 +172,8 @@ page_header_set_field(
 {
 	ut_ad(page);
 	ut_ad(field <= PAGE_N_RECS);
-	ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE);
-	ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
+	ut_ad(field == PAGE_N_HEAP || val < srv_page_size);
+	ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < srv_page_size);
 
 	mach_write_to_2(page + PAGE_HEADER + field, val);
 	if (page_zip) {
@@ -186,19 +186,17 @@ page_header_set_field(
 Returns the offset stored in the given header field.
 @return offset from the start of the page, or 0 */
 UNIV_INLINE
-ulint
+uint16_t
 page_header_get_offs(
 /*=================*/
 	const page_t*	page,	/*!< in: page */
 	ulint		field)	/*!< in: PAGE_FREE, ... */
 {
-	ulint	offs;
-
 	ut_ad((field == PAGE_FREE)
 	      || (field == PAGE_LAST_INSERT)
 	      || (field == PAGE_HEAP_TOP));
 
-	offs = page_header_get_field(page, field);
+	uint16_t offs = page_header_get_field(page, field);
 
 	ut_ad((field != PAGE_HEAP_TOP) || offs);
 
@@ -277,31 +275,6 @@ page_rec_get_heap_no(
 	}
 }
 
-/** Determine whether a page is an index root page.
-@param[in]	page	page frame
-@return true if the page is a root page of an index */
-UNIV_INLINE
-bool
-page_is_root(
-	const page_t*	page)
-{
-#if FIL_PAGE_PREV % 8
-# error FIL_PAGE_PREV must be 64-bit aligned
-#endif
-#if FIL_PAGE_NEXT != FIL_PAGE_PREV + 4
-# error FIL_PAGE_NEXT must be adjacent to FIL_PAGE_PREV
-#endif
-#if FIL_NULL != 0xffffffff
-# error FIL_NULL != 0xffffffff
-#endif
-	/* Check that this is an index page and both the PREV and NEXT
-	pointers are FIL_NULL, because the root page does not have any
-	siblings. */
-	return(fil_page_index_page_check(page)
-	       && *reinterpret_cast<const ib_uint64_t*>(page + FIL_PAGE_PREV)
-	       == IB_UINT64_MAX);
-}
-
 /** Determine whether an index page record is a user record.
 @param[in]	rec	record in an index page
 @return true if a user record */
@@ -423,7 +396,8 @@ page_get_middle_rec(
 /*================*/
 	page_t*	page)	/*!< in: page */
 {
-	ulint	middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
+	ulint	middle = (ulint(page_get_n_recs(page))
+			  + PAGE_HEAP_NO_USER_LOW) / 2;
 
 	return(page_rec_get_nth(page, middle));
 }
@@ -464,7 +438,7 @@ Gets the number of user records on page (infimum and supremum records
 are not user records).
 @return number of user records */
 UNIV_INLINE
-ulint
+uint16_t
 page_get_n_recs(
 /*============*/
 	const page_t*	page)	/*!< in: index page */
@@ -477,7 +451,7 @@ page_get_n_recs(
 Gets the number of dir slots in directory.
 @return number of slots */
 UNIV_INLINE
-ulint
+uint16_t
 page_dir_get_n_slots(
 /*=================*/
 	const page_t*	page)	/*!< in: index page */
@@ -502,7 +476,7 @@ page_dir_set_n_slots(
 Gets the number of records in the heap.
 @return number of user records */
 UNIV_INLINE
-ulint
+uint16_t
 page_dir_get_n_heap(
 /*================*/
 	const page_t*	page)	/*!< in: index page */
@@ -547,7 +521,7 @@ page_dir_get_nth_slot(
 	ut_ad(page_dir_get_n_slots(page) > n);
 
 	return((page_dir_slot_t*)
-	       page + UNIV_PAGE_SIZE - PAGE_DIR
+	       page + srv_page_size - PAGE_DIR
 	       - (n + 1) * PAGE_DIR_SLOT_SIZE);
 }
 #endif /* UNIV_DEBUG */
@@ -666,7 +640,7 @@ page_rec_get_next_low(
 
 	offs = rec_get_next_offs(rec, comp);
 
-	if (offs >= UNIV_PAGE_SIZE) {
+	if (offs >= srv_page_size) {
 		fprintf(stderr,
 			"InnoDB: Next record offset is nonsensical %lu"
 			" in record at offset %lu\n"
@@ -855,9 +829,8 @@ page_rec_get_base_extra_size(
 /*=========================*/
 	const rec_t*	rec)	/*!< in: physical record */
 {
-#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES
-# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES"
-#endif
+	compile_time_assert(REC_N_NEW_EXTRA_BYTES + 1
+			    == REC_N_OLD_EXTRA_BYTES);
 	return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
 }
 
@@ -868,21 +841,17 @@ Returns the sum of the sizes of the records in the record list, excluding
 the infimum and supremum records.
 @return data in bytes */
 UNIV_INLINE
-ulint
+uint16_t
 page_get_data_size(
 /*===============*/
 	const page_t*	page)	/*!< in: index page */
 {
-	ulint	ret;
-
-	ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
-		      - (page_is_comp(page)
-			 ? PAGE_NEW_SUPREMUM_END
-			 : PAGE_OLD_SUPREMUM_END)
-		      - page_header_get_field(page, PAGE_GARBAGE));
-
-	ut_ad(ret < UNIV_PAGE_SIZE);
-
+	uint16_t	ret = page_header_get_field(page, PAGE_HEAP_TOP)
+		- (page_is_comp(page)
+		   ? PAGE_NEW_SUPREMUM_END
+		   : PAGE_OLD_SUPREMUM_END)
+		- page_header_get_field(page, PAGE_GARBAGE);
+	ut_ad(ret < srv_page_size);
 	return(ret);
 }
 
@@ -930,13 +899,13 @@ page_get_free_space_of_empty(
 	ulint	comp)		/*!< in: nonzero=compact page layout */
 {
 	if (comp) {
-		return((ulint)(UNIV_PAGE_SIZE
+		return((ulint)(srv_page_size
 			       - PAGE_NEW_SUPREMUM_END
 			       - PAGE_DIR
 			       - 2 * PAGE_DIR_SLOT_SIZE));
 	}
 
-	return((ulint)(UNIV_PAGE_SIZE
+	return((ulint)(srv_page_size
 		       - PAGE_OLD_SUPREMUM_END
 		       - PAGE_DIR
 		       - 2 * PAGE_DIR_SLOT_SIZE));
@@ -1074,10 +1043,79 @@ page_mem_free(
 		page_zip_dir_delete(page_zip, rec, index, offsets, free);
 	} else {
 		page_header_set_field(page, page_zip, PAGE_N_RECS,
-				      page_get_n_recs(page) - 1);
+				      ulint(page_get_n_recs(page)) - 1);
+	}
+}
+
+/** Read the PAGE_DIRECTION field from a byte.
+@param[in]	ptr	pointer to PAGE_DIRECTION_B
+@return	the value of the PAGE_DIRECTION field */
+inline
+byte
+page_ptr_get_direction(const byte* ptr)
+{
+	ut_ad(page_offset(ptr) == PAGE_HEADER + PAGE_DIRECTION_B);
+	return *ptr & ((1U << 3) - 1);
+}
+
+/** Set the PAGE_DIRECTION field.
+@param[in]	ptr	pointer to PAGE_DIRECTION_B
+@param[in]	dir	the value of the PAGE_DIRECTION field */
+inline
+void
+page_ptr_set_direction(byte* ptr, byte dir)
+{
+	ut_ad(page_offset(ptr) == PAGE_HEADER + PAGE_DIRECTION_B);
+	ut_ad(dir >= PAGE_LEFT);
+	ut_ad(dir <= PAGE_NO_DIRECTION);
+	*ptr = (*ptr & ~((1U << 3) - 1)) | dir;
+}
+
+/** Read the PAGE_INSTANT field.
+@param[in]	page	index page
+@return the value of the PAGE_INSTANT field */
+inline
+uint16_t
+page_get_instant(const page_t* page)
+{
+	uint16_t i = page_header_get_field(page, PAGE_INSTANT);
+#ifdef UNIV_DEBUG
+	switch (fil_page_get_type(page)) {
+	case FIL_PAGE_TYPE_INSTANT:
+		ut_ad(page_get_direction(page) <= PAGE_NO_DIRECTION);
+		ut_ad(i >> 3);
+		break;
+	case FIL_PAGE_INDEX:
+		ut_ad(i <= PAGE_NO_DIRECTION || !page_is_comp(page));
+		break;
+	case FIL_PAGE_RTREE:
+		ut_ad(i <= PAGE_NO_DIRECTION);
+		break;
+	default:
+		ut_ad(!"invalid page type");
+		break;
 	}
+#endif /* UNIV_DEBUG */
+	return(i >> 3);
 }
 
+/** Assign the PAGE_INSTANT field.
+@param[in,out]	page	clustered index root page
+@param[in]	n	original number of clustered index fields
+@param[in,out]	mtr	mini-transaction */
+inline
+void
+page_set_instant(page_t* page, unsigned n, mtr_t* mtr)
+{
+	ut_ad(fil_page_get_type(page) == FIL_PAGE_TYPE_INSTANT);
+	ut_ad(n > 0);
+	ut_ad(n < REC_MAX_N_FIELDS);
+	uint16_t i = page_header_get_field(page, PAGE_INSTANT);
+	ut_ad(i <= PAGE_NO_DIRECTION);
+	i |= n << 3;
+	mlog_write_ulint(PAGE_HEADER + PAGE_INSTANT + page, i,
+			 MLOG_2BYTES, mtr);
+}
 #endif /* !UNIV_INNOCHECKSUM */
 
 #ifdef UNIV_MATERIALIZE
diff --git a/storage/innobase/include/page0size.h b/storage/innobase/include/page0size.h
index 30a996df0a6..7b8b7efe617 100644
--- a/storage/innobase/include/page0size.h
+++ b/storage/innobase/include/page0size.h
@@ -30,7 +30,7 @@ Created Nov 14, 2013 Vasil Dimov
 #include "univ.i"
 #include "fsp0types.h"
 
-#define FIELD_REF_SIZE 20
+#define FIELD_REF_SIZE 20U
 
 /** A BLOB field reference full of zero, for use in assertions and
 tests.Initially, BLOB field references are set to zero, in
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
index c1d5443d9e5..6e0c097bbaf 100644
--- a/storage/innobase/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
@@ -340,18 +340,39 @@ page_zip_write_node_ptr(
 	ulint		ptr,	/*!< in: node pointer */
 	mtr_t*		mtr);	/*!< in: mini-transaction, or NULL */
 
-/**********************************************************************//**
-Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
+/** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record.
+@param[in,out]	page_zip	compressed page
+@param[in,out]	rec		record
+@param[in]	offsets		rec_get_offsets(rec, index)
+@param[in]	trx_id_field	field number of DB_TRX_ID (number of PK fields)
+@param[in]	trx_id		DB_TRX_ID value (transaction identifier)
+@param[in]	roll_ptr	DB_ROLL_PTR value (undo log pointer)
+@param[in,out]	mtr		mini-transaction, or NULL to skip logging */
 void
 page_zip_write_trx_id_and_roll_ptr(
-/*===============================*/
-	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
-	byte*		rec,	/*!< in/out: record */
-	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
-	ulint		trx_id_col,/*!< in: column number of TRX_ID in rec */
-	trx_id_t	trx_id,	/*!< in: transaction identifier */
-	roll_ptr_t	roll_ptr)/*!< in: roll_ptr */
-	MY_ATTRIBUTE((nonnull));
+	page_zip_des_t*	page_zip,
+	byte*		rec,
+	const ulint*	offsets,
+	ulint		trx_id_col,
+	trx_id_t	trx_id,
+	roll_ptr_t	roll_ptr,
+	mtr_t*		mtr = NULL)
+	MY_ATTRIBUTE((nonnull(1,2,3)));
+
+/** Parse a MLOG_ZIP_WRITE_TRX_ID record.
+@param[in]	ptr		redo log buffer
+@param[in]	end_ptr		end of redo log buffer
+@param[in,out]	page		uncompressed page
+@param[in,out]	page_zip	compressed page
+@return end of log record
+@retval	NULL	if the log record is incomplete */
+byte*
+page_zip_parse_write_trx_id(
+	byte*		ptr,
+	byte*		end_ptr,
+	page_t*		page,
+	page_zip_des_t*	page_zip)
+	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
 
 /**********************************************************************//**
 Write the "deleted" flag of a record on a compressed page.  The flag must
diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic
index b471e2cf64e..b3ebc5dcf51 100644
--- a/storage/innobase/include/page0zip.ic
+++ b/storage/innobase/include/page0zip.ic
@@ -120,7 +120,7 @@ page_zip_get_size(
 	size = (UNIV_ZIP_SIZE_MIN >> 1) << page_zip->ssize;
 
 	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
-	ut_ad(size <= UNIV_PAGE_SIZE);
+	ut_ad(size <= srv_page_size);
 
 	return(size);
 }
@@ -242,9 +242,9 @@ page_zip_get_trailer_len(
 		ut_ad(!page_zip->n_blobs);
 	}
 
-	return((page_dir_get_n_heap(page_zip->data) - 2)
-	       * uncompressed_size
-	       + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
+	return (ulint(page_dir_get_n_heap(page_zip->data)) - 2)
+		* uncompressed_size
+		+ ulint(page_zip->n_blobs) * BTR_EXTERN_FIELD_REF_SIZE;
 }
 
 /**********************************************************************//**
diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h
index 37498c1c638..487ba8c147f 100644
--- a/storage/innobase/include/pars0pars.h
+++ b/storage/innobase/include/pars0pars.h
@@ -539,7 +539,7 @@ pars_info_add_int4_literal(
 /*=======================*/
 	pars_info_t*	info,		/*!< in: info struct */
 	const char*	name,		/*!< in: name */
-	lint		val);		/*!< in: value */
+	ulint		val);		/*!< in: value */
 
 /****************************************************************//**
 Equivalent to:
diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
index f01b596a52e..ca06f5b09ba 100644
--- a/storage/innobase/include/que0que.h
+++ b/storage/innobase/include/que0que.h
@@ -335,13 +335,6 @@ enum que_thr_lock_t {
 	QUE_THR_LOCK_TABLE
 };
 
-/** From where the cursor position is counted */
-enum que_cur_t {
-	QUE_CUR_NOT_DEFINED,
-	QUE_CUR_START,
-	QUE_CUR_END
-};
-
 /* Query graph query thread node: the fields are protected by the
 trx_t::mutex with the exceptions named below */
 
@@ -415,18 +408,7 @@ struct que_fork_t{
 					generated by the parser, or NULL
 					if the graph was created 'by hand' */
 	pars_info_t*	info;		/*!< info struct, or NULL */
-	/* The following cur_... fields are relevant only in a select graph */
 
-	ulint		cur_end;	/*!< QUE_CUR_NOT_DEFINED, QUE_CUR_START,
-					QUE_CUR_END */
-	ulint		cur_pos;	/*!< if there are n rows in the result
-					set, values 0 and n + 1 mean before
-					first row, or after last row, depending
-					on cur_end; values 1...n mean a row
-					index */
-	ibool		cur_on_row;	/*!< TRUE if cursor is on a row, i.e.,
-					it is not before the first row or
-					after the last row */
 	sel_node_t*	last_sel_node;	/*!< last executed select node, or NULL
 					if none */
 	UT_LIST_NODE_T(que_fork_t)
diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
deleted file mode 100644
index 129341be77c..00000000000
--- a/storage/innobase/include/read0read.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/read0read.h
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef read0read_h
-#define read0read_h
-
-#include "univ.i"
-
-#include "read0types.h"
-
-#include <algorithm>
-
-/** The MVCC read view manager */
-class MVCC {
-public:
-	/** Constructor
-	@param size		Number of views to pre-allocate */
-	explicit MVCC(ulint size);
-
-	/** Destructor.
-	Free all the views in the m_free list */
-	~MVCC();
-
-	/**
-	Allocate and create a view.
-	@param view		view owned by this class created for the
-				caller. Must be freed by calling close()
-	@param trx		transaction creating the view */
-	void view_open(ReadView*& view, trx_t* trx);
-
-	/**
-	Close a view created by the above function.
-	@para view		view allocated by trx_open.
-	@param own_mutex	true if caller owns trx_sys_t::mutex */
-	void view_close(ReadView*& view, bool own_mutex);
-
-	/**
-	Release a view that is inactive but not closed. Caller must own
-	the trx_sys_t::mutex.
-	@param view		View to release */
-	void view_release(ReadView*& view);
-
-	/** Clones the oldest view and stores it in view. No need to
-	call view_close(). The caller owns the view that is passed in.
-	It will also move the closed views from the m_views list to the
-	m_free list. This function is called by Purge to create it view.
-	@param view		Preallocated view, owned by the caller */
-	void clone_oldest_view(ReadView* view);
-
-	/**
-	@return the number of active views */
-	ulint size() const;
-
-	/**
-	@return true if the view is active and valid */
-	static bool is_view_active(ReadView* view)
-	{
-		ut_a(view != reinterpret_cast<ReadView*>(0x1));
-
-		return(view != NULL && !(intptr_t(view) & 0x1));
-	}
-
-	/**
-	Set the view creator transaction id. Note: This shouldbe set only
-	for views created by RW transactions. */
-	static void set_view_creator_trx_id(ReadView* view, trx_id_t id);
-
-private:
-
-	/**
-	Validates a read view list. */
-	bool validate() const;
-
-	/**
-	Find a free view from the active list, if none found then allocate
-	a new view. This function will also attempt to move delete marked
-	views from the active list to the freed list.
-	@return a view to use */
-	inline ReadView* get_view();
-
-	/**
-	Get the oldest view in the system. It will also move the delete
-	marked read views from the views list to the freed list.
-	@return oldest view if found or NULL */
-	inline ReadView* get_oldest_view() const;
-
-private:
-	// Prevent copying
-	MVCC(const MVCC&);
-	MVCC& operator=(const MVCC&);
-
-private:
-	typedef UT_LIST_BASE_NODE_T(ReadView) view_list_t;
-
-	/** Free views ready for reuse. */
-	view_list_t		m_free;
-
-	/** Active and closed views, the closed views will have the
-	creator trx id set to TRX_ID_MAX */
-	view_list_t		m_views;
-};
-
-#endif /* read0read_h */
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
index c83c7e04f11..eade82714c5 100644
--- a/storage/innobase/include/read0types.h
+++ b/storage/innobase/include/read0types.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,122 +32,163 @@ Created 2/16/1997 Heikki Tuuri
 
 #include "trx0types.h"
 
-// Friend declaration
-class MVCC;
 
-/** Read view lists the trx ids of those transactions for which a consistent
-read should not see the modifications to the database. */
+/** View is not visible to purge thread. */
+#define READ_VIEW_STATE_CLOSED 0
 
-class ReadView {
-	/** This is similar to a std::vector but it is not a drop
-	in replacement. It is specific to ReadView. */
-	class ids_t {
-		typedef trx_ids_t::value_type value_type;
+/** View is being opened, purge thread must wait for state change. */
+#define READ_VIEW_STATE_SNAPSHOT 1
 
-		/**
-		Constructor */
-		ids_t() : m_ptr(), m_size(), m_reserved() { }
+/** View is visible to purge thread. */
+#define READ_VIEW_STATE_OPEN 2
 
-		/**
-		Destructor */
-		~ids_t() { UT_DELETE_ARRAY(m_ptr); }
 
-		/**
-		Try and increase the size of the array. Old elements are
-		copied across. It is a no-op if n is < current size.
+/**
+  Read view lists the trx ids of those transactions for which a consistent read
+  should not see the modifications to the database.
+*/
+class ReadView
+{
+  /**
+    View state.
 
-		@param n 		Make space for n elements */
-		void reserve(ulint n);
+    It is not defined as enum as it has to be updated using atomic operations.
+    Possible values are READ_VIEW_STATE_CLOSED, READ_VIEW_STATE_SNAPSHOT and
+    READ_VIEW_STATE_OPEN.
 
-		/**
-		Resize the array, sets the current element count.
-		@param n		new size of the array, in elements */
-		void resize(ulint n)
-		{
-			ut_ad(n <= capacity());
+    Possible state transfers...
 
-			m_size = n;
-		}
-
-		/**
-		Reset the size to 0 */
-		void clear() { resize(0); }
-
-		/**
-		@return the capacity of the array in elements */
-		ulint capacity() const { return(m_reserved); }
-
-		/**
-		Copy and overwrite the current array contents
-
-		@param start		Source array
-		@param end		Pointer to end of array */
-		void assign(const value_type* start, const value_type* end);
-
-		/**
-		Insert the value in the correct slot, preserving the order.
-		Doesn't check for duplicates. */
-		void insert(value_type value);
-
-		/**
-		@return the value of the first element in the array */
-		value_type front() const
-		{
-			ut_ad(!empty());
-
-			return(m_ptr[0]);
-		}
-
-		/**
-		@return the value of the last element in the array */
-		value_type back() const
-		{
-			ut_ad(!empty());
-
-			return(m_ptr[m_size - 1]);
-		}
-
-		/**
-		Append a value to the array.
-		@param value		the value to append */
-		void push_back(value_type value);
-
-		/**
-		@return a pointer to the start of the array */
-		trx_id_t* data() { return(m_ptr); };
-
-		/**
-		@return a const pointer to the start of the array */
-		const trx_id_t* data() const { return(m_ptr); };
+    Start view open:
+    READ_VIEW_STATE_CLOSED -> READ_VIEW_STATE_SNAPSHOT
 
-		/**
-		@return the number of elements in the array */
-		ulint size() const { return(m_size); }
+    Complete view open:
+    READ_VIEW_STATE_SNAPSHOT -> READ_VIEW_STATE_OPEN
 
-		/**
-		@return true if size() == 0 */
-		bool empty() const { return(size() == 0); }
+    Close view:
+    READ_VIEW_STATE_OPEN -> READ_VIEW_STATE_CLOSED
+  */
+  int32_t m_state;
 
-	private:
-		// Prevent copying
-		ids_t(const ids_t&);
-		ids_t& operator=(const ids_t&);
 
-	private:
-		/** Memory for the array */
-		value_type*	m_ptr;
-
-		/** Number of active elements in the array */
-		ulint		m_size;
+public:
+  ReadView(): m_state(READ_VIEW_STATE_CLOSED), m_low_limit_id(0) {}
+
+
+  /**
+    Copy state from another view.
+
+    This method is used to find min(m_low_limit_no), min(m_low_limit_id) and
+    all transaction ids below min(m_low_limit_id). These values effectively
+    form oldest view.
+
+    @param other    view to copy from
+  */
+  void copy(const ReadView &other)
+  {
+    ut_ad(&other != this);
+    if (m_low_limit_no > other.m_low_limit_no)
+      m_low_limit_no= other.m_low_limit_no;
+    if (m_low_limit_id > other.m_low_limit_id)
+      m_low_limit_id= other.m_low_limit_id;
+
+    trx_ids_t::iterator dst= m_ids.begin();
+    for (trx_ids_t::const_iterator src= other.m_ids.begin();
+         src != other.m_ids.end(); src++)
+    {
+      if (*src >= m_low_limit_id)
+        break;
+loop:
+      if (dst == m_ids.end())
+      {
+        m_ids.push_back(*src);
+        dst= m_ids.end();
+        continue;
+      }
+      if (*dst < *src)
+      {
+        dst++;
+        goto loop;
+      }
+      else if (*dst > *src)
+        dst= m_ids.insert(dst, *src) + 1;
+    }
+    m_ids.erase(std::lower_bound(dst, m_ids.end(), m_low_limit_id),
+                m_ids.end());
+
+    m_up_limit_id= m_ids.empty() ? m_low_limit_id : m_ids.front();
+    ut_ad(m_up_limit_id <= m_low_limit_id);
+  }
+
+
+  /**
+    Opens a read view where exactly the transactions serialized before this
+    point in time are seen in the view.
+
+    View becomes visible to purge thread.
+
+    @param[in,out] trx transaction
+  */
+  void open(trx_t *trx);
+
+
+  /**
+    Closes the view.
+
+    View becomes not visible to purge thread.
+  */
+  void close()
+  {
+    ut_ad(m_state == READ_VIEW_STATE_CLOSED ||
+          m_state == READ_VIEW_STATE_OPEN);
+    if (m_state == READ_VIEW_STATE_OPEN)
+      my_atomic_store32_explicit(&m_state, READ_VIEW_STATE_CLOSED,
+                                 MY_MEMORY_ORDER_RELAXED);
+  }
+
+
+  /** m_state getter for trx_sys::clone_oldest_view() trx_sys::size(). */
+  int32_t get_state() const
+  {
+    return my_atomic_load32_explicit(const_cast<int32*>(&m_state),
+                                     MY_MEMORY_ORDER_ACQUIRE);
+  }
+
+
+  /**
+    Returns true if view is open.
+
+    Only used by view owner thread, thus we can omit atomic operations.
+  */
+  bool is_open() const
+  {
+    ut_ad(m_state == READ_VIEW_STATE_OPEN ||
+          m_state == READ_VIEW_STATE_CLOSED);
+    return m_state == READ_VIEW_STATE_OPEN;
+  }
+
+
+  /**
+    Creates a snapshot where exactly the transactions serialized before this
+    point in time are seen in the view.
+
+    @param[in,out] trx transaction
+  */
+  inline void snapshot(trx_t *trx);
+
+
+  /**
+    Sets the creator transaction id.
+
+    This should be set only for views created by RW transactions.
+  */
+  void set_creator_trx_id(trx_id_t id)
+  {
+    ut_ad(id > 0);
+    ut_ad(m_creator_trx_id == 0);
+    m_creator_trx_id= id;
+  }
 
-		/** Size of m_ptr in elements */
-		ulint		m_reserved;
 
-		friend class ReadView;
-	};
-public:
-	ReadView();
-	~ReadView();
 	/** Check whether transaction id is valid.
 	@param[in]	id		transaction id to check
 	@param[in]	name		table name */
@@ -163,8 +205,6 @@ public:
 		const table_name_t&	name) const
 		MY_ATTRIBUTE((warn_unused_result))
 	{
-		ut_ad(id > 0);
-
 		if (id < m_up_limit_id || id == m_creator_trx_id) {
 
 			return(true);
@@ -181,9 +221,7 @@ public:
 			return(true);
 		}
 
-		const ids_t::value_type*	p = m_ids.data();
-
-		return(!std::binary_search(p, p + m_ids.size(), id));
+		return(!std::binary_search(m_ids.begin(), m_ids.end(), id));
 	}
 
 	/**
@@ -195,21 +233,6 @@ public:
 	}
 
 	/**
-	Mark the view as closed */
-	void close()
-	{
-		ut_ad(m_creator_trx_id != TRX_ID_MAX);
-		m_creator_trx_id = TRX_ID_MAX;
-	}
-
-	/**
-	@return true if the view is closed */
-	bool is_closed() const
-	{
-		return(m_closed);
-	}
-
-	/**
 	Write the limits to the file.
 	@param file		file to write to */
 	void print_limits(FILE* file) const
@@ -234,66 +257,6 @@ public:
 		return(m_low_limit_id);
 	}
 
-	/**
-	@return true if there are no transaction ids in the snapshot */
-	bool empty() const
-	{
-		return(m_ids.empty());
-	}
-
-#ifdef UNIV_DEBUG
-	/**
-	@param rhs		view to compare with
-	@return truen if this view is less than or equal rhs */
-	bool le(const ReadView* rhs) const
-	{
-		return(m_low_limit_no <= rhs->m_low_limit_no);
-	}
-
-	trx_id_t up_limit_id() const
-	{
-		return(m_up_limit_id);
-	}
-#endif /* UNIV_DEBUG */
-private:
-	/**
-	Copy the transaction ids from the source vector */
-	inline void copy_trx_ids(const trx_ids_t& trx_ids);
-
-	/**
-	Opens a read view where exactly the transactions serialized before this
-	point in time are seen in the view.
-	@param id		Creator transaction id */
-	inline void prepare(trx_id_t id);
-
-	/**
-	Complete the read view creation */
-	inline void complete();
-
-	/**
-	Copy state from another view. Must call copy_complete() to finish.
-	@param other		view to copy from */
-	inline void copy_prepare(const ReadView& other);
-
-	/**
-	Complete the copy, insert the creator transaction id into the
-	m_trx_ids too and adjust the m_up_limit_id *, if required */
-	inline void copy_complete();
-
-	/**
-	Set the creator transaction id, existing id must be 0 */
-	void creator_trx_id(trx_id_t id)
-	{
-		ut_ad(m_creator_trx_id == 0);
-		m_creator_trx_id = id;
-	}
-
-	friend class MVCC;
-
-private:
-	// Disable copying
-	ReadView(const ReadView&);
-	ReadView& operator=(const ReadView&);
 
 private:
 	/** The read should not see any transaction with trx id >= this
@@ -311,21 +274,12 @@ private:
 
 	/** Set of RW transactions that was active when this snapshot
 	was taken */
-	ids_t		m_ids;
+	trx_ids_t	m_ids;
 
 	/** The view does not need to see the undo logs for transactions
 	whose transaction number is strictly smaller (<) than this value:
 	they can be removed in purge if not needed by other views */
 	trx_id_t	m_low_limit_no;
-
-	/** AC-NL-RO transaction view that has been "closed". */
-	bool		m_closed;
-
-	typedef UT_LIST_NODE_T(ReadView) node_t;
-
-	/** List of read views in trx_sys */
-	byte		pad1[64 - sizeof(node_t)];
-	node_t		m_view_list;
 };
 
 #endif
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index 3b1f1c7f742..3ee993944e9 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,6 +33,7 @@ Created 5/30/1994 Heikki Tuuri
 #include "rem0types.h"
 #include "mtr0types.h"
 #include "page0types.h"
+#include "dict0dict.h"
 #include "trx0types.h"
 #endif /*! UNIV_INNOCHECKSUM */
 #include <ostream>
@@ -54,11 +55,29 @@ in addition to the data and the offsets */
 in addition to the data and the offsets */
 #define REC_N_NEW_EXTRA_BYTES	5
 
-/* Record status values */
-#define REC_STATUS_ORDINARY	0
-#define REC_STATUS_NODE_PTR	1
-#define REC_STATUS_INFIMUM	2
-#define REC_STATUS_SUPREMUM	3
+/** Record status values for ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED */
+enum rec_comp_status_t {
+	/** User record (PAGE_LEVEL=0, heap>=PAGE_HEAP_NO_USER_LOW) */
+	REC_STATUS_ORDINARY = 0,
+	/** Node pointer record (PAGE_LEVEL>=0, heap>=PAGE_HEAP_NO_USER_LOW) */
+	REC_STATUS_NODE_PTR = 1,
+	/** The page infimum pseudo-record (heap=PAGE_HEAP_NO_INFIMUM) */
+	REC_STATUS_INFIMUM = 2,
+	/** The page supremum pseudo-record (heap=PAGE_HEAP_NO_SUPREMUM) */
+	REC_STATUS_SUPREMUM = 3,
+	/** Clustered index record that has been inserted or updated
+	after instant ADD COLUMN (more than dict_index_t::n_core_fields) */
+	REC_STATUS_COLUMNS_ADDED = 4
+};
+
+/** The dtuple_t::info_bits of the 'default row' record.
+@see rec_is_default_row() */
+static const byte REC_INFO_DEFAULT_ROW
+	= REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED;
+
+#define REC_NEW_STATUS		3	/* This is single byte bit-field */
+#define REC_NEW_STATUS_MASK	0x7UL
+#define REC_NEW_STATUS_SHIFT	0
 
 /* The following four constants are needed in page0zip.cc in order to
 efficiently compress and decompress pages. */
@@ -94,6 +113,22 @@ offsets[] array, first passed to rec_get_offsets() */
 #define REC_OFFS_NORMAL_SIZE	OFFS_IN_REC_NORMAL_SIZE
 #define REC_OFFS_SMALL_SIZE	10
 
+/** Get the base address of offsets.  The extra_size is stored at
+this position, and following positions hold the end offsets of
+the fields. */
+#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
+
+/** Compact flag ORed to the extra size returned by rec_get_offsets() */
+const ulint REC_OFFS_COMPACT = ~(ulint(~0) >> 1);
+/** SQL NULL flag in offsets returned by rec_get_offsets() */
+const ulint REC_OFFS_SQL_NULL = REC_OFFS_COMPACT;
+/** External flag in offsets returned by rec_get_offsets() */
+const ulint REC_OFFS_EXTERNAL = REC_OFFS_COMPACT >> 1;
+/** Default value flag in offsets returned by rec_get_offsets() */
+const ulint REC_OFFS_DEFAULT = REC_OFFS_COMPACT >> 2;
+/** Mask for offsets returned by rec_get_offsets() */
+const ulint REC_OFFS_MASK = REC_OFFS_DEFAULT - 1;
+
 #ifndef UNIV_INNOCHECKSUM
 /******************************************************//**
 The following function is used to get the pointer of the next chained record
@@ -252,25 +287,55 @@ rec_set_info_bits_new(
 	rec_t*	rec,	/*!< in/out: new-style physical record */
 	ulint	bits)	/*!< in: info bits */
 	MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-The following function retrieves the status bits of a new-style record.
+
+/** Determine the status bits of a non-REDUNDANT record.
+@param[in]	rec	ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED record
 @return status bits */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
-	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((warn_unused_result));
+inline
+rec_comp_status_t
+rec_get_status(const rec_t* rec)
+{
+	byte bits = rec[-REC_NEW_STATUS] & REC_NEW_STATUS_MASK;
+	ut_ad(bits <= REC_STATUS_COLUMNS_ADDED);
+	return static_cast<rec_comp_status_t>(bits);
+}
 
-/******************************************************//**
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
+/** Set the status bits of a non-REDUNDANT record.
+@param[in,out]	rec	ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED record
+@param[in]	bits	status bits */
+inline
 void
-rec_set_status(
-/*===========*/
-	rec_t*	rec,	/*!< in/out: physical record */
-	ulint	bits)	/*!< in: info bits */
-	MY_ATTRIBUTE((nonnull));
+rec_set_status(rec_t* rec, byte bits)
+{
+	ut_ad(bits <= REC_STATUS_COLUMNS_ADDED);
+	rec[-REC_NEW_STATUS] = (rec[-REC_NEW_STATUS] & ~REC_NEW_STATUS_MASK)
+		| bits;
+}
+
+/** Get the length of added field count in a REC_STATUS_COLUMNS_ADDED record.
+@param[in]	n_add_field	number of added fields, minus one
+@return	storage size of the field count, in bytes */
+inline unsigned rec_get_n_add_field_len(ulint n_add_field)
+{
+	ut_ad(n_add_field < REC_MAX_N_FIELDS);
+	return n_add_field < 0x80 ? 1 : 2;
+}
+
+/** Set the added field count in a REC_STATUS_COLUMNS_ADDED record.
+@param[in,out]	header	variable header of a REC_STATUS_COLUMNS_ADDED record
+@param[in]	n_add	number of added fields, minus 1
+@return	record header before the number of added fields */
+inline void rec_set_n_add_field(byte*& header, ulint n_add)
+{
+	ut_ad(n_add < REC_MAX_N_FIELDS);
+
+	if (n_add < 0x80) {
+		*header-- = byte(n_add);
+	} else {
+		*header-- = byte(n_add) | 0x80;
+		*header-- = byte(n_add >> 7);
+	}
+}
 
 /******************************************************//**
 The following function is used to retrieve the info and status
@@ -327,7 +392,7 @@ rec_set_deleted_flag_new(
 The following function tells if a new-style record is a node pointer.
 @return TRUE if node pointer */
 UNIV_INLINE
-ibool
+bool
 rec_get_node_ptr_flag(
 /*==================*/
 	const rec_t*	rec)	/*!< in: physical record */
@@ -459,9 +524,7 @@ rec_get_offsets_func(
 	const rec_t*		rec,
 	const dict_index_t*	index,
 	ulint*			offsets,
-#ifdef UNIV_DEBUG
 	bool			leaf,
-#endif /* UNIV_DEBUG */
 	ulint			n_fields,
 #ifdef UNIV_DEBUG
 	const char*		file,	/*!< in: file name where called */
@@ -471,7 +534,7 @@ rec_get_offsets_func(
 #ifdef UNIV_DEBUG
 	MY_ATTRIBUTE((nonnull(1,2,6,8),warn_unused_result));
 #else /* UNIV_DEBUG */
-	MY_ATTRIBUTE((nonnull(1,2,5),warn_unused_result));
+	MY_ATTRIBUTE((nonnull(1,2,6),warn_unused_result));
 #endif /* UNIV_DEBUG */
 
 #ifdef UNIV_DEBUG
@@ -479,7 +542,7 @@ rec_get_offsets_func(
 	rec_get_offsets_func(rec,index,offsets,leaf,n,__FILE__,__LINE__,heap)
 #else /* UNIV_DEBUG */
 # define rec_get_offsets(rec, index, offsets, leaf, n, heap)		\
-	rec_get_offsets_func(rec, index, offsets, n, heap)
+	rec_get_offsets_func(rec, index, offsets, leaf, n, heap)
 #endif /* UNIV_DEBUG */
 
 /******************************************************//**
@@ -499,32 +562,31 @@ rec_get_offsets_reverse(
 					offsets[0] allocated elements */
 	MY_ATTRIBUTE((nonnull));
 #ifdef UNIV_DEBUG
-/************************************************************//**
-Validates offsets returned by rec_get_offsets().
-@return TRUE if valid */
-UNIV_INLINE
-ibool
+/** Validate offsets returned by rec_get_offsets().
+@param[in]	rec	record, or NULL
+@param[in]	index	the index that the record belongs in, or NULL
+@param[in,out]	offsets	the offsets of the record
+@return true */
+bool
 rec_offs_validate(
-/*==============*/
-	const rec_t*		rec,	/*!< in: record or NULL */
-	const dict_index_t*	index,	/*!< in: record descriptor or NULL */
-	const ulint*		offsets)/*!< in: array returned by
-					rec_get_offsets() */
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	const ulint*		offsets)
 	MY_ATTRIBUTE((nonnull(3), warn_unused_result));
-/************************************************************//**
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
+/** Update debug data in offsets, in order to tame rec_offs_validate().
+@param[in]	rec	record
+@param[in]	index	the index that the record belongs in
+@param[in]	leaf	whether the record resides in a leaf page
+@param[in,out]	offsets	offsets from rec_get_offsets() to adjust */
 void
 rec_offs_make_valid(
-/*================*/
-	const rec_t*		rec,	/*!< in: record */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets)/*!< in: array returned by
-					rec_get_offsets() */
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	bool			leaf,
+	ulint*			offsets)
 	MY_ATTRIBUTE((nonnull));
 #else
-# define rec_offs_make_valid(rec, index, offsets) ((void) 0)
+# define rec_offs_make_valid(rec, index, leaf, offsets)
 #endif /* UNIV_DEBUG */
 
 /************************************************************//**
@@ -568,26 +630,7 @@ rec_get_nth_field_offs(
 	MY_ATTRIBUTE((nonnull));
 #define rec_get_nth_field(rec, offsets, n, len) \
 ((rec) + rec_get_nth_field_offs(offsets, n, len))
-/******************************************************//**
-Determine if the offsets are for a record in the new
-compact format.
-@return nonzero if compact format */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((warn_unused_result));
-/******************************************************//**
-Determine if the offsets are for a record containing
-externally stored columns.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_any_extern(
-/*================*/
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************//**
 Determine if the offsets are for a record containing null BLOB pointers.
 @return first field containing a null BLOB pointer, or NULL if none found */
@@ -598,15 +641,16 @@ rec_offs_any_null_extern(
 	const rec_t*	rec,		/*!< in: record */
 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec) */
 	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************//**
 Returns nonzero if the extern bit is set in nth field of rec.
 @return nonzero if externally stored */
 UNIV_INLINE
 ulint
-rec_offs_nth_extern(
+rec_offs_nth_extern_old(
 /*================*/
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n)	/*!< in: nth field */
+	const rec_t*	rec,	/*!< in: record */
+	ulint		    n	/*!< in: index of the field */)
 	MY_ATTRIBUTE((warn_unused_result));
 
 /** Mark the nth field as externally stored.
@@ -616,16 +660,177 @@ void
 rec_offs_make_nth_extern(
         ulint*		offsets,
         const ulint     n);
-/******************************************************//**
-Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return nonzero if SQL NULL */
-UNIV_INLINE
+
+/** Determine the number of allocated elements for an array of offsets.
+@param[in]	offsets		offsets after rec_offs_set_n_alloc()
+@return number of elements */
+inline
 ulint
-rec_offs_nth_sql_null(
-/*==================*/
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n)	/*!< in: nth field */
-	MY_ATTRIBUTE((warn_unused_result));
+rec_offs_get_n_alloc(const ulint* offsets)
+{
+	ulint	n_alloc;
+	ut_ad(offsets);
+	n_alloc = offsets[0];
+	ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
+	UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets);
+	return(n_alloc);
+}
+
+/** Determine the number of fields for which offsets have been initialized.
+@param[in]	offsets	rec_get_offsets()
+@return number of fields */
+inline
+ulint
+rec_offs_n_fields(const ulint* offsets)
+{
+	ulint	n_fields;
+	ut_ad(offsets);
+	n_fields = offsets[1];
+	ut_ad(n_fields > 0);
+	ut_ad(n_fields <= REC_MAX_N_FIELDS);
+	ut_ad(n_fields + REC_OFFS_HEADER_SIZE
+	      <= rec_offs_get_n_alloc(offsets));
+	return(n_fields);
+}
+
+/** Get a flag of a record field.
+@param[in]	offsets	rec_get_offsets()
+@param[in]	n	nth field
+@param[in]	flag	flag to extract
+@return	the flag of the record field */
+inline
+ulint
+rec_offs_nth_flag(const ulint* offsets, ulint n, ulint flag)
+{
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	ut_ad(n < rec_offs_n_fields(offsets));
+	/* The DEFAULT, NULL, EXTERNAL flags are mutually exclusive. */
+	ut_ad(ut_is_2pow(rec_offs_base(offsets)[1 + n]
+			 & (REC_OFFS_DEFAULT
+			    | REC_OFFS_SQL_NULL
+			    | REC_OFFS_EXTERNAL)));
+	return rec_offs_base(offsets)[1 + n] & flag;
+}
+
+/** Determine if a record field is missing
+(should be replaced by dict_index_t::instant_field_value()).
+@param[in]	offsets	rec_get_offsets()
+@param[in]	n	nth field
+@return	nonzero if default bit is set */
+inline
+ulint
+rec_offs_nth_default(const ulint* offsets, ulint n)
+{
+	return rec_offs_nth_flag(offsets, n, REC_OFFS_DEFAULT);
+}
+
+/** Determine if a record field is SQL NULL
+(should be replaced by dict_index_t::instant_field_value()).
+@param[in]	offsets	rec_get_offsets()
+@param[in]	n	nth field
+@return	nonzero if SQL NULL set */
+inline
+ulint
+rec_offs_nth_sql_null(const ulint* offsets, ulint n)
+{
+	return rec_offs_nth_flag(offsets, n, REC_OFFS_SQL_NULL);
+}
+
+/** Determine if a record field is stored off-page.
+@param[in]	offsets	rec_get_offsets()
+@param[in]	n	nth field
+Returns nonzero if the extern bit is set in nth field of rec.
+@return nonzero if externally stored */
+inline
+ulint
+rec_offs_nth_extern(const ulint* offsets, ulint n)
+{
+	return rec_offs_nth_flag(offsets, n, REC_OFFS_EXTERNAL);
+}
+
+/** Get a global flag of a record.
+@param[in]	offsets	rec_get_offsets()
+@param[in]	flag	flag to extract
+@return	the flag of the record field */
+inline
+ulint
+rec_offs_any_flag(const ulint* offsets, ulint flag)
+{
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	return *rec_offs_base(offsets) & flag;
+}
+
+/** Determine if the offsets are for a record containing off-page columns.
+@param[in]	offsets	rec_get_offsets()
+@return nonzero if any off-page columns exist */
+inline bool rec_offs_any_extern(const ulint* offsets)
+{
+	return rec_offs_any_flag(offsets, REC_OFFS_EXTERNAL);
+}
+
+/** Determine if the offsets are for a record that is missing fields.
+@param[in]	offsets	rec_get_offsets()
+@return nonzero if any fields need to be replaced with
+		dict_index_t::instant_field_value() */
+inline
+ulint
+rec_offs_any_default(const ulint* offsets)
+{
+	return rec_offs_any_flag(offsets, REC_OFFS_DEFAULT);
+}
+
+/** Determine if the offsets are for other than ROW_FORMAT=REDUNDANT.
+@param[in]	offsets	rec_get_offsets()
+@return	nonzero	if ROW_FORMAT is COMPACT,DYNAMIC or COMPRESSED
+@retval	0	if ROW_FORMAT=REDUNDANT */
+inline
+ulint
+rec_offs_comp(const ulint* offsets)
+{
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
+}
+
+/** Determine if the record is the 'default row' pseudo-record
+in the clustered index.
+@param[in]	rec	leaf page record
+@param[in]	index	index of the record
+@return	whether the record is the 'default row' pseudo-record */
+inline
+bool
+rec_is_default_row(const rec_t* rec, const dict_index_t* index)
+{
+	bool is = rec_get_info_bits(rec, dict_table_is_comp(index->table))
+		& REC_INFO_MIN_REC_FLAG;
+	ut_ad(!is || index->is_instant());
+	ut_ad(!is || !dict_table_is_comp(index->table)
+	      || rec_get_status(rec) == REC_STATUS_COLUMNS_ADDED);
+	return is;
+}
+
+/** Get the nth field from an index.
+@param[in]	rec	index record
+@param[in]	index	index
+@param[in]	offsets	rec_get_offsets(rec, index)
+@param[in]	n	field number
+@param[out]	len	length of the field in bytes, or UNIV_SQL_NULL
+@return a read-only copy of the index field */
+inline
+const byte*
+rec_get_nth_cfield(
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	const ulint*		offsets,
+	ulint			n,
+	ulint*			len)
+{
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	if (!rec_offs_nth_default(offsets, n)) {
+		return rec_get_nth_field(rec, offsets, n, len);
+	}
+	return index->instant_field_value(n, len);
+}
+
 /******************************************************//**
 Gets the physical size of a field.
 @return length of field */
@@ -679,16 +884,6 @@ rec_get_data_size_old(
 	const rec_t*	rec)	/*!< in: physical record */
 	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
-The following function returns the number of allocated elements
-for an array of offsets.
-@return number of elements */
-UNIV_INLINE
-ulint
-rec_offs_get_n_alloc(
-/*=================*/
-	const ulint*	offsets)/*!< in: array for rec_get_offsets() */
-	MY_ATTRIBUTE((warn_unused_result));
-/**********************************************************//**
 The following function sets the number of allocated elements
 for an array of offsets. */
 UNIV_INLINE
@@ -702,15 +897,6 @@ rec_offs_set_n_alloc(
 #define rec_offs_init(offsets) \
 	rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
 /**********************************************************//**
-The following function returns the number of fields in a record.
-@return number of fields */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((warn_unused_result));
-/**********************************************************//**
 The following function returns the data size of a physical
 record, that is the sum of field lengths. SQL null fields
 are counted as length 0 fields. The value returned by the function
@@ -785,37 +971,60 @@ rec_copy(
 @param[in]	fields		data fields
 @param[in]	n_fields	number of data fields
 @param[out]	extra		record header size
+@param[in]	status		REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED
 @return	total size, in bytes */
 ulint
 rec_get_converted_size_temp(
 	const dict_index_t*	index,
 	const dfield_t*		fields,
 	ulint			n_fields,
-	ulint*			extra)
-	MY_ATTRIBUTE((warn_unused_result, nonnull(1,2)));
+	ulint*			extra,
+	rec_comp_status_t	status = REC_STATUS_ORDINARY)
+	MY_ATTRIBUTE((warn_unused_result, nonnull));
 
-/******************************************************//**
-Determine the offset to each field in temporary file.
-@see rec_convert_dtuple_to_temp() */
+/** Determine the offset to each field in temporary file.
+@param[in]	rec	temporary file record
+@param[in]	index	index of that the record belongs to
+@param[in,out]	offsets	offsets to the fields; in: rec_offs_n_fields(offsets)
+@param[in]	n_core	number of core fields (index->n_core_fields)
+@param[in]	def_val	default values for non-core fields
+@param[in]	status	REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED */
 void
 rec_init_offsets_temp(
-/*==================*/
-	const rec_t*		rec,	/*!< in: temporary file record */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets)/*!< in/out: array of offsets;
-					in: n=rec_offs_n_fields(offsets) */
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	ulint*			offsets,
+	ulint			n_core,
+	const dict_col_t::def_t*def_val,
+	rec_comp_status_t	status = REC_STATUS_ORDINARY)
+	MY_ATTRIBUTE((nonnull));
+/** Determine the offset to each field in temporary file.
+@param[in]	rec	temporary file record
+@param[in]	index	index of that the record belongs to
+@param[in,out]	offsets	offsets to the fields; in: rec_offs_n_fields(offsets)
+*/
+void
+rec_init_offsets_temp(
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	ulint*			offsets)
 	MY_ATTRIBUTE((nonnull));
 
-/*********************************************************//**
-Builds a temporary file record out of a data tuple.
-@see rec_init_offsets_temp() */
+/** Convert a data tuple prefix to the temporary file format.
+@param[out]	rec		record in temporary file format
+@param[in]	index		clustered or secondary index
+@param[in]	fields		data fields
+@param[in]	n_fields	number of data fields
+@param[in]	status		REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED
+*/
 void
 rec_convert_dtuple_to_temp(
-/*=======================*/
-	rec_t*			rec,		/*!< out: record */
-	const dict_index_t*	index,		/*!< in: record descriptor */
-	const dfield_t*		fields,		/*!< in: array of data fields */
-	ulint			n_fields);	/*!< in: number of fields */
+	rec_t*			rec,
+	const dict_index_t*	index,
+	const dfield_t*		fields,
+	ulint			n_fields,
+	rec_comp_status_t	status = REC_STATUS_ORDINARY)
+	MY_ATTRIBUTE((nonnull));
 
 /**************************************************************//**
 Copies the first n fields of a physical record to a new physical record in
@@ -833,22 +1042,6 @@ rec_copy_prefix_to_buf(
 						or NULL */
 	ulint*			buf_size)	/*!< in/out: buffer size */
 	MY_ATTRIBUTE((nonnull));
-/** Fold a prefix of a physical record.
-@param[in]	rec		index record
-@param[in]	offsets		return value of rec_get_offsets()
-@param[in]	n_fields	number of complete fields to fold
-@param[in]	n_bytes		number of bytes to fold in the last field
-@param[in]	index_id	index tree ID
-@return the folded value */
-UNIV_INLINE
-ulint
-rec_fold(
-	const rec_t*	rec,
-	const ulint*	offsets,
-	ulint		n_fields,
-	ulint		n_bytes,
-	index_id_t	tree_id)
-	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Builds a physical record out of a data tuple and
 stores it into the given buffer.
@@ -896,7 +1089,7 @@ rec_get_converted_size_comp(
 					dict_table_is_comp() is
 					assumed to hold, even if
 					it does not */
-	ulint			status,	/*!< in: status bits of the record */
+	rec_comp_status_t	status,	/*!< in: status bits of the record */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
 	ulint*			extra)	/*!< out: extra size */
@@ -921,23 +1114,14 @@ The fields are copied into the memory heap.
 @param[in]	n_fields	number of fields to copy
 @param[in,out]	heap		memory heap */
 void
-rec_copy_prefix_to_dtuple_func(
+rec_copy_prefix_to_dtuple(
 	dtuple_t*		tuple,
 	const rec_t*		rec,
 	const dict_index_t*	index,
-#ifdef UNIV_DEBUG
 	bool			is_leaf,
-#endif /* UNIV_DEBUG */
 	ulint			n_fields,
 	mem_heap_t*		heap)
 	MY_ATTRIBUTE((nonnull));
-#ifdef UNIV_DEBUG
-# define rec_copy_prefix_to_dtuple(tuple,rec,index,leaf,n_fields,heap)	\
-	rec_copy_prefix_to_dtuple_func(tuple,rec,index,leaf,n_fields,heap)
-#else /* UNIV_DEBUG */
-# define rec_copy_prefix_to_dtuple(tuple,rec,index,leaf,n_fields,heap)	\
-	rec_copy_prefix_to_dtuple_func(tuple,rec,index,n_fields,heap)
-#endif /* UNIV_DEBUG */
 /***************************************************************//**
 Validates the consistency of a physical record.
 @return TRUE if ok */
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
index cb1f0d9836f..5e9dbcdcfb6 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innobase/include/rem0rec.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,19 +26,9 @@ Created 5/30/1994 Heikki Tuuri
 
 #include "mach0data.h"
 #include "ut0byte.h"
-#include "dict0dict.h"
 #include "dict0boot.h"
 #include "btr0types.h"
 
-/* Compact flag ORed to the extra size returned by rec_get_offsets() */
-#define REC_OFFS_COMPACT	((ulint) 1 << 31)
-/* SQL NULL flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_SQL_NULL	((ulint) 1 << 31)
-/* External flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_EXTERNAL	((ulint) 1 << 30)
-/* Mask for offsets returned by rec_get_offsets() */
-#define REC_OFFS_MASK		(REC_OFFS_EXTERNAL - 1)
-
 /* Offsets of the bit-fields in an old-style record. NOTE! In the table the
 most significant bytes and bits are written below less significant.
 
@@ -71,12 +61,13 @@ most significant bytes and bits are written below less significant.
 				  we can calculate the offset of the next
 				  record with the formula:
 				  relative_offset + offset_of_this_record
-				  mod UNIV_PAGE_SIZE
+				  mod srv_page_size
 			3	3 bits status:
-					000=conventional record
-					001=node pointer record (inside B-tree)
-					010=infimum record
-					011=supremum record
+					000=REC_STATUS_ORDINARY
+					001=REC_STATUS_NODE_PTR
+					010=REC_STATUS_INFIMUM
+					011=REC_STATUS_SUPREMUM
+					100=REC_STATUS_COLUMNS_ADDED
 					1xx=reserved
 				5 bits heap number
 			4	8 bits heap number
@@ -99,10 +90,6 @@ and the shift needed to obtain each bit-field of the record. */
 #define REC_OLD_N_FIELDS_MASK	0x7FEUL
 #define REC_OLD_N_FIELDS_SHIFT	1
 
-#define REC_NEW_STATUS		3	/* This is single byte bit-field */
-#define REC_NEW_STATUS_MASK	0x7UL
-#define REC_NEW_STATUS_SHIFT	0
-
 #define REC_OLD_HEAP_NO		5
 #define REC_HEAP_NO_MASK	0xFFF8UL
 #if 0 /* defined in rem0rec.h for use of page0zip.cc */
@@ -248,8 +235,8 @@ rec_get_next_ptr_const(
 {
 	ulint	field_value;
 
-	ut_ad(REC_NEXT_MASK == 0xFFFFUL);
-	ut_ad(REC_NEXT_SHIFT == 0);
+	compile_time_assert(REC_NEXT_MASK == 0xFFFFUL);
+	compile_time_assert(REC_NEXT_SHIFT == 0);
 
 	field_value = mach_read_from_2(rec - REC_NEXT);
 
@@ -267,13 +254,13 @@ rec_get_next_ptr_const(
 		as signed 16-bit integer in 2's complement arithmetics.
 		If all platforms defined int16_t in the standard headers,
 		the expression could be written simpler as
-		(int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
+		(int16_t) field_value + ut_align_offset(...) < srv_page_size
 		*/
 		ut_ad((field_value >= 32768
 		       ? field_value - 65536
 		       : field_value)
-		      + ut_align_offset(rec, UNIV_PAGE_SIZE)
-		      < UNIV_PAGE_SIZE);
+		      + ut_align_offset(rec, srv_page_size)
+		      < srv_page_size);
 #endif
 		/* There must be at least REC_N_NEW_EXTRA_BYTES + 1
 		between each record. */
@@ -281,12 +268,12 @@ rec_get_next_ptr_const(
 		       && field_value < 32768)
 		      || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
 
-		return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
-		       + ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
+		return((byte*) ut_align_down(rec, srv_page_size)
+		       + ut_align_offset(rec + field_value, srv_page_size));
 	} else {
-		ut_ad(field_value < UNIV_PAGE_SIZE);
+		ut_ad(field_value < srv_page_size);
 
-		return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
+		return((byte*) ut_align_down(rec, srv_page_size)
 		       + field_value);
 	}
 }
@@ -317,12 +304,8 @@ rec_get_next_offs(
 	ulint		comp)	/*!< in: nonzero=compact page format */
 {
 	ulint	field_value;
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
+	compile_time_assert(REC_NEXT_MASK == 0xFFFFUL);
+	compile_time_assert(REC_NEXT_SHIFT == 0);
 
 	field_value = mach_read_from_2(rec - REC_NEXT);
 
@@ -335,13 +318,13 @@ rec_get_next_offs(
 		as signed 16-bit integer in 2's complement arithmetics.
 		If all platforms defined int16_t in the standard headers,
 		the expression could be written simpler as
-		(int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
+		(int16_t) field_value + ut_align_offset(...) < srv_page_size
 		*/
 		ut_ad((field_value >= 32768
 		       ? field_value - 65536
 		       : field_value)
-		      + ut_align_offset(rec, UNIV_PAGE_SIZE)
-		      < UNIV_PAGE_SIZE);
+		      + ut_align_offset(rec, srv_page_size)
+		      < srv_page_size);
 #endif
 		if (field_value == 0) {
 
@@ -354,9 +337,9 @@ rec_get_next_offs(
 		       && field_value < 32768)
 		      || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
 
-		return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
+		return(ut_align_offset(rec + field_value, srv_page_size));
 	} else {
-		ut_ad(field_value < UNIV_PAGE_SIZE);
+		ut_ad(field_value < srv_page_size);
 
 		return(field_value);
 	}
@@ -373,14 +356,9 @@ rec_set_next_offs_old(
 	ulint	next)	/*!< in: offset of the next record */
 {
 	ut_ad(rec);
-	ut_ad(UNIV_PAGE_SIZE > next);
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
-
+	ut_ad(srv_page_size > next);
+	compile_time_assert(REC_NEXT_MASK == 0xFFFFUL);
+	compile_time_assert(REC_NEXT_SHIFT == 0);
 	mach_write_to_2(rec - REC_NEXT, next);
 }
 
@@ -397,7 +375,7 @@ rec_set_next_offs_new(
 	ulint	field_value;
 
 	ut_ad(rec);
-	ut_ad(UNIV_PAGE_SIZE > next);
+	ut_ad(srv_page_size > next);
 
 	if (!next) {
 		field_value = 0;
@@ -408,7 +386,7 @@ rec_set_next_offs_new(
 
 		field_value = (ulint)
 			((lint) next
-			 - (lint) ut_align_offset(rec, UNIV_PAGE_SIZE));
+			 - (lint) ut_align_offset(rec, srv_page_size));
 		field_value &= REC_NEXT_MASK;
 	}
 
@@ -457,26 +435,6 @@ rec_set_n_fields_old(
 }
 
 /******************************************************//**
-The following function retrieves the status bits of a new-style record.
-@return status bits */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
-	const rec_t*	rec)	/*!< in: physical record */
-{
-	ulint	ret;
-
-	ut_ad(rec);
-
-	ret = rec_get_bit_field_1(rec, REC_NEW_STATUS,
-				  REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
-	ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0);
-
-	return(ret);
-}
-
-/******************************************************//**
 The following function is used to get the number of fields
 in a record.
 @return number of data fields */
@@ -495,6 +453,7 @@ rec_get_n_fields(
 	}
 
 	switch (rec_get_status(rec)) {
+	case REC_STATUS_COLUMNS_ADDED:
 	case REC_STATUS_ORDINARY:
 		return(dict_index_get_n_fields(index));
 	case REC_STATUS_NODE_PTR:
@@ -502,10 +461,10 @@ rec_get_n_fields(
 	case REC_STATUS_INFIMUM:
 	case REC_STATUS_SUPREMUM:
 		return(1);
-	default:
-		ut_error;
-		return(ULINT_UNDEFINED);
 	}
+
+	ut_error;
+	return(ULINT_UNDEFINED);
 }
 
 /** Confirms the n_fields of the entry is sane with comparing the other
@@ -521,13 +480,15 @@ rec_n_fields_is_sane(
 	const rec_t*	rec,
 	const dtuple_t*	entry)
 {
-	return(rec_get_n_fields(rec, index)
-	       == dtuple_get_n_fields(entry)
+	const ulint n_fields = rec_get_n_fields(rec, index);
+
+	return(n_fields == dtuple_get_n_fields(entry)
+	       || (index->is_instant()
+		   && n_fields >= index->n_core_fields)
 	       /* a record for older SYS_INDEXES table
 	       (missing merge_threshold column) is acceptable. */
 	       || (index->table->id == DICT_INDEXES_ID
-		   && rec_get_n_fields(rec, index)
-		      == dtuple_get_n_fields(entry) - 1));
+		   && n_fields == dtuple_get_n_fields(entry) - 1));
 }
 
 /******************************************************//**
@@ -646,19 +607,6 @@ rec_set_info_bits_new(
 }
 
 /******************************************************//**
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
-void
-rec_set_status(
-/*===========*/
-	rec_t*	rec,	/*!< in/out: physical record */
-	ulint	bits)	/*!< in: info bits */
-{
-	rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
-			    REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
-}
-
-/******************************************************//**
 The following function is used to retrieve the info and status
 bits of a record.  (Only compact records have status bits.)
 @return info bits */
@@ -670,12 +618,11 @@ rec_get_info_and_status_bits(
 	ulint		comp)	/*!< in: nonzero=compact page format */
 {
 	ulint	bits;
-#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
-& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
-# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
-#endif
+	compile_time_assert(!((REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT)
+			      & (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
 	if (comp) {
-		bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec);
+		bits = rec_get_info_bits(rec, TRUE)
+			| ulint(rec_get_status(rec));
 	} else {
 		bits = rec_get_info_bits(rec, FALSE);
 		ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
@@ -692,10 +639,8 @@ rec_set_info_and_status_bits(
 	rec_t*	rec,	/*!< in/out: physical record */
 	ulint	bits)	/*!< in: info bits */
 {
-#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
-& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
-# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
-#endif
+	compile_time_assert(!((REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT)
+			      & (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
 	rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
 	rec_set_info_bits_new(rec, bits & ~REC_NEW_STATUS_MASK);
 }
@@ -774,7 +719,7 @@ rec_set_deleted_flag_new(
 The following function tells if a new-style record is a node pointer.
 @return TRUE if node pointer */
 UNIV_INLINE
-ibool
+bool
 rec_get_node_ptr_flag(
 /*==================*/
 	const rec_t*	rec)	/*!< in: physical record */
@@ -848,10 +793,6 @@ rec_get_1byte_offs_flag(
 /*====================*/
 	const rec_t*	rec)	/*!< in: physical record */
 {
-#if TRUE != 1
-#error "TRUE != 1"
-#endif
-
 	return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
 				   REC_OLD_SHORT_SHIFT));
 }
@@ -865,10 +806,7 @@ rec_set_1byte_offs_flag(
 	rec_t*	rec,	/*!< in: physical record */
 	ibool	flag)	/*!< in: TRUE if 1byte form */
 {
-#if TRUE != 1
-#error "TRUE != 1"
-#endif
-	ut_ad(flag <= TRUE);
+	ut_ad(flag <= 1);
 
 	rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
 			    REC_OLD_SHORT_SHIFT);
@@ -925,29 +863,6 @@ rec_2_is_field_extern(
 	return(rec_2_get_field_end_info(rec, n) & REC_2BYTE_EXTERN_MASK);
 }
 
-/* Get the base address of offsets.  The extra_size is stored at
-this position, and following positions hold the end offsets of
-the fields. */
-#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
-
-/**********************************************************//**
-The following function returns the number of allocated elements
-for an array of offsets.
-@return number of elements */
-UNIV_INLINE
-ulint
-rec_offs_get_n_alloc(
-/*=================*/
-	const ulint*	offsets)/*!< in: array for rec_get_offsets() */
-{
-	ulint	n_alloc;
-	ut_ad(offsets);
-	n_alloc = offsets[0];
-	ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
-	UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets);
-	return(n_alloc);
-}
-
 /**********************************************************//**
 The following function sets the number of allocated elements
 for an array of offsets. */
@@ -965,102 +880,6 @@ rec_offs_set_n_alloc(
 	offsets[0] = n_alloc;
 }
 
-/**********************************************************//**
-The following function returns the number of fields in a record.
-@return number of fields */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-{
-	ulint	n_fields;
-	ut_ad(offsets);
-	n_fields = offsets[1];
-	ut_ad(n_fields > 0);
-	ut_ad(n_fields <= REC_MAX_N_FIELDS);
-	ut_ad(n_fields + REC_OFFS_HEADER_SIZE
-	      <= rec_offs_get_n_alloc(offsets));
-	return(n_fields);
-}
-
-/************************************************************//**
-Validates offsets returned by rec_get_offsets().
-@return TRUE if valid */
-UNIV_INLINE
-ibool
-rec_offs_validate(
-/*==============*/
-	const rec_t*		rec,	/*!< in: record or NULL */
-	const dict_index_t*	index,	/*!< in: record descriptor or NULL */
-	const ulint*		offsets)/*!< in: array returned by
-					rec_get_offsets() */
-{
-	ulint	i	= rec_offs_n_fields(offsets);
-	ulint	last	= ULINT_MAX;
-	ulint	comp	= *rec_offs_base(offsets) & REC_OFFS_COMPACT;
-
-	if (rec) {
-		ut_ad((ulint) rec == offsets[2]);
-		if (!comp) {
-			ut_a(rec_get_n_fields_old(rec) >= i);
-		}
-	}
-	if (index) {
-		ulint max_n_fields;
-		ut_ad((ulint) index == offsets[3]);
-		max_n_fields = ut_max(
-			dict_index_get_n_fields(index),
-			dict_index_get_n_unique_in_tree(index) + 1);
-		if (comp && rec) {
-			switch (rec_get_status(rec)) {
-			case REC_STATUS_ORDINARY:
-				break;
-			case REC_STATUS_NODE_PTR:
-				max_n_fields = dict_index_get_n_unique_in_tree(
-					index) + 1;
-				break;
-			case REC_STATUS_INFIMUM:
-			case REC_STATUS_SUPREMUM:
-				max_n_fields = 1;
-				break;
-			default:
-				ut_error;
-			}
-		}
-		/* index->n_def == 0 for dummy indexes if !comp */
-		ut_a(!comp || index->n_def);
-		ut_a(!index->n_def || i <= max_n_fields);
-	}
-	while (i--) {
-		ulint	curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK;
-		ut_a(curr <= last);
-		last = curr;
-	}
-	return(TRUE);
-}
-#ifdef UNIV_DEBUG
-/************************************************************//**
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
-void
-rec_offs_make_valid(
-/*================*/
-	const rec_t*		rec,	/*!< in: record */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets)/*!< in: array returned by
-					rec_get_offsets() */
-{
-	ut_ad(rec);
-	ut_ad(index);
-	ut_ad(offsets);
-	ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets));
-	offsets[2] = (ulint) rec;
-	offsets[3] = (ulint) index;
-}
-#endif /* UNIV_DEBUG */
-
 /************************************************************//**
 The following function is used to get an offset to the nth
 data field in a record.
@@ -1072,7 +891,7 @@ rec_get_nth_field_offs(
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	ulint		n,	/*!< in: index of the field */
 	ulint*		len)	/*!< out: length of the field; UNIV_SQL_NULL
-				if SQL null */
+				if SQL null; UNIV_SQL_DEFAULT is default value */
 {
 	ulint	offs;
 	ulint	length;
@@ -1089,6 +908,8 @@ rec_get_nth_field_offs(
 
 	if (length & REC_OFFS_SQL_NULL) {
 		length = UNIV_SQL_NULL;
+	} else if (length & REC_OFFS_DEFAULT) {
+		length = UNIV_SQL_DEFAULT;
 	} else {
 		length &= REC_OFFS_MASK;
 		length -= offs;
@@ -1099,34 +920,6 @@ rec_get_nth_field_offs(
 }
 
 /******************************************************//**
-Determine if the offsets are for a record in the new
-compact format.
-@return nonzero if compact format */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-{
-	ut_ad(rec_offs_validate(NULL, NULL, offsets));
-	return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
-}
-
-/******************************************************//**
-Determine if the offsets are for a record containing
-externally stored columns.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_any_extern(
-/*================*/
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-{
-	ut_ad(rec_offs_validate(NULL, NULL, offsets));
-	return(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL);
-}
-
-/******************************************************//**
 Determine if the offsets are for a record containing null BLOB pointers.
 @return first field containing a null BLOB pointer, or NULL if none found */
 UNIV_INLINE
@@ -1167,29 +960,14 @@ Returns nonzero if the extern bit is set in nth field of rec.
 @return nonzero if externally stored */
 UNIV_INLINE
 ulint
-rec_offs_nth_extern(
+rec_offs_nth_extern_old(
 /*================*/
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n)	/*!< in: nth field */
-{
-	ut_ad(rec_offs_validate(NULL, NULL, offsets));
-	ut_ad(n < rec_offs_n_fields(offsets));
-	return(rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL);
-}
-
-/******************************************************//**
-Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return nonzero if SQL NULL */
-UNIV_INLINE
-ulint
-rec_offs_nth_sql_null(
-/*==================*/
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n)	/*!< in: nth field */
+	const rec_t*	rec,	/*!< in: record */
+	ulint		    n	/*!< in: index of the field */)
 {
-	ut_ad(rec_offs_validate(NULL, NULL, offsets));
-	ut_ad(n < rec_offs_n_fields(offsets));
-	return(rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL);
+	if(rec_get_1byte_offs_flag(rec))
+		return 0;
+	return (rec_2_get_field_end_info(rec,n) & REC_2BYTE_EXTERN_MASK);
 }
 
 /******************************************************//**
@@ -1400,7 +1178,7 @@ rec_get_nth_field_size(
 	os = rec_get_field_start_offs(rec, n);
 	next_os = rec_get_field_start_offs(rec, n + 1);
 
-	ut_ad(next_os - os < UNIV_PAGE_SIZE);
+	ut_ad(next_os - os < srv_page_size);
 
 	return(next_os - os);
 }
@@ -1427,6 +1205,7 @@ rec_set_nth_field(
 
 	ut_ad(rec);
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(!rec_offs_nth_default(offsets, n));
 
 	if (len == UNIV_SQL_NULL) {
 		if (!rec_offs_nth_sql_null(offsets, n)) {
@@ -1437,7 +1216,7 @@ rec_set_nth_field(
 		return;
 	}
 
-	data2 = rec_get_nth_field(rec, offsets, n, &len2);
+	data2 = (byte*)rec_get_nth_field(rec, offsets, n, &len2);
 	if (len2 == UNIV_SQL_NULL) {
 		ut_ad(!rec_offs_comp(offsets));
 		rec_set_nth_field_null_bit(rec, n, FALSE);
@@ -1501,7 +1280,7 @@ rec_offs_data_size(
 	ut_ad(rec_offs_validate(NULL, NULL, offsets));
 	size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)]
 		& REC_OFFS_MASK;
-	ut_ad(size < UNIV_PAGE_SIZE);
+	ut_ad(size < srv_page_size);
 	return(size);
 }
 
@@ -1518,8 +1297,8 @@ rec_offs_extra_size(
 {
 	ulint	size;
 	ut_ad(rec_offs_validate(NULL, NULL, offsets));
-	size = *rec_offs_base(offsets) & ~(REC_OFFS_COMPACT | REC_OFFS_EXTERNAL);
-	ut_ad(size < UNIV_PAGE_SIZE);
+	size = *rec_offs_base(offsets) & REC_OFFS_MASK;
+	ut_ad(size < srv_page_size);
 	return(size);
 }
 
@@ -1631,27 +1410,34 @@ rec_get_converted_size(
 	ut_ad(index);
 	ut_ad(dtuple);
 	ut_ad(dtuple_check_typed(dtuple));
-
-	ut_ad(dict_index_is_ibuf(index)
-
-	      || dtuple_get_n_fields(dtuple)
-		 == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
-		      == REC_STATUS_NODE_PTR)
-		     ? dict_index_get_n_unique_in_tree_nonleaf(index) + 1
-		     : dict_index_get_n_fields(index))
-
-	      /* a record for older SYS_INDEXES table
-	      (missing merge_threshold column) is acceptable. */
-	      || (index->table->id == DICT_INDEXES_ID
-		  && dtuple_get_n_fields(dtuple)
-		     == dict_index_get_n_fields(index) - 1));
+#ifdef UNIV_DEBUG
+	if (dict_index_is_ibuf(index)) {
+		ut_ad(dtuple->n_fields > 1);
+	} else if ((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
+		   == REC_STATUS_NODE_PTR) {
+		ut_ad(dtuple->n_fields
+		      == dict_index_get_n_unique_in_tree_nonleaf(index) + 1);
+	} else if (index->table->id == DICT_INDEXES_ID) {
+		/* The column SYS_INDEXES.MERGE_THRESHOLD was
+		instantly added in MariaDB 10.2.2 (MySQL 5.7). */
+		ut_ad(index->n_fields == DICT_NUM_FIELDS__SYS_INDEXES);
+		ut_ad(dtuple->n_fields == DICT_NUM_FIELDS__SYS_INDEXES
+		      || dtuple->n_fields
+		      == DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD);
+	} else {
+		ut_ad(dtuple->n_fields >= index->n_core_fields);
+		ut_ad(dtuple->n_fields <= index->n_fields);
+	}
+#endif
 
 	if (dict_table_is_comp(index->table)) {
-		return(rec_get_converted_size_comp(index,
-						   dtuple_get_info_bits(dtuple)
-						   & REC_NEW_STATUS_MASK,
-						   dtuple->fields,
-						   dtuple->n_fields, NULL));
+		return(rec_get_converted_size_comp(
+			       index,
+			       static_cast<rec_comp_status_t>(
+				       dtuple->info_bits
+				       & REC_NEW_STATUS_MASK),
+			       dtuple->fields,
+			       dtuple->n_fields, NULL));
 	}
 
 	data_size = dtuple_get_data_size(dtuple, 0);
@@ -1659,105 +1445,5 @@ rec_get_converted_size(
 	extra_size = rec_get_converted_extra_size(
 		data_size, dtuple_get_n_fields(dtuple), n_ext);
 
-#if 0
-	/* This code is inactive since it may be the wrong place to add
-	in the size of node pointers used in parent pages AND it is not
-	currently needed since ha_innobase::max_supported_key_length()
-	ensures that the key size limit for each page size is well below
-	the actual limit ((free space on page / 4) - record overhead).
-	But those limits will need to be raised when InnoDB can
-	support multiple page sizes.  At that time, we will need
-	to consider the node pointer on these universal btrees. */
-
-	if (dict_index_is_ibuf(index)) {
-		/* This is for the insert buffer B-tree.
-		All fields in the leaf tuple ascend to the
-		parent node plus the child page pointer. */
-
-		/* ibuf cannot contain externally stored fields */
-		ut_ad(n_ext == 0);
-
-		/* Add the data pointer and recompute extra_size
-		based on one more field. */
-		data_size += REC_NODE_PTR_SIZE;
-		extra_size = rec_get_converted_extra_size(
-			data_size,
-			dtuple_get_n_fields(dtuple) + 1,
-			0);
-
-		/* Be sure dtuple->n_fields has this node ptr
-		accounted for.  This function should correspond to
-		what rec_convert_dtuple_to_rec() needs in storage.
-		In optimistic insert or update-not-in-place, we will
-		have to ensure that if the record is converted to a
-		node pointer, it will not become too large.*/
-	}
-#endif
-
 	return(data_size + extra_size);
 }
-
-/** Fold a prefix of a physical record.
-@param[in]	rec		index record
-@param[in]	offsets		return value of rec_get_offsets()
-@param[in]	n_fields	number of complete fields to fold
-@param[in]	n_bytes		number of bytes to fold in the last field
-@param[in]	index_id	index tree ID
-@return the folded value */
-UNIV_INLINE
-ulint
-rec_fold(
-	const rec_t*	rec,
-	const ulint*	offsets,
-	ulint		n_fields,
-	ulint		n_bytes,
-	index_id_t	tree_id)
-{
-	ulint		i;
-	const byte*	data;
-	ulint		len;
-	ulint		fold;
-	ulint		n_fields_rec;
-
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-	ut_ad(rec_validate(rec, offsets));
-	ut_ad(n_fields > 0 || n_bytes > 0);
-
-	n_fields_rec = rec_offs_n_fields(offsets);
-	ut_ad(n_fields <= n_fields_rec);
-	ut_ad(n_fields < n_fields_rec || n_bytes == 0);
-
-	if (n_fields > n_fields_rec) {
-		n_fields = n_fields_rec;
-	}
-
-	if (n_fields == n_fields_rec) {
-		n_bytes = 0;
-	}
-
-	fold = ut_fold_ull(tree_id);
-
-	for (i = 0; i < n_fields; i++) {
-		data = rec_get_nth_field(rec, offsets, i, &len);
-
-		if (len != UNIV_SQL_NULL) {
-			fold = ut_fold_ulint_pair(fold,
-						  ut_fold_binary(data, len));
-		}
-	}
-
-	if (n_bytes > 0) {
-		data = rec_get_nth_field(rec, offsets, i, &len);
-
-		if (len != UNIV_SQL_NULL) {
-			if (len > n_bytes) {
-				len = n_bytes;
-			}
-
-			fold = ut_fold_ulint_pair(fold,
-						  ut_fold_binary(data, len));
-		}
-	}
-
-	return(fold);
-}
diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h
index f8133f77466..ac78a3c6748 100644
--- a/storage/innobase/include/rem0types.h
+++ b/storage/innobase/include/rem0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -54,8 +54,7 @@ This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
 files would be at risk! */
 #define REC_ANTELOPE_MAX_INDEX_COL_LEN		768
 
-/** Maximum indexed field length for table format UNIV_FORMAT_B and
-beyond.
+/** Maximum indexed field length for tables that have atomic BLOBs.
 This (3072) is the maximum index row length allowed, so we cannot create index
 prefix column longer than that. */
 #define REC_VERSION_56_MAX_INDEX_COL_LEN	3072
diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h
index 8f7632ed9ac..3ae5d5bc175 100644
--- a/storage/innobase/include/row0ftsort.h
+++ b/storage/innobase/include/row0ftsort.h
@@ -185,15 +185,15 @@ tokenized doc string. The index has three "fields":
 dict_index_t*
 row_merge_create_fts_sort_index(
 /*============================*/
-	dict_index_t*		index,	/*!< in: Original FTS index
-					based on which this sort index
-					is created */
-	const dict_table_t*	table,	/*!< in: table that FTS index
-					is being created on */
-	ibool*			opt_doc_id_size);
-					/*!< out: whether to use 4 bytes
-					instead of 8 bytes integer to
-					store Doc ID during sort */
+	dict_index_t*	index,	/*!< in: Original FTS index
+				based on which this sort index
+				is created */
+	dict_table_t*	table,	/*!< in,out: table that FTS index
+				is being created on */
+	ibool*		opt_doc_id_size);
+				/*!< out: whether to use 4 bytes
+				instead of 8 bytes integer to
+				store Doc ID during sort */
 
 /********************************************************************//**
 Initialize FTS parallel sort structures.
diff --git a/storage/innobase/include/row0import.h b/storage/innobase/include/row0import.h
index c6dfca9d7e8..5eb5425b983 100644
--- a/storage/innobase/include/row0import.h
+++ b/storage/innobase/include/row0import.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -47,21 +47,13 @@ row_import_for_mysql(
 						in MySQL */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/*****************************************************************//**
-Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
-@return DB_SUCCESS or error code. */
-dberr_t
-row_import_update_discarded_flag(
-/*=============================*/
-	trx_t*		trx,			/*!< in/out: transaction that
-						covers the update */
-	table_id_t	table_id,		/*!< in: Table for which we want
-						to set the root table->flags2 */
-	bool		discarded,		/*!< in: set MIX_LEN column bit
-						to discarded, if true */
-	bool		dict_locked)		/*!< in: Set to true if the
-						caller already owns the
-						dict_sys_t:: mutex. */
+/** Update the DICT_TF2_DISCARDED flag in SYS_TABLES.MIX_LEN.
+@param[in,out]	trx		dictionary transaction
+@param[in]	table_id	table identifier
+@param[in]	discarded	whether to set or clear the flag
+@return DB_SUCCESS or error code */
+dberr_t row_import_update_discarded_flag(trx_t* trx, table_id_t table_id,
+					 bool discarded)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /*****************************************************************//**
diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h
index b0b9ccd271b..ed425390ed2 100644
--- a/storage/innobase/include/row0ins.h
+++ b/storage/innobase/include/row0ins.h
@@ -203,6 +203,8 @@ struct ins_node_t{
 				+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
 	trx_id_t	trx_id;	/*!< trx id or the last trx which executed the
 				node */
+	byte		vers_start_buf[8]; /* Buffers for System Versioning */
+	byte		vers_end_buf[8];   /* system fields. */
 	mem_heap_t*	entry_sys_heap;
 				/* memory heap used as auxiliary storage;
 				entry_list and sys fields are stored here;
@@ -228,5 +230,4 @@ struct ins_node_t{
 #define INS_NODE_ALLOC_ROW_ID	2	/* row id should be allocated */
 #define	INS_NODE_INSERT_ENTRIES 3	/* index entries should be built and
 					inserted */
-
 #endif
diff --git a/storage/innobase/include/row0log.h b/storage/innobase/include/row0log.h
index c52beb495da..723cf310f95 100644
--- a/storage/innobase/include/row0log.h
+++ b/storage/innobase/include/row0log.h
@@ -49,17 +49,21 @@ for online creation.
 bool
 row_log_allocate(
 /*=============*/
+	const trx_t*	trx,	/*!< in: the ALTER TABLE transaction */
 	dict_index_t*	index,	/*!< in/out: index */
 	dict_table_t*	table,	/*!< in/out: new table being rebuilt,
 				or NULL when creating a secondary index */
 	bool		same_pk,/*!< in: whether the definition of the
 				PRIMARY KEY has remained the same */
-	const dtuple_t*	add_cols,
+	const dtuple_t*	defaults,
 				/*!< in: default values of
-				added columns, or NULL */
+				added, changed columns, or NULL */
 	const ulint*	col_map,/*!< in: mapping of old column
 				numbers to new ones, or NULL if !table */
-	const char*	path)	/*!< in: where to create temporary file */
+	const char*	path,	/*!< in: where to create temporary file */
+	const TABLE*	old_table,	/*!< in:table definition before alter */
+	bool		allow_not_null) /*!< in: allow null to non-null
+					conversion */
 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
 
 /******************************************************//**
@@ -207,13 +211,15 @@ row_log_table_blob_alloc(
 @param[in,out]	stage		performance schema accounting object, used by
 ALTER TABLE. stage->begin_phase_log_table() will be called initially and then
 stage->inc() will be called for each block of log that is applied.
+@param[in]	new_table	Altered table
 @return DB_SUCCESS, or error code on failure */
 dberr_t
 row_log_table_apply(
 	que_thr_t*		thr,
 	dict_table_t*		old_table,
 	struct TABLE*		table,
-	ut_stage_alter_t*	stage)
+	ut_stage_alter_t*	stage,
+	dict_table_t*		new_table)
 	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
index 51ad5cc5cd7..ad4005239c3 100644
--- a/storage/innobase/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
@@ -61,11 +61,11 @@ struct ib_sequence_t;
 
 /** @brief Block size for I/O operations in merge sort.
 
-The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
+The minimum is srv_page_size, or page_get_free_space_of_empty()
 rounded to a power of 2.
 
 When not creating a PRIMARY KEY that contains column prefixes, this
-can be set as small as UNIV_PAGE_SIZE / 2. */
+can be set as small as srv_page_size / 2. */
 typedef byte	row_merge_block_t;
 
 /** @brief Secondary buffer for I/O operations of merge records.
@@ -101,7 +101,7 @@ struct row_merge_buf_t {
 
 /** Information about temporary files used in merge sort */
 struct merge_file_t {
-	int		fd;		/*!< file descriptor */
+	pfs_os_file_t	fd;		/*!< file descriptor */
 	ulint		offset;		/*!< file offset (end of file) */
 	ib_uint64_t	n_rec;		/*!< number of records in the file */
 };
@@ -193,7 +193,7 @@ row_merge_drop_temp_indexes(void);
 UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
 @param[in]	path	location for creating temporary merge files, or NULL
 @return File descriptor */
-int
+pfs_os_file_t
 row_merge_file_create_low(
 	const char*	path)
 	MY_ATTRIBUTE((warn_unused_result));
@@ -203,7 +203,7 @@ if UNIV_PFS_IO is defined. */
 void
 row_merge_file_destroy_low(
 /*=======================*/
-	int		fd);	/*!< in: merge file descriptor */
+	const pfs_os_file_t&	fd);	/*!< in: merge file descriptor */
 
 /*********************************************************************//**
 Provide a new pathname for a table that is being renamed if it belongs to
@@ -260,7 +260,6 @@ row_merge_rename_index_to_drop(
 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
 
 /** Create the index and load in to the dictionary.
-@param[in,out]	trx		trx (sets error_state)
 @param[in,out]	table		the index is on this table
 @param[in]	index_def	the index definition
 @param[in]	add_v		new virtual columns added along with add
@@ -268,7 +267,6 @@ row_merge_rename_index_to_drop(
 @return index, or NULL on error */
 dict_index_t*
 row_merge_create_index(
-	trx_t*			trx,
 	dict_table_t*		table,
 	const index_def_t*	index_def,
 	const dict_add_v_col_t*	add_v)
@@ -310,7 +308,7 @@ old_table unless creating a PRIMARY KEY
 @param[in]	n_indexes	size of indexes[]
 @param[in,out]	table		MySQL table, for reporting erroneous key value
 if applicable
-@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	defaults	default values of added, changed columns, or NULL
 @param[in]	col_map		mapping of old column numbers to new ones, or
 NULL if old_table == new_table
 @param[in]	add_autoinc	number of added AUTO_INCREMENT columns, or
@@ -324,6 +322,7 @@ this function and it will be passed to other functions for further accounting.
 @param[in]	add_v		new virtual columns added along with indexes
 @param[in]	eval_table	mysql table used to evaluate virtual column
 				value, see innobase_get_computed_value().
+@param[in]	allow_non_null	allow the conversion from null to not-null
 @return DB_SUCCESS or error code */
 dberr_t
 row_merge_build_indexes(
@@ -335,14 +334,15 @@ row_merge_build_indexes(
 	const ulint*		key_numbers,
 	ulint			n_indexes,
 	struct TABLE*		table,
-	const dtuple_t*		add_cols,
+	const dtuple_t*		defaults,
 	const ulint*		col_map,
 	ulint			add_autoinc,
 	ib_sequence_t&		sequence,
 	bool			skip_pk_sort,
 	ut_stage_alter_t*	stage,
 	const dict_add_v_col_t*	add_v,
-	struct TABLE*		eval_table)
+	struct TABLE*		eval_table,
+	bool			allow_non_null)
 	MY_ATTRIBUTE((warn_unused_result));
 
 /********************************************************************//**
@@ -372,7 +372,7 @@ UNIV_INTERN
 bool
 row_merge_write(
 /*============*/
-	int		fd,	/*!< in: file descriptor */
+	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 	ulint		offset,	/*!< in: offset where to write,
 				in number of row_merge_block_t elements */
 	const void*	buf,	/*!< in: data */
@@ -393,7 +393,7 @@ row_merge_buf_empty(
 @param[out]	merge_file	merge file structure
 @param[in]	path		location for creating temporary file, or NULL
 @return file descriptor, or -1 on failure */
-int
+pfs_os_file_t
 row_merge_file_create(
 	merge_file_t*	merge_file,
 	const char*	path)
@@ -421,7 +421,7 @@ row_merge_sort(
 	const row_merge_dup_t*	dup,
 	merge_file_t*		file,
 	row_merge_block_t*	block,
-	int*			tmpfd,
+	pfs_os_file_t*		tmpfd,
 	const bool		update_progress,
 	const double	pct_progress,
 	const double	pct_cost,
@@ -460,7 +460,7 @@ row_merge_file_destroy(
 bool
 row_merge_read(
 /*===========*/
-	int			fd,	/*!< in: file descriptor */
+	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 	ulint			offset,	/*!< in: offset where to read
 					in number of row_merge_block_t
 					elements */
@@ -479,7 +479,7 @@ row_merge_read_rec(
 	mrec_buf_t*		buf,	/*!< in/out: secondary buffer */
 	const byte*		b,	/*!< in: pointer to record */
 	const dict_index_t*	index,	/*!< in: index of the record */
-	int			fd,	/*!< in: file descriptor */
+	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 	ulint*			foffs,	/*!< in/out: file offset */
 	const mrec_t**		mrec,	/*!< out: pointer to merge record,
 					or NULL on end of list
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index 3a53c35ba8b..c59248d88c4 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -227,14 +227,26 @@ row_lock_table_autoinc_for_mysql(
 dberr_t
 row_lock_table(row_prebuilt_t* prebuilt);
 
+/** System Versioning: row_insert_for_mysql() modes */
+enum ins_mode_t {
+	/* plain row (without versioning) */
+	ROW_INS_NORMAL = 0,
+	/* row_start = TRX_ID, row_end = MAX */
+	ROW_INS_VERSIONED,
+	/* row_end = TRX_ID */
+	ROW_INS_HISTORICAL
+};
+
 /** Does an insert for MySQL.
 @param[in]	mysql_rec	row in the MySQL format
 @param[in,out]	prebuilt	prebuilt struct in MySQL handle
+@param[in]	ins_mode	what row type we're inserting
 @return error code or DB_SUCCESS*/
 dberr_t
 row_insert_for_mysql(
 	const byte*		mysql_rec,
-	row_prebuilt_t*		prebuilt)
+	row_prebuilt_t*		prebuilt,
+	ins_mode_t		ins_mode)
 	MY_ATTRIBUTE((warn_unused_result));
 
 /*********************************************************************//**
@@ -258,7 +270,8 @@ row_get_prebuilt_update_vector(
 @param[in,out]	prebuilt	prebuilt struct in MySQL handle
 @return error code or DB_SUCCESS */
 dberr_t
-row_update_for_mysql(row_prebuilt_t* prebuilt)
+row_update_for_mysql(
+	row_prebuilt_t*		prebuilt)
 	MY_ATTRIBUTE((warn_unused_result));
 
 /** This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
@@ -660,6 +673,8 @@ struct row_prebuilt_t {
 					not to be confused with InnoDB
 					externally stored columns
 					(VARCHAR can be off-page too) */
+	unsigned	versioned_write:1;/*!< whether this is
+					a versioned write */
 	mysql_row_templ_t* mysql_template;/*!< template used to transform
 					rows fast between MySQL and Innobase
 					formats; memory for this template
@@ -775,7 +790,7 @@ struct row_prebuilt_t {
 					allocated mem buf start, because
 					there is a 4 byte magic number at the
 					start and at the end */
-	ibool		keep_other_fields_on_keyread; /*!< when using fetch
+	bool		keep_other_fields_on_keyread; /*!< when using fetch
 					cache with HA_EXTRA_KEYREAD, don't
 					overwrite other fields in mysql row
 					row buffer.*/
@@ -836,6 +851,20 @@ struct row_prebuilt_t {
 
 	/** The MySQL table object */
 	TABLE*		m_mysql_table;
+
+	/** Get template by dict_table_t::cols[] number */
+	const mysql_row_templ_t* get_template_by_col(ulint col) const
+	{
+		ut_ad(col < n_template);
+		ut_ad(mysql_template);
+		for (ulint i = col; i < n_template; ++i) {
+			const mysql_row_templ_t* templ = &mysql_template[i];
+			if (!templ->is_virtual && templ->col_no == col) {
+				return templ;
+			}
+		}
+		return NULL;
+	}
 };
 
 /** Callback for row_mysql_sys_index_iterate() */
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
index 655685c02a8..a7ddef4fe8a 100644
--- a/storage/innobase/include/row0purge.h
+++ b/storage/innobase/include/row0purge.h
@@ -111,7 +111,7 @@ struct purge_node_t{
 
 	upd_t*		update;	/*!< update vector for a clustered index
 				record */
-	dtuple_t*	ref;	/*!< NULL, or row reference to the next row to
+	const dtuple_t*	ref;	/*!< NULL, or row reference to the next row to
 				handle */
 	dtuple_t*	row;	/*!< NULL, or a copy (also fields copied to
 				heap) of the indexed fields of the row to
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
index d24ae37b13d..1f37a6b02d7 100644
--- a/storage/innobase/include/row0row.h
+++ b/storage/innobase/include/row0row.h
@@ -153,9 +153,9 @@ row_build(
 					consulted instead; the user
 					columns in this table should be
 					the same columns as in index->table */
-	const dtuple_t*		add_cols,
+	const dtuple_t*		defaults,
 					/*!< in: default values of
-					added columns, or NULL */
+					added, changed columns, or NULL */
 	const ulint*		col_map,/*!< in: mapping of old column
 					numbers to new ones, or NULL */
 	row_ext_t**		ext,	/*!< out, own: cache of
@@ -177,7 +177,7 @@ addition of new virtual columns.
 				of an index, or NULL if
 				index->table should be
 				consulted instead
-@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	defaults	default values of added, changed columns, or NULL
 @param[in]	add_v		new virtual columns added
 				along with new indexes
 @param[in]	col_map		mapping of old column
@@ -194,7 +194,7 @@ row_build_w_add_vcol(
 	const rec_t*		rec,
 	const ulint*		offsets,
 	const dict_table_t*	col_table,
-	const dtuple_t*		add_cols,
+	const dtuple_t*		defaults,
 	const dict_add_v_col_t*	add_v,
 	const ulint*		col_map,
 	row_ext_t**		ext,
@@ -269,9 +269,8 @@ row_build_row_ref_in_tuple(
 					held as long as the row
 					reference is used! */
 	const dict_index_t*	index,	/*!< in: secondary index */
-	ulint*			offsets,/*!< in: rec_get_offsets(rec, index)
+	ulint*			offsets)/*!< in: rec_get_offsets(rec, index)
 					or NULL */
-	trx_t*			trx)	/*!< in: transaction or NULL */
 	MY_ATTRIBUTE((nonnull(1,2,3)));
 /*******************************************************************//**
 Builds from a secondary index record a row reference with which we can
@@ -285,8 +284,8 @@ row_build_row_ref_fast(
 	const ulint*	map,	/*!< in: array of field numbers in rec
 				telling how ref should be built from
 				the fields of rec */
-	const rec_t*	rec,	/*!< in: record in the index; must be
-				preserved while ref is used, as we do
+	const rec_t*	rec,	/*!< in: secondary index record;
+				must be preserved while ref is used, as we do
 				not copy field values to heap */
 	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
 /***************************************************************//**
@@ -398,7 +397,7 @@ row_mtr_start(mtr_t* mtr, dict_index_t* index, bool pessimistic)
 {
 	mtr->start();
 
-	switch (index->space) {
+	switch (index->table->space->id) {
 	case IBUF_SPACE_ID:
 		if (pessimistic
 		    && !(index->type & (DICT_UNIQUE | DICT_SPATIAL))) {
@@ -409,7 +408,7 @@ row_mtr_start(mtr_t* mtr, dict_index_t* index, bool pessimistic)
 		mtr->set_log_mode(MTR_LOG_NO_REDO);
 		break;
 	default:
-		mtr->set_named_space(index->space);
+		index->set_modified(*mtr);
 		break;
 	}
 
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
index 8a32bb3ffd2..a7c0f2551b5 100644
--- a/storage/innobase/include/row0row.ic
+++ b/storage/innobase/include/row0row.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -144,8 +145,8 @@ row_build_row_ref_fast(
 	const ulint*	map,	/*!< in: array of field numbers in rec
 				telling how ref should be built from
 				the fields of rec */
-	const rec_t*	rec,	/*!< in: record in the index; must be
-				preserved while ref is used, as we do
+	const rec_t*	rec,	/*!< in: secondary index record;
+				must be preserved while ref is used, as we do
 				not copy field values to heap */
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
index 552680b16d1..ef0ccbbda9f 100644
--- a/storage/innobase/include/row0sel.h
+++ b/storage/innobase/include/row0sel.h
@@ -135,8 +135,7 @@ row_sel_convert_mysql_key_to_innobase(
 	ulint		buf_len,	/*!< in: buffer length */
 	dict_index_t*	index,		/*!< in: index of the key value */
 	const byte*	key_ptr,	/*!< in: MySQL key value */
-	ulint		key_len,	/*!< in: MySQL key value length */
-	trx_t*		trx);		/*!< in: transaction */
+	ulint		key_len);	/*!< in: MySQL key value length */
 
 
 /** Searches for rows in the database. This is used in the interface to
diff --git a/storage/innobase/include/row0trunc.h b/storage/innobase/include/row0trunc.h
index f9a20665a3b..993dac295da 100644
--- a/storage/innobase/include/row0trunc.h
+++ b/storage/innobase/include/row0trunc.h
@@ -182,19 +182,16 @@ public:
 	/** Create an index for a table.
 	@param[in]	table_name		table name, for which to create
 	the index
-	@param[in]	space_id		space id where we have to
-	create the index
-	@param[in]	page_size		page size of the .ibd file
+	@param[in,out]	space			tablespace
 	@param[in]	index_type		type of index to truncate
 	@param[in]	index_id		id of index to truncate
 	@param[in]	btr_redo_create_info	control info for ::btr_create()
 	@param[in,out]	mtr			mini-transaction covering the
 	create index
 	@return root page no or FIL_NULL on failure */
-	ulint create_index(
+	inline ulint create_index(
 		const char*		table_name,
-		ulint			space_id,
-		const page_size_t&	page_size,
+		fil_space_t*		space,
 		ulint			index_type,
 		index_id_t      	index_id,
 		const btr_create_t&	btr_redo_create_info,
@@ -203,31 +200,27 @@ public:
 	/** Create the indexes for a table
 	@param[in]	table_name	table name, for which to create the
 	indexes
-	@param[in]	space_id	space id where we have to create the
-	indexes
-	@param[in]	page_size	page size of the .ibd file
-	@param[in]	flags		tablespace flags
+	@param[in,out]	space		tablespace
 	@param[in]	format_flags	page format flags
 	@return DB_SUCCESS or error code. */
-	dberr_t create_indexes(
+	inline dberr_t create_indexes(
 		const char*		table_name,
-		ulint			space_id,
-		const page_size_t&	page_size,
-		ulint			flags,
+		fil_space_t*		space,
 		ulint			format_flags);
 
 	/** Check if index has been modified since TRUNCATE log snapshot
 	was recorded.
-	@param space_id	space_id where table/indexes resides.
+	@param[in]	space		tablespace
+	@param[in]	root_page_no	index root page number
 	@return true if modified else false */
-	bool is_index_modified_since_logged(
-		ulint		space_id,
-		ulint		root_page_no) const;
+	inline bool is_index_modified_since_logged(
+		const fil_space_t*	space,
+		ulint			root_page_no) const;
 
 	/** Drop indexes for a table.
-	@param space_id		space_id where table/indexes resides.
+	@param[in,out] space		tablespace
 	@return DB_SUCCESS or error code. */
-	void drop_indexes(ulint	space_id) const;
+	void drop_indexes(fil_space_t* space) const;
 
 	/**
 	Parses log record during recovery
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
index 730e7f559c6..f7cec643b33 100644
--- a/storage/innobase/include/row0undo.h
+++ b/storage/innobase/include/row0undo.h
@@ -111,7 +111,7 @@ struct undo_node_t{
 	ulint		cmpl_info;/*!< compiler analysis of an update */
 	upd_t*		update;	/*!< update vector for a clustered index
 				record */
-	dtuple_t*	ref;	/*!< row reference to the next row to handle */
+	const dtuple_t*	ref;	/*!< row reference to the next row to handle */
 	dtuple_t*	row;	/*!< a copy (also fields copied to heap) of the
 				row to handle */
 	row_ext_t*	ext;	/*!< NULL, or prefixes of the externally
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
index 77708d7d568..5e01e513a50 100644
--- a/storage/innobase/include/row0upd.h
+++ b/storage/innobase/include/row0upd.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,6 @@ Created 12/27/1996 Heikki Tuuri
 #ifndef row0upd_h
 #define row0upd_h
 
-#include "univ.i"
 #include "data0data.h"
 #include "row0types.h"
 #include "btr0types.h"
@@ -234,27 +233,19 @@ row_upd_build_difference_binary(
 	mem_heap_t*	heap,
 	TABLE*		mysql_table)
 	MY_ATTRIBUTE((nonnull(1,2,3,7), warn_unused_result));
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the index entry
-given. */
+/** Apply an update vector to an index entry.
+@param[in,out]	entry	index entry to be updated; the clustered index record
+			must be covered by a lock or a page latch to prevent
+			deletion (rollback or purge)
+@param[in]	index	index of the entry
+@param[in]	update	update vector built for the entry
+@param[in,out]	heap	memory heap for copying off-page columns */
 void
 row_upd_index_replace_new_col_vals_index_pos(
-/*=========================================*/
-	dtuple_t*	entry,	/*!< in/out: index entry where replaced;
-				the clustered index record must be
-				covered by a lock or a page latch to
-				prevent deletion (rollback or purge) */
-	dict_index_t*	index,	/*!< in: index; NOTE that this may also be a
-				non-clustered index */
-	const upd_t*	update,	/*!< in: an update vector built for the index so
-				that the field number in an upd_field is the
-				index position */
-	ibool		order_only,
-				/*!< in: if TRUE, limit the replacement to
-				ordering fields of index; note that this
-				does not work for non-clustered indexes. */
-	mem_heap_t*	heap)	/*!< in: memory heap for allocating and
-				copying the new values */
+	dtuple_t*		entry,
+	const dict_index_t*	index,
+	const upd_t*		update,
+	mem_heap_t*		heap)
 	MY_ATTRIBUTE((nonnull));
 /***********************************************************//**
 Replaces the new column values stored in the update vector to the index entry
@@ -462,6 +453,7 @@ struct upd_t{
 					virtual column update now */
 	ulint		n_fields;	/*!< number of update fields */
 	upd_field_t*	fields;		/*!< array of update fields */
+	byte		vers_sys_value[8]; /*!< buffer for updating system fields */
 
 	/** Append an update field to the end of array
 	@param[in]	field	an update field */
@@ -482,6 +474,22 @@ struct upd_t{
 		return(false);
 	}
 
+	/** Determine if the update affects a system versioned column or row_end. */
+	bool affects_versioned() const
+	{
+		for (ulint i = 0; i < n_fields; i++) {
+			dtype_t type = fields[i].new_val.type;
+			if (type.is_versioned()) {
+				return true;
+			}
+			// versioned DELETE is UPDATE SET row_end=NOW
+			if (type.vers_sys_end()) {
+				return true;
+			}
+		}
+		return false;
+	}
+
 #ifdef UNIV_DEBUG
         bool validate() const
         {
@@ -498,17 +506,24 @@ struct upd_t{
 
 };
 
+/** Kinds of update operation */
+enum delete_mode_t {
+	NO_DELETE = 0,		/*!< this operation does not delete */
+	PLAIN_DELETE,		/*!< ordinary delete */
+	VERSIONED_DELETE	/*!< update old and insert a new row */
+};
+
 /* Update node structure which also implements the delete operation
 of a row */
 
 struct upd_node_t{
 	que_common_t	common;	/*!< node type: QUE_NODE_UPDATE */
-	ibool		is_delete;/* TRUE if delete, FALSE if update */
+	delete_mode_t	is_delete;	/*!< kind of DELETE */
 	ibool		searched_update;
 				/* TRUE if searched update, FALSE if
 				positioned */
-	ibool		in_mysql_interface;
-				/* TRUE if the update node was created
+	bool		in_mysql_interface;
+				/* whether the update node was created
 				for the MySQL interface */
 	dict_foreign_t*	foreign;/* NULL or pointer to a foreign key
 				constraint if this update node is used in
@@ -553,6 +568,12 @@ struct upd_node_t{
 	dtuple_t*	row;	/*!< NULL, or a copy (also fields copied to
 				heap) of the row to update; this must be reset
 				to NULL after a successful update */
+	dtuple_t*	historical_row;	/*!< historical row used in
+				CASCADE UPDATE/SET NULL;
+				allocated from historical_heap  */
+	mem_heap_t*	historical_heap; /*!< heap for historical row insertion;
+				created when row to update is located;
+				freed right before row update */
 	row_ext_t*	ext;	/*!< NULL, or prefixes of the externally
 				stored columns in the old row */
 	dtuple_t*	upd_row;/* NULL, or a copy of the updated row */
@@ -567,6 +588,22 @@ struct upd_node_t{
 				/* column assignment list */
 	ulint		magic_n;
 
+	/** Also set row_start = CURRENT_TIMESTAMP/trx->id
+	@param[in]	trx	transaction */
+	void make_versioned_update(const trx_t* trx);
+	/** Only set row_end = CURRENT_TIMESTAMP/trx->id.
+	Do not touch other fields at all.
+	@param[in]	trx	transaction */
+	void make_versioned_delete(const trx_t* trx);
+
+private:
+	/** Appends row_start or row_end field to update vector and sets a
+	CURRENT_TIMESTAMP/trx->id value to it.
+	Supposed to be called only by make_versioned_update() and
+	make_versioned_delete().
+	@param[in]	trx	transaction
+	@param[in]	vers_sys_idx	table->row_start or table->row_end */
+	void make_versioned_helper(const trx_t* trx, ulint idx);
 };
 
 #define	UPD_NODE_MAGIC_N	1579975
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
index 364c876ecc7..5e43a272388 100644
--- a/storage/innobase/include/row0upd.ic
+++ b/storage/innobase/include/row0upd.ic
@@ -181,9 +181,8 @@ row_upd_rec_sys_fields(
 			offset = row_get_trx_id_offset(index, offsets);
 		}
 
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
-#endif
+		compile_time_assert(DATA_TRX_ID + 1 == DATA_ROLL_PTR);
+
 		/* During IMPORT the trx id in the record can be in the
 		future, if the .ibd file is being imported from another
 		instance. During IMPORT roll_ptr will be 0. */
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
index 23c2e8546bc..9869a3acf95 100644
--- a/storage/innobase/include/row0vers.h
+++ b/storage/innobase/include/row0vers.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -42,33 +42,19 @@ class ReadView;
 
 /** Determine if an active transaction has inserted or modified a secondary
 index record.
+@param[in,out]	caller_trx	trx of current thread
 @param[in]	rec	secondary index record
 @param[in]	index	secondary index
 @param[in]	offsets	rec_get_offsets(rec, index)
-@return	the active transaction; trx_release_reference() must be invoked
+@return	the active transaction; trx->release_reference() must be invoked
 @retval	NULL if the record was committed */
 trx_t*
 row_vers_impl_x_locked(
+	trx_t*		caller_trx,
 	const rec_t*	rec,
 	dict_index_t*	index,
 	const ulint*	offsets);
 
-/*****************************************************************//**
-Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view.
-@param[in]	trx_id		transaction id in the version
-@param[in]	name		table name
-@param[in,out]	mtr		mini transaction  holding the latch on the
-				clustered index record; it will also hold
-				 the latch on purge_view
-@return TRUE if earlier version should be preserved */
-ibool
-row_vers_must_preserve_del_marked(
-/*==============================*/
-	trx_id_t		trx_id,
-	const table_name_t&	name,
-	mtr_t*			mtr);
-
 /** Finds out if a version of the record, where the version >= the current
 purge view, should have ientry as its secondary index entry. We check
 if there is any not delete marked version of the record where the trx
@@ -134,6 +120,7 @@ which should be seen by a semi-consistent read. */
 void
 row_vers_build_for_semi_consistent_read(
 /*====================================*/
+	trx_t*		caller_trx,/*!<in/out: trx of current thread */
 	const rec_t*	rec,	/*!< in: record in a clustered index; the
 				caller must have a latch on the page; this
 				latch locks the top of the stack of versions
diff --git a/storage/innobase/include/srv0conc.h b/storage/innobase/include/srv0conc.h
index 9573c5add84..35937fe1204 100644
--- a/storage/innobase/include/srv0conc.h
+++ b/storage/innobase/include/srv0conc.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -41,9 +42,7 @@ Created 2011/04/18 Sunny Bains
 #define srv_conc_h
 
 /** We are prepared for a situation that we have this many threads waiting for
-a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
-value. */
-
+a semaphore inside InnoDB. srv_start() sets the value. */
 extern	ulint	srv_max_n_threads;
 
 /** The following controls how many threads we let inside InnoDB concurrently:
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index e4034f3a6ff..069ab5cf93a 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -2,7 +2,7 @@
 
 Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
@@ -503,18 +503,18 @@ extern ulint		monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT - 1) /
 
 /** Macros to turn on/off the control bit in monitor_set_tbl for a monitor
 counter option. */
-#define MONITOR_ON(monitor)				\
-	(monitor_set_tbl[monitor / NUM_BITS_ULINT] |=	\
-			((ulint)1 << (monitor % NUM_BITS_ULINT)))
+#define MONITOR_ON(monitor)					\
+	(monitor_set_tbl[unsigned(monitor) / NUM_BITS_ULINT] |=	\
+	 (ulint(1) << (unsigned(monitor) % NUM_BITS_ULINT)))
 
-#define MONITOR_OFF(monitor)				\
-	(monitor_set_tbl[monitor / NUM_BITS_ULINT] &=	\
-			~((ulint)1 << (monitor % NUM_BITS_ULINT)))
+#define MONITOR_OFF(monitor)					\
+	(monitor_set_tbl[unsigned(monitor) / NUM_BITS_ULINT] &=	\
+	 ~(ulint(1) << (unsigned(monitor) % NUM_BITS_ULINT)))
 
 /** Check whether the requested monitor is turned on/off */
-#define MONITOR_IS_ON(monitor)				\
-	(monitor_set_tbl[monitor / NUM_BITS_ULINT] &	\
-			((ulint)1 << (monitor % NUM_BITS_ULINT)))
+#define MONITOR_IS_ON(monitor)					\
+	(monitor_set_tbl[unsigned(monitor) / NUM_BITS_ULINT] &	\
+	 (ulint(1) << (unsigned(monitor) % NUM_BITS_ULINT)))
 
 /** The actual monitor counter array that records each monintor counter
 value */
@@ -608,8 +608,9 @@ Use MONITOR_INC if appropriate mutex protection exists.
 #define MONITOR_ATOMIC_INC_LOW(monitor, enabled)			\
 	if (enabled) {							\
 		ib_uint64_t	value;					\
-		value  = my_atomic_add64(				\
-			(int64*) &MONITOR_VALUE(monitor), 1) + 1;	\
+		value  = my_atomic_add64_explicit(			\
+			(int64*) &MONITOR_VALUE(monitor), 1,		\
+			MY_MEMORY_ORDER_RELAXED) + 1;			\
 		/* Note: This is not 100% accurate because of the	\
 		inherent race, we ignore it due to performance. */	\
 		if (value > (ib_uint64_t) MONITOR_MAX_VALUE(monitor)) {	\
@@ -624,8 +625,9 @@ Use MONITOR_DEC if appropriate mutex protection exists.
 #define MONITOR_ATOMIC_DEC_LOW(monitor, enabled)			\
 	if (enabled) {							\
 		ib_uint64_t	value;					\
-		value = my_atomic_add64(				\
-			(int64*) &MONITOR_VALUE(monitor), -1) - 1;	\
+		value = my_atomic_add64_explicit(			\
+			(int64*) &MONITOR_VALUE(monitor), -1,		\
+			MY_MEMORY_ORDER_RELAXED) - 1;			\
 		/* Note: This is not 100% accurate because of the	\
 		inherent race, we ignore it due to performance. */	\
 		if (value < (ib_uint64_t) MONITOR_MIN_VALUE(monitor)) {	\
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index ecd2914515d..422b8ef39e4 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -48,7 +48,6 @@ Created 10/10/1995 Heikki Tuuri
 #include "mysql/psi/psi.h"
 
 #include "univ.i"
-#include "log0log.h"
 #include "os0event.h"
 #include "que0types.h"
 #include "trx0types.h"
@@ -81,7 +80,7 @@ struct srv_stats_t
 	lsn_ctr_1_t		os_log_written;
 
 	/** Number of writes being done to the log files.
-	Protected by log_sys->write_mutex. */
+	Protected by log_sys.write_mutex. */
 	ulint_ctr_1_t		os_log_pending_writes;
 
 	/** We increase this counter, when we don't have enough
@@ -148,7 +147,7 @@ struct srv_stats_t
 	ulint_ctr_1_t		n_lock_wait_count;
 
 	/** Number of threads currently waiting on database locks */
-	simple_counter<ulint, true> n_lock_wait_current_count;
+	simple_atomic_counter<>	n_lock_wait_current_count;
 
 	/** Number of rows read. */
 	ulint_ctr_64_t		n_rows_read;
@@ -261,12 +260,6 @@ extern	ulong	srv_thread_sleep_delay;
 /** Maximum sleep delay (in micro-seconds), value of 0 disables it.*/
 extern	ulong	srv_adaptive_max_sleep_delay;
 
-/** The file format to use on new *.ibd files. */
-extern ulint	srv_file_format;
-/** Whether to check file format during startup.  A value of
-UNIV_FORMAT_MAX + 1 means no checking ie. FALSE.  The default is to
-set it to the highest format we support. */
-extern ulint	srv_max_file_format_at_startup;
 /** Place locks to records only i.e. do not use next-key locking except
 on duplicate key checking and foreign key checking */
 extern ibool	srv_locks_unsafe_for_binlog;
@@ -283,25 +276,12 @@ Currently we support native aio on windows and linux */
 extern my_bool	srv_use_native_aio;
 extern my_bool	srv_numa_interleave;
 
-/* Use trim operation */
-extern my_bool srv_use_trim;
-
 /* Use atomic writes i.e disable doublewrite buffer */
 extern my_bool srv_use_atomic_writes;
 
 /* Compression algorithm*/
 extern ulong innodb_compression_algorithm;
 
-/* Number of flush threads */
-#define MTFLUSH_MAX_WORKER		64
-#define MTFLUSH_DEFAULT_WORKER		8
-
-/* Number of threads used for multi-threaded flush */
-extern long    srv_mtflush_threads;
-
-/* If this flag is TRUE, then we will use multi threaded flush. */
-extern my_bool	srv_use_mtflush;
-
 /** TRUE if the server was successfully started */
 extern bool	srv_was_started;
 
@@ -359,17 +339,15 @@ extern const ulint	SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
 
 extern char*	srv_log_group_home_dir;
 
-/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
-#define SRV_N_LOG_FILES_MAX 100
 extern ulong	srv_n_log_files;
 /** The InnoDB redo log file size, or 0 when changing the redo log format
 at startup (while disallowing writes to the redo log). */
 extern ulonglong	srv_log_file_size;
-extern ulint	srv_log_buffer_size;
+extern ulong	srv_log_buffer_size;
 extern ulong	srv_flush_log_at_trx_commit;
 extern uint	srv_flush_log_at_timeout;
 extern ulong	srv_log_write_ahead_size;
-extern char	srv_adaptive_flushing;
+extern my_bool	srv_adaptive_flushing;
 extern my_bool	srv_flush_sync;
 
 #ifdef WITH_INNODB_DISALLOW_WRITES
@@ -400,8 +378,6 @@ extern ulong	srv_n_page_hash_locks;
 /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
 extern ulong	srv_LRU_scan_depth;
 /** Whether or not to flush neighbors of a block */
-extern ulong	srv_buf_pool_dump_pct;	/*!< dump that may % of each buffer
-					pool during BP dump */
 extern ulong	srv_flush_neighbors;
 /** Previously requested size */
 extern ulint	srv_buf_pool_old_size;
@@ -411,14 +387,18 @@ extern ulint	srv_buf_pool_base_size;
 extern ulint	srv_buf_pool_curr_size;
 /** Dump this % of each buffer pool during BP dump */
 extern ulong	srv_buf_pool_dump_pct;
+#ifdef UNIV_DEBUG
+/** Abort load after this amount of pages */
+extern ulong srv_buf_pool_load_pages_abort;
+#endif
 /** Lock table size in bytes */
 extern ulint	srv_lock_table_size;
 
 extern ulint	srv_n_file_io_threads;
 extern my_bool	srv_random_read_ahead;
 extern ulong	srv_read_ahead_threshold;
-extern ulint	srv_n_read_io_threads;
-extern ulint	srv_n_write_io_threads;
+extern ulong	srv_n_read_io_threads;
+extern ulong	srv_n_write_io_threads;
 
 /* Defragmentation, Origianlly facebook default value is 100, but it's too high */
 #define SRV_DEFRAGMENT_FREQUENCY_DEFAULT 40
@@ -452,8 +432,6 @@ to treat NULL value when collecting statistics. It is not defined
 as enum type because the configure option takes unsigned integer type. */
 extern ulong	srv_innodb_stats_method;
 
-extern char*	srv_file_flush_method_str;
-
 extern ulint	srv_max_n_open_files;
 
 extern ulong	srv_n_page_cleaners;
@@ -488,7 +466,7 @@ extern my_bool			srv_stats_include_delete_marked;
 extern unsigned long long	srv_stats_modified_counter;
 extern my_bool			srv_stats_sample_traditional;
 
-extern ibool	srv_use_doublewrite_buf;
+extern my_bool	srv_use_doublewrite_buf;
 extern ulong	srv_doublewrite_batch_size;
 extern ulong	srv_checksum_algorithm;
 
@@ -630,16 +608,16 @@ extern mysql_pfs_key_t	trx_rollback_clean_thread_key;
 schema */
 #  define pfs_register_thread(key)			\
 do {								\
-	struct PSI_thread* psi = PSI_THREAD_CALL(new_thread)(key, NULL, 0);\
+	struct PSI_thread* psi = PSI_CALL_new_thread(key, NULL, 0);\
 	/* JAN: TODO: MYSQL 5.7 PSI                             \
-	PSI_THREAD_CALL(set_thread_os_id)(psi);	*/		\
-	PSI_THREAD_CALL(set_thread)(psi);			\
+	PSI_CALL_set_thread_os_id(psi);	*/		\
+	PSI_CALL_set_thread(psi);			\
 } while (0)
 
 /* This macro delist the current thread from performance schema */
 #  define pfs_delete_thread()				\
 do {								\
-	PSI_THREAD_CALL(delete_current_thread)();		\
+	PSI_CALL_delete_current_thread();		\
 } while (0)
 # else
 #  define pfs_register_thread(key)
@@ -680,10 +658,9 @@ extern PSI_stage_info	srv_stage_buffer_pool_load;
 #endif /* HAVE_PSI_STAGE_INTERFACE */
 
 
-/** Alternatives for the file flush option in Unix; see the InnoDB manual
-about what these mean */
+/** Alternatives for innodb_flush_method */
 enum srv_flush_t {
-	SRV_FSYNC = 1,	/*!< fsync, the default */
+	SRV_FSYNC = 0,	/*!< fsync, the default */
 	SRV_O_DSYNC,	/*!< open log files in O_SYNC mode */
 	SRV_LITTLESYNC,	/*!< do not call os_file_flush()
 				when writing data files, but do flush
@@ -695,18 +672,21 @@ enum srv_flush_t {
 				the reason for which is that some FS
 				do not flush meta-data when
 				unbuffered IO happens */
-	SRV_O_DIRECT_NO_FSYNC,
+	SRV_O_DIRECT_NO_FSYNC
 				/*!< do not use fsync() when using
 				direct IO i.e.: it can be set to avoid
 				the fsync() call that we make when
 				using SRV_UNIX_O_DIRECT. However, in
 				this case user/DBA should be sure about
 				the integrity of the meta-data */
-	SRV_ALL_O_DIRECT_FSYNC
+#ifdef _WIN32
+	,SRV_ALL_O_DIRECT_FSYNC
 				/*!< Traditional Windows appoach to open 
 				all files without caching, and do FileFlushBuffers()*/
+#endif
 };
-extern enum srv_flush_t	srv_file_flush_method;
+/** innodb_flush_method */
+extern ulong srv_file_flush_method;
 
 /** Alternatives for srv_force_recovery. Non-zero values are intended
 to help the user get a damaged database up so that he can dump intact
@@ -943,16 +923,10 @@ srv_was_tablespace_truncated(const fil_space_t* space);
 #ifdef UNIV_DEBUG
 /** Disables master thread. It's used by:
 	SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
-@param[in]	thd		thread handle
-@param[in]	var		pointer to system variable
-@param[out]	var_ptr		where the formal string goes
 @param[in]	save		immediate result from check function */
 void
-srv_master_thread_disabled_debug_update(
-	THD*				thd,
-	struct st_mysql_sys_var*	var,
-	void*				var_ptr,
-	const void*			save);
+srv_master_thread_disabled_debug_update(THD*, st_mysql_sys_var*, void*,
+					const void* save);
 #endif /* UNIV_DEBUG */
 
 /** Status variables to be passed to MySQL */
@@ -968,6 +942,7 @@ struct export_var_t{
 	char  innodb_buffer_pool_dump_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool dump status */
 	char  innodb_buffer_pool_load_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool load status */
 	char  innodb_buffer_pool_resize_status[512];/*!< Buf pool resize status */
+	my_bool innodb_buffer_pool_load_incomplete;/*!< Buf pool load incomplete */
 	ulint innodb_buffer_pool_pages_total;	/*!< Buffer pool size */
 	ulint innodb_buffer_pool_pages_data;	/*!< Data pages */
 	ulint innodb_buffer_pool_bytes_data;	/*!< File bytes used */
@@ -996,7 +971,7 @@ struct export_var_t{
 	ulint innodb_os_log_fsyncs;		/*!< fil_n_log_flushes */
 	ulint innodb_os_log_pending_writes;	/*!< srv_os_log_pending_writes */
 	ulint innodb_os_log_pending_fsyncs;	/*!< fil_n_pending_log_flushes */
-	ulint innodb_page_size;			/*!< UNIV_PAGE_SIZE */
+	ulint innodb_page_size;			/*!< srv_page_size */
 	ulint innodb_pages_created;		/*!< buf_pool->stat.n_pages_created */
 	ulint innodb_pages_read;		/*!< buf_pool->stat.n_pages_read*/
 	ulint innodb_page0_read;		/*!< srv_stats.page0_read */
@@ -1033,6 +1008,9 @@ struct export_var_t{
 	ulint innodb_defragment_count;		/*!< Number of defragment
 						operations*/
 
+	/** Number of instant ALTER TABLE operations that affect columns */
+	ulong innodb_instant_alter_column;
+
 	ulint innodb_onlineddl_rowlog_rows;	/*!< Online alter rows */
 	ulint innodb_onlineddl_rowlog_pct_used; /*!< Online alter percentage
 						of used row log buffer */
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
index 3575f2e40b9..ee263f6c1f6 100644
--- a/storage/innobase/include/srv0start.h
+++ b/storage/innobase/include/srv0start.h
@@ -44,20 +44,16 @@ only one buffer pool instance is used. */
 dberr_t
 srv_undo_tablespaces_init(bool create_new_db);
 
-/****************************************************************//**
-Starts Innobase and creates a new database if database files
-are not found and the user wants.
+/** Start InnoDB.
+@param[in]	create_new_db	whether to create a new database
 @return DB_SUCCESS or error code */
-dberr_t
-innobase_start_or_create_for_mysql();
+dberr_t srv_start(bool create_new_db);
 
 /** Shut down InnoDB. */
-void
-innodb_shutdown();
+void innodb_shutdown();
 
 /** Shut down background threads that can generate undo log. */
-void
-srv_shutdown_bg_undo_sources();
+void srv_shutdown_bg_undo_sources();
 
 /*************************************************************//**
 Copy the file path component of the physical file to parameter. It will
diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
index 4104e594cf9..b3180c1779d 100644
--- a/storage/innobase/include/sync0arr.h
+++ b/storage/innobase/include/sync0arr.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
+Copyright (c) 2015, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -99,16 +99,11 @@ void
 sync_array_print(
 	FILE*		file);	/*!< in: file where to print */
 
-/**********************************************************************//**
-Create the primary system wait array(s), they are protected by an OS mutex */
-void
-sync_array_init(
-	ulint		n_threads);	/*!< in: Number of slots to create */
+/** Create the primary system wait arrays */
+void sync_array_init();
 
-/**********************************************************************//**
-Close sync array wait sub-system. */
-void
-sync_array_close();
+/** Destroy the sync array wait sub-system. */
+void sync_array_close();
 
 /**********************************************************************//**
 Get an instance of the sync wait array. */
diff --git a/storage/innobase/include/sync0policy.h b/storage/innobase/include/sync0policy.h
index de27c87816c..daa08a91e1e 100644
--- a/storage/innobase/include/sync0policy.h
+++ b/storage/innobase/include/sync0policy.h
@@ -50,7 +50,7 @@ public:
 			m_mutex(),
 			m_filename(),
 			m_line(),
-			m_thread_id(os_thread_id_t(ULINT_UNDEFINED))
+			m_thread_id(ULINT_UNDEFINED)
 		{
 			/* No op */
 		}
@@ -76,7 +76,8 @@ public:
 		{
 			m_mutex = mutex;
 
-			m_thread_id = os_thread_get_curr_id();
+			my_atomic_storelint(&m_thread_id,
+					    ulint(os_thread_get_curr_id()));
 
 			m_filename = filename;
 
@@ -89,7 +90,7 @@ public:
 		{
 			m_mutex = NULL;
 
-			m_thread_id = os_thread_id_t(ULINT_UNDEFINED);
+			my_atomic_storelint(&m_thread_id, ULINT_UNDEFINED);
 
 			m_filename = NULL;
 
@@ -105,7 +106,7 @@ public:
 
 			msg << m_mutex->policy().to_string();
 
-			if (os_thread_pf(m_thread_id) != ULINT_UNDEFINED) {
+			if (m_thread_id != ULINT_UNDEFINED) {
 
 				msg << " addr: " << m_mutex
 				    << " acquired: " << locked_from().c_str();
@@ -138,7 +139,7 @@ public:
 		unsigned	m_line;
 
 		/** Thread ID of the thread that own(ed) the mutex */
-		os_thread_id_t	m_thread_id;
+		ulint		m_thread_id;
 	};
 
 	/** Constructor. */
@@ -157,7 +158,7 @@ public:
 	/** Mutex is being destroyed. */
 	void destroy() UNIV_NOTHROW
 	{
-		ut_ad(m_context.m_thread_id == os_thread_id_t(ULINT_UNDEFINED));
+		ut_ad((ulint)my_atomic_loadlint(&m_context.m_thread_id) == ULINT_UNDEFINED);
 
 		m_magic_n = 0;
 
@@ -167,8 +168,7 @@ public:
 	/** Called when the mutex is "created". Note: Not from the constructor
 	but when the mutex is initialised.
 	@param[in]	id              Mutex ID */
-	void init(latch_id_t id)
-		UNIV_NOTHROW;
+	void init(latch_id_t id) UNIV_NOTHROW;
 
 	/** Called when an attempt is made to lock the mutex
 	@param[in]	mutex		Mutex instance to be locked
@@ -199,7 +199,7 @@ public:
 	bool is_owned() const UNIV_NOTHROW
 	{
 		return(os_thread_eq(
-				m_context.m_thread_id,
+				(os_thread_id_t)my_atomic_loadlint(&m_context.m_thread_id),
 				os_thread_get_curr_id()));
 	}
 
@@ -221,7 +221,7 @@ public:
 	os_thread_id_t get_thread_id() const
 		UNIV_NOTHROW
 	{
-		return(m_context.m_thread_id);
+		return((os_thread_id_t)my_atomic_loadlint(&m_context.m_thread_id));
 	}
 
 	/** Magic number to check for memory corruption. */
@@ -241,7 +241,7 @@ struct NoPolicy {
 	void init(const Mutex&, latch_id_t, const char*, uint32_t)
 		UNIV_NOTHROW { }
 	void destroy() UNIV_NOTHROW { }
-	void enter(const Mutex&, const char*, unsigned line) UNIV_NOTHROW { }
+	void enter(const Mutex&, const char*, unsigned) UNIV_NOTHROW { }
 	void add(uint32_t, uint32_t) UNIV_NOTHROW { }
 	void locked(const Mutex&, const char*, ulint) UNIV_NOTHROW { }
 	void release(const Mutex&) UNIV_NOTHROW { }
@@ -275,12 +275,11 @@ public:
 
 	/** Called when the mutex is "created". Note: Not from the constructor
 	but when the mutex is initialised.
-	@param[in]	mutex		Mutex instance to track
 	@param[in]	id              Mutex ID
 	@param[in]	filename	File where mutex was created
 	@param[in]	line		Line in filename */
 	void init(
-		const MutexType&	mutex,
+		const Mutex&,
 		latch_id_t		id,
 		const char*		filename,
 		uint32_t		line)
@@ -423,15 +422,8 @@ public:
 
 	/** Called when the mutex is "created". Note: Not from the constructor
 	but when the mutex is initialised.
-	@param[in]	mutex		Mutex instance to track
-	@param[in]	id              Mutex ID
-	@param[in]	filename	File where mutex was created
-	@param[in]	line		Line in filename */
-	void init(
-		const MutexType&	mutex,
-		latch_id_t		id,
-		const char*		filename,
-		uint32_t		line)
+	@param[in]	id              Mutex ID */
+	void init(const Mutex&, latch_id_t id, const char*, uint32)
 		UNIV_NOTHROW
 	{
 		/* It can be LATCH_ID_BUF_BLOCK_MUTEX or
diff --git a/storage/innobase/include/sync0policy.ic b/storage/innobase/include/sync0policy.ic
index f3526bbfef5..a28e3c382b4 100644
--- a/storage/innobase/include/sync0policy.ic
+++ b/storage/innobase/include/sync0policy.ic
@@ -80,7 +80,7 @@ void MutexDebug<Mutex>::locked(
 	UNIV_NOTHROW
 {
 	ut_ad(!is_owned());
-	ut_ad(m_context.m_thread_id == os_thread_id_t(ULINT_UNDEFINED));
+	ut_ad(m_context.m_thread_id == ULINT_UNDEFINED);
 
 	m_context.locked(mutex, name, line);
 
@@ -88,7 +88,7 @@ void MutexDebug<Mutex>::locked(
 }
 
 template <typename Mutex>
-void MutexDebug<Mutex>::release(const Mutex* mutex)
+void MutexDebug<Mutex>::release(const Mutex*)
 	UNIV_NOTHROW
 {
 	ut_ad(is_owned());
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
index 888a32007ce..b61553fc380 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innobase/include/sync0rw.h
@@ -501,13 +501,13 @@ bool
 rw_lock_lock_word_decr(
 /*===================*/
 	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	ulint		amount,		/*!< in: amount to decrement */
-	lint		threshold);	/*!< in: threshold of judgement */
+	int32_t		amount,		/*!< in: amount to decrement */
+	int32_t		threshold);	/*!< in: threshold of judgement */
 #ifdef UNIV_DEBUG
 /******************************************************************//**
 Checks if the thread has locked the rw-lock in the specified mode, with
 the pass value == 0. */
-ibool
+bool
 rw_lock_own(
 /*========*/
 	rw_lock_t*	lock,		/*!< in: rw-lock */
@@ -571,10 +571,10 @@ struct rw_lock_t
 #endif /* UNIV_DEBUG */
 {
 	/** Holds the state of the lock. */
-	volatile lint	lock_word;
+	int32_t	lock_word;
 
 	/** 1: there are waiters */
-	volatile uint32_t	waiters;
+	int32_t	waiters;
 
 	/** number of granted SX locks. */
 	volatile ulint	sx_recursive;
@@ -603,9 +603,6 @@ struct rw_lock_t
 	/** File name where lock created */
 	const char*	cfile_name;
 
-	/** last s-lock file/line is not guaranteed to be correct */
-	const char*	last_s_file_name;
-
 	/** File name where last x-locked */
 	const char*	last_x_file_name;
 
@@ -615,9 +612,6 @@ struct rw_lock_t
 	/** If 1 then the rw-lock is a block lock */
 	unsigned	is_block_lock:1;
 
-	/** Line number where last time s-locked */
-	unsigned	last_s_line:14;
-
 	/** Line number where last time x-locked */
 	unsigned	last_x_line:14;
 
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
index 404c7cb9b86..f0c33ecbeda 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
@@ -2,7 +2,7 @@
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -77,7 +77,8 @@ rw_lock_get_writer(
 /*===============*/
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
-	lint lock_word = lock->lock_word;
+	int32_t lock_word = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+						      MY_MEMORY_ORDER_RELAXED);
 
 	ut_ad(lock_word <= X_LOCK_DECR);
 	if (lock_word > X_LOCK_HALF_DECR) {
@@ -109,15 +110,16 @@ rw_lock_get_reader_count(
 /*=====================*/
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
-	lint lock_word = lock->lock_word;
+	int32_t lock_word = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+						      MY_MEMORY_ORDER_RELAXED);
 	ut_ad(lock_word <= X_LOCK_DECR);
 
 	if (lock_word > X_LOCK_HALF_DECR) {
 		/* s-locked, no x-waiter */
-		return(X_LOCK_DECR - lock_word);
+		return ulint(X_LOCK_DECR - lock_word);
 	} else if (lock_word > 0) {
 		/* s-locked, with sx-locks only */
-		return(X_LOCK_HALF_DECR - lock_word);
+		return ulint(X_LOCK_HALF_DECR - lock_word);
 	} else if (lock_word == 0) {
 		/* x-locked */
 		return(0);
@@ -145,7 +147,8 @@ rw_lock_get_x_lock_count(
 /*=====================*/
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
-	lint lock_copy = lock->lock_word;
+	int32_t lock_copy = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+						      MY_MEMORY_ORDER_RELAXED);
 	ut_ad(lock_copy <= X_LOCK_DECR);
 
 	if (lock_copy == 0 || lock_copy == -X_LOCK_HALF_DECR) {
@@ -158,12 +161,12 @@ rw_lock_get_x_lock_count(
 		/* no s-lock, no sx-lock, 2 or more x-locks.
 		First 2 x-locks are set with -X_LOCK_DECR,
 		all other recursive x-locks are set with -1 */
-		return(2 - (lock_copy + X_LOCK_DECR));
+		return ulint(2 - X_LOCK_DECR - lock_copy);
 	} else {
 		/* no s-lock, 1 or more sx-lock, 2 or more x-locks.
 		First 2 x-locks are set with -(X_LOCK_DECR + X_LOCK_HALF_DECR),
 		all other recursive x-locks are set with -1 */
-		return(2 - (lock_copy + X_LOCK_DECR + X_LOCK_HALF_DECR));
+		return ulint(2 - X_LOCK_DECR - X_LOCK_HALF_DECR - lock_copy);
 	}
 }
 
@@ -178,7 +181,8 @@ rw_lock_get_sx_lock_count(
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 #ifdef UNIV_DEBUG
-	lint lock_copy = lock->lock_word;
+	int32_t lock_copy = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+						      MY_MEMORY_ORDER_RELAXED);
 
 	ut_ad(lock_copy <= X_LOCK_DECR);
 
@@ -197,9 +201,7 @@ rw_lock_get_sx_lock_count(
 }
 
 /******************************************************************//**
-Two different implementations for decrementing the lock_word of a rw_lock:
-one for systems supporting atomic operations, one for others. This does
-does not support recusive x-locks: they should be handled by the caller and
+Recursive x-locks are not supported: they should be handled by the caller and
 need not be atomic since they are performed by the current lock holder.
 Returns true if the decrement was made, false if not.
 @return true if decr occurs */
@@ -208,16 +210,17 @@ bool
 rw_lock_lock_word_decr(
 /*===================*/
 	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	ulint		amount,		/*!< in: amount to decrement */
-	lint		threshold)	/*!< in: threshold of judgement */
+	int32_t		amount,		/*!< in: amount to decrement */
+	int32_t		threshold)	/*!< in: threshold of judgement */
 {
-	lint local_lock_word;
-
-	local_lock_word = lock->lock_word;
-	while (local_lock_word > threshold) {
-		if (my_atomic_caslint(&lock->lock_word,
-				      &local_lock_word,
-				      local_lock_word - amount)) {
+	int32_t lock_copy = my_atomic_load32_explicit(&lock->lock_word,
+						      MY_MEMORY_ORDER_RELAXED);
+	while (lock_copy > threshold) {
+		if (my_atomic_cas32_strong_explicit(&lock->lock_word,
+						    &lock_copy,
+						    lock_copy - amount,
+						    MY_MEMORY_ORDER_ACQUIRE,
+						    MY_MEMORY_ORDER_RELAXED)) {
 			return(true);
 		}
 	}
@@ -246,11 +249,6 @@ rw_lock_s_lock_low(
 
 	ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_S, file_name, line));
 
-	/* These debugging values are not set safely: they may be incorrect
-	or even refer to a line that is invalid for the file name. */
-	lock->last_s_file_name = file_name;
-	lock->last_s_line = line;
-
 	return(TRUE);	/* locking succeeded */
 }
 
@@ -304,29 +302,32 @@ rw_lock_x_lock_func_nowait(
 	const char*	file_name,/*!< in: file name where lock requested */
 	unsigned	line)	/*!< in: line where requested */
 {
-	lint oldval = X_LOCK_DECR;
+	int32_t oldval = X_LOCK_DECR;
 
-	if (my_atomic_caslint(&lock->lock_word, &oldval, 0)) {
+	if (my_atomic_cas32_strong_explicit(&lock->lock_word, &oldval, 0,
+					    MY_MEMORY_ORDER_ACQUIRE,
+					    MY_MEMORY_ORDER_RELAXED)) {
 		lock->writer_thread = os_thread_get_curr_id();
 
 	} else if (os_thread_eq(lock->writer_thread, os_thread_get_curr_id())) {
-		/* Relock: this lock_word modification is safe since no other
-		threads can modify (lock, unlock, or reserve) lock_word while
-		there is an exclusive writer and this is the writer thread. */
-		if (lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR) {
+		/* Relock: even though no other thread can modify (lock, unlock
+		or reserve) lock_word while there is an exclusive writer and
+		this is the writer thread, we still want concurrent threads to
+		observe consistent values. */
+		if (oldval == 0 || oldval == -X_LOCK_HALF_DECR) {
 			/* There are 1 x-locks */
-			lock->lock_word -= X_LOCK_DECR;
-		} else if (lock->lock_word <= -X_LOCK_DECR) {
+			my_atomic_add32_explicit(&lock->lock_word, -X_LOCK_DECR,
+						 MY_MEMORY_ORDER_RELAXED);
+		} else if (oldval <= -X_LOCK_DECR) {
 			/* There are 2 or more x-locks */
-			lock->lock_word--;
+			my_atomic_add32_explicit(&lock->lock_word, -1,
+						 MY_MEMORY_ORDER_RELAXED);
+			/* Watch for too many recursive locks */
+			ut_ad(oldval < 1);
 		} else {
 			/* Failure */
 			return(FALSE);
 		}
-
-		/* Watch for too many recursive locks */
-		ut_ad(lock->lock_word < 0);
-
 	} else {
 		/* Failure */
 		return(FALSE);
@@ -354,14 +355,19 @@ rw_lock_s_unlock_func(
 #endif /* UNIV_DEBUG */
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
-	ut_ad(lock->lock_word > -X_LOCK_DECR);
-	ut_ad(lock->lock_word != 0);
-	ut_ad(lock->lock_word < X_LOCK_DECR);
+#ifdef UNIV_DEBUG
+	int32_t	dbg_lock_word = my_atomic_load32_explicit(&lock->lock_word,
+							  MY_MEMORY_ORDER_RELAXED);
+	ut_ad(dbg_lock_word > -X_LOCK_DECR);
+	ut_ad(dbg_lock_word != 0);
+	ut_ad(dbg_lock_word < X_LOCK_DECR);
+#endif
 
 	ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_S));
 
 	/* Increment lock_word to indicate 1 less reader */
-	lint	lock_word = my_atomic_addlint(&lock->lock_word, 1) + 1;
+	int32_t	lock_word = my_atomic_add32_explicit(&lock->lock_word, 1,
+						     MY_MEMORY_ORDER_RELEASE) + 1;
 	if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) {
 
 		/* wait_ex waiter exists. It may not be asleep, but we signal
@@ -387,41 +393,49 @@ rw_lock_x_unlock_func(
 #endif /* UNIV_DEBUG */
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
-	ut_ad(lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR
-	      || lock->lock_word <= -X_LOCK_DECR);
+	int32_t	lock_word = my_atomic_load32_explicit(&lock->lock_word,
+						      MY_MEMORY_ORDER_RELAXED);
 
-	if (lock->lock_word == 0) {
+	ut_ad(lock_word == 0 || lock_word == -X_LOCK_HALF_DECR
+	      || lock_word <= -X_LOCK_DECR);
+
+	if (lock_word == 0) {
 		/* Last caller in a possible recursive chain. */
 		lock->writer_thread = 0;
 	}
 
 	ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X));
 
-	if (lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR) {
-		/* There is 1 x-lock */
-		/* atomic increment is needed, because it is last */
-		if (my_atomic_addlint(&lock->lock_word, X_LOCK_DECR) <= -X_LOCK_DECR) {
-			ut_error;
-		}
+	if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) {
+		/* Last X-lock owned by this thread, it may still hold SX-locks.
+		ACQ_REL due to...
+		RELEASE: we release rw-lock
+		ACQUIRE: we want waiters to be loaded after lock_word is stored */
+		my_atomic_add32_explicit(&lock->lock_word, X_LOCK_DECR,
+					 MY_MEMORY_ORDER_ACQ_REL);
 
 		/* This no longer has an X-lock but it may still have
 		an SX-lock. So it is now free for S-locks by other threads.
 		We need to signal read/write waiters.
 		We do not need to signal wait_ex waiters, since they cannot
 		exist when there is a writer. */
-		if (lock->waiters) {
-			my_atomic_store32((int32*) &lock->waiters, 0);
+		if (my_atomic_load32_explicit(&lock->waiters,
+					      MY_MEMORY_ORDER_RELAXED)) {
+			my_atomic_store32_explicit(&lock->waiters, 0,
+						   MY_MEMORY_ORDER_RELAXED);
 			os_event_set(lock->event);
 			sync_array_object_signalled();
 		}
-	} else if (lock->lock_word == -X_LOCK_DECR
-		   || lock->lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
+	} else if (lock_word == -X_LOCK_DECR
+		   || lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
 		/* There are 2 x-locks */
-		lock->lock_word += X_LOCK_DECR;
+		my_atomic_add32_explicit(&lock->lock_word, X_LOCK_DECR,
+					 MY_MEMORY_ORDER_RELAXED);
 	} else {
 		/* There are more than 2 x-locks. */
-		ut_ad(lock->lock_word < -X_LOCK_DECR);
-		lock->lock_word += 1;
+		ut_ad(lock_word < -X_LOCK_DECR);
+		my_atomic_add32_explicit(&lock->lock_word, 1,
+					 MY_MEMORY_ORDER_RELAXED);
 	}
 
 	ut_ad(rw_lock_validate(lock));
@@ -447,28 +461,37 @@ rw_lock_sx_unlock_func(
 	ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_SX));
 
 	if (lock->sx_recursive == 0) {
+		int32_t	lock_word = my_atomic_load32_explicit(&lock->lock_word,
+							      MY_MEMORY_ORDER_RELAXED);
 		/* Last caller in a possible recursive chain. */
-		if (lock->lock_word > 0) {
+		if (lock_word > 0) {
 			lock->writer_thread = 0;
+			ut_ad(lock_word <= INT_MAX32 - X_LOCK_HALF_DECR);
+
+			/* Last SX-lock owned by this thread, doesn't own X-lock.
+			ACQ_REL due to...
+			RELEASE: we release rw-lock
+			ACQUIRE: we want waiters to be loaded after lock_word is stored */
+			my_atomic_add32_explicit(&lock->lock_word, X_LOCK_HALF_DECR,
+						 MY_MEMORY_ORDER_ACQ_REL);
 
-			if (my_atomic_addlint(&lock->lock_word, X_LOCK_HALF_DECR) <= 0) {
-				ut_error;
-			}
 			/* Lock is now free. May have to signal read/write
 			waiters. We do not need to signal wait_ex waiters,
 			since they cannot exist when there is an sx-lock
 			holder. */
-			if (lock->waiters) {
-				my_atomic_store32((int32*) &lock->waiters, 0);
+			if (my_atomic_load32_explicit(&lock->waiters,
+						      MY_MEMORY_ORDER_RELAXED)) {
+				my_atomic_store32_explicit(&lock->waiters, 0,
+							   MY_MEMORY_ORDER_RELAXED);
 				os_event_set(lock->event);
 				sync_array_object_signalled();
 			}
 		} else {
 			/* still has x-lock */
-			ut_ad(lock->lock_word == -X_LOCK_HALF_DECR
-			      || lock->lock_word <= -(X_LOCK_DECR
-						      + X_LOCK_HALF_DECR));
-			lock->lock_word += X_LOCK_HALF_DECR;
+			ut_ad(lock_word == -X_LOCK_HALF_DECR ||
+			      lock_word <= -(X_LOCK_DECR + X_LOCK_HALF_DECR));
+			my_atomic_add32_explicit(&lock->lock_word, X_LOCK_HALF_DECR,
+						 MY_MEMORY_ORDER_RELAXED);
 		}
 	}
 
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index 3c40591e873..ffa682b46db 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -57,7 +57,6 @@ extern mysql_pfs_key_t	buf_pool_zip_mutex_key;
 extern mysql_pfs_key_t	cache_last_read_mutex_key;
 extern mysql_pfs_key_t	dict_foreign_err_mutex_key;
 extern mysql_pfs_key_t	dict_sys_mutex_key;
-extern mysql_pfs_key_t	file_format_max_mutex_key;
 extern mysql_pfs_key_t	fil_system_mutex_key;
 extern mysql_pfs_key_t	flush_list_mutex_key;
 extern mysql_pfs_key_t	fts_bg_threads_mutex_key;
@@ -95,7 +94,6 @@ extern mysql_pfs_key_t	srv_innodb_monitor_mutex_key;
 extern mysql_pfs_key_t	srv_misc_tmpfile_mutex_key;
 extern mysql_pfs_key_t	srv_monitor_file_mutex_key;
 extern mysql_pfs_key_t	buf_dblwr_mutex_key;
-extern mysql_pfs_key_t	trx_undo_mutex_key;
 extern mysql_pfs_key_t	trx_mutex_key;
 extern mysql_pfs_key_t	trx_pool_mutex_key;
 extern mysql_pfs_key_t	trx_pool_manager_mutex_key;
@@ -110,6 +108,7 @@ extern mysql_pfs_key_t	sync_array_mutex_key;
 extern mysql_pfs_key_t	thread_mutex_key;
 extern mysql_pfs_key_t  zip_pad_mutex_key;
 extern mysql_pfs_key_t  row_drop_list_mutex_key;
+extern mysql_pfs_key_t	rw_trx_hash_element_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
 #ifdef UNIV_PFS_RWLOCK
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
index 19f992f5f50..773c610d30e 100644
--- a/storage/innobase/include/sync0types.h
+++ b/storage/innobase/include/sync0types.h
@@ -108,16 +108,6 @@ V
 Transaction system header
 |
 V
-Transaction undo mutex			The undo log entry must be written
-|					before any index page is modified.
-|					Transaction undo mutex is for the undo
-|					logs the analogue of the tree latch
-|					for a B-tree. If a thread has the
-|					trx undo mutex reserved, it is allowed
-|					to latch the undo log pages in any
-|					order, and also after it has acquired
-|					the fsp latch.
-V
 Rollback segment mutex			The rollback segment mutex must be
 |					reserved, if, e.g., a new page must
 |					be added to an undo log. The rollback
@@ -160,7 +150,7 @@ V
 lock_sys_mutex				Mutex protecting lock_sys_t
 |
 V
-trx_sys->mutex				Mutex protecting trx_sys_t
+trx_sys.mutex				Mutex protecting trx_sys_t
 |
 V
 Threads mutex				Background thread scheduling mutex
@@ -233,6 +223,7 @@ enum latch_level_t {
 	SYNC_REC_LOCK,
 	SYNC_THREADS,
 	SYNC_TRX,
+	SYNC_RW_TRX_HASH_ELEMENT,
 	SYNC_TRX_SYS,
 	SYNC_LOCK_SYS,
 	SYNC_LOCK_WAIT_SYS,
@@ -255,7 +246,6 @@ enum latch_level_t {
 	SYNC_RSEG_HEADER_NEW,
 	SYNC_NOREDO_RSEG,
 	SYNC_REDO_RSEG,
-	SYNC_TRX_UNDO,
 	SYNC_PURGE_LATCH,
 	SYNC_TREE_NODE,
 	SYNC_TREE_NODE_FROM_HASH,
@@ -270,8 +260,6 @@ enum latch_level_t {
 	SYNC_DICT,
 	SYNC_FTS_CACHE,
 
-	SYNC_FILE_FORMAT_TAG,
-
 	SYNC_DICT_OPERATION,
 
 	SYNC_TRX_I_S_LAST_READ,
@@ -339,7 +327,6 @@ enum latch_id_t {
 	LATCH_ID_SRV_MISC_TMPFILE,
 	LATCH_ID_SRV_MONITOR_FILE,
 	LATCH_ID_BUF_DBLWR,
-	LATCH_ID_TRX_UNDO,
 	LATCH_ID_TRX_POOL,
 	LATCH_ID_TRX_POOL_MANAGER,
 	LATCH_ID_TRX,
@@ -380,12 +367,11 @@ enum latch_id_t {
 	LATCH_ID_SCRUB_STAT_MUTEX,
 	LATCH_ID_DEFRAGMENT_MUTEX,
 	LATCH_ID_BTR_DEFRAGMENT_MUTEX,
-	LATCH_ID_MTFLUSH_THREAD_MUTEX,
-	LATCH_ID_MTFLUSH_MUTEX,
 	LATCH_ID_FIL_CRYPT_MUTEX,
 	LATCH_ID_FIL_CRYPT_STAT_MUTEX,
 	LATCH_ID_FIL_CRYPT_DATA_MUTEX,
 	LATCH_ID_FIL_CRYPT_THREADS_MUTEX,
+	LATCH_ID_RW_TRX_HASH_ELEMENT,
 	LATCH_ID_TEST_MUTEX,
 	LATCH_ID_MAX = LATCH_ID_TEST_MUTEX
 };
@@ -493,10 +479,10 @@ struct OSMutex {
 	}
 
 private:
-#ifdef UNIV_DEBUG
+#ifdef DBUG_ASSERT_EXISTS
 	/** true if the mutex has been freed/destroyed. */
 	bool			m_freed;
-#endif /* UNIV_DEBUG */
+#endif /* DBUG_ASSERT_EXISTS */
 
 	sys_mutex_t		m_mutex;
 };
@@ -993,8 +979,7 @@ struct latch_t {
 		UNIV_NOTHROW
 		:
 		m_id(id),
-		m_rw_lock(),
-		m_temp_fsp() { }
+		m_rw_lock() {}
 
 	/** Destructor */
 	virtual ~latch_t() UNIV_NOTHROW { }
@@ -1028,24 +1013,6 @@ struct latch_t {
 		return(sync_latch_get_level(m_id));
 	}
 
-	/** @return true if the latch is for a temporary file space*/
-	bool is_temp_fsp() const
-		UNIV_NOTHROW
-	{
-		return(m_temp_fsp);
-	}
-
-	/** Set the temporary tablespace flag. (For internal temporary
-	tables, MySQL 5.7 does not always acquire the index->lock. We
-	need to figure out the context and add some special rules
-	during the checks.) */
-	void set_temp_fsp()
-		UNIV_NOTHROW
-	{
-		ut_ad(get_id() == LATCH_ID_FIL_SPACE);
-		m_temp_fsp = true;
-	}
-
 	/** @return the latch name, m_id must be set  */
 	const char* get_name() const
 		UNIV_NOTHROW
@@ -1061,9 +1028,6 @@ struct latch_t {
 	/** true if it is a rw-lock. In debug mode, rw_lock_t derives from
 	this class and sets this variable. */
 	bool		m_rw_lock;
-
-	/** true if it is an temporary space latch */
-	bool		m_temp_fsp;
 };
 
 /** Subclass this to iterate over a thread's acquired latch levels. */
@@ -1155,92 +1119,88 @@ enum rw_lock_flag_t {
 
 #endif /* UNIV_INNOCHECKSUM */
 
-#ifdef _WIN64
 static inline ulint my_atomic_addlint(ulint *A, ulint B)
 {
+#ifdef _WIN64
   return ulint(my_atomic_add64((volatile int64*)A, B));
+#else
+  return ulint(my_atomic_addlong(A, B));
+#endif
 }
 
 static inline ulint my_atomic_loadlint(const ulint *A)
 {
+#ifdef _WIN64
   return ulint(my_atomic_load64((volatile int64*)A));
+#else
+  return ulint(my_atomic_loadlong(A));
+#endif
 }
 
 static inline lint my_atomic_addlint(volatile lint *A, lint B)
 {
+#ifdef _WIN64
   return my_atomic_add64((volatile int64*)A, B);
+#else
+  return my_atomic_addlong(A, B);
+#endif
 }
 
 static inline lint my_atomic_loadlint(const lint *A)
 {
+#ifdef _WIN64
   return lint(my_atomic_load64((volatile int64*)A));
+#else
+  return my_atomic_loadlong(A);
+#endif
 }
 
 static inline void my_atomic_storelint(ulint *A, ulint B)
 {
+#ifdef _WIN64
   my_atomic_store64((volatile int64*)A, B);
+#else
+  my_atomic_storelong(A, B);
+#endif
 }
 
-static inline lint my_atomic_caslint(volatile lint *A, lint *B, lint C)
+/** Simple non-atomic counter aligned to CACHE_LINE_SIZE
+@tparam	Type	the integer type of the counter */
+template <typename Type>
+struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) simple_counter
 {
-  return my_atomic_cas64((volatile int64*)A, (int64 *)B, C);
-}
+	/** Increment the counter */
+	Type inc() { return add(1); }
+	/** Decrement the counter */
+	Type dec() { return add(Type(~0)); }
 
-static inline ulint my_atomic_caslint(ulint *A, ulint *B, ulint C)
-{
-  return my_atomic_cas64((volatile int64*)A, (int64 *)B, (int64)C);
-}
+	/** Add to the counter
+	@param[in]	i	amount to be added
+	@return	the value of the counter after adding */
+	Type add(Type i) { return m_counter += i; }
 
-#else
-#define my_atomic_addlint my_atomic_addlong
-#define my_atomic_loadlint my_atomic_loadlong
-#define my_atomic_caslint my_atomic_caslong
-#endif
+	/** @return the value of the counter */
+	operator Type() const { return m_counter; }
 
-/** Simple counter aligned to CACHE_LINE_SIZE
-@tparam	Type	the integer type of the counter
-@tparam	atomic	whether to use atomic memory access */
-template <typename Type = ulint, bool atomic = false>
-struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) simple_counter
+private:
+	/** The counter */
+	Type	m_counter;
+};
+
+/** Simple atomic counter aligned to CACHE_LINE_SIZE
+@tparam	Type	lint or ulint */
+template <typename Type = ulint>
+struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) simple_atomic_counter
 {
 	/** Increment the counter */
 	Type inc() { return add(1); }
 	/** Decrement the counter */
-	Type dec() { return sub(1); }
+	Type dec() { return add(Type(~0)); }
 
 	/** Add to the counter
 	@param[in]	i	amount to be added
-	@return	the value of the counter after adding */
-	Type add(Type i)
-	{
-		compile_time_assert(!atomic || sizeof(Type) == sizeof(lint));
-		if (atomic) {
-#ifdef _MSC_VER
-// Suppress type conversion/ possible loss of data warning
-#pragma warning (push)
-#pragma warning (disable : 4244)
-#endif
-			return Type(my_atomic_addlint(reinterpret_cast<ulint*>
-						      (&m_counter), i));
-#ifdef _MSC_VER
-#pragma warning (pop)
-#endif
-		} else {
-			return m_counter += i;
-		}
-	}
-	/** Subtract from the counter
-	@param[in]	i	amount to be subtracted
-	@return	the value of the counter after adding */
-	Type sub(Type i)
-	{
-		compile_time_assert(!atomic || sizeof(Type) == sizeof(lint));
-		if (atomic) {
-			return Type(my_atomic_addlint(&m_counter, -lint(i)));
-		} else {
-			return m_counter -= i;
-		}
-	}
+	@return	the value of the counter before adding */
+	Type add(Type i) { return my_atomic_addlint(&m_counter, i); }
 
 	/** @return the value of the counter (non-atomic access)! */
 	operator Type() const { return m_counter; }
diff --git a/storage/innobase/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h
index e02c5d88a29..ee7da7b74dc 100644
--- a/storage/innobase/include/trx0i_s.h
+++ b/storage/innobase/include/trx0i_s.h
@@ -264,10 +264,10 @@ trx_i_s_possibly_fetch_data_into_cache(
 	trx_i_s_cache_t*	cache);	/*!< in/out: cache */
 
 /*******************************************************************//**
-Returns TRUE if the data in the cache is truncated due to the memory
+Returns true, if the data in the cache is truncated due to the memory
 limit posed by TRX_I_S_MEM_LIMIT.
 @return TRUE if truncated */
-ibool
+bool
 trx_i_s_cache_is_truncated(
 /*=======================*/
 	trx_i_s_cache_t*	cache);	/*!< in: cache */
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index 8d31a50f028..27807321212 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -27,14 +27,8 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef trx0purge_h
 #define trx0purge_h
 
-#include "univ.i"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
+#include "trx0rseg.h"
 #include "que0types.h"
-#include "page0page.h"
-#include "fil0fil.h"
-#include "read0types.h"
 
 /** A dummy undo record used as a return value when we have a whole undo log
 which needs no purge */
@@ -50,16 +44,13 @@ trx_purge_get_log_from_hist(
 /*========================*/
 	fil_addr_t	node_addr);	/*!< in: file address of the history
 					list node of the log */
-/************************************************************************
-Adds the update undo log as the first log in the history list. Removes the
-update undo log segment from the rseg slot if it is too big for reuse. */
+/** Prepend the history list with an undo log.
+Remove the undo log segment from the rseg slot if it is too big for reuse.
+@param[in]	trx		transaction
+@param[in,out]	undo		undo log
+@param[in,out]	mtr		mini-transaction */
 void
-trx_purge_add_update_undo_to_history(
-/*=================================*/
-	trx_t*		trx,		/*!< in: transaction */
-	page_t*		undo_page,	/*!< in: update undo log header page,
-					x-latched */
-	mtr_t*		mtr);		/*!< in: mtr */
+trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr);
 /*******************************************************************//**
 This function runs a purge batch.
 @return number of undo log pages handled in the batch */
@@ -68,35 +59,7 @@ trx_purge(
 /*======*/
 	ulint	n_purge_threads,	/*!< in: number of purge tasks to
 					submit to task queue. */
-	ulint	limit,			/*!< in: the maximum number of
-					records to purge in one batch */
 	bool	truncate);		/*!< in: truncate history if true */
-/*******************************************************************//**
-Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
-void
-trx_purge_stop(void);
-/*================*/
-/*******************************************************************//**
-Resume purge, move to PURGE_STATE_RUN. */
-void
-trx_purge_run(void);
-/*================*/
-
-/** Purge states */
-enum purge_state_t {
-	PURGE_STATE_INIT,		/*!< Purge instance created */
-	PURGE_STATE_RUN,		/*!< Purge should be running */
-	PURGE_STATE_STOP,		/*!< Purge should be stopped */
-	PURGE_STATE_EXIT,		/*!< Purge has been shutdown */
-	PURGE_STATE_DISABLED		/*!< Purge was never started */
-};
-
-/*******************************************************************//**
-Get the purge state.
-@return purge state. */
-purge_state_t
-trx_purge_state(void);
-/*=================*/
 
 /** Rollback segements from a given transaction with trx-no
 scheduled for purge. */
@@ -106,69 +69,28 @@ private:
 		trx_rsegs_t;
 public:
 	typedef trx_rsegs_t::iterator iterator;
+	typedef trx_rsegs_t::const_iterator const_iterator;
 
 	/** Default constructor */
-	TrxUndoRsegs() : m_trx_no() { }
-
-	explicit TrxUndoRsegs(trx_id_t trx_no)
-		:
-		m_trx_no(trx_no)
-	{
-		// Do nothing
-	}
-
-	/** Get transaction number
-	@return trx_id_t - get transaction number. */
-	trx_id_t get_trx_no() const
-	{
-		return(m_trx_no);
-	}
-
-	/** Add rollback segment.
-	@param rseg rollback segment to add. */
-	void push_back(trx_rseg_t* rseg)
-	{
-		m_rsegs.push_back(rseg);
-	}
-
-	/** Erase the element pointed by given iterator.
-	@param[in]	iterator	iterator */
-	void erase(iterator& it)
-	{
-		m_rsegs.erase(it);
-	}
-
-	/** Number of registered rsegs.
-	@return size of rseg list. */
-	ulint size() const
-	{
-		return(m_rsegs.size());
-	}
-
-	/**
-	@return an iterator to the first element */
-	iterator begin()
-	{
-		return(m_rsegs.begin());
-	}
-
-	/**
-	@return an iterator to the end */
-	iterator end()
-	{
-		return(m_rsegs.end());
-	}
+	TrxUndoRsegs() {}
+	/** Constructor */
+	TrxUndoRsegs(trx_rseg_t& rseg)
+		: m_commit(rseg.last_commit), m_rsegs(1, &rseg) {}
+	/** Constructor */
+	TrxUndoRsegs(trx_id_t trx_no, trx_rseg_t& rseg)
+		: m_commit(trx_no << 1), m_rsegs(1, &rseg) {}
 
-	/** Append rollback segments from referred instance to current
-	instance. */
-	void append(const TrxUndoRsegs& append_from)
-	{
-		ut_ad(get_trx_no() == append_from.get_trx_no());
+	/** @return the transaction commit identifier */
+	trx_id_t trx_no() const { return m_commit >> 1; }
 
-		m_rsegs.insert(m_rsegs.end(),
-			       append_from.m_rsegs.begin(),
-			       append_from.m_rsegs.end());
-	}
+	bool operator!=(const TrxUndoRsegs& other) const
+	{ return m_commit != other.m_commit; }
+	bool empty() const { return m_rsegs.empty(); }
+	void erase(iterator& it) { m_rsegs.erase(it); }
+	iterator begin() { return(m_rsegs.begin()); }
+	iterator end() { return(m_rsegs.end()); }
+	const_iterator begin() const { return m_rsegs.begin(); }
+	const_iterator end() const { return m_rsegs.end(); }
 
 	/** Compare two TrxUndoRsegs based on trx_no.
 	@param elem1 first element to compare
@@ -176,17 +98,12 @@ public:
 	@return true if elem1 > elem2 else false.*/
 	bool operator()(const TrxUndoRsegs& lhs, const TrxUndoRsegs& rhs)
 	{
-		return(lhs.m_trx_no > rhs.m_trx_no);
+		return(lhs.m_commit > rhs.m_commit);
 	}
 
-	/** Compiler defined copy-constructor/assignment operator
-	should be fine given that there is no reference to a memory
-	object outside scope of class object.*/
-
 private:
-	/** The rollback segments transaction number. */
-	trx_id_t		m_trx_no;
-
+	/** Copy trx_rseg_t::last_commit */
+	trx_id_t		m_commit;
 	/** Rollback segments of a transaction, scheduled for purge. */
 	trx_rsegs_t		m_rsegs;
 };
@@ -196,16 +113,14 @@ typedef std::priority_queue<
 	std::vector<TrxUndoRsegs, ut_allocator<TrxUndoRsegs> >,
 	TrxUndoRsegs>	purge_pq_t;
 
-/**
-Chooses the rollback segment with the smallest trx_no. */
+/** Chooses the rollback segment with the oldest committed transaction */
 struct TrxUndoRsegsIterator {
-
 	/** Constructor */
 	TrxUndoRsegsIterator();
-
 	/** Sets the next rseg to purge in purge_sys.
+	Executed in the purge coordinator thread.
 	@return whether anything is to be purged */
-	bool set_next();
+	inline bool set_next();
 
 private:
 	// Disable copying
@@ -213,38 +128,11 @@ private:
 	TrxUndoRsegsIterator& operator=(const TrxUndoRsegsIterator&);
 
 	/** The current element to process */
-	TrxUndoRsegs			m_trx_undo_rsegs;
-
-	/** Track the current element in m_trx_undo_rseg */
-	TrxUndoRsegs::iterator		m_iter;
-
-	/** Sentinel value */
-	static const TrxUndoRsegs	NullElement;
+	TrxUndoRsegs			m_rsegs;
+	/** Track the current element in m_rsegs */
+	TrxUndoRsegs::const_iterator	m_iter;
 };
 
-/** This is the purge pointer/iterator. We need both the undo no and the
-transaction no up to which purge has parsed and applied the records. */
-struct purge_iter_t {
-	purge_iter_t()
-		:
-		trx_no(),
-		undo_no(),
-		undo_rseg_space(ULINT_UNDEFINED)
-	{
-		// Do nothing
-	}
-
-	trx_id_t	trx_no;		/*!< Purge has advanced past all
-					transactions whose number is less
-					than this */
-	undo_no_t	undo_no;	/*!< Purge has advanced past all records
-					whose undo number is less than this */
-	ulint		undo_rseg_space;
-					/*!< Last undo record resided in this
-					space id. */
-};
-
-
 /* Namespace to hold all the related functions and variables need for truncate
 of undo tablespace. */
 namespace undo {
@@ -269,17 +157,12 @@ namespace undo {
 	/** Track UNDO tablespace mark for truncate. */
 	class Truncate {
 	public:
-
-		Truncate()
-			:
-			m_undo_for_trunc(ULINT_UNDEFINED),
-			m_rseg_for_trunc(),
-			m_scan_start(1),
-			m_purge_rseg_truncate_frequency(
-				static_cast<ulint>(
-				srv_purge_rseg_truncate_frequency))
+		void create()
 		{
-			/* Do Nothing. */
+			m_undo_for_trunc = ULINT_UNDEFINED;
+			m_scan_start = 1;
+			m_purge_rseg_truncate_frequency =
+				ulint(srv_purge_rseg_truncate_frequency);
 		}
 
 		/** Clear the cached rollback segment. Normally done
@@ -450,53 +333,58 @@ namespace undo {
 class purge_sys_t
 {
 public:
-	/** Construct the purge system. */
-	purge_sys_t();
-	/** Destruct the purge system. */
-	~purge_sys_t();
-
-	rw_lock_t	latch;		/*!< The latch protecting the purge
-					view. A purge operation must acquire an
-					x-latch here for the instant at which
-					it changes the purge view: an undo
-					log operation can prevent this by
-					obtaining an s-latch here. It also
-					protects state and running */
-	os_event_t	event;		/*!< State signal event;
-					os_event_set() and os_event_reset()
-					are protected by purge_sys_t::latch
-					X-lock */
-	ulint		n_stop;		/*!< Counter to track number stops */
-	volatile bool	running;	/*!< true, if purge is active,
-					we check this without the latch too */
-	volatile purge_state_t	state;	/*!< Purge coordinator thread states,
-					we check this in several places
-					without holding the latch. */
+	/** signal state changes; os_event_reset() and os_event_set()
+	are protected by rw_lock_x_lock(latch) */
+	MY_ALIGNED(CACHE_LINE_SIZE)
+	os_event_t	event;
+	/** latch protecting view, m_enabled */
+	MY_ALIGNED(CACHE_LINE_SIZE)
+	rw_lock_t	latch;
+private:
+	/** whether purge is enabled; protected by latch and my_atomic */
+	int32_t		m_enabled;
+	/** number of pending stop() calls without resume() */
+	int32_t		m_paused;
+public:
 	que_t*		query;		/*!< The query graph which will do the
 					parallelized purge operation */
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	ReadView	view;		/*!< The purge will not remove undo logs
 					which are >= this view (purge view) */
-	ulint	n_submitted;	/*!< Count of total tasks submitted
-					to the task queue */
-	ulint	n_completed;	/*!< Count of total tasks completed */
-
-	/*------------------------------*/
-	/* The following two fields form the 'purge pointer' which advances
-	during a purge, and which is used in history list truncation */
-
-	purge_iter_t	iter;		/* Limit up to which we have read and
-					parsed the UNDO log records.  Not
-					necessarily purged from the indexes.
-					Note that this can never be less than
-					the limit below, we check for this
-					invariant in trx0purge.cc */
-	purge_iter_t	limit;		/* The 'purge pointer' which advances
-					during a purge, and which is used in
-					history list truncation */
-#ifdef UNIV_DEBUG
-	purge_iter_t	done;		/* Indicate 'purge pointer' which have
-					purged already accurately. */
-#endif /* UNIV_DEBUG */
+	/** Total number of tasks submitted by srv_purge_coordinator_thread.
+	Not accessed by other threads. */
+	ulint	n_submitted;
+	/** Number of completed tasks. Accessed by srv_purge_coordinator
+	and srv_worker_thread by my_atomic. */
+	ulint	n_completed;
+
+	/** Iterator to the undo log records of committed transactions */
+	struct iterator
+	{
+		bool operator<=(const iterator& other) const
+		{
+			if (commit < other.commit) return true;
+			if (commit > other.commit) return false;
+			return undo_no <= other.undo_no;
+		}
+
+		/** @return the commit number of the transaction */
+		trx_id_t trx_no() const { return commit >> 1; }
+		void reset_trx_no(trx_id_t trx_no) { commit = trx_no << 1; }
+
+		/** 2 * trx_t::no + old_insert of the committed transaction */
+		trx_id_t	commit;
+		/** The record number within the committed transaction's undo
+		log, increasing, purged from from 0 onwards */
+		undo_no_t	undo_no;
+	};
+
+	/** The tail of the purge queue; the last parsed undo log of a
+	committed transaction. */
+	iterator	tail;
+	/** The head of the purge queue; any older undo logs of committed
+	transactions may be discarded (history list truncation). */
+	iterator	head;
 	/*-----------------------------*/
 	bool		next_stored;	/*!< whether rseg holds the next record
 					to purge */
@@ -524,10 +412,70 @@ public:
 
 	undo::Truncate	undo_trunc;	/*!< Track UNDO tablespace marked
 					for truncate. */
+
+
+  /**
+    Constructor.
+
+    Some members may require late initialisation, thus we just mark object as
+    uninitialised. Real initialisation happens in create().
+  */
+
+  purge_sys_t() : event(NULL), m_enabled(false) {}
+
+
+  /** Create the instance */
+  void create();
+
+  /** Close the purge system on shutdown */
+  void close();
+
+  /** @return whether purge is enabled */
+  bool enabled()
+  {
+    return my_atomic_load32_explicit(&m_enabled, MY_MEMORY_ORDER_RELAXED);
+  }
+  /** @return whether purge is enabled */
+  bool enabled_latched()
+  {
+    ut_ad(rw_lock_own_flagged(&latch, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+    return bool(m_enabled);
+  }
+  /** @return whether the purge coordinator is paused */
+  bool paused()
+  { return my_atomic_load32_explicit(&m_paused, MY_MEMORY_ORDER_RELAXED); }
+  /** @return whether the purge coordinator is paused */
+  bool paused_latched()
+  {
+    ut_ad(rw_lock_own_flagged(&latch, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+    return m_paused != 0;
+  }
+
+  /** Enable purge at startup. Not protected by latch; the main thread
+  will wait for purge_sys.enabled() in srv_start() */
+  void coordinator_startup()
+  {
+    ut_ad(!enabled());
+    my_atomic_store32_explicit(&m_enabled, true, MY_MEMORY_ORDER_RELAXED);
+  }
+
+  /** Disable purge at shutdown */
+  void coordinator_shutdown()
+  {
+    ut_ad(enabled());
+    my_atomic_store32_explicit(&m_enabled, false, MY_MEMORY_ORDER_RELAXED);
+  }
+
+  /** @return whether the purge coordinator thread is active */
+  bool running();
+  /** Stop purge during FLUSH TABLES FOR EXPORT */
+  void stop();
+  /** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
+  void resume();
 };
 
 /** The global data structure coordinating a purge */
-extern purge_sys_t*	purge_sys;
+extern purge_sys_t	purge_sys;
 
 /** Info required to purge a record */
 struct trx_purge_rec_t {
diff --git a/storage/innobase/include/trx0purge.ic b/storage/innobase/include/trx0purge.ic
index c32651b7a00..cd519a8e64d 100644
--- a/storage/innobase/include/trx0purge.ic
+++ b/storage/innobase/include/trx0purge.ic
@@ -40,24 +40,3 @@ trx_purge_get_log_from_hist(
 
 	return(node_addr);
 }
-
-/********************************************************************//**
-address of its history list node.
-@return true if purge_sys_t::limit <= purge_sys_t::iter */
-UNIV_INLINE
-bool
-trx_purge_check_limit(void)
-/*=======================*/
-{
-	/* limit is used to track till what point purge element has been
-	processed and so limit <= iter.
-	undo_no ordering is enforced only within the same rollback segment.
-	If a transaction uses multiple rollback segments then we need to
-	consider the rollback segment space id too. */
-	return(purge_sys->iter.trx_no > purge_sys->limit.trx_no
-	       || (purge_sys->iter.trx_no == purge_sys->limit.trx_no
-		   && ((purge_sys->iter.undo_no >= purge_sys->limit.undo_no)
-		       || (purge_sys->iter.undo_rseg_space
-			   != purge_sys->limit.undo_rseg_space))));
-}
-
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
index 2551d5759ae..88c98625462 100644
--- a/storage/innobase/include/trx0rec.h
+++ b/storage/innobase/include/trx0rec.h
@@ -56,22 +56,6 @@ trx_undo_rec_get_type(
 /*==================*/
 	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
 /**********************************************************************//**
-Reads from an undo log record the record compiler info.
-@return compiler info */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
-	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
-/**********************************************************************//**
-Returns TRUE if an undo log record contains an extern storage field.
-@return TRUE if extern */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
-	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
-/**********************************************************************//**
 Reads the undo log record number.
 @return undo no */
 UNIV_INLINE
@@ -114,7 +98,7 @@ trx_undo_rec_get_row_ref(
 				used, as we do NOT copy the data in the
 				record! */
 	dict_index_t*	index,	/*!< in: clustered index */
-	dtuple_t**	ref,	/*!< out, own: row reference */
+	const dtuple_t**ref,	/*!< out, own: row reference */
 	mem_heap_t*	heap);	/*!< in: memory heap from which the memory
 				needed is allocated */
 /**********************************************************************//**
@@ -260,25 +244,22 @@ trx_undo_prev_version_build(
 				into this function by purge thread or not.
 				And if we read "after image" of undo log */
 
-/***********************************************************//**
-Parses a redo log record of adding an undo log record.
-@return end of log record or NULL */
+/** Parse MLOG_UNDO_INSERT.
+@param[in]	ptr	log record
+@param[in]	end_ptr	end of log record buffer
+@param[in,out]	page	page or NULL
+@return	end of log record
+@retval	NULL	if the log record is incomplete */
 byte*
 trx_undo_parse_add_undo_rec(
-/*========================*/
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	page_t*	page);	/*!< in: page or NULL */
-/***********************************************************//**
-Parses a redo log record of erasing of an undo page end.
-@return end of log record or NULL */
-byte*
-trx_undo_parse_erase_page_end(
-/*==========================*/
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	page_t*	page,	/*!< in: page or NULL */
-	mtr_t*	mtr);	/*!< in: mtr or NULL */
+	const byte*	ptr,
+	const byte*	end_ptr,
+	page_t*		page);
+/** Erase the unused undo log page end.
+@param[in,out]	undo_page	undo log page
+@return whether the page contained something */
+bool
+trx_undo_erase_page_end(page_t* undo_page);
 
 /** Read from an undo log record a non-virtual column value.
 @param[in,out]	ptr		pointer to remaining part of the undo record
@@ -330,6 +311,8 @@ compilation info multiplied by 16 is ORed to this value in an undo log
 record */
 
 #define	TRX_UNDO_RENAME_TABLE	9	/*!< RENAME TABLE */
+#define	TRX_UNDO_INSERT_DEFAULT	10	/*!< insert a "default value"
+					pseudo-record for instant ALTER */
 #define	TRX_UNDO_INSERT_REC	11	/* fresh insert into clustered index */
 #define	TRX_UNDO_UPD_EXIST_REC	12	/* update of a non-delete-marked
 					record */
@@ -345,6 +328,9 @@ record */
 					storage fields: used by purge to
 					free the external storage */
 
+/** The search tuple corresponding to TRX_UNDO_INSERT_DEFAULT */
+extern const dtuple_t trx_undo_default_rec;
+
 #include "trx0rec.ic"
 
 #endif /* trx0rec_h */
diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
index d0771a94b05..a9794eb213d 100644
--- a/storage/innobase/include/trx0rec.ic
+++ b/storage/innobase/include/trx0rec.ic
@@ -36,35 +36,6 @@ trx_undo_rec_get_type(
 }
 
 /**********************************************************************//**
-Reads from an undo log record the record compiler info.
-@return compiler info */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
-	const trx_undo_rec_t*	undo_rec)	/*!< in: undo log record */
-{
-	return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
-}
-
-/**********************************************************************//**
-Returns TRUE if an undo log record contains an extern storage field.
-@return TRUE if extern */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
-	const trx_undo_rec_t*	undo_rec)	/*!< in: undo log record */
-{
-	if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/**********************************************************************//**
 Reads the undo log record number.
 @return undo no */
 UNIV_INLINE
@@ -93,8 +64,8 @@ trx_undo_rec_copy(
 	ulint		len;
 
 	len = mach_read_from_2(undo_rec)
-		- ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
-	ut_ad(len < UNIV_PAGE_SIZE);
+		- ut_align_offset(undo_rec, srv_page_size);
+	ut_ad(len < srv_page_size);
 	trx_undo_rec_t* rec = static_cast<trx_undo_rec_t*>(
 		mem_heap_dup(heap, undo_rec, len));
 	mach_write_to_2(rec, len);
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
index 66e6f137b5a..af5ed73f04b 100644
--- a/storage/innobase/include/trx0roll.h
+++ b/storage/innobase/include/trx0roll.h
@@ -33,7 +33,7 @@ Created 3/26/1996 Heikki Tuuri
 #include "mtr0mtr.h"
 #include "trx0sys.h"
 
-extern bool		trx_rollback_or_clean_is_active;
+extern bool		trx_rollback_is_active;
 extern const trx_t*	trx_roll_crash_recv_trx;
 
 /*******************************************************************//**
@@ -63,20 +63,17 @@ trx_undo_rec_t*
 trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/** Report progress when rolling back a row of a recovered transaction.
-@return	whether the rollback should be aborted due to pending shutdown */
-bool
-trx_roll_must_shutdown();
+/** Report progress when rolling back a row of a recovered transaction. */
+void trx_roll_report_progress();
 /*******************************************************************//**
 Rollback or clean up any incomplete transactions which were
 encountered in crash recovery.  If the transaction already was
 committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back. */
+transaction was not yet committed, then we roll it back.
+@param all true=roll back all recovered active transactions;
+false=roll back any incomplete dictionary transaction */
 void
-trx_rollback_or_clean_recovered(
-/*============================*/
-	ibool	all);	/*!< in: FALSE=roll back dictionary transactions;
-			TRUE=roll back all non-PREPARED transactions */
+trx_rollback_recovered(bool all);
 /*******************************************************************//**
 Rollback or clean up any incomplete transactions which were
 encountered in crash recovery.  If the transaction already was
@@ -86,11 +83,7 @@ Note: this is done in a background thread.
 @return a dummy parameter */
 extern "C"
 os_thread_ret_t
-DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
-/*================================================*/
-	void*	arg MY_ATTRIBUTE((unused)));
-			/*!< in: a dummy parameter required by
-			os_thread_create */
+DECLARE_THREAD(trx_rollback_all_recovered)(void*);
 /*********************************************************************//**
 Creates a rollback command node struct.
 @return own: rollback node struct */
@@ -225,6 +218,4 @@ struct trx_named_savept_t{
 					transaction */
 };
 
-#include "trx0roll.ic"
-
 #endif
diff --git a/storage/innobase/include/trx0roll.ic b/storage/innobase/include/trx0roll.ic
deleted file mode 100644
index b09a1471150..00000000000
--- a/storage/innobase/include/trx0roll.ic
+++ /dev/null
@@ -1,62 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0roll.ic
-Transaction rollback
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Check if undo numbering is maintained while processing undo records
-for rollback.
-@return true if undo numbering is maintained. */
-UNIV_INLINE
-bool
-trx_roll_check_undo_rec_ordering(
-/*=============================*/
-	undo_no_t	curr_undo_rec_no,	/*!< in: record number of
-						undo record to process. */
-	ulint		curr_undo_space_id,	/*!< in: space-id of rollback
-						segment that contains the
-						undo record to process. */
-	const trx_t*	trx)			/*!< in: transaction */
-{
-	/* Each transaction now can have multiple rollback segments.
-	If a transaction involves temp and non-temp tables, both the rollback
-	segments will be active. In this case undo records will be distrubuted
-	across the two rollback segments.
-	CASE-1: UNDO action will apply all undo records from one rollback
-	segment before moving to next. This means undo record numbers can't be
-	sequential but ordering is still enforced as next undo record number
-	should be < processed undo record number.
-	CASE-2: For normal rollback (not initiated by crash) all rollback
-	segments will be active (including non-redo).
-	Based on transaction operation pattern undo record number of first
-	undo record from this new rollback segment can be > last undo number
-	from previous rollback segment and so we ignore this check if
-	rollback segments are switching. Once switched new rollback segment
-	should re-follow undo record number pattern (as mentioned in CASE-1). */
-
-	return(curr_undo_space_id != trx->undo_rseg_space
-	       || curr_undo_rec_no + 1 <= trx->undo_no);
-}
-#endif /* UNIV_DEBUG */
-
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index 48c5133644c..dbd80486b71 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,10 +27,8 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef trx0rseg_h
 #define trx0rseg_h
 
-#include "trx0types.h"
 #include "trx0sys.h"
 #include "fut0lst.h"
-#include <vector>
 
 /** Gets a rollback segment header.
 @param[in]	space		space where placed
@@ -39,10 +37,7 @@ Created 3/26/1996 Heikki Tuuri
 @return rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
-trx_rsegf_get(
-	ulint			space,
-	ulint			page_no,
-	mtr_t*			mtr);
+trx_rsegf_get(fil_space_t* space, ulint page_no, mtr_t* mtr);
 
 /** Gets a newly created rollback segment header.
 @param[in]	space		space where placed
@@ -57,16 +52,6 @@ trx_rsegf_get_new(
 	mtr_t*			mtr);
 
 /***************************************************************//**
-Gets the file page number of the nth undo log slot.
-@return page number of the undo log segment */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
-	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
-	ulint		n,	/*!< in: index of slot */
-	mtr_t*		mtr);	/*!< in: mtr */
-/***************************************************************//**
 Sets the file page number of the nth undo log slot. */
 UNIV_INLINE
 void
@@ -81,25 +66,20 @@ Looks for a free slot for an undo log segment.
 @return slot index or ULINT_UNDEFINED if not found */
 UNIV_INLINE
 ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
-	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
-	mtr_t*		mtr);	/*!< in: mtr */
+trx_rsegf_undo_find_free(const trx_rsegf_t* rsegf);
 
-/** Creates a rollback segment header.
-This function is called only when a new rollback segment is created in
-the database.
-@param[in]	space		space id
-@param[in]	max_size	max size in pages
-@param[in]	rseg_slot_no	rseg id == slot number in trx sys
+/** Create a rollback segment header.
+@param[in,out]	space		system, undo, or temporary tablespace
+@param[in]	rseg_id		rollback segment identifier
+@param[in,out]	sys_header	the TRX_SYS page (NULL for temporary rseg)
 @param[in,out]	mtr		mini-transaction
 @return page number of the created segment, FIL_NULL if fail */
 ulint
 trx_rseg_header_create(
-	ulint			space,
-	ulint			max_size,
-	ulint			rseg_slot_no,
-	mtr_t*			mtr);
+	fil_space_t*	space,
+	ulint		rseg_id,
+	buf_block_t*	sys_header,
+	mtr_t*		mtr);
 
 /** Initialize the rollback segments in memory at database startup. */
 void
@@ -133,7 +113,7 @@ trx_rseg_get_n_undo_tablespaces(
 	ulint*		space_ids);	/*!< out: array of space ids of
 					UNDO tablespaces */
 /* Number of undo log slots in a rollback segment file copy */
-#define TRX_RSEG_N_SLOTS	(UNIV_PAGE_SIZE / 16)
+#define TRX_RSEG_N_SLOTS	(srv_page_size / 16)
 
 /* Maximum number of transactions supported by a single rollback segment */
 #define TRX_RSEG_MAX_N_TRXS	(TRX_RSEG_N_SLOTS / 2)
@@ -150,32 +130,25 @@ struct trx_rseg_t {
 	RsegMutex			mutex;
 
 	/** space where the rollback segment header is placed */
-	ulint				space;
+	fil_space_t*			space;
 
 	/** page number of the rollback segment header */
 	ulint				page_no;
 
-	/** maximum allowed size in pages */
-	ulint				max_size;
-
 	/** current size in pages */
 	ulint				curr_size;
 
 	/*--------------------------------------------------------*/
-	/* Fields for update undo logs */
-	/** List of update undo logs */
-	UT_LIST_BASE_NODE_T(trx_undo_t)	update_undo_list;
-
-	/** List of update undo log segments cached for fast reuse */
-	UT_LIST_BASE_NODE_T(trx_undo_t)	update_undo_cached;
+	/* Fields for undo logs */
+	/** List of undo logs */
+	UT_LIST_BASE_NODE_T(trx_undo_t)	undo_list;
 
-	/*--------------------------------------------------------*/
-	/* Fields for insert undo logs */
-	/** List of insert undo logs */
-	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list;
+	/** List of undo log segments cached for fast reuse */
+	UT_LIST_BASE_NODE_T(trx_undo_t)	undo_cached;
 
-	/** List of insert undo log segments cached for fast reuse */
-	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
+	/** List of recovered old insert_undo logs of incomplete
+	transactions (to roll back or XA COMMIT & purge) */
+	UT_LIST_BASE_NODE_T(trx_undo_t) old_insert_list;
 
 	/*--------------------------------------------------------*/
 
@@ -186,11 +159,11 @@ struct trx_rseg_t {
 	/** Byte offset of the last not yet purged log header */
 	ulint				last_offset;
 
-	/** Transaction number of the last not yet purged log */
-	trx_id_t			last_trx_no;
+	/** trx_t::no * 2 + old_insert of the last not yet purged log */
+	trx_id_t			last_commit;
 
-	/** TRUE if the last not yet purged log needs purging */
-	ibool				last_del_marks;
+	/** Whether the log segment needs purge */
+	bool				needs_purge;
 
 	/** Reference counter to track rseg allocated transactions. */
 	ulint				trx_ref_count;
@@ -199,23 +172,31 @@ struct trx_rseg_t {
 	UNDO-tablespace marked for truncate. */
 	bool				skip_allocation;
 
+	/** @return the commit ID of the last committed transaction */
+	trx_id_t last_trx_no() const { return last_commit >> 1; }
+
+	void set_last_trx_no(trx_id_t trx_no, bool is_update)
+	{
+		last_commit = trx_no << 1 | trx_id_t(is_update);
+	}
+
 	/** @return whether the rollback segment is persistent */
 	bool is_persistent() const
 	{
-		ut_ad(space == SRV_TMP_SPACE_ID
-		      || space == TRX_SYS_SPACE
+		ut_ad(space == fil_system.temp_space
+		      || space == fil_system.sys_space
 		      || (srv_undo_space_id_start > 0
-			  && space >= srv_undo_space_id_start
-			  && space <= srv_undo_space_id_start
+			  && space->id >= srv_undo_space_id_start
+			  && space->id <= srv_undo_space_id_start
 			  + TRX_SYS_MAX_UNDO_SPACES));
-		ut_ad(space == SRV_TMP_SPACE_ID
-		      || space == TRX_SYS_SPACE
+		ut_ad(space == fil_system.temp_space
+		      || space == fil_system.sys_space
 		      || (srv_undo_space_id_start > 0
-			  && space >= srv_undo_space_id_start
-			  && space <= srv_undo_space_id_start
+			  && space->id >= srv_undo_space_id_start
+			  && space->id <= srv_undo_space_id_start
 			  + srv_undo_tablespaces_active)
 		      || !srv_was_started);
-		return(space != SRV_TMP_SPACE_ID);
+		return(space->id != SRV_TMP_SPACE_ID);
 	}
 };
 
@@ -232,19 +213,99 @@ struct trx_rseg_t {
 
 /* Transaction rollback segment header */
 /*-------------------------------------------------------------*/
-#define	TRX_RSEG_MAX_SIZE	0	/* Maximum allowed size for rollback
-					segment in pages */
-#define	TRX_RSEG_HISTORY_SIZE	4	/* Number of file pages occupied
-					by the logs in the history list */
-#define	TRX_RSEG_HISTORY	8	/* The update undo logs for committed
-					transactions */
+/** 0xfffffffe = pre-MariaDB 10.3.5 format; 0=MariaDB 10.3.5 or later */
+#define	TRX_RSEG_FORMAT		0
+/** Number of pages in the TRX_RSEG_HISTORY list */
+#define	TRX_RSEG_HISTORY_SIZE	4
+/** Committed transaction logs that have not been purged yet */
+#define	TRX_RSEG_HISTORY	8
 #define	TRX_RSEG_FSEG_HEADER	(8 + FLST_BASE_NODE_SIZE)
 					/* Header for the file segment where
 					this page is placed */
 #define TRX_RSEG_UNDO_SLOTS	(8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE)
 					/* Undo log segment slots */
+/** Maximum transaction ID (valid only if TRX_RSEG_FORMAT is 0) */
+#define TRX_RSEG_MAX_TRX_ID	(TRX_RSEG_UNDO_SLOTS + TRX_RSEG_N_SLOTS	\
+				 * TRX_RSEG_SLOT_SIZE)
+
+/** 8 bytes offset within the binlog file */
+#define TRX_RSEG_BINLOG_OFFSET		TRX_RSEG_MAX_TRX_ID + 8
+/** MySQL log file name, 512 bytes, including terminating NUL
+(valid only if TRX_RSEG_FORMAT is 0).
+If no binlog information is present, the first byte is NUL. */
+#define TRX_RSEG_BINLOG_NAME		TRX_RSEG_MAX_TRX_ID + 16
+/** Maximum length of binlog file name, including terminating NUL, in bytes */
+#define TRX_RSEG_BINLOG_NAME_LEN	512
+
+#ifdef WITH_WSREP
+/** The offset to WSREP XID headers */
+#define	TRX_RSEG_WSREP_XID_INFO		TRX_RSEG_MAX_TRX_ID + 16 + 512
+
+/** WSREP XID format (1 if present and valid, 0 if not present) */
+#define TRX_RSEG_WSREP_XID_FORMAT	TRX_RSEG_WSREP_XID_INFO
+/** WSREP XID GTRID length */
+#define TRX_RSEG_WSREP_XID_GTRID_LEN	TRX_RSEG_WSREP_XID_INFO + 4
+/** WSREP XID bqual length */
+#define TRX_RSEG_WSREP_XID_BQUAL_LEN	TRX_RSEG_WSREP_XID_INFO + 8
+/** WSREP XID data (XIDDATASIZE bytes) */
+#define TRX_RSEG_WSREP_XID_DATA		TRX_RSEG_WSREP_XID_INFO + 12
+#endif /* WITH_WSREP*/
+
 /*-------------------------------------------------------------*/
 
+/** Read the page number of an undo log slot.
+@param[in]	rsegf	rollback segment header
+@param[in]	n	slot number */
+inline
+uint32_t
+trx_rsegf_get_nth_undo(const trx_rsegf_t* rsegf, ulint n)
+{
+	ut_ad(n < TRX_RSEG_N_SLOTS);
+	return mach_read_from_4(rsegf + TRX_RSEG_UNDO_SLOTS
+				+ n * TRX_RSEG_SLOT_SIZE);
+}
+
+#ifdef WITH_WSREP
+/** Update the WSREP XID information in rollback segment header.
+@param[in,out]	rseg_header	rollback segment header
+@param[in]	xid		WSREP XID
+@param[in,out]	mtr		mini-transaction */
+void
+trx_rseg_update_wsrep_checkpoint(
+	trx_rsegf_t*	rseg_header,
+	const XID*	xid,
+	mtr_t*		mtr);
+
+/** Update WSREP checkpoint XID in first rollback segment header
+as part of wsrep_set_SE_checkpoint() when it is guaranteed that there
+are no wsrep transactions committing.
+If the UUID part of the WSREP XID does not match to the UUIDs of XIDs already
+stored into rollback segments, the WSREP XID in all the remaining rollback
+segments will be reset.
+@param[in]	xid		WSREP XID */
+void trx_rseg_update_wsrep_checkpoint(const XID* xid);
+
+/** Recover the latest WSREP checkpoint XID.
+@param[out]	xid	WSREP XID
+@return	whether the WSREP XID was found */
+bool trx_rseg_read_wsrep_checkpoint(XID& xid);
+#endif /* WITH_WSREP */
+
+/** Upgrade a rollback segment header page to MariaDB 10.3 format.
+@param[in,out]	rseg_header	rollback segment header page
+@param[in,out]	mtr		mini-transaction */
+void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr);
+
+/** Update the offset information about the end of the binlog entry
+which corresponds to the transaction just being committed.
+In a replication slave, this updates the master binlog position
+up to which replication has proceeded.
+@param[in,out]	rseg_header	rollback segment header
+@param[in]	trx		committing transaction
+@param[in,out]	mtr		mini-transaction */
+void
+trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr);
+
 #include "trx0rseg.ic"
 
 #endif
diff --git a/storage/innobase/include/trx0rseg.ic b/storage/innobase/include/trx0rseg.ic
index dac7dadfb68..9edfe897155 100644
--- a/storage/innobase/include/trx0rseg.ic
+++ b/storage/innobase/include/trx0rseg.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,28 +34,18 @@ Created 3/26/1996 Heikki Tuuri
 @return rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
-trx_rsegf_get(
-	ulint			space,
-	ulint			page_no,
-	mtr_t*			mtr)
+trx_rsegf_get(fil_space_t* space, ulint page_no, mtr_t* mtr)
 {
-	buf_block_t*	block;
-	trx_rsegf_t*	header;
-
-	ut_ad(space <= srv_undo_space_id_start + srv_undo_tablespaces_active
-	      || space == SRV_TMP_SPACE_ID
+	ut_ad(space == fil_system.sys_space || space == fil_system.temp_space
+	      || srv_is_undo_tablespace(space->id)
 	      || !srv_was_started);
-	ut_ad(space <= srv_undo_space_id_start + TRX_SYS_MAX_UNDO_SPACES
-	      || space == SRV_TMP_SPACE_ID);
 
-	block = buf_page_get(
-		page_id_t(space, page_no), univ_page_size, RW_X_LATCH, mtr);
+	buf_block_t* block = buf_page_get(page_id_t(space->id, page_no),
+					  univ_page_size, RW_X_LATCH, mtr);
 
 	buf_block_dbg_add_level(block, SYNC_RSEG_HEADER);
 
-	header = TRX_RSEG + buf_block_get_frame(block);
-
-	return(header);
+	return TRX_RSEG + block->frame;
 }
 
 /** Gets a newly created rollback segment header.
@@ -88,23 +78,6 @@ trx_rsegf_get_new(
 }
 
 /***************************************************************//**
-Gets the file page number of the nth undo log slot.
-@return page number of the undo log segment */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
-	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
-	ulint		n,	/*!< in: index of slot */
-	mtr_t*		mtr)	/*!< in: mtr */
-{
-	ut_a(n < TRX_RSEG_N_SLOTS);
-
-	return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS
-			      + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
-}
-
-/***************************************************************//**
 Sets the file page number of the nth undo log slot. */
 UNIV_INLINE
 void
@@ -126,10 +99,7 @@ Looks for a free slot for an undo log segment.
 @return slot index or ULINT_UNDEFINED if not found */
 UNIV_INLINE
 ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
-	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
-	mtr_t*		mtr)	/*!< in: mtr */
+trx_rsegf_undo_find_free(const trx_rsegf_t* rsegf)
 {
 	ulint		i;
 	ulint		page_no;
@@ -143,7 +113,7 @@ trx_rsegf_undo_find_free(
 #endif
 
 	for (i = 0; i < max_slots; i++) {
-		page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
+		page_no = trx_rsegf_get_nth_undo(rsegf, i);
 
 		if (page_no == FIL_NULL) {
 			return(i);
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index ebe70a1c70e..6af212d35ff 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -35,7 +35,6 @@ Created 3/26/1996 Heikki Tuuri
 #include "mem0mem.h"
 #include "mtr0mtr.h"
 #include "ut0byte.h"
-#include "mem0mem.h"
 #include "ut0lst.h"
 #include "read0types.h"
 #include "page0types.h"
@@ -47,185 +46,84 @@ Created 3/26/1996 Heikki Tuuri
 
 typedef UT_LIST_BASE_NODE_T(trx_t) trx_ut_list_t;
 
-// Forward declaration
-class MVCC;
-class ReadView;
-
-/** The transaction system */
-extern trx_sys_t*	trx_sys;
-
 /** Checks if a page address is the trx sys header page.
 @param[in]	page_id	page id
 @return true if trx sys header page */
-UNIV_INLINE
+inline
 bool
-trx_sys_hdr_page(
-	const page_id_t&	page_id);
-
-/** Initialize the transaction system main-memory data structures. */
-void trx_sys_init_at_db_start();
+trx_sys_hdr_page(const page_id_t& page_id)
+{
+	return(page_id.space() == TRX_SYS_SPACE
+	       && page_id.page_no() == TRX_SYS_PAGE_NO);
+}
 
 /*****************************************************************//**
-Creates the trx_sys instance and initializes purge_queue and mutex. */
-void
-trx_sys_create(void);
-/*================*/
-/*****************************************************************//**
 Creates and initializes the transaction system at the database creation. */
 void
 trx_sys_create_sys_pages(void);
 /*==========================*/
-/** @return an unallocated rollback segment slot in the TRX_SYS header
+/** Find an available rollback segment.
+@param[in]	sys_header
+@return an unallocated rollback segment slot in the TRX_SYS header
 @retval ULINT_UNDEFINED if not found */
 ulint
-trx_sysf_rseg_find_free(mtr_t* mtr);
-/**********************************************************************//**
-Gets a pointer to the transaction system file copy and x-locks its page.
-@return pointer to system file copy, page x-locked */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
-	mtr_t*	mtr);	/*!< in: mtr */
-/*****************************************************************//**
-Gets the space of the nth rollback segment slot in the trx system
-file copy.
-@return space id */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
-	ulint		i,		/*!< in: slot index == rseg id */
-	mtr_t*		mtr);		/*!< in: mtr */
-/*****************************************************************//**
-Gets the page number of the nth rollback segment slot in the trx system
-file copy.
-@return page number, FIL_NULL if slot unused */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
-	ulint		i,		/*!< in: slot index == rseg id */
-	mtr_t*		mtr);		/*!< in: mtr */
-/*****************************************************************//**
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
-	ulint		i,		/*!< in: slot index == rseg id */
-	ulint		space,		/*!< in: space id */
-	mtr_t*		mtr);		/*!< in: mtr */
-/*****************************************************************//**
-Sets the page number of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
-	ulint		i,		/*!< in: slot index == rseg id */
-	ulint		page_no,	/*!< in: page number, FIL_NULL if
-					the slot is reset to unused */
-	mtr_t*		mtr);		/*!< in: mtr */
-/*****************************************************************//**
-Allocates a new transaction id.
-@return new, allocated trx id */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_id();
-/*===================*/
-/*****************************************************************//**
-Determines the maximum transaction id.
-@return maximum currently allocated trx id; will be stale after the
-next call to trx_sys_get_new_trx_id() */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_max_trx_id(void);
-/*========================*/
+trx_sys_rseg_find_free(const buf_block_t* sys_header);
+/** Request the TRX_SYS page.
+@param[in]	rw	whether to lock the page for writing
+@return the TRX_SYS page
+@retval	NULL	if the page cannot be read */
+inline
+buf_block_t*
+trx_sysf_get(mtr_t* mtr, bool rw = true)
+{
+	buf_block_t* block = buf_page_get(
+		page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
+		univ_page_size, rw ? RW_X_LATCH : RW_S_LATCH, mtr);
+	if (block) {
+		buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
+	}
+	return block;
+}
 
 #ifdef UNIV_DEBUG
 /* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
 extern uint			trx_rseg_n_slots_debug;
 #endif
 
-/*****************************************************************//**
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
+/** Write DB_TRX_ID.
+@param[out]	db_trx_id	the DB_TRX_ID field to be written to
+@param[in]	id		transaction ID */
 UNIV_INLINE
 void
-trx_write_trx_id(
-/*=============*/
-	byte*		ptr,	/*!< in: pointer to memory where written */
-	trx_id_t	id);	/*!< in: id */
-/*****************************************************************//**
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_...
+trx_write_trx_id(byte* db_trx_id, trx_id_t id)
+{
+	compile_time_assert(DATA_TRX_ID_LEN == 6);
+	ut_ad(id);
+	mach_write_to_6(db_trx_id, id);
+}
+
+/** Read a transaction identifier.
 @return id */
-UNIV_INLINE
+inline
 trx_id_t
-trx_read_trx_id(
-/*============*/
-	const byte*	ptr);	/*!< in: pointer to memory from where to read */
-/****************************************************************//**
-Looks for the trx instance with the given id in the rw trx_list.
-@return	the trx handle or NULL if not found */
-UNIV_INLINE
-trx_t*
-trx_get_rw_trx_by_id(
-/*=================*/
-	trx_id_t	trx_id);/*!< in: trx id to search for */
-/****************************************************************//**
-Returns the minimum trx id in rw trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->state to
-find out if the minimum trx id transaction itself is active, or already
-committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_rw_min_trx_id(void);
-/*===================*/
-/****************************************************************//**
-Checks if a rw transaction with the given id is active.
-@return transaction instance if active, or NULL */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active_low(
-/*=================*/
-	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
-	ibool*		corrupt);	/*!< in: NULL or pointer to a flag
-					that will be set if corrupt */
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. If the caller is
-not holding trx_sys->mutex, the transaction may already have been
-committed.
-@return transaction instance if active, or NULL; */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active(
-/*=============*/
-	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
-	ibool*		corrupt,	/*!< in: NULL or pointer to a flag
-					that will be set if corrupt */
-	bool		do_ref_count);	/*!< in: if true then increment the
-					trx_t::n_ref_count */
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-/***********************************************************//**
-Assert that a transaction has been recovered.
-@return TRUE */
-UNIV_INLINE
-ibool
-trx_assert_recovered(
-/*=================*/
-	trx_id_t	trx_id)		/*!< in: transaction identifier */
-	MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+trx_read_trx_id(const byte* ptr)
+{
+	compile_time_assert(DATA_TRX_ID_LEN == 6);
+	return(mach_read_from_6(ptr));
+}
+
+#ifdef UNIV_DEBUG
+/** Check that the DB_TRX_ID in a record is valid.
+@param[in]	db_trx_id	the DB_TRX_ID column to validate
+@param[in]	trx_id		the id of the ALTER TABLE transaction */
+inline bool trx_id_check(const void* db_trx_id, trx_id_t trx_id)
+{
+	trx_id_t id = trx_read_trx_id(static_cast<const byte*>(db_trx_id));
+	ut_ad(id == 0 || id > trx_id);
+	return true;
+}
+#endif
+
 /*****************************************************************//**
 Updates the offset information about the end of the MySQL binlog entry
 which corresponds to the transaction just being committed. In a MySQL
@@ -236,138 +134,17 @@ trx_sys_update_mysql_binlog_offset(
 /*===============================*/
 	const char*	file_name,/*!< in: MySQL log file name */
 	int64_t		offset,	/*!< in: position in that log file */
-        trx_sysf_t*     sys_header, /*!< in: trx sys header */
-	mtr_t*		mtr);	/*!< in: mtr */
+	buf_block_t*	sys_header, /*!< in,out: trx sys header */
+	mtr_t*		mtr);	/*!< in,out: mini-transaction */
 /** Display the MySQL binlog offset info if it is present in the trx
 system header. */
 void
 trx_sys_print_mysql_binlog_offset();
-#ifdef WITH_WSREP
 
-/** Update WSREP XID info in sys_header of TRX_SYS_PAGE_NO = 5.
-@param[in]	xid		Transaction XID
-@param[in,out]	sys_header	sys_header
-@param[in]	mtr		minitransaction */
-UNIV_INTERN
-void
-trx_sys_update_wsrep_checkpoint(
-	const XID*	xid,
-	trx_sysf_t*	sys_header,
-	mtr_t*		mtr);
-
-/** Read WSREP checkpoint XID from sys header.
-@param[out]	xid	WSREP XID
-@return	whether the checkpoint was present */
-UNIV_INTERN
-bool
-trx_sys_read_wsrep_checkpoint(XID* xid);
-#endif /* WITH_WSREP */
-
-/** Initializes the tablespace tag system. */
-void
-trx_sys_file_format_init(void);
-/*==========================*/
-
-/*****************************************************************//**
-Closes the tablespace tag system. */
-void
-trx_sys_file_format_close(void);
-/*===========================*/
-
-/********************************************************************//**
-Tags the system table space with minimum format id if it has not been
-tagged yet.
-WARNING: This function is only called during the startup and AFTER the
-redo log application during recovery has finished. */
-void
-trx_sys_file_format_tag_init(void);
-/*==============================*/
-
-/*****************************************************************//**
-Shutdown/Close the transaction system. */
-void
-trx_sys_close(void);
-/*===============*/
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
-	const ulint	id);		/*!< in: id of the file format */
-/*****************************************************************//**
-Set the file format id unconditionally except if it's already the
-same value.
-@return TRUE if value updated */
-ibool
-trx_sys_file_format_max_set(
-/*========================*/
-	ulint		format_id,	/*!< in: file format id */
-	const char**	name);		/*!< out: max file format name or
-					NULL if not needed. */
 /** Create the rollback segments.
 @return	whether the creation succeeded */
 bool
 trx_sys_create_rsegs();
-/*****************************************************************//**
-Get the number of transaction in the system, independent of their state.
-@return count of transactions in trx_sys_t::trx_list */
-UNIV_INLINE
-ulint
-trx_sys_get_n_rw_trx(void);
-/*======================*/
-
-/*********************************************************************
-Check if there are any active (non-prepared) transactions.
-@return total number of active transactions or 0 if none */
-ulint
-trx_sys_any_active_transactions(void);
-/*=================================*/
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the max format name */
-const char*
-trx_sys_file_format_max_get(void);
-/*=============================*/
-/*****************************************************************//**
-Check for the max file format tag stored on disk.
-@return DB_SUCCESS or error code */
-dberr_t
-trx_sys_file_format_max_check(
-/*==========================*/
-	ulint		max_format_id);	/*!< in: the max format id to check */
-/********************************************************************//**
-Update the file format tag in the system tablespace only if the given
-format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
-ibool
-trx_sys_file_format_max_upgrade(
-/*============================*/
-	const char**	name,		/*!< out: max file format name */
-	ulint		format_id);	/*!< in: file format identifier */
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
-	const ulint	id);	/*!< in: id of the file format */
-
-/**
-Add the transaction to the RW transaction set
-@param trx		transaction instance to add */
-UNIV_INLINE
-void
-trx_sys_rw_trx_add(trx_t* trx);
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Validate the trx_sys_t::rw_trx_list.
-@return true if the list is valid */
-bool
-trx_sys_validate_trx_list();
-/*========================*/
-#endif /* UNIV_DEBUG */
 
 /** The automatically created system rollback segment has this id */
 #define TRX_SYS_SYSTEM_RSEG_ID	0
@@ -377,18 +154,13 @@ trx_sys_validate_trx_list();
 
 /** Transaction system header */
 /*------------------------------------------------------------- @{ */
-#define	TRX_SYS_TRX_ID_STORE	0	/*!< the maximum trx id or trx
-					number modulo
-					TRX_SYS_TRX_ID_UPDATE_MARGIN
-					written to a file page by any
-					transaction; the assignment of
-					transaction ids continues from
-					this number rounded up by
-					TRX_SYS_TRX_ID_UPDATE_MARGIN
-					plus
-					TRX_SYS_TRX_ID_UPDATE_MARGIN
-					when the database is
-					started */
+/** In old versions of InnoDB, this persisted the value of
+trx_sys.get_max_trx_id(). Starting with MariaDB 10.3.5,
+the field TRX_RSEG_MAX_TRX_ID in rollback segment header pages
+and the fields TRX_UNDO_TRX_ID, TRX_UNDO_TRX_NO in undo log pages
+are used instead. The field only exists for the purpose of upgrading
+from older MySQL or MariaDB versions. */
+#define	TRX_SYS_TRX_ID_STORE	0
 #define TRX_SYS_FSEG_HEADER	8	/*!< segment header for the
 					tablespace segment the trx
 					system is created into */
@@ -398,16 +170,52 @@ trx_sys_validate_trx_list();
 					slots */
 /*------------------------------------------------------------- @} */
 
-/* Max number of rollback segments: the number of segment specification slots
-in the transaction system array; rollback segment id must fit in one (signed)
-byte, therefore 128; each slot is currently 8 bytes in size. If you want
-to raise the level to 256 then you will need to fix some assertions that
-impose the 7 bit restriction. e.g., mach_write_to_3() */
+/** The number of rollback segments; rollback segment id must fit in
+the 7 bits reserved for it in DB_ROLL_PTR. */
 #define	TRX_SYS_N_RSEGS			128
 /** Maximum number of undo tablespaces (not counting the system tablespace) */
 #define TRX_SYS_MAX_UNDO_SPACES		(TRX_SYS_N_RSEGS - 1)
 
-/** Maximum length of MySQL binlog file name, in bytes. */
+/* Rollback segment specification slot offsets */
+
+/** the tablespace ID of an undo log header; starting with
+MySQL/InnoDB 5.1.7, this is FIL_NULL if the slot is unused */
+#define	TRX_SYS_RSEG_SPACE	0
+/** the page number of an undo log header, or FIL_NULL if unused */
+#define	TRX_SYS_RSEG_PAGE_NO	4
+/** Size of a rollback segment specification slot */
+#define TRX_SYS_RSEG_SLOT_SIZE	8
+
+/** Read the tablespace ID of a rollback segment slot.
+@param[in]	sys_header	TRX_SYS page
+@param[in]	rseg_id		rollback segment identifier
+@return	undo tablespace id */
+inline
+uint32_t
+trx_sysf_rseg_get_space(const buf_block_t* sys_header, ulint rseg_id)
+{
+	ut_ad(rseg_id < TRX_SYS_N_RSEGS);
+	return mach_read_from_4(TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_SPACE
+				+ rseg_id * TRX_SYS_RSEG_SLOT_SIZE
+				+ sys_header->frame);
+}
+
+/** Read the page number of a rollback segment slot.
+@param[in]	sys_header	TRX_SYS page
+@param[in]	rseg_id		rollback segment identifier
+@return	undo page number */
+inline
+uint32_t
+trx_sysf_rseg_get_page_no(const buf_block_t* sys_header, ulint rseg_id)
+{
+	ut_ad(rseg_id < TRX_SYS_N_RSEGS);
+	return mach_read_from_4(TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_PAGE_NO
+				+ rseg_id * TRX_SYS_RSEG_SLOT_SIZE
+				+ sys_header->frame);
+}
+
+/** Maximum length of MySQL binlog file name, in bytes.
+(Used before MariaDB 10.3.5.) */
 #define TRX_SYS_MYSQL_LOG_NAME_LEN	512
 /** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
 #define TRX_SYS_MYSQL_LOG_MAGIC_N	873422344
@@ -416,7 +224,7 @@ impose the 7 bit restriction. e.g., mach_write_to_3() */
 # error "UNIV_PAGE_SIZE_MIN < 4096"
 #endif
 /** The offset of the MySQL binlog offset info in the trx system header */
-#define TRX_SYS_MYSQL_LOG_INFO		(UNIV_PAGE_SIZE - 1000)
+#define TRX_SYS_MYSQL_LOG_INFO		(srv_page_size - 1000)
 #define	TRX_SYS_MYSQL_LOG_MAGIC_N_FLD	0	/*!< magic number which is
 						TRX_SYS_MYSQL_LOG_MAGIC_N
 						if we have valid data in the
@@ -425,7 +233,7 @@ impose the 7 bit restriction. e.g., mach_write_to_3() */
 						within that file */
 #define TRX_SYS_MYSQL_LOG_NAME		12	/*!< MySQL log file name */
 
-/** Memory map TRX_SYS_PAGE_NO = 5 when UNIV_PAGE_SIZE = 4096
+/** Memory map TRX_SYS_PAGE_NO = 5 when srv_page_size = 4096
 
 0...37 FIL_HEADER
 38...45 TRX_SYS_TRX_ID_STORE
@@ -441,7 +249,7 @@ impose the 7 bit restriction. e.g., mach_write_to_3() */
 ...
   ...1063  TRX_SYS_RSEG_PAGE_NO     for slot 126
 
-(UNIV_PAGE_SIZE-3500 WSREP ::: FAIL would overwrite undo tablespace
+(srv_page_size-3500 WSREP ::: FAIL would overwrite undo tablespace
 space_id, page_no pairs :::)
 596 TRX_SYS_WSREP_XID_INFO             TRX_SYS_WSREP_XID_MAGIC_N_FLD
 600 TRX_SYS_WSREP_XID_FORMAT
@@ -451,7 +259,7 @@ space_id, page_no pairs :::)
 739 TRX_SYS_WSREP_XID_DATA_END
 
 FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
-(UNIV_PAGE_SIZE-2500)
+(srv_page_size-2500)
 1596 TRX_SYS_WSREP_XID_INFO             TRX_SYS_WSREP_XID_MAGIC_N_FLD
 1600 TRX_SYS_WSREP_XID_FORMAT
 1604 TRX_SYS_WSREP_XID_GTRID_LEN
@@ -459,19 +267,19 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
 1612 TRX_SYS_WSREP_XID_DATA   (len = 128)
 1739 TRX_SYS_WSREP_XID_DATA_END
 
-(UNIV_PAGE_SIZE - 2000 MYSQL MASTER LOG)
+(srv_page_size - 2000 MYSQL MASTER LOG)
 2096   TRX_SYS_MYSQL_MASTER_LOG_INFO   TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
 2100   TRX_SYS_MYSQL_LOG_OFFSET_HIGH
 2104   TRX_SYS_MYSQL_LOG_OFFSET_LOW
 2108   TRX_SYS_MYSQL_LOG_NAME
 
-(UNIV_PAGE_SIZE - 1000 MYSQL LOG)
+(srv_page_size - 1000 MYSQL LOG)
 3096   TRX_SYS_MYSQL_LOG_INFO          TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
 3100   TRX_SYS_MYSQL_LOG_OFFSET_HIGH
 3104   TRX_SYS_MYSQL_LOG_OFFSET_LOW
 3108   TRX_SYS_MYSQL_LOG_NAME
 
-(UNIV_PAGE_SIZE - 200 DOUBLEWRITE)
+(srv_page_size - 200 DOUBLEWRITE)
 3896   TRX_SYS_DOUBLEWRITE		TRX_SYS_DOUBLEWRITE_FSEG
 3906         TRX_SYS_DOUBLEWRITE_MAGIC
 3910         TRX_SYS_DOUBLEWRITE_BLOCK1
@@ -479,12 +287,12 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
 3918         TRX_SYS_DOUBLEWRITE_REPEAT
 3930         TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N
 
-(UNIV_PAGE_SIZE - 8, TAILER)
+(srv_page_size - 8, TAILER)
 4088..4096	FIL_TAILER
 
 */
 #ifdef WITH_WSREP
-/** The offset to WSREP XID headers */
+/** The offset to WSREP XID headers (used before MariaDB 10.3.5) */
 #define TRX_SYS_WSREP_XID_INFO std::max(srv_page_size - 3500, 1596UL)
 #define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0
 #define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265
@@ -500,7 +308,7 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
 /** Doublewrite buffer */
 /* @{ */
 /** The offset of the doublewrite buffer header on the trx system header page */
-#define TRX_SYS_DOUBLEWRITE		(UNIV_PAGE_SIZE - 200)
+#define TRX_SYS_DOUBLEWRITE		(srv_page_size - 200)
 /*-------------------------------------------------------------*/
 #define TRX_SYS_DOUBLEWRITE_FSEG	0	/*!< fseg header of the fseg
 						containing the doublewrite
@@ -548,83 +356,477 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
 #define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE	FSP_EXTENT_SIZE
 /* @} */
 
-/** File format tag */
-/* @{ */
-/** The offset of the file format tag on the trx system header page
-(TRX_SYS_PAGE_NO of TRX_SYS_SPACE) */
-#define TRX_SYS_FILE_FORMAT_TAG		(UNIV_PAGE_SIZE - 16)
-
-/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
-identifier is added to this constant. */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW	3645922177UL
-/** Contents of TRX_SYS_FILE_FORMAT_TAG+4 when valid */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH	2745987765UL
-/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
-identifier is added to this 64-bit constant. */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N					\
-	((ib_uint64_t) TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH << 32	\
-	 | TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW)
-/* @} */
+trx_t* current_trx();
+
+struct rw_trx_hash_element_t
+{
+  rw_trx_hash_element_t(): trx(0)
+  {
+    mutex_create(LATCH_ID_RW_TRX_HASH_ELEMENT, &mutex);
+  }
+
+
+  ~rw_trx_hash_element_t()
+  {
+    mutex_free(&mutex);
+  }
+
+
+  trx_id_t id; /* lf_hash_init() relies on this to be first in the struct */
+  trx_id_t no;
+  trx_t *trx;
+  ib_mutex_t mutex;
+};
+
+
+/**
+  Wrapper around LF_HASH to store set of in memory read-write transactions.
+*/
+
+class rw_trx_hash_t
+{
+  LF_HASH hash;
+
+
+  /**
+    Constructor callback for lock-free allocator.
+
+    Object is just allocated and is not yet accessible via rw_trx_hash by
+    concurrent threads. Object can be reused multiple times before it is freed.
+    Every time object is being reused initializer() callback is called.
+  */
+
+  static void rw_trx_hash_constructor(uchar *arg)
+  {
+    new(arg + LF_HASH_OVERHEAD) rw_trx_hash_element_t();
+  }
+
+
+  /**
+    Destructor callback for lock-free allocator.
+
+    Object is about to be freed and is not accessible via rw_trx_hash by
+    concurrent threads.
+  */
+
+  static void rw_trx_hash_destructor(uchar *arg)
+  {
+    reinterpret_cast<rw_trx_hash_element_t*>
+      (arg + LF_HASH_OVERHEAD)->~rw_trx_hash_element_t();
+  }
+
+
+  /**
+    Destructor callback for lock-free allocator.
+
+    This destructor is used at shutdown. It frees remaining transaction
+    objects.
+
+    XA PREPARED transactions may remain if they haven't been committed or
+    rolled back. ACTIVE transactions may remain if startup was interrupted or
+    server is running in read-only mode or for certain srv_force_recovery
+    levels.
+  */
+
+  static void rw_trx_hash_shutdown_destructor(uchar *arg)
+  {
+    rw_trx_hash_element_t *element=
+      reinterpret_cast<rw_trx_hash_element_t*>(arg + LF_HASH_OVERHEAD);
+    if (trx_t *trx= element->trx)
+    {
+      ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED) ||
+            (trx_state_eq(trx, TRX_STATE_ACTIVE) &&
+             (!srv_was_started ||
+              srv_read_only_mode ||
+              srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO)));
+      trx_free_at_shutdown(trx);
+    }
+    element->~rw_trx_hash_element_t();
+  }
+
+
+  /**
+    Initializer callback for lock-free hash.
+
+    Object is not yet accessible via rw_trx_hash by concurrent threads, but is
+    about to become such. Object id can be changed only by this callback and
+    remains the same until all pins to this object are released.
+
+    Object trx can be changed to 0 by erase() under object mutex protection,
+    which indicates it is about to be removed from lock-free hash and become
+    not accessible by concurrent threads.
+  */
+
+  static void rw_trx_hash_initializer(LF_HASH *,
+                                      rw_trx_hash_element_t *element,
+                                      trx_t *trx)
+  {
+    ut_ad(element->trx == 0);
+    element->trx= trx;
+    element->id= trx->id;
+    element->no= TRX_ID_MAX;
+    trx->rw_trx_hash_element= element;
+  }
+
+
+  /**
+    Gets LF_HASH pins.
+
+    Pins are used to protect object from being destroyed or reused. They are
+    normally stored in trx object for quick access. If caller doesn't have trx
+    available, we try to get it using currnet_trx(). If caller doesn't have trx
+    at all, temporary pins are allocated.
+  */
+
+  LF_PINS *get_pins(trx_t *trx)
+  {
+    if (!trx->rw_trx_hash_pins)
+    {
+      trx->rw_trx_hash_pins= lf_hash_get_pins(&hash);
+      ut_a(trx->rw_trx_hash_pins);
+    }
+    return trx->rw_trx_hash_pins;
+  }
+
+
+  struct eliminate_duplicates_arg
+  {
+    trx_ids_t ids;
+    my_hash_walk_action action;
+    void *argument;
+    eliminate_duplicates_arg(size_t size, my_hash_walk_action act, void* arg):
+      action(act), argument(arg) { ids.reserve(size); }
+  };
+
+
+  static my_bool eliminate_duplicates(rw_trx_hash_element_t *element,
+                                      eliminate_duplicates_arg *arg)
+  {
+    for (trx_ids_t::iterator it= arg->ids.begin(); it != arg->ids.end(); it++)
+    {
+      if (*it == element->id)
+        return 0;
+    }
+    arg->ids.push_back(element->id);
+    return arg->action(element, arg->argument);
+  }
+
 
-/** The transaction system central memory data structure. */
-struct trx_sys_t {
-
-	TrxSysMutex	mutex;		/*!< mutex protecting most fields in
-					this structure except when noted
-					otherwise */
-
-	MVCC*		mvcc;		/*!< Multi version concurrency control
-					manager */
-	volatile trx_id_t
-			max_trx_id;	/*!< The smallest number not yet
-					assigned as a transaction id or
-					transaction number. This is declared
-					volatile because it can be accessed
-					without holding any mutex during
-					AC-NL-RO view creation. */
-	trx_ut_list_t	serialisation_list;
-					/*!< Ordered on trx_t::no of all the
-					currenrtly active RW transactions */
 #ifdef UNIV_DEBUG
-	trx_id_t	rw_max_trx_id;	/*!< Max trx id of read-write
-					transactions which exist or existed */
-#endif /* UNIV_DEBUG */
-
-	/** Avoid false sharing */
-	const char	pad1[CACHE_LINE_SIZE];
-	trx_ut_list_t	rw_trx_list;	/*!< List of active and committed in
-					memory read-write transactions, sorted
-					on trx id, biggest first. Recovered
-					transactions are always on this list. */
-
-	/** Avoid false sharing */
-	const char	pad2[CACHE_LINE_SIZE];
-	trx_ut_list_t	mysql_trx_list;	/*!< List of transactions created
-					for MySQL. All user transactions are
-					on mysql_trx_list. The rw_trx_list
-					can contain system transactions and
-					recovered transactions that will not
-					be in the mysql_trx_list.
-					mysql_trx_list may additionally contain
-					transactions that have not yet been
-					started in InnoDB. */
-
-	trx_ids_t	rw_trx_ids;	/*!< Array of Read write transaction IDs
-					for MVCC snapshot. A ReadView would take
-					a snapshot of these transactions whose
-					changes are not visible to it. We should
-					remove transactions from the list before
-					committing in memory and releasing locks
-					to ensure right order of removal and
-					consistent snapshot. */
-
-	/** Avoid false sharing */
-	const char	pad3[CACHE_LINE_SIZE];
+  static void validate_element(trx_t *trx)
+  {
+    ut_ad(!trx->read_only || !trx->rsegs.m_redo.rseg);
+    ut_ad(!trx_is_autocommit_non_locking(trx));
+    mutex_enter(&trx->mutex);
+    ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) ||
+          trx_state_eq(trx, TRX_STATE_PREPARED));
+    mutex_exit(&trx->mutex);
+  }
+
+
+  struct debug_iterator_arg
+  {
+    my_hash_walk_action action;
+    void *argument;
+  };
+
+
+  static my_bool debug_iterator(rw_trx_hash_element_t *element,
+                                debug_iterator_arg *arg)
+  {
+    mutex_enter(&element->mutex);
+    if (element->trx)
+      validate_element(element->trx);
+    mutex_exit(&element->mutex);
+    return arg->action(element, arg->argument);
+  }
+#endif
+
+
+public:
+  void init()
+  {
+    lf_hash_init(&hash, sizeof(rw_trx_hash_element_t), LF_HASH_UNIQUE, 0,
+                 sizeof(trx_id_t), 0, &my_charset_bin);
+    hash.alloc.constructor= rw_trx_hash_constructor;
+    hash.alloc.destructor= rw_trx_hash_destructor;
+    hash.initializer=
+      reinterpret_cast<lf_hash_initializer>(rw_trx_hash_initializer);
+  }
+
+
+  void destroy()
+  {
+    hash.alloc.destructor= rw_trx_hash_shutdown_destructor;
+    lf_hash_destroy(&hash);
+  }
+
+
+  /**
+    Releases LF_HASH pins.
+
+    Must be called by thread that owns trx_t object when the latter is being
+    "detached" from thread (e.g. released to the pool by trx_free()). Can be
+    called earlier if thread is expected not to use rw_trx_hash.
+
+    Since pins are not allowed to be transferred to another thread,
+    initialisation thread calls this for recovered transactions.
+  */
+
+  void put_pins(trx_t *trx)
+  {
+    if (trx->rw_trx_hash_pins)
+    {
+      lf_hash_put_pins(trx->rw_trx_hash_pins);
+      trx->rw_trx_hash_pins= 0;
+    }
+  }
+
+
+  /**
+    Finds trx object in lock-free hash with given id.
+
+    Only ACTIVE or PREPARED trx objects may participate in hash. Nevertheless
+    the transaction may get committed before this method returns.
+
+    With do_ref_count == false the caller may dereference returned trx pointer
+    only if lock_sys.mutex was acquired before calling find().
+
+    With do_ref_count == true caller may dereference trx even if it is not
+    holding lock_sys.mutex. Caller is responsible for calling
+    trx->release_reference() when it is done playing with trx.
+
+    Ideally this method should get caller rw_trx_hash_pins along with trx
+    object as a parameter, similar to insert() and erase(). However most
+    callers lose trx early in their call chains and it is not that easy to pass
+    them through.
+
+    So we take more expensive approach: get trx through current_thd()->ha_data.
+    Some threads don't have trx attached to THD, and at least server
+    initialisation thread, fts_optimize_thread, srv_master_thread,
+    dict_stats_thread, srv_monitor_thread, btr_defragment_thread don't even
+    have THD at all. For such cases we allocate pins only for duration of
+    search and free them immediately.
+
+    This has negative performance impact and should be fixed eventually (by
+    passing caller_trx as a parameter). Still stream of DML is more or less Ok.
+
+    @return
+      @retval 0 not found
+      @retval pointer to trx
+  */
+
+  trx_t *find(trx_t *caller_trx, trx_id_t trx_id, bool do_ref_count)
+  {
+    /*
+      In MariaDB 10.3, purge will reset DB_TRX_ID to 0
+      when the history is lost. Read/write transactions will
+      always have a nonzero trx_t::id; there the value 0 is
+      reserved for transactions that did not write or lock
+      anything yet.
+
+      The caller should already have handled trx_id==0 specially.
+    */
+    ut_ad(trx_id);
+    if (caller_trx && caller_trx->id == trx_id)
+    {
+      if (do_ref_count)
+        caller_trx->reference();
+      return caller_trx;
+    }
+
+    trx_t *trx= 0;
+    LF_PINS *pins= caller_trx ? get_pins(caller_trx) : lf_hash_get_pins(&hash);
+    ut_a(pins);
+
+    rw_trx_hash_element_t *element= reinterpret_cast<rw_trx_hash_element_t*>
+      (lf_hash_search(&hash, pins, reinterpret_cast<const void*>(&trx_id),
+                      sizeof(trx_id_t)));
+    if (element)
+    {
+      mutex_enter(&element->mutex);
+      lf_hash_search_unpin(pins);
+      trx= element->trx;
+      if (!trx);
+      else if (UNIV_UNLIKELY(trx_id != trx->id))
+        trx= NULL;
+      else {
+        if (do_ref_count)
+          trx->reference();
+        ut_d(validate_element(trx));
+      }
+      mutex_exit(&element->mutex);
+    }
+    if (!caller_trx)
+      lf_hash_put_pins(pins);
+    return trx;
+  }
+
+
+  /**
+    Inserts trx to lock-free hash.
+
+    Object becomes accessible via rw_trx_hash.
+  */
+
+  void insert(trx_t *trx)
+  {
+    ut_d(validate_element(trx));
+    int res= lf_hash_insert(&hash, get_pins(trx),
+                            reinterpret_cast<void*>(trx));
+    ut_a(res == 0);
+  }
+
+
+  /**
+    Removes trx from lock-free hash.
+
+    Object becomes not accessible via rw_trx_hash. But it still can be pinned
+    by concurrent find(), which is supposed to release it immediately after
+    it sees object trx is 0.
+  */
+
+  void erase(trx_t *trx)
+  {
+    ut_d(validate_element(trx));
+    mutex_enter(&trx->rw_trx_hash_element->mutex);
+    trx->rw_trx_hash_element->trx= 0;
+    mutex_exit(&trx->rw_trx_hash_element->mutex);
+    int res= lf_hash_delete(&hash, get_pins(trx),
+                            reinterpret_cast<const void*>(&trx->id),
+                            sizeof(trx_id_t));
+    ut_a(res == 0);
+  }
+
+
+  /**
+    Returns the number of elements in the hash.
+
+    The number is exact only if hash is protected against concurrent
+    modifications (e.g. single threaded startup or hash is protected
+    by some mutex). Otherwise the number may be used as a hint only,
+    because it may change even before this method returns.
+  */
+
+  uint32_t size()
+  {
+    return uint32_t(my_atomic_load32_explicit(&hash.count,
+					      MY_MEMORY_ORDER_RELAXED));
+  }
+
+
+  /**
+    Iterates the hash.
+
+    @param caller_trx  used to get/set pins
+    @param action      called for every element in hash
+    @param argument    opque argument passed to action
+
+    May return the same element multiple times if hash is under contention.
+    If caller doesn't like to see the same transaction multiple times, it has
+    to call iterate_no_dups() instead.
+
+    May return element with committed transaction. If caller doesn't like to
+    see committed transactions, it has to skip those under element mutex:
+
+      mutex_enter(&element->mutex);
+      if (trx_t trx= element->trx)
+      {
+        // trx is protected against commit in this branch
+      }
+      mutex_exit(&element->mutex);
+
+    May miss concurrently inserted transactions.
+
+    @return
+      @retval 0 iteration completed successfully
+      @retval 1 iteration was interrupted (action returned 1)
+  */
+
+  int iterate(trx_t *caller_trx, my_hash_walk_action action, void *argument)
+  {
+    LF_PINS *pins= caller_trx ? get_pins(caller_trx) : lf_hash_get_pins(&hash);
+    ut_a(pins);
+#ifdef UNIV_DEBUG
+    debug_iterator_arg debug_arg= { action, argument };
+    action= reinterpret_cast<my_hash_walk_action>(debug_iterator);
+    argument= &debug_arg;
+#endif
+    int res= lf_hash_iterate(&hash, pins, action, argument);
+    if (!caller_trx)
+      lf_hash_put_pins(pins);
+    return res;
+  }
+
+
+  int iterate(my_hash_walk_action action, void *argument)
+  {
+    return iterate(current_trx(), action, argument);
+  }
+
+
+  /**
+    Iterates the hash and eliminates duplicate elements.
+
+    @sa iterate()
+  */
+
+  int iterate_no_dups(trx_t *caller_trx, my_hash_walk_action action,
+                      void *argument)
+  {
+    eliminate_duplicates_arg arg(size() + 32, action, argument);
+    return iterate(caller_trx, reinterpret_cast<my_hash_walk_action>
+                   (eliminate_duplicates), &arg);
+  }
+
+
+  int iterate_no_dups(my_hash_walk_action action, void *argument)
+  {
+    return iterate_no_dups(current_trx(), action, argument);
+  }
+};
+
+
+/** The transaction system central memory data structure. */
+class trx_sys_t
+{
+  /**
+    The smallest number not yet assigned as a transaction id or transaction
+    number. Accessed and updated with atomic operations.
+  */
+  MY_ALIGNED(CACHE_LINE_SIZE) trx_id_t m_max_trx_id;
+
+
+  /**
+    Solves race conditions between register_rw() and snapshot_ids() as well as
+    race condition between assign_new_trx_no() and snapshot_ids().
+
+    @sa register_rw()
+    @sa assign_new_trx_no()
+    @sa snapshot_ids()
+  */
+  MY_ALIGNED(CACHE_LINE_SIZE) trx_id_t m_rw_trx_hash_version;
+
+
+  /**
+    TRX_RSEG_HISTORY list length (number of committed transactions to purge)
+  */
+  MY_ALIGNED(CACHE_LINE_SIZE) int32 rseg_history_len;
+
+  bool m_initialised;
+
+public:
+  /** Mutex protecting trx_list. */
+  MY_ALIGNED(CACHE_LINE_SIZE) mutable TrxSysMutex mutex;
+
+  /** List of all transactions. */
+  MY_ALIGNED(CACHE_LINE_SIZE) trx_ut_list_t trx_list;
+
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	/** Temporary rollback segments */
 	trx_rseg_t*	temp_rsegs[TRX_SYS_N_RSEGS];
-	/** Avoid false sharing */
-	const char	pad4[CACHE_LINE_SIZE];
 
+	MY_ALIGNED(CACHE_LINE_SIZE)
 	trx_rseg_t*	rseg_array[TRX_SYS_N_RSEGS];
 					/*!< Pointer array to rollback
 					segments; NULL if slot not in use;
@@ -632,46 +834,378 @@ struct trx_sys_t {
 					single-threaded mode; not protected
 					by any mutex, because it is read-only
 					during multi-threaded operation */
-	ulint		rseg_history_len;
-					/*!< Length of the TRX_RSEG_HISTORY
-					list (update undo logs for committed
-					transactions), protected by
-					rseg->mutex */
-
-	TrxIdSet	rw_trx_set;	/*!< Mapping from transaction id
-					to transaction instance */
-
-	ulint		n_prepared_trx;	/*!< Number of transactions currently
-					in the XA PREPARED state */
-
-	ulint		n_prepared_recovered_trx; /*!< Number of transactions
-					currently in XA PREPARED state that are
-					also recovered. Such transactions cannot
-					be added during runtime. They can only
-					occur after recovery if mysqld crashed
-					while there were XA PREPARED
-					transactions. We disable query cache
-					if such transactions exist. */
-};
 
-/** When a trx id which is zero modulo this number (which must be a power of
-two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
-page is updated */
-#define TRX_SYS_TRX_ID_WRITE_MARGIN	((trx_id_t) 256)
+  /**
+    Lock-free hash of in memory read-write transactions.
+    Works faster when it is on it's own cache line (tested).
+  */
+
+  MY_ALIGNED(CACHE_LINE_SIZE) rw_trx_hash_t rw_trx_hash;
+
+
+#ifdef WITH_WSREP
+  /** Latest recovered XID during startup */
+  XID recovered_wsrep_xid;
+#endif
+  /** Latest recovered binlog offset */
+  uint64_t recovered_binlog_offset;
+  /** Latest recovred binlog file name */
+  char recovered_binlog_filename[TRX_SYS_MYSQL_LOG_NAME_LEN];
+
+
+  /**
+    Constructor.
+
+    Some members may require late initialisation, thus we just mark object as
+    uninitialised. Real initialisation happens in create().
+  */
+
+  trx_sys_t(): m_initialised(false) {}
+
+
+  /**
+    Returns the minimum trx id in rw trx list.
 
-/** Test if trx_sys->mutex is owned. */
-#define trx_sys_mutex_own() (trx_sys->mutex.is_owned())
+    This is the smallest id for which the trx can possibly be active. (But, you
+    must look at the trx->state to find out if the minimum trx id transaction
+    itself is active, or already committed.)
 
-/** Acquire the trx_sys->mutex. */
-#define trx_sys_mutex_enter() do {			\
-	mutex_enter(&trx_sys->mutex);			\
-} while (0)
+    @return the minimum trx id, or m_max_trx_id if the trx list is empty
+  */
 
-/** Release the trx_sys->mutex. */
-#define trx_sys_mutex_exit() do {			\
-	trx_sys->mutex.exit();				\
-} while (0)
+  trx_id_t get_min_trx_id()
+  {
+    trx_id_t id= get_max_trx_id();
+    rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action>
+                        (get_min_trx_id_callback), &id);
+    return id;
+  }
 
-#include "trx0sys.ic"
+
+  /**
+    Determines the maximum transaction id.
+
+    @return maximum currently allocated trx id; will be stale after the
+            next call to trx_sys.get_new_trx_id()
+  */
+
+  trx_id_t get_max_trx_id()
+  {
+    return static_cast<trx_id_t>
+           (my_atomic_load64_explicit(reinterpret_cast<int64*>(&m_max_trx_id),
+                                      MY_MEMORY_ORDER_RELAXED));
+  }
+
+
+  /**
+    Allocates a new transaction id.
+    @return new, allocated trx id
+  */
+
+  trx_id_t get_new_trx_id()
+  {
+    trx_id_t id= get_new_trx_id_no_refresh();
+    refresh_rw_trx_hash_version();
+    return id;
+  }
+
+
+  /**
+    Allocates and assigns new transaction serialisation number.
+
+    There's a gap between m_max_trx_id increment and transaction serialisation
+    number becoming visible through rw_trx_hash. While we're in this gap
+    concurrent thread may come and do MVCC snapshot without seeing allocated
+    but not yet assigned serialisation number. Then at some point purge thread
+    may clone this view. As a result it won't see newly allocated serialisation
+    number and may remove "unnecessary" history data of this transaction from
+    rollback segments.
+
+    m_rw_trx_hash_version is intended to solve this problem. MVCC snapshot has
+    to wait until m_max_trx_id == m_rw_trx_hash_version, which effectively
+    means that all transaction serialisation numbers up to m_max_trx_id are
+    available through rw_trx_hash.
+
+    We rely on refresh_rw_trx_hash_version() to issue RELEASE memory barrier so
+    that m_rw_trx_hash_version increment happens after
+    trx->rw_trx_hash_element->no becomes visible through rw_trx_hash.
+
+    @param trx transaction
+  */
+  void assign_new_trx_no(trx_t *trx)
+  {
+    trx->no= get_new_trx_id_no_refresh();
+    my_atomic_store64_explicit(reinterpret_cast<int64*>
+                               (&trx->rw_trx_hash_element->no),
+                               trx->no, MY_MEMORY_ORDER_RELAXED);
+    refresh_rw_trx_hash_version();
+  }
+
+
+  /**
+    Takes MVCC snapshot.
+
+    To reduce malloc probablility we reserver rw_trx_hash.size() + 32 elements
+    in ids.
+
+    For details about get_rw_trx_hash_version() != get_max_trx_id() spin
+    @sa register_rw() and @sa assign_new_trx_no().
+
+    We rely on get_rw_trx_hash_version() to issue ACQUIRE memory barrier so
+    that loading of m_rw_trx_hash_version happens before accessing rw_trx_hash.
+
+    To optimise snapshot creation rw_trx_hash.iterate() is being used instead
+    of rw_trx_hash.iterate_no_dups(). It means that some transaction
+    identifiers may appear multiple times in ids.
+
+    @param[in,out] caller_trx used to get access to rw_trx_hash_pins
+    @param[out]    ids        array to store registered transaction identifiers
+    @param[out]    max_trx_id variable to store m_max_trx_id value
+    @param[out]    mix_trx_no variable to store min(trx->no) value
+  */
+
+  void snapshot_ids(trx_t *caller_trx, trx_ids_t *ids, trx_id_t *max_trx_id,
+                    trx_id_t *min_trx_no)
+  {
+    ut_ad(!mutex_own(&mutex));
+    snapshot_ids_arg arg(ids);
+
+    while ((arg.m_id= get_rw_trx_hash_version()) != get_max_trx_id())
+      ut_delay(1);
+    arg.m_no= arg.m_id;
+
+    ids->clear();
+    ids->reserve(rw_trx_hash.size() + 32);
+    rw_trx_hash.iterate(caller_trx,
+                        reinterpret_cast<my_hash_walk_action>(copy_one_id),
+                        &arg);
+
+    *max_trx_id= arg.m_id;
+    *min_trx_no= arg.m_no;
+  }
+
+
+  /** Initialiser for m_max_trx_id and m_rw_trx_hash_version. */
+  void init_max_trx_id(trx_id_t value)
+  {
+    m_max_trx_id= m_rw_trx_hash_version= value;
+  }
+
+
+  bool is_initialised() { return m_initialised; }
+
+
+  /** Initialise the transaction subsystem. */
+  void create();
+
+  /** Close the transaction subsystem on shutdown. */
+  void close();
+
+  /** @return total number of active (non-prepared) transactions */
+  ulint any_active_transactions();
+
+
+  /**
+    Registers read-write transaction.
+
+    Transaction becomes visible to MVCC.
+
+    There's a gap between m_max_trx_id increment and transaction becoming
+    visible through rw_trx_hash. While we're in this gap concurrent thread may
+    come and do MVCC snapshot. As a result concurrent read view will be able to
+    observe records owned by this transaction even before it was committed.
+
+    m_rw_trx_hash_version is intended to solve this problem. MVCC snapshot has
+    to wait until m_max_trx_id == m_rw_trx_hash_version, which effectively
+    means that all transactions up to m_max_trx_id are available through
+    rw_trx_hash.
+
+    We rely on refresh_rw_trx_hash_version() to issue RELEASE memory barrier so
+    that m_rw_trx_hash_version increment happens after transaction becomes
+    visible through rw_trx_hash.
+  */
+
+  void register_rw(trx_t *trx)
+  {
+    trx->id= get_new_trx_id_no_refresh();
+    rw_trx_hash.insert(trx);
+    refresh_rw_trx_hash_version();
+  }
+
+
+  /**
+    Deregisters read-write transaction.
+
+    Transaction is removed from rw_trx_hash, which releases all implicit locks.
+    MVCC snapshot won't see this transaction anymore.
+  */
+
+  void deregister_rw(trx_t *trx)
+  {
+    rw_trx_hash.erase(trx);
+  }
+
+
+  bool is_registered(trx_t *caller_trx, trx_id_t id)
+  {
+    return id && find(caller_trx, id, false);
+  }
+
+
+  trx_t *find(trx_t *caller_trx, trx_id_t id, bool do_ref_count= true)
+  {
+    return rw_trx_hash.find(caller_trx, id, do_ref_count);
+  }
+
+
+  /**
+    Registers transaction in trx_sys.
+
+    @param trx transaction
+  */
+  void register_trx(trx_t *trx)
+  {
+    mutex_enter(&mutex);
+    UT_LIST_ADD_FIRST(trx_list, trx);
+    mutex_exit(&mutex);
+  }
+
+
+  /**
+    Deregisters transaction in trx_sys.
+
+    @param trx transaction
+  */
+  void deregister_trx(trx_t *trx)
+  {
+    mutex_enter(&mutex);
+    UT_LIST_REMOVE(trx_list, trx);
+    mutex_exit(&mutex);
+  }
+
+
+  /**
+    Clones the oldest view and stores it in view.
+
+    No need to call ReadView::close(). The caller owns the view that is passed
+    in. This function is called by purge thread to determine whether it should
+    purge the delete marked record or not.
+  */
+  void clone_oldest_view();
+
+
+  /** @return the number of active views */
+  size_t view_count() const
+  {
+    size_t count= 0;
+
+    mutex_enter(&mutex);
+    for (const trx_t *trx= UT_LIST_GET_FIRST(trx_list); trx;
+         trx= UT_LIST_GET_NEXT(trx_list, trx))
+    {
+      if (trx->read_view.get_state() == READ_VIEW_STATE_OPEN)
+        ++count;
+    }
+    mutex_exit(&mutex);
+    return count;
+  }
+
+  /** @return number of committed transactions waiting for purge */
+  ulint history_size() const
+  {
+    return uint32(my_atomic_load32(&const_cast<trx_sys_t*>(this)
+                                   ->rseg_history_len));
+  }
+  /** Add to the TRX_RSEG_HISTORY length (on database startup). */
+  void history_add(int32 len)
+  {
+    my_atomic_add32(&rseg_history_len, len);
+  }
+  /** Register a committed transaction. */
+  void history_insert() { history_add(1); }
+  /** Note that a committed transaction was purged. */
+  void history_remove() { history_add(-1); }
+
+private:
+  static my_bool get_min_trx_id_callback(rw_trx_hash_element_t *element,
+                                         trx_id_t *id)
+  {
+    if (element->id < *id)
+    {
+      mutex_enter(&element->mutex);
+      /* We don't care about read-only transactions here. */
+      if (element->trx && element->trx->rsegs.m_redo.rseg)
+        *id= element->id;
+      mutex_exit(&element->mutex);
+    }
+    return 0;
+  }
+
+
+  struct snapshot_ids_arg
+  {
+    snapshot_ids_arg(trx_ids_t *ids): m_ids(ids) {}
+    trx_ids_t *m_ids;
+    trx_id_t m_id;
+    trx_id_t m_no;
+  };
+
+
+  static my_bool copy_one_id(rw_trx_hash_element_t *element,
+                             snapshot_ids_arg *arg)
+  {
+    if (element->id < arg->m_id)
+    {
+      trx_id_t no= static_cast<trx_id_t>(my_atomic_load64_explicit(
+        reinterpret_cast<int64*>(&element->no), MY_MEMORY_ORDER_RELAXED));
+      arg->m_ids->push_back(element->id);
+      if (no < arg->m_no)
+        arg->m_no= no;
+    }
+    return 0;
+  }
+
+
+  /** Getter for m_rw_trx_hash_version, must issue ACQUIRE memory barrier. */
+  trx_id_t get_rw_trx_hash_version()
+  {
+    return static_cast<trx_id_t>
+           (my_atomic_load64_explicit(reinterpret_cast<int64*>
+                                      (&m_rw_trx_hash_version),
+                                      MY_MEMORY_ORDER_ACQUIRE));
+  }
+
+
+  /** Increments m_rw_trx_hash_version, must issue RELEASE memory barrier. */
+  void refresh_rw_trx_hash_version()
+  {
+    my_atomic_add64_explicit(reinterpret_cast<int64*>(&m_rw_trx_hash_version),
+                             1, MY_MEMORY_ORDER_RELEASE);
+  }
+
+
+  /**
+    Allocates new transaction id without refreshing rw_trx_hash version.
+
+    This method is extracted for exclusive use by register_rw() and
+    assign_new_trx_no() where new id must be allocated atomically with
+    payload of these methods from MVCC snapshot point of view.
+
+    @sa get_new_trx_id()
+    @sa assign_new_trx_no()
+
+    @return new transaction id
+  */
+
+  trx_id_t get_new_trx_id_no_refresh()
+  {
+    return static_cast<trx_id_t>(my_atomic_add64_explicit(
+      reinterpret_cast<int64*>(&m_max_trx_id), 1, MY_MEMORY_ORDER_RELAXED));
+  }
+};
+
+
+/** The transaction system */
+extern trx_sys_t trx_sys;
 
 #endif
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
deleted file mode 100644
index 861800ef40e..00000000000
--- a/storage/innobase/include/trx0sys.ic
+++ /dev/null
@@ -1,464 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0sys.ic
-Transaction system
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0trx.h"
-#include "data0type.h"
-#include "srv0srv.h"
-#include "mtr0log.h"
-
-/* The typedef for rseg slot in the file copy */
-typedef byte	trx_sysf_rseg_t;
-
-/* Rollback segment specification slot offsets */
-/*-------------------------------------------------------------*/
-#define	TRX_SYS_RSEG_SPACE	0	/* space where the segment
-					header is placed; starting with
-					MySQL/InnoDB 5.1.7, this is
-					UNIV_UNDEFINED if the slot is unused */
-#define	TRX_SYS_RSEG_PAGE_NO	4	/*  page number where the segment
-					header is placed; this is FIL_NULL
-					if the slot is unused */
-/*-------------------------------------------------------------*/
-/* Size of a rollback segment specification slot */
-#define TRX_SYS_RSEG_SLOT_SIZE	8
-
-/*****************************************************************//**
-Writes the value of max_trx_id to the file based trx system header. */
-void
-trx_sys_flush_max_trx_id(void);
-/*==========================*/
-
-/** Checks if a page address is the trx sys header page.
-@param[in]	page_id	page id
-@return true if trx sys header page */
-UNIV_INLINE
-bool
-trx_sys_hdr_page(
-	const page_id_t&	page_id)
-{
-	return(page_id.space() == TRX_SYS_SPACE
-	       && page_id.page_no() == TRX_SYS_PAGE_NO);
-}
-
-/**********************************************************************//**
-Gets a pointer to the transaction system header and x-latches its page.
-@return pointer to system header, page x-latched. */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
-	mtr_t*	mtr)	/*!< in: mtr */
-{
-	buf_block_t*	block = NULL;
-	trx_sysf_t*	header = NULL;
-
-	ut_ad(mtr);
-
-	block = buf_page_get(page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
-			     univ_page_size, RW_X_LATCH, mtr);
-
-	if (block) {
-		buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
-
-		header = TRX_SYS + buf_block_get_frame(block);
-	}
-
-	return(header);
-}
-
-/*****************************************************************//**
-Gets the space of the nth rollback segment slot in the trx system
-file copy.
-@return space id */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys header */
-	ulint		i,		/*!< in: slot index == rseg id */
-	mtr_t*		mtr)		/*!< in: mtr */
-{
-	ut_ad(sys_header);
-	ut_ad(i < TRX_SYS_N_RSEGS);
-
-	return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
-			      + i * TRX_SYS_RSEG_SLOT_SIZE
-			      + TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr));
-}
-
-/*****************************************************************//**
-Gets the page number of the nth rollback segment slot in the trx system
-header.
-@return page number, FIL_NULL if slot unused */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx system header */
-	ulint		i,		/*!< in: slot index == rseg id */
-	mtr_t*		mtr)		/*!< in: mtr */
-{
-	ut_ad(sys_header);
-	ut_ad(i < TRX_SYS_N_RSEGS);
-
-	return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
-			      + i * TRX_SYS_RSEG_SLOT_SIZE
-			      + TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr));
-}
-
-/*****************************************************************//**
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
-	ulint		i,		/*!< in: slot index == rseg id */
-	ulint		space,		/*!< in: space id */
-	mtr_t*		mtr)		/*!< in: mtr */
-{
-	ut_ad(sys_header);
-	ut_ad(i < TRX_SYS_N_RSEGS);
-
-	mlog_write_ulint(sys_header + TRX_SYS_RSEGS
-			 + i * TRX_SYS_RSEG_SLOT_SIZE
-			 + TRX_SYS_RSEG_SPACE,
-			 space,
-			 MLOG_4BYTES, mtr);
-}
-
-/*****************************************************************//**
-Sets the page number of the nth rollback segment slot in the trx system
-header. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx sys header */
-	ulint		i,		/*!< in: slot index == rseg id */
-	ulint		page_no,	/*!< in: page number, FIL_NULL if the
-					slot is reset to unused */
-	mtr_t*		mtr)		/*!< in: mtr */
-{
-	ut_ad(sys_header);
-	ut_ad(i < TRX_SYS_N_RSEGS);
-
-	mlog_write_ulint(sys_header + TRX_SYS_RSEGS
-			 + i * TRX_SYS_RSEG_SLOT_SIZE
-			 + TRX_SYS_RSEG_PAGE_NO,
-			 page_no,
-			 MLOG_4BYTES, mtr);
-}
-
-/*****************************************************************//**
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_trx_id(
-/*=============*/
-	byte*		ptr,	/*!< in: pointer to memory where written */
-	trx_id_t	id)	/*!< in: id */
-{
-#if DATA_TRX_ID_LEN != 6
-# error "DATA_TRX_ID_LEN != 6"
-#endif
-	ut_ad(id > 0);
-	mach_write_to_6(ptr, id);
-}
-
-/*****************************************************************//**
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_...
-@return id */
-UNIV_INLINE
-trx_id_t
-trx_read_trx_id(
-/*============*/
-	const byte*	ptr)	/*!< in: pointer to memory from where to read */
-{
-#if DATA_TRX_ID_LEN != 6
-# error "DATA_TRX_ID_LEN != 6"
-#endif
-	return(mach_read_from_6(ptr));
-}
-
-/****************************************************************//**
-Looks for the trx handle with the given id in rw_trx_list.
-The caller must be holding trx_sys->mutex.
-@return the trx handle or NULL if not found;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
-UNIV_INLINE
-trx_t*
-trx_get_rw_trx_by_id(
-/*=================*/
-	trx_id_t	trx_id)	/*!< in: trx id to search for */
-{
-	ut_ad(trx_id > 0);
-	ut_ad(trx_sys_mutex_own());
-
-	if (trx_sys->rw_trx_set.empty()) {
-		return(NULL);
-	}
-
-	TrxIdSet::iterator	it;
-
-	it = trx_sys->rw_trx_set.find(TrxTrack(trx_id));
-
-	return(it == trx_sys->rw_trx_set.end() ? NULL : it->m_trx);
-}
-
-/****************************************************************//**
-Returns the minimum trx id in trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->state
-to find out if the minimum trx id transaction itself is active, or already
-committed.). The caller must be holding the trx_sys_t::mutex in shared mode.
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_rw_min_trx_id_low(void)
-/*=======================*/
-{
-	trx_id_t	id;
-
-	ut_ad(trx_sys_mutex_own());
-
-	const trx_t*	trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
-
-	if (trx == NULL) {
-		id = trx_sys->max_trx_id;
-	} else {
-		assert_trx_in_rw_list(trx);
-		id = trx->id;
-	}
-
-	return(id);
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-/***********************************************************//**
-Assert that a transaction has been recovered.
-@return TRUE */
-UNIV_INLINE
-ibool
-trx_assert_recovered(
-/*=================*/
-	trx_id_t	trx_id)		/*!< in: transaction identifier */
-{
-	const trx_t*	trx;
-
-	trx_sys_mutex_enter();
-
-	trx = trx_get_rw_trx_by_id(trx_id);
-	ut_a(trx->is_recovered);
-
-	trx_sys_mutex_exit();
-
-	return(TRUE);
-}
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
-/****************************************************************//**
-Returns the minimum trx id in rw trx list. This is the smallest id for which
-the rw trx can possibly be active. (But, you must look at the trx->state
-to find out if the minimum trx id transaction itself is active, or already
-committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_rw_min_trx_id(void)
-/*===================*/
-{
-	trx_sys_mutex_enter();
-
-	trx_id_t	id = trx_rw_min_trx_id_low();
-
-	trx_sys_mutex_exit();
-
-	return(id);
-}
-
-/****************************************************************//**
-Checks if a rw transaction with the given id is active.  If the caller is
-not holding lock_sys->mutex, the transaction may already have been committed.
-@return transaction instance if active, or NULL */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active_low(
-/*=================*/
-	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
-	ibool*		corrupt)	/*!< in: NULL or pointer to a flag
-					that will be set if corrupt */
-{
-	trx_t*		trx;
-
-	ut_ad(trx_sys_mutex_own());
-
-	if (trx_id < trx_rw_min_trx_id_low()) {
-
-		trx = NULL;
-	} else if (trx_id >= trx_sys->max_trx_id) {
-
-		/* There must be corruption: we let the caller handle the
-		diagnostic prints in this case. */
-
-		trx = NULL;
-		if (corrupt != NULL) {
-			*corrupt = TRUE;
-		}
-	} else {
-		trx = trx_get_rw_trx_by_id(trx_id);
-
-		if (trx != NULL
-		    && trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
-
-			trx = NULL;
-		}
-	}
-
-	return(trx);
-}
-
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. If the caller is
-not holding lock_sys->mutex, the transaction may already have been
-committed.
-@return transaction instance if active, or NULL; */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active(
-/*=============*/
-	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
-	ibool*		corrupt,	/*!< in: NULL or pointer to a flag
-					that will be set if corrupt */
-	bool		do_ref_count)	/*!< in: if true then increment the
-					trx_t::n_ref_count */
-{
-	ut_ad(trx_id);
-
-	trx_sys_mutex_enter();
-
-	trx_t* trx = trx_rw_is_active_low(trx_id, corrupt);
-
-	if (trx) {
-		trx = trx_reference(do_ref_count ? trx_id : 0, trx);
-	}
-
-	trx_sys_mutex_exit();
-
-	return(trx);
-}
-
-/*****************************************************************//**
-Allocates a new transaction id.
-@return new, allocated trx id */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_id()
-/*====================*/
-{
-	/* wsrep_fake_trx_id  violates this assert */
-	ut_ad(trx_sys_mutex_own());
-
-	/* VERY important: after the database is started, max_trx_id value is
-	divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
-	will evaluate to TRUE when this function is first time called,
-	and the value for trx id will be written to disk-based header!
-	Thus trx id values will not overlap when the database is
-	repeatedly started! */
-
-	if (!(trx_sys->max_trx_id % TRX_SYS_TRX_ID_WRITE_MARGIN)) {
-
-		trx_sys_flush_max_trx_id();
-	}
-
-	return(trx_sys->max_trx_id++);
-}
-
-/*****************************************************************//**
-Determines the maximum transaction id.
-@return maximum currently allocated trx id; will be stale after the
-next call to trx_sys_get_new_trx_id() */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_max_trx_id(void)
-/*========================*/
-{
-	ut_ad(!trx_sys_mutex_own());
-
-#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
-	/* Avoid torn reads. */
-
-	trx_sys_mutex_enter();
-
-	trx_id_t	max_trx_id = trx_sys->max_trx_id;
-
-	trx_sys_mutex_exit();
-
-	return(max_trx_id);
-#else
-	/* Perform a dirty read. Callers should be prepared for stale
-	values, and we know that the value fits in a machine word, so
-	that it will be read and written atomically. */
-	return(trx_sys->max_trx_id);
-#endif /* UNIV_WORD_SIZE < DATA_TRX_ID_LEN */
-}
-
-/*****************************************************************//**
-Get the number of transaction in the system, independent of their state.
-@return count of transactions in trx_sys_t::rw_trx_list */
-UNIV_INLINE
-ulint
-trx_sys_get_n_rw_trx(void)
-/*======================*/
-{
-	ulint	n_trx;
-
-	trx_sys_mutex_enter();
-
-	n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
-	trx_sys_mutex_exit();
-
-	return(n_trx);
-}
-
-/**
-Add the transaction to the RW transaction set
-@param trx		transaction instance to add */
-UNIV_INLINE
-void
-trx_sys_rw_trx_add(trx_t* trx)
-{
-	ut_ad(trx->id != 0);
-
-	trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
-	ut_d(trx->in_rw_trx_list = true);
-}
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 917222477b1..d6a8b8c771b 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -41,16 +41,16 @@ Created 3/26/1996 Heikki Tuuri
 #include "trx0xa.h"
 #include "ut0vec.h"
 #include "fts0fts.h"
+#include "read0types.h"
 
 // Forward declaration
 struct mtr_t;
 
 // Forward declaration
-class ReadView;
-
-// Forward declaration
 class FlushObserver;
 
+struct rw_trx_hash_element_t;
+
 /** Set flush observer for the transaction
 @param[in/out]	trx		transaction struct
 @param[in]	observer	flush observer */
@@ -82,45 +82,19 @@ const dict_index_t*
 trx_get_error_info(
 /*===============*/
 	const trx_t*	trx);	/*!< in: trx object */
-/********************************************************************//**
-Creates a transaction object for MySQL.
-@return own: transaction object */
-trx_t*
-trx_allocate_for_mysql(void);
-/*========================*/
-/********************************************************************//**
-Creates a transaction object for background operations by the master thread.
-@return own: transaction object */
-trx_t*
-trx_allocate_for_background(void);
-/*=============================*/
-
-/** Frees and initialize a transaction object instantinated during recovery.
-@param trx trx object to free and initialize during recovery */
-void
-trx_free_resurrected(trx_t* trx);
-
-/** Free a transaction that was allocated by background or user threads.
-@param trx trx object to free */
-void
-trx_free_for_background(trx_t* trx);
 
-/********************************************************************//**
-At shutdown, frees a transaction object that is in the PREPARED state. */
-void
-trx_free_prepared(
-/*==============*/
-	trx_t*	trx);	/*!< in, own: trx object */
+/** @return a trx_t instance from trx_pools. */
+trx_t *trx_create();
 
-/** Free a transaction object for MySQL.
-@param[in,out]	trx	transaction */
-void
-trx_free_for_mysql(trx_t*	trx);
+/**
+  Release a trx_t instance back to the pool.
+  @param trx the instance to release.
+*/
+void trx_free(trx_t*& trx);
 
-/** Disconnect a transaction from MySQL.
-@param[in,out]	trx	transaction */
+/** At shutdown, frees a transaction object. */
 void
-trx_disconnect_plain(trx_t*	trx);
+trx_free_at_shutdown(trx_t *trx);
 
 /** Disconnect a prepared transaction from MySQL.
 @param[in,out]	trx	transaction */
@@ -229,22 +203,10 @@ trx_commit(
 /*=======*/
 	trx_t*	trx);	/*!< in/out: transaction */
 
-/****************************************************************//**
-Commits a transaction and a mini-transaction. */
-void
-trx_commit_low(
-/*===========*/
-	trx_t*	trx,	/*!< in/out: transaction */
-	mtr_t*	mtr);	/*!< in/out: mini-transaction (will be committed),
-			or NULL if trx made no modifications */
-/****************************************************************//**
-Cleans up a transaction at database startup. The cleanup is needed if
-the transaction already got to the middle of a commit when the database
-crashed, and we cannot roll it back. */
-void
-trx_cleanup_at_db_startup(
-/*======================*/
-	trx_t*	trx);	/*!< in: transaction */
+/** Commit a transaction and a mini-transaction.
+@param[in,out]	trx	transaction
+@param[in,out]	mtr	mini-transaction (NULL if no modifications) */
+void trx_commit_low(trx_t* trx, mtr_t* mtr);
 /**********************************************************************//**
 Does the transaction commit for MySQL.
 @return DB_SUCCESS or error number */
@@ -263,13 +225,13 @@ int
 trx_recover_for_mysql(
 /*==================*/
 	XID*	xid_list,	/*!< in/out: prepared transactions */
-	ulint	len);		/*!< in: number of slots in xid_list */
+	uint	len);		/*!< in: number of slots in xid_list */
 /*******************************************************************//**
 This function is used to find one X/Open XA distributed transaction
 which is in the prepared state
 @return trx or NULL; on match, the trx->xid will be invalidated;
 note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
+holding lock_sys.mutex */
 trx_t *
 trx_get_trx_by_xid(
 /*===============*/
@@ -287,31 +249,6 @@ void
 trx_mark_sql_stat_end(
 /*==================*/
 	trx_t*	trx);	/*!< in: trx handle */
-/********************************************************************//**
-Assigns a read view for a consistent read query. All the consistent reads
-within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction. */
-ReadView*
-trx_assign_read_view(
-/*=================*/
-	trx_t*	trx);	/*!< in: active transaction */
-
-/****************************************************************//**
-@return the transaction's read view or NULL if one not assigned. */
-UNIV_INLINE
-ReadView*
-trx_get_read_view(
-/*==============*/
-	trx_t*	trx);
-
-/****************************************************************//**
-@return the transaction's read view or NULL if one not assigned. */
-UNIV_INLINE
-const ReadView*
-trx_get_read_view(
-/*==============*/
-	const trx_t*	trx);
-
 /****************************************************************//**
 Prepares a transaction for commit/rollback. */
 void
@@ -335,7 +272,7 @@ trx_commit_step(
 
 /**********************************************************************//**
 Prints info about a transaction.
-Caller must hold trx_sys->mutex. */
+Caller must hold trx_sys.mutex. */
 void
 trx_print_low(
 /*==========*/
@@ -355,7 +292,7 @@ trx_print_low(
 
 /**********************************************************************//**
 Prints info about a transaction.
-The caller must hold lock_sys->mutex and trx_sys->mutex.
+The caller must hold lock_sys.mutex and trx_sys.mutex.
 When possible, use trx_print() instead. */
 void
 trx_print_latched(
@@ -365,25 +302,9 @@ trx_print_latched(
 	ulint		max_query_len);	/*!< in: max query length to print,
 					or 0 to use the default max length */
 
-#ifdef WITH_WSREP
-/**********************************************************************//**
-Prints info about a transaction.
-Transaction information may be retrieved without having trx_sys->mutex acquired
-so it may not be completely accurate. The caller must own lock_sys->mutex
-and the trx must have some locks to make sure that it does not escape
-without locking lock_sys->mutex. */
-UNIV_INTERN
-void
-wsrep_trx_print_locking(
-	FILE*		f,		/*!< in: output stream */
-	const trx_t*	trx,		/*!< in: transaction */
-	ulint		max_query_len)	/*!< in: max query length to print,
-					or 0 to use the default max length */
-	MY_ATTRIBUTE((nonnull));
-#endif /* WITH_WSREP */
 /**********************************************************************//**
 Prints info about a transaction.
-Acquires and releases lock_sys->mutex and trx_sys->mutex. */
+Acquires and releases lock_sys.mutex. */
 void
 trx_print(
 /*======*/
@@ -413,9 +334,9 @@ trx_set_dict_operation(
 
 /**********************************************************************//**
 Determines if a transaction is in the given state.
-The caller must hold trx_sys->mutex, or it must be the thread
+The caller must hold trx_sys.mutex, or it must be the thread
 that is serving a running transaction.
-A running RW transaction must be in trx_sys->rw_trx_list.
+A running RW transaction must be in trx_sys.rw_trx_hash.
 @return TRUE if trx->state == state */
 UNIV_INLINE
 bool
@@ -431,22 +352,11 @@ trx_state_eq(
 				trx->state == TRX_STATE_NOT_STARTED
 				after an error has been reported */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-# ifdef UNIV_DEBUG
-/**********************************************************************//**
-Asserts that a transaction has been started.
-The caller must hold trx_sys->mutex.
-@return TRUE if started */
-ibool
-trx_assert_started(
-/*===============*/
-	const trx_t*	trx)	/*!< in: transaction */
-	MY_ATTRIBUTE((warn_unused_result));
-# endif /* UNIV_DEBUG */
 
 /**********************************************************************//**
 Determines if the currently running transaction has been interrupted.
-@return TRUE if interrupted */
-ibool
+@return true if interrupted */
+bool
 trx_is_interrupted(
 /*===============*/
 	const trx_t*	trx);	/*!< in: transaction */
@@ -519,18 +429,6 @@ trx_set_rw_mode(
 	trx_t*		trx);
 
 /**
-Release the transaction. Decrease the reference count.
-@param trx Transaction that is being released */
-UNIV_INLINE
-void
-trx_release_reference(
-	trx_t*		trx);
-
-/**
-Check if the transaction is being referenced. */
-#define trx_is_referenced(t)	((t)->n_ref > 0)
-
-/**
 Transactions that aren't started by the MySQL server don't set
 the trx_t::mysql_thd field. For such transactions we set the lock
 wait timeout to 0 instead of the user configured value that comes
@@ -559,15 +457,6 @@ with an explicit check for the read-only status.
 ((t)->read_only && trx_is_autocommit_non_locking((t)))
 
 /**
-Assert that the transaction is in the trx_sys_t::rw_trx_list */
-#define assert_trx_in_rw_list(t) do {					\
-	ut_ad(!(t)->read_only);						\
-	ut_ad((t)->in_rw_trx_list					\
-	      == !((t)->read_only || !(t)->rsegs.m_redo.rseg));		\
-	check_trx_state(t);						\
-} while (0)
-
-/**
 Check transaction state */
 #define check_trx_state(t) do {						\
 	ut_ad(!trx_is_autocommit_non_locking((t)));			\
@@ -589,8 +478,8 @@ Check transaction state */
 	ut_ad(trx_state_eq((t), TRX_STATE_NOT_STARTED));		\
 	ut_ad(!(t)->id);						\
 	ut_ad(!(t)->has_logged());					\
-	ut_ad(!(t)->n_ref);						\
-	ut_ad(!MVCC::is_view_active((t)->read_view));			\
+	ut_ad(!(t)->is_referenced());					\
+	ut_ad(!(t)->read_view.is_open());				\
 	ut_ad((t)->lock.wait_thr == NULL);				\
 	ut_ad(UT_LIST_GET_LEN((t)->lock.trx_locks) == 0);		\
 	ut_ad((t)->dict_operation == TRX_DICT_OP_NONE);			\
@@ -607,16 +496,15 @@ transaction pool.
 #ifdef UNIV_DEBUG
 /*******************************************************************//**
 Assert that an autocommit non-locking select cannot be in the
-rw_trx_list and that it is a read-only transaction.
-The tranasction must be in the mysql_trx_list. */
+rw_trx_hash and that it is a read-only transaction.
+The transaction must have mysql_thd assigned. */
 # define assert_trx_nonlocking_or_in_list(t)				\
 	do {								\
 		if (trx_is_autocommit_non_locking(t)) {			\
 			trx_state_t	t_state = (t)->state;		\
 			ut_ad((t)->read_only);				\
 			ut_ad(!(t)->is_recovered);			\
-			ut_ad(!(t)->in_rw_trx_list);			\
-			ut_ad((t)->in_mysql_trx_list);			\
+			ut_ad((t)->mysql_thd);				\
 			ut_ad(t_state == TRX_STATE_NOT_STARTED		\
 			      || t_state == TRX_STATE_ACTIVE);		\
 		} else {						\
@@ -626,8 +514,8 @@ The tranasction must be in the mysql_trx_list. */
 #else /* UNIV_DEBUG */
 /*******************************************************************//**
 Assert that an autocommit non-locking slect cannot be in the
-rw_trx_list and that it is a read-only transaction.
-The tranasction must be in the mysql_trx_list. */
+rw_trx_hash and that it is a read-only transaction.
+The transaction must have mysql_thd assigned. */
 # define assert_trx_nonlocking_or_in_list(trx) ((void)0)
 #endif /* UNIV_DEBUG */
 
@@ -654,7 +542,7 @@ To query the state either of the mutexes is sufficient within the locking
 code and no mutex is required when the query thread is no longer waiting. */
 
 /** The locks and state of an active transaction. Protected by
-lock_sys->mutex, trx->mutex or both. */
+lock_sys.mutex, trx->mutex or both. */
 struct trx_lock_t {
 	ulint		n_active_thrs;	/*!< number of active query threads */
 
@@ -666,10 +554,10 @@ struct trx_lock_t {
 					TRX_QUE_LOCK_WAIT, this points to
 					the lock request, otherwise this is
 					NULL; set to non-NULL when holding
-					both trx->mutex and lock_sys->mutex;
+					both trx->mutex and lock_sys.mutex;
 					set to NULL when holding
-					lock_sys->mutex; readers should
-					hold lock_sys->mutex, except when
+					lock_sys.mutex; readers should
+					hold lock_sys.mutex, except when
 					they are holding trx->mutex and
 					wait_lock==NULL */
 	ib_uint64_t	deadlock_mark;	/*!< A mark field that is initialized
@@ -683,13 +571,13 @@ struct trx_lock_t {
 					resolution, it sets this to true.
 					Protected by trx->mutex. */
 	time_t		wait_started;	/*!< lock wait started at this time,
-					protected only by lock_sys->mutex */
+					protected only by lock_sys.mutex */
 
 	que_thr_t*	wait_thr;	/*!< query thread belonging to this
 					trx that is in QUE_THR_LOCK_WAIT
 					state. For threads suspended in a
 					lock wait, this is protected by
-					lock_sys->mutex. Otherwise, this may
+					lock_sys.mutex. Otherwise, this may
 					only be modified by the thread that is
 					serving the running transaction. */
 
@@ -708,12 +596,12 @@ struct trx_lock_t {
 	unsigned	table_cached;
 
 	mem_heap_t*	lock_heap;	/*!< memory heap for trx_locks;
-					protected by lock_sys->mutex */
+					protected by lock_sys.mutex */
 
 	trx_lock_list_t trx_locks;	/*!< locks requested by the transaction;
 					insertions are protected by trx->mutex
-					and lock_sys->mutex; removals are
-					protected by lock_sys->mutex */
+					and lock_sys.mutex; removals are
+					protected by lock_sys.mutex */
 
 	lock_list	table_locks;	/*!< All table locks requested by this
 					transaction, including AUTOINC locks */
@@ -732,14 +620,73 @@ struct trx_lock_t {
 	ulint		n_rec_locks;	/*!< number of rec locks in this trx */
 };
 
-/** Type used to store the list of tables that are modified by a given
-transaction. We store pointers to the table objects in memory because
+/** Logical first modification time of a table in a transaction */
+class trx_mod_table_time_t
+{
+	/** First modification of the table */
+	undo_no_t	first;
+	/** First modification of a system versioned column */
+	undo_no_t	first_versioned;
+
+	/** Magic value signifying that a system versioned column of a
+	table was never modified in a transaction. */
+	static const undo_no_t UNVERSIONED = IB_ID_MAX;
+
+public:
+	/** Constructor
+	@param[in]	rows	number of modified rows so far */
+	trx_mod_table_time_t(undo_no_t rows)
+		: first(rows), first_versioned(UNVERSIONED) {}
+
+#ifdef UNIV_DEBUG
+	/** Validation
+	@param[in]	rows	number of modified rows so far
+	@return	whether the object is valid */
+	bool valid(undo_no_t rows = UNVERSIONED) const
+	{
+		return first <= first_versioned && first <= rows;
+	}
+#endif /* UNIV_DEBUG */
+	/** @return if versioned columns were modified */
+	bool is_versioned() const { return first_versioned != UNVERSIONED; }
+
+	/** After writing an undo log record, set is_versioned() if needed
+	@param[in]	rows	number of modified rows so far */
+	void set_versioned(undo_no_t rows)
+	{
+		ut_ad(!is_versioned());
+		first_versioned = rows;
+		ut_ad(valid());
+	}
+
+	/** Invoked after partial rollback
+	@param[in]	limit	number of surviving modified rows
+	@return	whether this should be erased from trx_t::mod_tables */
+	bool rollback(undo_no_t limit)
+	{
+		ut_ad(valid());
+		if (first >= limit) {
+			return true;
+		}
+
+		if (first_versioned < limit && is_versioned()) {
+			first_versioned = UNVERSIONED;
+		}
+
+		return false;
+	}
+};
+
+/** Collection of persistent tables and their first modification
+in a transaction.
+We store pointers to the table objects in memory because
 we know that a table object will not be destroyed while a transaction
 that modified it is running. */
-typedef std::set<
-	dict_table_t*,
+typedef std::map<
+	dict_table_t*, trx_mod_table_time_t,
 	std::less<dict_table_t*>,
-	ut_allocator<dict_table_t*> >	trx_mod_tables_t;
+	ut_allocator<std::pair<dict_table_t* const, trx_mod_table_time_t> > >
+	trx_mod_tables_t;
 
 /** The transaction handle
 
@@ -769,30 +716,31 @@ so without holding any mutex. The following are exceptions to this:
 
 * trx_rollback_resurrected() may access resurrected (connectionless)
 transactions while the system is already processing new user
-transactions. The trx_sys->mutex prevents a race condition between it
+transactions. The trx_sys.mutex prevents a race condition between it
 and lock_trx_release_locks() [invoked by trx_commit()].
 
 * trx_print_low() may access transactions not associated with the current
-thread. The caller must be holding trx_sys->mutex and lock_sys->mutex.
+thread. The caller must be holding lock_sys.mutex.
 
-* When a transaction handle is in the trx_sys->mysql_trx_list or
-trx_sys->trx_list, some of its fields must not be modified without
-holding trx_sys->mutex exclusively.
+* When a transaction handle is in the trx_sys.trx_list, some of its fields
+must not be modified without holding trx->mutex.
 
 * The locking code (in particular, lock_deadlock_recursive() and
 lock_rec_convert_impl_to_expl()) will access transactions associated
 to other connections. The locks of transactions are protected by
-lock_sys->mutex and sometimes by trx->mutex. */
+lock_sys.mutex and sometimes by trx->mutex. */
 
 /** Represents an instance of rollback segment along with its state variables.*/
 struct trx_undo_ptr_t {
 	trx_rseg_t*	rseg;		/*!< rollback segment assigned to the
 					transaction, or NULL if not assigned
 					yet */
-	trx_undo_t*	insert_undo;	/*!< pointer to the insert undo log, or
-					NULL if no inserts performed yet */
-	trx_undo_t*	update_undo;	/*!< pointer to the update undo log, or
-					NULL if no update performed yet */
+	trx_undo_t*	undo;		/*!< pointer to the undo log, or
+					NULL if nothing logged yet */
+	trx_undo_t*     old_insert;	/*!< pointer to recovered
+					insert undo log, or NULL if no
+					INSERT transactions were
+					recovered from old-format undo logs */
 };
 
 /** An instance of temporary rollback segment. */
@@ -816,10 +764,23 @@ struct trx_rsegs_t {
 };
 
 struct trx_t {
+private:
+  /**
+    Count of references.
+
+    We can't release the locks nor commit the transaction until this reference
+    is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify
+    that it is no longer "active".
+  */
+
+  int32_t n_ref;
+
+
+public:
 	TrxMutex	mutex;		/*!< Mutex protecting the fields
 					state and lock (except some fields
 					of lock, which are protected by
-					lock_sys->mutex) */
+					lock_sys.mutex) */
 
 	trx_id_t	id;		/*!< transaction id */
 
@@ -828,7 +789,7 @@ struct trx_t {
 					transaction is moved to
 					COMMITTED_IN_MEMORY state.
 					Protected by trx_sys_t::mutex
-					when trx->in_rw_trx_list. Initially
+					when trx is in rw_trx_hash. Initially
 					set to TRX_ID_MAX. */
 
 	/** State of the trx from the point of view of concurrency control
@@ -855,6 +816,9 @@ struct trx_t {
 	Recovered XA:
 	* NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
 
+	Recovered XA followed by XA ROLLBACK:
+	* NOT_STARTED -> PREPARED -> ACTIVE -> COMMITTED -> (freed)
+
 	XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
 	* NOT_STARTED -> PREPARED -> (freed)
 
@@ -865,11 +829,11 @@ struct trx_t {
 
 	XA (2PC) transactions are always treated as non-autocommit.
 
-	Transitions to ACTIVE or NOT_STARTED occur when
-	!in_rw_trx_list (no trx_sys->mutex needed).
+	Transitions to ACTIVE or NOT_STARTED occur when transaction
+	is not in rw_trx_hash (no trx_sys.mutex needed).
 
 	Autocommit non-locking read-only transactions move between states
-	without holding any mutex. They are !in_rw_trx_list.
+	without holding any mutex. They are not in rw_trx_hash.
 
 	All transactions, unless they are determined to be ac-nl-ro,
 	explicitly tagged as read-only or read-write, will first be put
@@ -878,16 +842,16 @@ struct trx_t {
 	do we remove it from the read-only list and put it on the read-write
 	list. During this switch we assign it a rollback segment.
 
-	When a transaction is NOT_STARTED, it can be in_mysql_trx_list if
-	it is a user transaction. It cannot be in rw_trx_list.
+	When a transaction is NOT_STARTED, it can be in trx_list. It cannot be
+	in rw_trx_hash.
 
-	ACTIVE->PREPARED->COMMITTED is only possible when trx->in_rw_trx_list.
-	The transition ACTIVE->PREPARED is protected by trx_sys->mutex.
+	ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash.
+	The transition ACTIVE->PREPARED is protected by trx_sys.mutex.
 
 	ACTIVE->COMMITTED is possible when the transaction is in
-	rw_trx_list.
+	rw_trx_hash.
 
-	Transitions to COMMITTED are protected by both lock_sys->mutex
+	Transitions to COMMITTED are protected by both lock_sys.mutex
 	and trx->mutex.
 
 	NOTE: Some of these state change constraints are an overkill,
@@ -896,25 +860,16 @@ struct trx_t {
 
 	trx_state_t	state;
 
-	ReadView*	read_view;	/*!< consistent read view used in the
+	ReadView	read_view;	/*!< consistent read view used in the
 					transaction, or NULL if not yet set */
-
-	UT_LIST_NODE_T(trx_t)
-			trx_list;	/*!< list of transactions;
-					protected by trx_sys->mutex. */
-	UT_LIST_NODE_T(trx_t)
-			no_list;	/*!< Required during view creation
-					to check for the view limit for
-					transactions that are committing */
-
 	trx_lock_t	lock;		/*!< Information about the transaction
 					locks and state. Protected by
-					trx->mutex or lock_sys->mutex
+					trx->mutex or lock_sys.mutex
 					or both */
 	bool		is_recovered;	/*!< 0=normal transaction,
 					1=recovered, must be rolled back,
-					protected by trx_sys->mutex when
-					trx->in_rw_trx_list holds */
+					protected by trx_sys.mutex when
+					trx is in rw_trx_hash */
 
 
 	/* These fields are not protected by any mutex. */
@@ -993,7 +948,7 @@ struct trx_t {
 					contains a pointer to the latest file
 					name; this is NULL if binlog is not
 					used */
-	int64_t		mysql_log_offset;
+	ulonglong	mysql_log_offset;
 					/*!< if MySQL binlog is used, this
 					field contains the end offset of the
 					binlog entry */
@@ -1006,21 +961,8 @@ struct trx_t {
 					statement uses, except those
 					in consistent read */
 	/*------------------------------*/
-#ifdef UNIV_DEBUG
-	/** The following two fields are mutually exclusive. */
-	/* @{ */
-
-	bool		in_rw_trx_list;	/*!< true if in trx_sys->rw_trx_list */
-	/* @} */
-#endif /* UNIV_DEBUG */
-	UT_LIST_NODE_T(trx_t)
-			mysql_trx_list;	/*!< list of transactions created for
-					MySQL; protected by trx_sys->mutex */
-#ifdef UNIV_DEBUG
-	bool		in_mysql_trx_list;
-					/*!< true if in
-					trx_sys->mysql_trx_list */
-#endif /* UNIV_DEBUG */
+	UT_LIST_NODE_T(trx_t) trx_list;	/*!< list of all transactions;
+					protected by trx_sys.mutex */
 	/*------------------------------*/
 	dberr_t		error_state;	/*!< 0 if no error, otherwise error
 					number; NOTE That ONLY the thread
@@ -1044,12 +986,6 @@ struct trx_t {
 			trx_savepoints;	/*!< savepoints set with SAVEPOINT ...,
 					oldest first */
 	/*------------------------------*/
-	UndoMutex	undo_mutex;	/*!< mutex protecting the fields in this
-					section (down to undo_no_arr), EXCEPT
-					last_sql_stat_start, which can be
-					accessed only when we know that there
-					cannot be any activity in the undo
-					logs! */
 	undo_no_t	undo_no;	/*!< next undo log record number to
 					assign; since the undo log is
 					private for a transaction, this
@@ -1057,21 +993,15 @@ struct trx_t {
 					with no gaps; thus it represents
 					the number of modified/inserted
 					rows in a transaction */
-	ulint		undo_rseg_space;
-					/*!< space id where last undo record
-					was written */
 	trx_savept_t	last_sql_stat_start;
 					/*!< undo_no when the last sql statement
 					was started: in case of an error, trx
-					is rolled back down to this undo
-					number; see note at undo_mutex! */
+					is rolled back down to this number */
 	trx_rsegs_t	rsegs;		/* rollback segments for undo logging */
 	undo_no_t	roll_limit;	/*!< least undo number to undo during
 					a partial rollback; 0 otherwise */
-#ifdef UNIV_DEBUG
 	bool		in_rollback;	/*!< true when the transaction is
 					executing a partial or full rollback */
-#endif /* UNIV_DEBUG */
 	ulint		pages_undone;	/*!< number of undo log pages undone
 					since the last undo log truncation */
 	/*------------------------------*/
@@ -1083,7 +1013,7 @@ struct trx_t {
 					also in the lock list trx_locks. This
 					vector needs to be freed explicitly
 					when the trx instance is destroyed.
-					Protected by lock_sys->mutex. */
+					Protected by lock_sys.mutex. */
 	/*------------------------------*/
 	bool		read_only;	/*!< true if transaction is flagged
 					as a READ-ONLY transaction.
@@ -1120,14 +1050,6 @@ struct trx_t {
 	const char*	start_file;	/*!< Filename where it was started */
 #endif /* UNIV_DEBUG */
 
-	lint		n_ref;		/*!< Count of references, protected
-					by trx_t::mutex. We can't release the
-					locks nor commit the transaction until
-					this reference is 0.  We can change
-					the state to COMMITTED_IN_MEMORY to
-					signify that it is no longer
-					"active". */
-
 	XID*		xid;		/*!< X/Open XA transaction
 					identification to identify a
 					transaction branch */
@@ -1156,12 +1078,14 @@ struct trx_t {
 	os_event_t	wsrep_event;	/* event waited for in srv_conc_slot */
 #endif /* WITH_WSREP */
 
+	rw_trx_hash_element_t *rw_trx_hash_element;
+	LF_PINS *rw_trx_hash_pins;
 	ulint		magic_n;
 
 	/** @return whether any persistent undo log has been generated */
 	bool has_logged_persistent() const
 	{
-		return(rsegs.m_redo.insert_undo || rsegs.m_redo.update_undo);
+		return(rsegs.m_redo.undo);
 	}
 
 	/** @return whether any undo log has been generated */
@@ -1170,6 +1094,13 @@ struct trx_t {
 		return(has_logged_persistent() || rsegs.m_noredo.undo);
 	}
 
+	/** @return whether any undo log has been generated or
+	recovered */
+	bool has_logged_or_recovered() const
+	{
+		return(has_logged() || rsegs.m_redo.old_insert);
+	}
+
 	/** @return rollback segment for modifying temporary tables */
 	trx_rseg_t* get_temp_rseg()
 	{
@@ -1181,6 +1112,33 @@ struct trx_t {
 		return(assign_temp_rseg());
 	}
 
+
+  bool is_referenced()
+  {
+    return my_atomic_load32_explicit(&n_ref, MY_MEMORY_ORDER_RELAXED) > 0;
+  }
+
+
+  void reference()
+  {
+#ifdef UNIV_DEBUG
+  int32_t old_n_ref=
+#endif
+    my_atomic_add32_explicit(&n_ref, 1, MY_MEMORY_ORDER_RELAXED);
+    ut_ad(old_n_ref >= 0);
+  }
+
+
+  void release_reference()
+  {
+#ifdef UNIV_DEBUG
+  int32_t old_n_ref=
+#endif
+    my_atomic_add32_explicit(&n_ref, -1, MY_MEMORY_ORDER_RELAXED);
+    ut_ad(old_n_ref > 0);
+  }
+
+
 private:
 	/** Assign a rollback segment for modifying temporary tables.
 	@return the assigned rollback segment */
@@ -1266,32 +1224,6 @@ struct commit_node_t{
 	mutex_exit(&t->mutex);			\
 } while (0)
 
-/**
-Increase the reference count. If the transaction is in state
-TRX_STATE_COMMITTED_IN_MEMORY then the transaction is considered
-committed and the reference count is not incremented.
-@param id the transaction ID; 0 if not to increment the reference count
-@param trx Transaction that is being referenced
-@return trx
-@retval	NULL	if the transaction is no longer active */
-inline trx_t* trx_reference(trx_id_t id, trx_t* trx)
-{
-	trx_mutex_enter(trx);
-
-	if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
-		trx = NULL;
-	} else if (!id) {
-	} else if (trx->id != id) {
-		trx = NULL;
-	} else {
-		ut_ad(trx->n_ref >= 0);
-		++trx->n_ref;
-	}
-
-	trx_mutex_exit(trx);
-	return(trx);
-}
-
 #include "trx0trx.ic"
 
 #endif
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
index dd42c8b8368..6589aca4e77 100644
--- a/storage/innobase/include/trx0trx.ic
+++ b/storage/innobase/include/trx0trx.ic
@@ -24,13 +24,11 @@ The transaction
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
-#include "read0read.h"
-
 /**********************************************************************//**
 Determines if a transaction is in the given state.
-The caller must hold trx_sys->mutex, or it must be the thread
+The caller must hold trx_sys.mutex, or it must be the thread
 that is serving a running transaction.
-A running RW transaction must be in trx_sys->rw_trx_list.
+A running RW transaction must be in trx_sys.rw_trx_hash.
 @return TRUE if trx->state == state */
 UNIV_INLINE
 bool
@@ -69,8 +67,6 @@ trx_state_eq(
 		     || (relaxed
 			 && thd_get_error_number(trx->mysql_thd)));
 
-		ut_ad(!trx->in_rw_trx_list);
-
 		return(true);
 	}
 	ut_error;
@@ -209,42 +205,3 @@ ok:
 	trx->ddl = true;
 	trx->dict_operation = op;
 }
-
-/**
-Release the transaction. Decrease the reference count.
-@param trx Transaction that is being released */
-UNIV_INLINE
-void
-trx_release_reference(
-	trx_t*		trx)
-{
-	trx_mutex_enter(trx);
-
-	ut_ad(trx->n_ref > 0);
-	--trx->n_ref;
-
-	trx_mutex_exit(trx);
-}
-
-
-/**
-@param trx		Get the active view for this transaction, if one exists
-@return the transaction's read view or NULL if one not assigned. */
-UNIV_INLINE
-ReadView*
-trx_get_read_view(
-	trx_t*		trx)
-{
-	return(!MVCC::is_view_active(trx->read_view) ? NULL : trx->read_view);
-}
-
-/**
-@param trx		Get the active view for this transaction, if one exists
-@return the transaction's read view or NULL if one not assigned. */
-UNIV_INLINE
-const ReadView*
-trx_get_read_view(
-	const trx_t*	trx)
-{
-	return(!MVCC::is_view_active(trx->read_view) ? NULL : trx->read_view);
-}
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
index b42871bef31..abc92a6edec 100644
--- a/storage/innobase/include/trx0types.h
+++ b/storage/innobase/include/trx0types.h
@@ -31,12 +31,9 @@ Created 3/26/1996 Heikki Tuuri
 #include "ut0mutex.h"
 #include "ut0new.h"
 
-#include <set>
 #include <queue>
 #include <vector>
 
-//#include <unordered_set>
-
 /** printf(3) format used for printing DB_TRX_ID and other system fields */
 #define TRX_ID_FMT	IB_ID_FMT
 
@@ -95,8 +92,6 @@ enum trx_dict_op_t {
 struct trx_t;
 /** The locks and state of an active transaction */
 struct trx_lock_t;
-/** Transaction system */
-struct trx_sys_t;
 /** Signal */
 struct trx_sig_t;
 /** Rollback segment */
@@ -120,9 +115,6 @@ typedef ib_id_t	roll_ptr_t;
 /** Undo number */
 typedef ib_id_t	undo_no_t;
 
-/** Maximum transaction identifier */
-#define TRX_ID_MAX	IB_ID_MAX
-
 /** Transaction savepoint */
 struct trx_savept_t{
 	undo_no_t	least_undo_no;	/*!< least undo number to undo */
@@ -130,8 +122,6 @@ struct trx_savept_t{
 
 /** File objects */
 /* @{ */
-/** Transaction system header */
-typedef byte	trx_sysf_t;
 /** Rollback segment header */
 typedef byte	trx_rsegf_t;
 /** Undo segment header */
@@ -148,56 +138,8 @@ typedef	byte	trx_undo_rec_t;
 
 typedef ib_mutex_t RsegMutex;
 typedef ib_mutex_t TrxMutex;
-typedef ib_mutex_t UndoMutex;
 typedef ib_mutex_t PQMutex;
 typedef ib_mutex_t TrxSysMutex;
 
 typedef std::vector<trx_id_t, ut_allocator<trx_id_t> >	trx_ids_t;
-
-/** Mapping read-write transactions from id to transaction instance, for
-creating read views and during trx id lookup for MVCC and locking. */
-struct TrxTrack {
-	explicit TrxTrack(trx_id_t id, trx_t* trx = NULL)
-		:
-		m_id(id),
-		m_trx(trx)
-	{
-		// Do nothing
-	}
-
-	trx_id_t	m_id;
-	trx_t*		m_trx;
-};
-
-struct TrxTrackHash {
-	size_t operator()(const TrxTrack& key) const
-	{
-		return(size_t(key.m_id));
-	}
-};
-
-/**
-Comparator for TrxMap */
-struct TrxTrackHashCmp {
-
-	bool operator() (const TrxTrack& lhs, const TrxTrack& rhs) const
-	{
-		return(lhs.m_id == rhs.m_id);
-	}
-};
-
-/**
-Comparator for TrxMap */
-struct TrxTrackCmp {
-
-	bool operator() (const TrxTrack& lhs, const TrxTrack& rhs) const
-	{
-		return(lhs.m_id < rhs.m_id);
-	}
-};
-
-//typedef std::unordered_set<TrxTrack, TrxTrackHash, TrxTrackHashCmp> TrxIdSet;
-typedef std::set<TrxTrack, TrxTrackCmp, ut_allocator<TrxTrack> >
-	TrxIdSet;
-
 #endif /* trx0types_h */
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index f738af4b454..16e2a384424 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -118,17 +118,6 @@ page_t*
 trx_undo_page_get_s_latched(const page_id_t& page_id, mtr_t* mtr);
 
 /******************************************************************//**
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
-	trx_undo_rec_t*	rec,	/*!< in: undo log record */
-	ulint		page_no,/*!< in: undo log header page number */
-	ulint		offset);/*!< in: undo log header offset on page */
-/******************************************************************//**
 Returns the next undo log record on the page in the specified log, or
 NULL if none exists.
 @return pointer to record, NULL if none */
@@ -139,28 +128,6 @@ trx_undo_page_get_next_rec(
 	trx_undo_rec_t*	rec,	/*!< in: undo log record */
 	ulint		page_no,/*!< in: undo log header page number */
 	ulint		offset);/*!< in: undo log header offset on page */
-/******************************************************************//**
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
-	page_t*	undo_page,/*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset);	/*!< in: undo log header offset on page */
-/******************************************************************//**
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
-	page_t*	undo_page,/*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset);/*!< in: undo log header offset on page */
 /***********************************************************************//**
 Gets the previous record in an undo log.
 @return undo log record, the page s-latched, NULL if none */
@@ -192,20 +159,18 @@ trx_undo_get_next_rec(
 @return undo log record, the page latched, NULL if none */
 trx_undo_rec_t*
 trx_undo_get_first_rec(
-	ulint			space,
+	fil_space_t*		space,
 	ulint			page_no,
 	ulint			offset,
 	ulint			mode,
 	mtr_t*			mtr);
 
 /** Allocate an undo log page.
-@param[in,out]	trx	transaction
 @param[in,out]	undo	undo log
 @param[in,out]	mtr	mini-transaction that does not hold any page latch
 @return	X-latched block if success
 @retval	NULL	on failure */
-buf_block_t*
-trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
+buf_block_t* trx_undo_add_page(trx_undo_t* undo, mtr_t* mtr)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /** Free the last undo log page. The caller must hold the rseg mutex.
@@ -238,32 +203,28 @@ trx_undo_truncate_start(
 	ulint		hdr_page_no,
 	ulint		hdr_offset,
 	undo_no_t	limit);
-/********************************************************************//**
-Initializes the undo log lists for a rollback segment memory copy.
-This function is only called when the database is started or a new
-rollback segment created.
-@return the combined size of undo log segments in pages */
-ulint
-trx_undo_lists_init(
-/*================*/
-	trx_rseg_t*	rseg);	/*!< in: rollback segment memory object */
+/** Assign an undo log for a persistent transaction.
+A new undo log is created or a cached undo log reused.
+@param[in,out]	trx	transaction
+@param[out]	err	error code
+@param[in,out]	mtr	mini-transaction
+@return	the undo log block
+@retval	NULL	on error */
+buf_block_t*
+trx_undo_assign(trx_t* trx, dberr_t* err, mtr_t* mtr)
+	MY_ATTRIBUTE((nonnull));
 /** Assign an undo log for a transaction.
 A new undo log is created or a cached undo log reused.
 @param[in,out]	trx	transaction
 @param[in]	rseg	rollback segment
 @param[out]	undo	the undo log
-@param[in]	type	TRX_UNDO_INSERT or TRX_UNDO_UPDATE
-@retval	DB_SUCCESS	on success
-@retval	DB_TOO_MANY_CONCURRENT_TRXS
-@retval	DB_OUT_OF_FILE_SPACE
-@retval	DB_READ_ONLY
-@retval DB_OUT_OF_MEMORY */
-dberr_t
-trx_undo_assign_undo(
-	trx_t*		trx,
-	trx_rseg_t*	rseg,
-	trx_undo_t**	undo,
-	ulint		type)
+@param[out]	err	error code
+@param[in,out]	mtr	mini-transaction
+@return	the undo log block
+@retval	NULL	on error */
+buf_block_t*
+trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
+		    dberr_t* err, mtr_t* mtr)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /******************************************************************//**
 Sets the state of the undo log segment at a transaction finish.
@@ -276,7 +237,7 @@ trx_undo_set_state_at_finish(
 
 /** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK.
 @param[in,out]	trx		transaction
-@param[in,out]	undo		insert_undo or update_undo log
+@param[in,out]	undo		undo log
 @param[in]	rollback	false=XA PREPARE, true=XA ROLLBACK
 @param[in,out]	mtr		mini-transaction
 @return undo log segment header page, x-latched */
@@ -287,20 +248,7 @@ trx_undo_set_state_at_prepare(
 	bool		rollback,
 	mtr_t*		mtr);
 
-/**********************************************************************//**
-Adds the update undo log header as the first in the history list, and
-frees the memory object, or puts it to the list of cached update undo log
-segments. */
-void
-trx_undo_update_cleanup(
-/*====================*/
-	trx_t*		trx,		/*!< in: trx owning the update
-					undo log */
-	page_t*		undo_page,	/*!< in: update undo log header page,
-					x-latched */
-	mtr_t*		mtr);		/*!< in: mtr */
-
-/** Free an insert or temporary undo log after commit or rollback.
+/** Free an old insert or temporary undo log after commit or rollback.
 The information is not needed after a commit or rollback, therefore
 the data can be discarded.
 @param[in,out]	undo	undo log
@@ -308,26 +256,31 @@ the data can be discarded.
 void
 trx_undo_commit_cleanup(trx_undo_t* undo, bool is_temp);
 
-/********************************************************************//**
-At shutdown, frees the undo logs of a PREPARED transaction. */
+/** At shutdown, frees the undo logs of a transaction. */
 void
-trx_undo_free_prepared(
-/*===================*/
-	trx_t*	trx)	/*!< in/out: PREPARED transaction */
-	ATTRIBUTE_COLD __attribute__((nonnull));
-
-/***********************************************************//**
-Parses the redo log entry of an undo log page initialization.
+trx_undo_free_at_shutdown(trx_t *trx);
+
+/** Parse MLOG_UNDO_INIT.
+@param[in]	ptr	log record
+@param[in]	end_ptr	end of log record buffer
+@param[in,out]	page	page or NULL
+@param[in,out]	mtr	mini-transaction
+@return	end of log record
+@retval	NULL	if the log record is incomplete */
+byte*
+trx_undo_parse_page_init(const byte* ptr, const byte* end_ptr, page_t* page);
+/** Parse MLOG_UNDO_HDR_REUSE for crash-upgrade from MariaDB 10.2.
+@param[in]	ptr	redo log record
+@param[in]	end_ptr	end of log buffer
+@param[in,out]	page	undo page or NULL
 @return end of log record or NULL */
 byte*
-trx_undo_parse_page_init(
-/*=====================*/
-	const byte*	ptr,	/*!< in: buffer */
-	const byte*	end_ptr,/*!< in: buffer end */
-	page_t*		page,	/*!< in: page or NULL */
-	mtr_t*		mtr);	/*!< in: mtr or NULL */
-/** Parse the redo log entry of an undo log page header create or reuse.
-@param[in]	type	MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE
+trx_undo_parse_page_header_reuse(
+	const byte*	ptr,
+	const byte*	end_ptr,
+	page_t*		page);
+
+/** Parse the redo log entry of an undo log page header create.
 @param[in]	ptr	redo log record
 @param[in]	end_ptr	end of log buffer
 @param[in,out]	page	page frame or NULL
@@ -335,17 +288,19 @@ trx_undo_parse_page_init(
 @return end of log record or NULL */
 byte*
 trx_undo_parse_page_header(
-	mlog_id_t	type,
 	const byte*	ptr,
 	const byte*	end_ptr,
 	page_t*		page,
 	mtr_t*		mtr);
-/************************************************************************
-Frees an undo log memory copy. */
-void
-trx_undo_mem_free(
-/*==============*/
-	trx_undo_t*	undo);		/* in: the undo object to be freed */
+/** Read an undo log when starting up the database.
+@param[in,out]	rseg		rollback segment
+@param[in]	id		rollback segment slot
+@param[in]	page_no		undo log segment page number
+@param[in,out]	max_trx_id	the largest observed transaction ID
+@return	size of the undo log in pages */
+ulint
+trx_undo_mem_create_at_db_start(trx_rseg_t* rseg, ulint id, ulint page_no,
+				trx_id_t& max_trx_id);
 
 #endif /* !UNIV_INNOCHECKSUM */
 
@@ -368,25 +323,15 @@ trx_undo_mem_free(
 
 #ifndef UNIV_INNOCHECKSUM
 
-/** Transaction undo log memory object; this is protected by the undo_mutex
-in the corresponding transaction object */
+/** Transaction undo log memory object; modified by the thread associated
+with the transaction. */
 
 struct trx_undo_t {
 	/*-----------------------------*/
 	ulint		id;		/*!< undo log slot number within the
 					rollback segment */
-	ulint		type;		/*!< TRX_UNDO_INSERT or
-					TRX_UNDO_UPDATE */
 	ulint		state;		/*!< state of the corresponding undo log
 					segment */
-	ibool		del_marks;	/*!< relevant only in an update undo
-					log: this is TRUE if the transaction may
-					have delete marked records, because of
-					a delete of a row or an update of an
-					indexed field; purge is then
-					necessary; also TRUE if the transaction
-					has updated an externally stored
-					field */
 	trx_id_t	trx_id;		/*!< id of the trx assigned to the undo
 					log */
 	XID		xid;		/*!< X/Open XA transaction
@@ -396,8 +341,6 @@ struct trx_undo_t {
 					id */
 	trx_rseg_t*	rseg;		/*!< rseg where the undo log belongs */
 	/*-----------------------------*/
-	ulint		space;		/*!< space id where the undo log
-					placed */
 	ulint		hdr_page_no;	/*!< page number of the header page in
 					the undo log */
 	ulint		hdr_offset;	/*!< header offset of the undo log on
@@ -407,8 +350,6 @@ struct trx_undo_t {
 					top_page_no during a rollback */
 	ulint		size;		/*!< current size in pages */
 	/*-----------------------------*/
-	ulint		empty;		/*!< TRUE if the stack of undo log
-					records is currently empty */
 	ulint		top_page_no;	/*!< page number where the latest undo
 					log record was catenated; during
 					rollback the page from which the latest
@@ -416,11 +357,16 @@ struct trx_undo_t {
 	ulint		top_offset;	/*!< offset of the latest undo record,
 					i.e., the topmost element in the undo
 					log if we think of it as a stack */
-	undo_no_t	top_undo_no;	/*!< undo number of the latest record */
+	undo_no_t	top_undo_no;	/*!< undo number of the latest record
+					(IB_ID_MAX if the undo log is empty) */
 	buf_block_t*	guess_block;	/*!< guess for the buffer block where
 					the top page might reside */
 	ulint		withdraw_clock;	/*!< the withdraw clock value of the
 					buffer pool when guess_block was stored */
+
+	/** @return whether the undo log is empty */
+	bool empty() const { return top_undo_no == IB_ID_MAX; }
+
 	/*-----------------------------*/
 	UT_LIST_NODE_T(trx_undo_t) undo_list;
 					/*!< undo log objects in the rollback
@@ -433,8 +379,8 @@ struct trx_undo_t {
 /*-------------------------------------------------------------*/
 /** Transaction undo log page header offsets */
 /* @{ */
-#define	TRX_UNDO_PAGE_TYPE	0	/*!< TRX_UNDO_INSERT or
-					TRX_UNDO_UPDATE */
+#define	TRX_UNDO_PAGE_TYPE	0	/*!< unused; 0 (before MariaDB 10.3.1:
+					TRX_UNDO_INSERT or TRX_UNDO_UPDATE) */
 #define	TRX_UNDO_PAGE_START	2	/*!< Byte offset where the undo log
 					records for the LATEST transaction
 					start on this page (remember that
@@ -455,7 +401,7 @@ struct trx_undo_t {
 at most this many bytes used; we must leave space at least for one new undo
 log header on the page */
 
-#define TRX_UNDO_PAGE_REUSE_LIMIT	(3 * UNIV_PAGE_SIZE / 4)
+#define TRX_UNDO_PAGE_REUSE_LIMIT	(3 << (srv_page_size_shift - 2))
 
 /* An update undo log segment may contain several undo logs on its first page
 if the undo logs took so little space that the segment could be cached and
@@ -495,14 +441,23 @@ log segment */
 page of an update undo log segment. */
 /* @{ */
 /*-------------------------------------------------------------*/
-#define	TRX_UNDO_TRX_ID		0	/*!< Transaction id */
-#define	TRX_UNDO_TRX_NO		8	/*!< Transaction number of the
-					transaction; defined only if the log
-					is in a history list */
-#define TRX_UNDO_DEL_MARKS	16	/*!< Defined only in an update undo
-					log: TRUE if the transaction may have
-					done delete markings of records, and
-					thus purge is necessary */
+/** Transaction start identifier, or 0 if the undo log segment has been
+completely purged and trx_purge_free_segment() has started freeing it */
+#define	TRX_UNDO_TRX_ID		0
+/** Transaction end identifier (if the log is in a history list),
+or 0 if the transaction has not been committed */
+#define	TRX_UNDO_TRX_NO		8
+/** Before MariaDB 10.3.1, when purge did not reset DB_TRX_ID of
+surviving user records, this used to be called TRX_UNDO_DEL_MARKS.
+
+The value 1 indicates that purge needs to process the undo log segment.
+The value 0 indicates that all of it has been processed, and
+trx_purge_free_segment() has been invoked, so the log is not safe to access.
+
+Before MariaDB 10.3.1, a log segment may carry the value 0 even before
+trx_purge_free_segment() was called, for those undo log records for
+which purge would not result in removing delete-marked records. */
+#define	TRX_UNDO_NEEDS_PURGE	16
 #define	TRX_UNDO_LOG_START	18	/*!< Offset of the first undo log record
 					of this log on the header page; purge
 					may remove undo log record from the
@@ -532,7 +487,7 @@ page of an update undo log segment. */
 #define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE)
 
 /* Note: the writing of the undo log old header is coded by a log record
-MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the
+MLOG_UNDO_HDR_CREATE. The appending of an XID to the
 header is logged separately. In this sense, the XID is not really a member
 of the undo log header. TODO: do not append the XID to the log header if XA
 is not needed by the user. The XID wastes about 150 bytes of space in every
diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic
index 0285c212bdd..630638f6b7b 100644
--- a/storage/innobase/include/trx0undo.ic
+++ b/storage/innobase/include/trx0undo.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -40,9 +40,7 @@ trx_undo_build_roll_ptr(
 	ulint	offset)		/*!< in: offset of the undo entry within page */
 {
 	roll_ptr_t	roll_ptr;
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
+	compile_time_assert(DATA_ROLL_PTR_LEN == 7);
 	ut_ad(is_insert == 0 || is_insert == 1);
 	ut_ad(rseg_id < TRX_SYS_N_RSEGS);
 	ut_ad(offset < 65536);
@@ -67,12 +65,7 @@ trx_undo_decode_roll_ptr(
 	ulint*		offset)		/*!< out: offset of the undo
 					entry within page */
 {
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
+	compile_time_assert(DATA_ROLL_PTR_LEN == 7);
 	ut_ad(roll_ptr < (1ULL << 56));
 	*offset = (ulint) roll_ptr & 0xFFFF;
 	roll_ptr >>= 16;
@@ -92,14 +85,9 @@ trx_undo_roll_ptr_is_insert(
 /*========================*/
 	roll_ptr_t	roll_ptr)	/*!< in: roll pointer */
 {
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
-	ut_ad(roll_ptr < (1ULL << 56));
-	return((ibool) (roll_ptr >> 55));
+	compile_time_assert(DATA_ROLL_PTR_LEN == 7);
+	ut_ad(roll_ptr < (1ULL << (ROLL_PTR_INSERT_FLAG_POS + 1)));
+	return((ibool) (roll_ptr >> ROLL_PTR_INSERT_FLAG_POS));
 }
 
 /***********************************************************************//**
@@ -111,10 +99,8 @@ trx_undo_trx_id_is_insert(
 /*======================*/
 	const byte*	trx_id)	/*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
 {
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error
-#endif
-	return(static_cast<bool>(trx_id[DATA_TRX_ID_LEN] >> 7));
+	compile_time_assert(DATA_TRX_ID + 1 == DATA_ROLL_PTR);
+	return bool(trx_id[DATA_TRX_ID_LEN] >> 7);
 }
 
 /*****************************************************************//**
@@ -129,9 +115,7 @@ trx_write_roll_ptr(
 					written */
 	roll_ptr_t	roll_ptr)	/*!< in: roll ptr */
 {
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
+	compile_time_assert(DATA_ROLL_PTR_LEN == 7);
 	mach_write_to_7(ptr, roll_ptr);
 }
 
@@ -146,9 +130,7 @@ trx_read_roll_ptr(
 /*==============*/
 	const byte*	ptr)	/*!< in: pointer to memory from where to read */
 {
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
+	compile_time_assert(DATA_ROLL_PTR_LEN == 7);
 	return(mach_read_from_7(ptr));
 }
 
@@ -184,89 +166,24 @@ trx_undo_page_get_s_latched(const page_id_t& page_id, mtr_t* mtr)
 	return(buf_block_get_frame(block));
 }
 
-/******************************************************************//**
-Returns the start offset of the undo log records of the specified undo
-log on the page.
-@return start offset */
-UNIV_INLINE
-ulint
-trx_undo_page_get_start(
-/*====================*/
-	page_t*	undo_page,/*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset)	/*!< in: undo log header offset on page */
-{
-	ulint	start;
-
-	if (page_no == page_get_page_no(undo_page)) {
-
-		start = mach_read_from_2(offset + undo_page
-					 + TRX_UNDO_LOG_START);
-	} else {
-		start = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE;
-	}
-
-	return(start);
-}
-
-/******************************************************************//**
-Returns the end offset of the undo log records of the specified undo
-log on the page.
+/** Determine the end offset of undo log records of an undo log page.
+@param[in]	undo_page	undo log page
+@param[in]	page_no		undo log header page number
+@param[in]	offset		undo log header offset
 @return end offset */
-UNIV_INLINE
-ulint
-trx_undo_page_get_end(
-/*==================*/
-	page_t*	undo_page,/*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset)	/*!< in: undo log header offset on page */
+inline
+uint16_t
+trx_undo_page_get_end(const page_t* undo_page, ulint page_no, ulint offset)
 {
-	trx_ulogf_t*	log_hdr;
-	ulint		end;
-
 	if (page_no == page_get_page_no(undo_page)) {
-
-		log_hdr = undo_page + offset;
-
-		end = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
-
-		if (end == 0) {
-			end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
-					       + TRX_UNDO_PAGE_FREE);
+		if (uint16_t end = mach_read_from_2(TRX_UNDO_NEXT_LOG
+						    + offset + undo_page)) {
+			return end;
 		}
-	} else {
-		end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
-				       + TRX_UNDO_PAGE_FREE);
-	}
-
-	return(end);
-}
-
-/******************************************************************//**
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
-	trx_undo_rec_t*	rec,	/*!< in: undo log record */
-	ulint		page_no,/*!< in: undo log header page number */
-	ulint		offset)	/*!< in: undo log header offset on page */
-{
-	page_t*	undo_page;
-	ulint	start;
-
-	undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
-
-	start = trx_undo_page_get_start(undo_page, page_no, offset);
-
-	if (start + undo_page == rec) {
-
-		return(NULL);
 	}
 
-	return(undo_page + mach_read_from_2(rec - 2));
+	return mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+				+ undo_page);
 }
 
 /******************************************************************//**
@@ -285,7 +202,7 @@ trx_undo_page_get_next_rec(
 	ulint	end;
 	ulint	next;
 
-	undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
+	undo_page = (page_t*) ut_align_down(rec, srv_page_size);
 
 	end = trx_undo_page_get_end(undo_page, page_no, offset);
 
@@ -298,55 +215,3 @@ trx_undo_page_get_next_rec(
 
 	return(undo_page + next);
 }
-
-/******************************************************************//**
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
-	page_t*	undo_page,/*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset)	/*!< in: undo log header offset on page */
-{
-	ulint	start;
-	ulint	end;
-
-	start = trx_undo_page_get_start(undo_page, page_no, offset);
-	end = trx_undo_page_get_end(undo_page, page_no, offset);
-
-	if (start == end) {
-
-		return(NULL);
-	}
-
-	return(undo_page + mach_read_from_2(undo_page + end - 2));
-}
-
-/******************************************************************//**
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
-	page_t*	undo_page,/*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset)	/*!< in: undo log header offset on page */
-{
-	ulint	start;
-	ulint	end;
-
-	start = trx_undo_page_get_start(undo_page, page_no, offset);
-	end = trx_undo_page_get_end(undo_page, page_no, offset);
-
-	if (start == end) {
-
-		return(NULL);
-	}
-
-	return(undo_page + start);
-}
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 10eb83289da..bf1d245a65e 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -50,16 +50,14 @@ calculated in make_version_string() in sql/sql_show.cc like this:
 because the version is shown with only one dot, we skip the last
 component, i.e. we show M.N.P as M.N */
 #define INNODB_VERSION_SHORT	\
-	(INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
+	(MYSQL_VERSION_MAJOR << 8 | MYSQL_VERSION_MINOR)
 
 #define INNODB_VERSION_STR			\
-	IB_TO_STR(INNODB_VERSION_MAJOR) "."	\
-	IB_TO_STR(INNODB_VERSION_MINOR) "."	\
-	IB_TO_STR(INNODB_VERSION_BUGFIX)
+	IB_TO_STR(MYSQL_VERSION_MAJOR) "."	\
+	IB_TO_STR(MYSQL_VERSION_MINOR) "."	\
+	IB_TO_STR(MYSQL_VERSION_PATCH)
 
-#define REFMAN "http://dev.mysql.com/doc/refman/"	\
-	IB_TO_STR(INNODB_VERSION_MAJOR) "."		\
-	IB_TO_STR(INNODB_VERSION_MINOR) "/en/"
+#define REFMAN "http://dev.mysql.com/doc/refman/5.7/en/"
 
 /** How far ahead should we tell the service manager the timeout
 (time in seconds) */
@@ -172,9 +170,8 @@ for all cases. This is used by ut0lst.h related code. */
 /* When this macro is defined then additional test functions will be
 compiled. These functions live at the end of each relevant source file
 and have "test_" prefix. These functions can be called from the end of
-innobase_init() or they can be called from gdb after
-innobase_start_or_create_for_mysql() has executed using the call
-command. */
+innodb_init() or they can be called from gdb after srv_start() has executed
+using the call command. */
 /*
 #define UNIV_COMPILE_TEST_FUNCS
 #define UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
@@ -275,33 +272,6 @@ management to ensure correct alignment for doubles etc. */
 			========================
 */
 
-/** There are currently two InnoDB file formats which are used to group
-features with similar restrictions and dependencies. Using an enum allows
-switch statements to give a compiler warning when a new one is introduced. */
-enum innodb_file_formats_enum {
-	/** Antelope File Format: InnoDB/MySQL up to 5.1.
-	This format includes REDUNDANT and COMPACT row formats */
-	UNIV_FORMAT_A		= 0,
-
-	/** Barracuda File Format: Introduced in InnoDB plugin for 5.1:
-	This format includes COMPRESSED and DYNAMIC row formats.  It
-	includes the ability to create secondary indexes from data that
-	is not on the clustered index page and the ability to store more
-	data off the clustered index page. */
-	UNIV_FORMAT_B		= 1
-};
-
-typedef enum innodb_file_formats_enum innodb_file_formats_t;
-
-/** Minimum supported file format */
-#define UNIV_FORMAT_MIN		UNIV_FORMAT_A
-
-/** Maximum supported file format */
-#define UNIV_FORMAT_MAX		UNIV_FORMAT_B
-
-/** The 2-logarithm of UNIV_PAGE_SIZE: */
-#define UNIV_PAGE_SIZE_SHIFT	srv_page_size_shift
-
 #ifdef HAVE_LZO
 #define IF_LZO(A,B) A
 #else
@@ -338,32 +308,29 @@ typedef enum innodb_file_formats_enum innodb_file_formats_t;
 #define IF_PUNCH_HOLE(A,B) B
 #endif
 
-/** The universal page size of the database */
-#define UNIV_PAGE_SIZE		((ulint) srv_page_size)
-
 /** log2 of smallest compressed page size (1<<10 == 1024 bytes)
 Note: This must never change! */
-#define UNIV_ZIP_SIZE_SHIFT_MIN		10
+#define UNIV_ZIP_SIZE_SHIFT_MIN		10U
 
 /** log2 of largest compressed page size (1<<14 == 16384 bytes).
 A compressed page directory entry reserves 14 bits for the start offset
 and 2 bits for flags. This limits the uncompressed page size to 16k.
 */
-#define UNIV_ZIP_SIZE_SHIFT_MAX		14
+#define UNIV_ZIP_SIZE_SHIFT_MAX		14U
 
 /* Define the Min, Max, Default page sizes. */
 /** Minimum Page Size Shift (power of 2) */
-#define UNIV_PAGE_SIZE_SHIFT_MIN	12
+#define UNIV_PAGE_SIZE_SHIFT_MIN	12U
 /** log2 of largest page size (1<<16 == 64436 bytes). */
 /** Maximum Page Size Shift (power of 2) */
-#define UNIV_PAGE_SIZE_SHIFT_MAX	16
+#define UNIV_PAGE_SIZE_SHIFT_MAX	16U
 /** log2 of default page size (1<<14 == 16384 bytes). */
 /** Default Page Size Shift (power of 2) */
-#define UNIV_PAGE_SIZE_SHIFT_DEF	14
+#define UNIV_PAGE_SIZE_SHIFT_DEF	14U
 /** Original 16k InnoDB Page Size Shift, in case the default changes */
-#define UNIV_PAGE_SIZE_SHIFT_ORIG	14
+#define UNIV_PAGE_SIZE_SHIFT_ORIG	14U
 /** Original 16k InnoDB Page Size as an ssize (log2 - 9) */
-#define UNIV_PAGE_SSIZE_ORIG		(UNIV_PAGE_SIZE_SHIFT_ORIG - 9)
+#define UNIV_PAGE_SSIZE_ORIG		(UNIV_PAGE_SIZE_SHIFT_ORIG - 9U)
 
 /** Minimum page size InnoDB currently supports. */
 #define UNIV_PAGE_SIZE_MIN	(1U << UNIV_PAGE_SIZE_SHIFT_MIN)
@@ -383,13 +350,13 @@ and 2 bits for flags. This limits the uncompressed page size to 16k.
 /** Largest possible ssize for an uncompressed page.
 (The convention 'ssize' is used for 'log2 minus 9' or the number of
 shifts starting with 512.)
-This max number varies depending on UNIV_PAGE_SIZE. */
+This max number varies depending on srv_page_size. */
 #define UNIV_PAGE_SSIZE_MAX	\
-	static_cast<ulint>(UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+	ulint(srv_page_size_shift - UNIV_ZIP_SIZE_SHIFT_MIN + 1U)
 
 /** Smallest possible ssize for an uncompressed page. */
 #define UNIV_PAGE_SSIZE_MIN	\
-	static_cast<ulint>(UNIV_PAGE_SIZE_SHIFT_MIN - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+	ulint(UNIV_PAGE_SIZE_SHIFT_MIN - UNIV_ZIP_SIZE_SHIFT_MIN + 1U)
 
 /** Maximum number of parallel threads in a parallelized operation */
 #define UNIV_MAX_PARALLELISM	32
@@ -494,7 +461,7 @@ typedef	ib_uint64_t		lsn_t;
 #define UINT64_UNDEFINED	((ib_uint64_t)(-1))
 
 /** The bitmask of 32-bit unsigned integer */
-#define ULINT32_MASK		0xFFFFFFFF
+#define ULINT32_MASK		0xFFFFFFFFU
 /** The undefined 32-bit unsigned integer */
 #define	ULINT32_UNDEFINED	ULINT32_MASK
 
diff --git a/storage/innobase/include/ut0byte.ic b/storage/innobase/include/ut0byte.ic
index 9c0cd6ee3c3..1ef90eca416 100644
--- a/storage/innobase/include/ut0byte.ic
+++ b/storage/innobase/include/ut0byte.ic
@@ -144,9 +144,6 @@ ut_bit_get_nth(
 	ulint	n)	/*!< in: nth bit requested */
 {
 	ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
 	return(1 & (a >> n));
 }
 
@@ -162,9 +159,6 @@ ut_bit_set_nth(
 	ibool	val)	/*!< in: value for the bit to set */
 {
 	ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
 	if (val) {
 		return(((ulint) 1 << n) | a);
 	} else {
diff --git a/storage/innobase/include/ut0crc32.h b/storage/innobase/include/ut0crc32.h
index 36b389b5bd2..32ad066f85a 100644
--- a/storage/innobase/include/ut0crc32.h
+++ b/storage/innobase/include/ut0crc32.h
@@ -47,14 +47,11 @@ typedef uint32_t	(*ut_crc32_func_t)(const byte* ptr, ulint len);
 /** Pointer to CRC32 calculation function. */
 extern ut_crc32_func_t	ut_crc32;
 
-/** Pointer to CRC32 calculation function, which uses big-endian byte order
+/** CRC32 calculation function, which uses big-endian byte order
 when converting byte strings to integers internally. */
-extern ut_crc32_func_t	ut_crc32_legacy_big_endian;
-
-/** Pointer to CRC32-byte-by-byte calculation function (byte order agnostic,
-but very slow). */
-extern ut_crc32_func_t	ut_crc32_byte_by_byte;
+extern uint32_t ut_crc32_legacy_big_endian(const byte* buf, ulint len);
 
+/** Text description of CRC32 implementation */
 extern const char*	ut_crc32_implementation;
 
 #endif /* ut0crc32_h */
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
index fd9a064ba35..6672be62617 100644
--- a/storage/innobase/include/ut0dbg.h
+++ b/storage/innobase/include/ut0dbg.h
@@ -61,8 +61,8 @@ ut_dbg_assertion_failed(
 	ut_dbg_assertion_failed(0, __FILE__, __LINE__)
 
 /** Debug assertion */
-#define ut_ad	DBUG_ASSERT
-#ifdef UNIV_DEBUG
+#define ut_ad	DBUG_SLOW_ASSERT
+#if defined(UNIV_DEBUG) || !defined(DBUG_OFF)
 /** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
 #define ut_d(EXPR)	EXPR
 #else
diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
index 09733da20a0..f62d3744b96 100644
--- a/storage/innobase/include/ut0lst.h
+++ b/storage/innobase/include/ut0lst.h
@@ -426,7 +426,7 @@ Gets the last node in a two-way list.
 @return last node, or NULL if the list is empty */
 #define UT_LIST_GET_LAST(BASE)		(BASE).end
 
-struct	NullValidate { void operator()(const void* elem) { } };
+struct	NullValidate { void operator()(const void*) { } };
 
 /********************************************************************//**
 Iterate over all the elements and call the functor for each element.
diff --git a/storage/innobase/include/ut0new.h b/storage/innobase/include/ut0new.h
index 955e7b026c7..5dcb25271c5 100644
--- a/storage/innobase/include/ut0new.h
+++ b/storage/innobase/include/ut0new.h
@@ -129,6 +129,10 @@ InnoDB:
 #include <string.h> /* strlen(), strrchr(), strncmp() */
 
 #include "my_global.h" /* needed for headers from mysql/psi/ */
+#if !defined(DBUG_OFF) && defined(HAVE_MADVISE)
+#include <sys/mman.h>
+#endif
+
 /* JAN: TODO: missing 5.7 header */
 #ifdef HAVE_MYSQL_MEMORY_H
 #include "mysql/psi/mysql_memory.h" /* PSI_MEMORY_CALL() */
@@ -172,7 +176,6 @@ extern PSI_memory_key	mem_key_other;
 extern PSI_memory_key	mem_key_row_log_buf;
 extern PSI_memory_key	mem_key_row_merge_sort;
 extern PSI_memory_key	mem_key_std;
-extern PSI_memory_key	mem_key_trx_sys_t_rw_trx_ids;
 extern PSI_memory_key	mem_key_partitioning;
 
 /** Setup the internal objects needed for UT_NEW() to operate.
@@ -235,6 +238,51 @@ struct ut_new_pfx_t {
 #endif
 };
 
+static inline void ut_allocate_trace_dontdump(void *ptr, size_t	bytes,
+					      bool
+#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DONTDUMP)
+					      dontdump
+#endif
+					      , ut_new_pfx_t* pfx,
+					      const char*
+#ifdef UNIV_PFS_MEMORY
+					      file
+#endif
+
+					      )
+{
+	ut_a(ptr != NULL);
+
+#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DONTDUMP)
+	if (dontdump && madvise(ptr, bytes, MADV_DONTDUMP)) {
+		ib::warn() << "Failed to set memory to DONTDUMP: "
+			   << strerror(errno)
+			   << " ptr " << ptr
+			   << " size " << bytes;
+	}
+#endif
+	if (pfx != NULL) {
+#ifdef UNIV_PFS_MEMORY
+		allocate_trace(bytes, file, pfx);
+#endif /* UNIV_PFS_MEMORY */
+		pfx->m_size = bytes;
+	}
+}
+
+#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP)
+static inline void ut_dodump(void* ptr, size_t m_size)
+{
+	if (ptr && madvise(ptr, m_size, MADV_DODUMP)) {
+		ib::warn() << "Failed to set memory to DODUMP: "
+			   << strerror(errno)
+			   << " ptr " << ptr
+			   << " size " << m_size;
+	}
+}
+#else
+static inline void ut_dodump(void*, size_t) {}
+#endif
+
 /** Allocator class for allocating memory from inside std::* containers.
 @tparam	T		type of allocated object
 @tparam oom_fatal	whether to commit suicide when running out of memory */
@@ -249,19 +297,25 @@ public:
 	typedef size_t		size_type;
 	typedef ptrdiff_t	difference_type;
 
+#ifdef UNIV_PFS_MEMORY
 	/** Default constructor. */
 	explicit
 	ut_allocator(PSI_memory_key key = PSI_NOT_INSTRUMENTED)
-#ifdef UNIV_PFS_MEMORY
 		: m_key(key)
-#endif /* UNIV_PFS_MEMORY */
 	{
 	}
+#else
+	ut_allocator() {}
+	ut_allocator(PSI_memory_key) {}
+#endif /* UNIV_PFS_MEMORY */
 
 	/** Constructor from allocator of another type. */
 	template <class U>
-	ut_allocator(
-		const ut_allocator<U>&	other)
+	ut_allocator(const ut_allocator<U>&
+#ifdef UNIV_PFS_MEMORY
+		     other
+#endif
+		     )
 #ifdef UNIV_PFS_MEMORY
 		: m_key(other.m_key)
 #endif /* UNIV_PFS_MEMORY */
@@ -282,6 +336,8 @@ public:
 #endif /* UNIV_PFS_MEMORY */
 	}
 
+	pointer allocate(size_type n) { return allocate(n, NULL, NULL); }
+
 	/** Allocate a chunk of memory that can hold 'n_elements' objects of
 	type 'T' and trace the allocation.
 	If the allocation fails this method may throw an exception. This
@@ -290,17 +346,19 @@ public:
 	After successfull allocation the returned pointer must be passed
 	to ut_allocator::deallocate() when no longer needed.
 	@param[in]	n_elements	number of elements
-	@param[in]	hint		pointer to a nearby memory location,
-	unused by this implementation
-	@param[in]	file		file name of the caller
 	@param[in]	set_to_zero	if true, then the returned memory is
 	initialized with 0x0 bytes.
+	@param[in]	throw_on_error	if true, raize exception if too big
 	@return pointer to the allocated memory */
 	pointer
 	allocate(
 		size_type	n_elements,
-		const_pointer	hint = NULL,
-		const char*	file = NULL,
+		const_pointer,
+		const char*
+#ifdef UNIV_PFS_MEMORY
+		file /*!< file name of the caller */
+#endif
+		,
 		bool		set_to_zero = false,
 		bool		throw_on_error = true)
 	{
@@ -567,6 +625,8 @@ public:
 	/** Allocate a large chunk of memory that can hold 'n_elements'
 	objects of type 'T' and trace the allocation.
 	@param[in]	n_elements	number of elements
+	@param[in]	dontdump	if true, advise the OS is not to core
+	dump this memory.
 	@param[out]	pfx		storage for the description of the
 	allocated memory. The caller must provide space for this one and keep
 	it until the memory is no longer needed and then pass it to
@@ -575,7 +635,8 @@ public:
 	pointer
 	allocate_large(
 		size_type	n_elements,
-		ut_new_pfx_t*	pfx)
+		ut_new_pfx_t*	pfx,
+		bool		dontdump = false)
 	{
 		if (n_elements == 0 || n_elements > max_size()) {
 			return(NULL);
@@ -586,13 +647,11 @@ public:
 		pointer	ptr = reinterpret_cast<pointer>(
 			os_mem_alloc_large(&n_bytes));
 
-#ifdef UNIV_PFS_MEMORY
-		if (ptr != NULL) {
-			allocate_trace(n_bytes, NULL, pfx);
+		if (ptr == NULL) {
+			return NULL;
 		}
-#else
-		pfx->m_size = n_bytes;
-#endif /* UNIV_PFS_MEMORY */
+
+		ut_allocate_trace_dontdump(ptr, n_bytes, dontdump, pfx, NULL);
 
 		return(ptr);
 	}
@@ -601,17 +660,30 @@ public:
 	deallocation.
 	@param[in,out]	ptr	pointer to memory to free
 	@param[in]	pfx	descriptor of the memory, as returned by
-	allocate_large(). */
+	allocate_large().
+	@param[in]      dodump  if true, advise the OS to include this
+	memory again if a core dump occurs. */
 	void
 	deallocate_large(
 		pointer			ptr,
-		const ut_new_pfx_t*	pfx)
+		const ut_new_pfx_t*
+#ifdef UNIV_PFS_MEMORY
+		pfx
+#endif
+		,
+		size_t			size,
+		bool			dodump = false)
 	{
+		if (dodump) {
+			ut_dodump(ptr, size);
+		}
 #ifdef UNIV_PFS_MEMORY
-		deallocate_trace(pfx);
+		if (pfx) {
+			deallocate_trace(pfx);
+		}
 #endif /* UNIV_PFS_MEMORY */
 
-		os_mem_free_large(ptr, pfx->m_size);
+		os_mem_free_large(ptr, size);
 	}
 
 #ifdef UNIV_PFS_MEMORY
@@ -725,12 +797,7 @@ could be freed by A2 even if the pfs mem key is different. */
 template <typename T>
 inline
 bool
-operator==(
-	const ut_allocator<T>&	lhs,
-	const ut_allocator<T>&	rhs)
-{
-	return(true);
-}
+operator==(const ut_allocator<T>&, const ut_allocator<T>&) { return(true); }
 
 /** Compare two allocators of the same type. */
 template <typename T>
@@ -843,6 +910,10 @@ ut_delete_array(
 	ut_allocator<byte>(key).allocate( \
 		n_bytes, NULL, __FILE__, false, false))
 
+#define ut_malloc_dontdump(n_bytes) static_cast<void*>( \
+	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).allocate_large( \
+		n_bytes, true))
+
 #define ut_zalloc(n_bytes, key)		static_cast<void*>( \
 	ut_allocator<byte>(key).allocate( \
 		n_bytes, NULL, __FILE__, true, false))
@@ -866,6 +937,10 @@ ut_delete_array(
 #define ut_free(ptr)	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate( \
 	reinterpret_cast<byte*>(ptr))
 
+#define ut_free_dodump(ptr, size) static_cast<void*>( \
+	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate_large( \
+		ptr, NULL, size, true))
+
 #else /* UNIV_PFS_MEMORY */
 
 /* Fallbacks when memory tracing is disabled at compile time. */
@@ -888,6 +963,14 @@ ut_delete_array(
 
 #define ut_malloc_nokey(n_bytes)	::malloc(n_bytes)
 
+static inline void *ut_malloc_dontdump(size_t n_bytes)
+{
+	void *ptr = os_mem_alloc_large(&n_bytes);
+
+	ut_allocate_trace_dontdump(ptr, n_bytes, true, NULL, NULL);
+	return ptr;
+}
+
 #define ut_zalloc_nokey(n_bytes)	::calloc(1, n_bytes)
 
 #define ut_zalloc_nokey_nofatal(n_bytes)	::calloc(1, n_bytes)
@@ -896,6 +979,12 @@ ut_delete_array(
 
 #define ut_free(ptr)			::free(ptr)
 
+static inline void ut_free_dodump(void *ptr, size_t size)
+{
+	ut_dodump(ptr, size);
+	os_mem_free_large(ptr, size);
+}
+
 #endif /* UNIV_PFS_MEMORY */
 
 #endif /* ut0new_h */
diff --git a/storage/innobase/include/ut0pool.h b/storage/innobase/include/ut0pool.h
index c0237158ce5..d3ea733a440 100644
--- a/storage/innobase/include/ut0pool.h
+++ b/storage/innobase/include/ut0pool.h
@@ -115,7 +115,7 @@ struct Pool {
 		} else if (m_last < m_end) {
 
 			/* Initialise the remaining elements. */
-			init(m_end - m_last);
+			init(size_t(m_end - m_last));
 
 			ut_ad(!m_pqueue.empty());
 
diff --git a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h
index 49ae3c81356..5baf8684d23 100644
--- a/storage/innobase/include/ut0rnd.h
+++ b/storage/innobase/include/ut0rnd.h
@@ -61,16 +61,6 @@ UNIV_INLINE
 ulint
 ut_rnd_gen_ulint(void);
 /*==================*/
-/********************************************************//**
-Generates a random integer from a given interval.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
-	ulint	low,	/*!< in: low limit; can generate also this value */
-	ulint	high);	/*!< in: high limit; can generate also this value */
-
 /*******************************************************//**
 The following function generates a hash value for a ulint integer
 to a hash table of size table_size, which should be a prime or some
diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic
index 16dccb545d8..1e4915dd0f9 100644
--- a/storage/innobase/include/ut0rnd.ic
+++ b/storage/innobase/include/ut0rnd.ic
@@ -97,30 +97,6 @@ ut_rnd_gen_ulint(void)
 	return(rnd);
 }
 
-/********************************************************//**
-Generates a random integer from a given interval.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
-	ulint	low,	/*!< in: low limit; can generate also this value */
-	ulint	high)	/*!< in: high limit; can generate also this value */
-{
-	ulint	rnd;
-
-	ut_ad(high >= low);
-
-	if (low == high) {
-
-		return(low);
-	}
-
-	rnd = ut_rnd_gen_ulint();
-
-	return(low + (rnd % (high - low)));
-}
-
 /*******************************************************//**
 The following function generates a hash value for a ulint integer
 to a hash table of size table_size, which should be a prime
diff --git a/storage/innobase/include/ut0stage.h b/storage/innobase/include/ut0stage.h
index 1d5457a3ab0..4b96fad3c21 100644
--- a/storage/innobase/include/ut0stage.h
+++ b/storage/innobase/include/ut0stage.h
@@ -529,65 +529,28 @@ ut_stage_alter_t::change_phase(
 
 class ut_stage_alter_t {
 public:
-	explicit
-	ut_stage_alter_t(
-		const dict_index_t*	pk)
-	{
-	}
+	explicit ut_stage_alter_t(const dict_index_t*) {}
 
-	void
-	begin_phase_read_pk(
-		ulint	n_sort_indexes)
-	{
-	}
+	void begin_phase_read_pk(ulint)	{}
 
-	void
-	n_pk_recs_inc()
-	{
-	}
+	void n_pk_recs_inc() {}
 
-	void
-	inc(
-		ulint	inc_val = 1)
-	{
-	}
+	void inc() {}
+	void inc(ulint) {}
 
-	void
-	end_phase_read_pk()
-	{
-	}
+	void end_phase_read_pk() {}
 
-	void
-	begin_phase_sort(
-		double	sort_multi_factor)
-	{
-	}
+	void begin_phase_sort(double) {}
 
-	void
-	begin_phase_insert()
-	{
-	}
+	void begin_phase_insert() {}
 
-	void
-	begin_phase_flush(
-		ulint	n_flush_pages)
-	{
-	}
+	void begin_phase_flush(ulint) {}
 
-	void
-	begin_phase_log_index()
-	{
-	}
+	void begin_phase_log_index() {}
 
-	void
-	begin_phase_log_table()
-	{
-	}
+	void begin_phase_log_table() {}
 
-	void
-	begin_phase_end()
-	{
-	}
+	void begin_phase_end() {}
 };
 
 #endif /* HAVE_PSI_STAGE_INTERFACE */
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index 4e9c2599933..1614d3ead6d 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -45,6 +45,7 @@ Created 1/20/1994 Heikki Tuuri
 #include <stdarg.h>
 
 #include <string>
+#include <my_atomic.h>
 
 /** Index name prefix in fast index creation, as a string constant */
 #define TEMP_INDEX_PREFIX_STR	"\377"
@@ -52,35 +53,6 @@ Created 1/20/1994 Heikki Tuuri
 /** Time stamp */
 typedef time_t	ib_time_t;
 
-#ifdef HAVE_PAUSE_INSTRUCTION
-   /* According to the gcc info page, asm volatile means that the
-   instruction has important side-effects and must not be removed.
-   Also asm volatile may trigger a memory barrier (spilling all registers
-   to memory). */
-# ifdef __SUNPRO_CC
-#  define UT_RELAX_CPU() asm ("pause" )
-# else
-#  define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
-# endif /* __SUNPRO_CC */
-
-#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
-# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
-#elif defined _WIN32
-   /* In the Win32 API, the x86 PAUSE instruction is executed by calling
-   the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
-   independent way by using YieldProcessor. */
-# define UT_RELAX_CPU() YieldProcessor()
-#elif defined(__powerpc__) && defined __GLIBC__
-# include <sys/platform/ppc.h>
-# define UT_RELAX_CPU() __ppc_get_timebase()
-#else
-# define UT_RELAX_CPU() do { \
-     volatile int32	volatile_var; \
-     int32 oldval= 0; \
-     my_atomic_cas32(&volatile_var, &oldval, 1); \
-   } while (0)
-#endif
-
 #if defined (__GNUC__)
 # define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
 #elif defined (_MSC_VER)
@@ -89,15 +61,6 @@ typedef time_t	ib_time_t;
 # define UT_COMPILER_BARRIER()
 #endif
 
-#if defined(HAVE_HMT_PRIORITY_INSTRUCTION)
-# include <sys/platform/ppc.h>
-# define UT_LOW_PRIORITY_CPU() __ppc_set_ppr_low()
-# define UT_RESUME_PRIORITY_CPU() __ppc_set_ppr_med()
-#else
-# define UT_LOW_PRIORITY_CPU() ((void)0)
-# define UT_RESUME_PRIORITY_CPU() ((void)0)
-#endif
-
 /*********************************************************************//**
 Delays execution for at most max_wait_us microseconds or returns earlier
 if cond becomes true.