227 files changed, 32550 insertions, 19328 deletions
diff --git a/storage/innobase/include/api0api.h b/storage/innobase/include/api0api.h
index 500bf4fe3b2..ec02febee74 100644
--- a/storage/innobase/include/api0api.h
+++ b/storage/innobase/include/api0api.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,16 +30,11 @@ InnoDB Native API
 #include "db0err.h"
 #include <stdio.h>
 
-#ifdef _MSC_VER
-#define strncasecmp		_strnicmp
-#define strcasecmp		_stricmp
-#endif
-
-#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
+#if defined(__GNUC__)
 #define UNIV_NO_IGNORE		MY_ATTRIBUTE ((warn_unused_result))
 #else
 #define UNIV_NO_IGNORE
-#endif /* __GNUC__ && __GNUC__ > 2 && !__INTEL_COMPILER */
+#endif /* __GNUC__ */
 
 /* See comment about ib_bool_t as to why the two macros are unsigned long. */
 /** The boolean value of "true" used internally within InnoDB */
@@ -53,7 +48,11 @@ typedef enum dberr_t		ib_err_t;
 /** Representation of a byte within InnoDB */
 typedef unsigned char		ib_byte_t;
 /** Representation of an unsigned long int within InnoDB */
+#ifdef _WIN64
+typedef unsigned __int64	ib_ulint_t;
+#else
 typedef unsigned long int	ib_ulint_t;
+#endif /* _WIN64 */
 
 /* We assume C99 support except when using VisualStudio. */
 #if !defined(_MSC_VER)
@@ -325,25 +324,6 @@ typedef struct {
 } ib_col_meta_t;
 
 /* Note: Must be in sync with trx0trx.h */
-/** @enum ib_trx_state_t The transaction state can be queried using the
-ib_trx_state() function. The InnoDB deadlock monitor can roll back a
-transaction and users should be prepared for this, especially where there
-is high contention. The way to determine the state of the transaction is to
-query it's state and check. */
-typedef enum {
-	IB_TRX_NOT_STARTED,		/*!< Has not started yet, the
-					transaction has not ben started yet.*/
-
-	IB_TRX_ACTIVE,			/*!< The transaction is currently
-					active and needs to be either
-					committed or rolled back. */
-
-	IB_TRX_COMMITTED_IN_MEMORY,	/*!< Not committed to disk yet */
-
-	IB_TRX_PREPARED			/*!< Support for 2PC/XA */
-} ib_trx_state_t;
-
-/* Note: Must be in sync with trx0trx.h */
 /** @enum ib_trx_level_t Transaction isolation levels */
 typedef enum {
 	IB_TRX_READ_UNCOMMITTED = 0,	/*!< Dirty read: non-locking SELECTs are
@@ -416,11 +396,11 @@ typedef struct ib_cursor_t* ib_crsr_t;
 This function is used to compare two data fields for which the data type
 is such that we must use the client code to compare them.
 
-@param col_meta		column meta data
-@param p1		key
+@param col_meta column meta data
+@param p1 key
 @oaram p1_len		key length
-@param p2		second key
-@param p2_len		second key length
+@param p2 second key
+@param p2_len second key length
 @return 1, 0, -1, if a is greater, equal, less than b, respectively */
 
 typedef int (*ib_client_cmp_t)(
@@ -433,18 +413,6 @@ typedef int (*ib_client_cmp_t)(
 /* This should be the same as univ.i */
 /** Represents SQL_NULL length */
 #define	IB_SQL_NULL		0xFFFFFFFF
-/** The number of system columns in a row. */
-#define IB_N_SYS_COLS		3
-
-/** The maximum length of a text column. */
-#define MAX_TEXT_LEN		4096
-
-/* MySQL uses 3 byte UTF-8 encoding. */
-/** The maximum length of a column name in a table schema. */
-#define IB_MAX_COL_NAME_LEN	(64 * 3)
-
-/** The maximum length of a table name (plus database name). */
-#define IB_MAX_TABLE_NAME_LEN	(64 * 3) * 2
 
 /*****************************************************************//**
 Start a transaction that's been rolled back. This special function
@@ -453,8 +421,7 @@ a transaction. While the transaction has been rolled back the handle
 is still valid and can be reused by calling this function. If you
 don't want to reuse the transaction handle then you can free the handle
 by calling ib_trx_release().
-@return	innobase txn handle */
-
+@return innobase txn handle */
 ib_err_t
 ib_trx_start(
 /*=========*/
@@ -469,8 +436,7 @@ ib_trx_start(
 /*****************************************************************//**
 Begin a transaction. This will allocate a new transaction handle and
 put the transaction in the active state.
-@return	innobase txn handle */
-
+@return innobase txn handle */
 ib_trx_t
 ib_trx_begin(
 /*=========*/
@@ -481,21 +447,6 @@ ib_trx_begin(
 					single DML */
 
 /*****************************************************************//**
-Query the transaction's state. This function can be used to check for
-the state of the transaction in case it has been rolled back by the
-InnoDB deadlock detector. Note that when a transaction is selected as
-a victim for rollback, InnoDB will always return an appropriate error
-code indicating this. @see DB_DEADLOCK, @see DB_LOCK_TABLE_FULL and
-@see DB_LOCK_WAIT_TIMEOUT
-@return	transaction state */
-
-ib_trx_state_t
-ib_trx_state(
-/*=========*/
-	ib_trx_t	ib_trx);	/*!< in: trx handle */
-
-
-/*****************************************************************//**
 Check if the transaction is read_only */
 ib_u32_t
 ib_trx_read_only(
@@ -506,8 +457,7 @@ ib_trx_read_only(
 Release the resources of the transaction. If the transaction was
 selected as a victim by InnoDB and rolled back then use this function
 to free the transaction handle.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_trx_release(
 /*===========*/
@@ -516,8 +466,7 @@ ib_trx_release(
 /*****************************************************************//**
 Commit a transaction. This function will release the schema latches too.
 It will also free the transaction handle.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_trx_commit(
 /*==========*/
@@ -526,8 +475,7 @@ ib_trx_commit(
 /*****************************************************************//**
 Rollback a transaction. This function will release the schema latches too.
 It will also free the transaction handle.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_trx_rollback(
 /*============*/
@@ -535,8 +483,7 @@ ib_trx_rollback(
 
 /*****************************************************************//**
 Open an InnoDB table and return a cursor handle to it.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_open_table_using_id(
 /*==========================*/
@@ -546,21 +493,8 @@ ib_cursor_open_table_using_id(
 	ib_crsr_t*	ib_crsr);	/*!< out,own: InnoDB cursor */
 
 /*****************************************************************//**
-Open an InnoDB index and return a cursor handle to it.
-@return	DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_open_index_using_id(
-/*==========================*/
-	ib_id_u64_t	index_id,	/*!< in: index id of index to open */
-	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
-					can be NULL */
-	ib_crsr_t*	ib_crsr);	/*!< out: InnoDB cursor */
-
-/*****************************************************************//**
 Open an InnoDB secondary index cursor and return a cursor handle to it.
 @return DB_SUCCESS or err code */
-
 ib_err_t
 ib_cursor_open_index_using_name(
 /*============================*/
@@ -572,8 +506,7 @@ ib_cursor_open_index_using_name(
 
 /*****************************************************************//**
 Open an InnoDB table by name and return a cursor handle to it.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_open_table(
 /*=================*/
@@ -584,26 +517,15 @@ ib_cursor_open_table(
 
 /*****************************************************************//**
 Reset the cursor.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_reset(
 /*============*/
 	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
 
-
-/*****************************************************************//**
-set a cursor trx to NULL*/
-
-void
-ib_cursor_clear_trx(
-/*================*/
-	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
-
 /*****************************************************************//**
 Close an InnoDB table and free the cursor.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_close(
 /*============*/
@@ -612,7 +534,6 @@ ib_cursor_close(
 /*****************************************************************//**
 Close the table, decrement n_ref_count count.
 @return DB_SUCCESS or err code */
-
 ib_err_t
 ib_cursor_close_table(
 /*==================*/
@@ -621,7 +542,6 @@ ib_cursor_close_table(
 /*****************************************************************//**
 update the cursor with new transactions and also reset the cursor
 @return DB_SUCCESS or err code */
-
 ib_err_t
 ib_cursor_new_trx(
 /*==============*/
@@ -631,26 +551,15 @@ ib_cursor_new_trx(
 /*****************************************************************//**
 Commit the transaction in a cursor
 @return DB_SUCCESS or err code */
-
 ib_err_t
 ib_cursor_commit_trx(
 /*=================*/
 	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
 	ib_trx_t	ib_trx);	/*!< in: transaction */
 
-/********************************************************************//**
-Open a table using the table name, if found then increment table ref count.
-@return table instance if found */
-
-void*
-ib_open_table_by_name(
-/*==================*/
-	const char*	name);		/*!< in: table name to lookup */
-
 /*****************************************************************//**
 Insert a row to a table.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_insert_row(
 /*=================*/
@@ -659,8 +568,7 @@ ib_cursor_insert_row(
 
 /*****************************************************************//**
 Update a row in a table.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_update_row(
 /*=================*/
@@ -670,8 +578,7 @@ ib_cursor_update_row(
 
 /*****************************************************************//**
 Delete a row in a table.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_delete_row(
 /*=================*/
@@ -679,8 +586,7 @@ ib_cursor_delete_row(
 
 /*****************************************************************//**
 Read current row.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_read_row(
 /*===============*/
@@ -691,26 +597,15 @@ ib_cursor_read_row(
 
 /*****************************************************************//**
 Move cursor to the first record in the table.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_first(
 /*============*/
 	ib_crsr_t	ib_crsr);	/*!< in: InnoDB cursor instance */
 
 /*****************************************************************//**
-Move cursor to the last record in the table.
-@return	DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_last(
-/*===========*/
-	ib_crsr_t	ib_crsr);	/*!< in: InnoDB cursor instance */
-
-/*****************************************************************//**
 Move cursor to the next record in the table.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_next(
 /*===========*/
@@ -718,8 +613,7 @@ ib_cursor_next(
 
 /*****************************************************************//**
 Search for key.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_moveto(
 /*=============*/
@@ -729,7 +623,6 @@ ib_cursor_moveto(
 
 /*****************************************************************//**
 Set the match mode for ib_cursor_move(). */
-
 void
 ib_cursor_set_match_mode(
 /*=====================*/
@@ -738,8 +631,7 @@ ib_cursor_set_match_mode(
 
 /*****************************************************************//**
 Set a column of the tuple. Make a copy using the tuple's heap.
-@return	DB_SUCCESS or error code */
-
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_col_set_value(
 /*=============*/
@@ -752,8 +644,7 @@ ib_col_set_value(
 
 /*****************************************************************//**
 Get the size of the data available in the column the tuple.
-@return	bytes avail or IB_SQL_NULL */
-
+@return bytes avail or IB_SQL_NULL */
 ib_ulint_t
 ib_col_get_len(
 /*===========*/
@@ -762,8 +653,7 @@ ib_col_get_len(
 
 /*****************************************************************//**
 Copy a column value from the tuple.
-@return	bytes copied or IB_SQL_NULL */
-
+@return bytes copied or IB_SQL_NULL */
 ib_ulint_t
 ib_col_copy_value(
 /*==============*/
@@ -774,8 +664,7 @@ ib_col_copy_value(
 
 /*************************************************************//**
 Read a signed int 8 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i8(
 /*=============*/
@@ -785,8 +674,7 @@ ib_tuple_read_i8(
 
 /*************************************************************//**
 Read an unsigned int 8 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u8(
 /*=============*/
@@ -796,8 +684,7 @@ ib_tuple_read_u8(
 
 /*************************************************************//**
 Read a signed int 16 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i16(
 /*==============*/
@@ -807,8 +694,7 @@ ib_tuple_read_i16(
 
 /*************************************************************//**
 Read an unsigned int 16 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u16(
 /*==============*/
@@ -818,8 +704,7 @@ ib_tuple_read_u16(
 
 /*************************************************************//**
 Read a signed int 32 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i32(
 /*==============*/
@@ -829,8 +714,7 @@ ib_tuple_read_i32(
 
 /*************************************************************//**
 Read an unsigned int 32 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u32(
 /*==============*/
@@ -840,8 +724,7 @@ ib_tuple_read_u32(
 
 /*************************************************************//**
 Read a signed int 64 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i64(
 /*==============*/
@@ -851,8 +734,7 @@ ib_tuple_read_i64(
 
 /*************************************************************//**
 Read an unsigned int 64 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u64(
 /*==============*/
@@ -862,8 +744,7 @@ ib_tuple_read_u64(
 
 /*****************************************************************//**
 Get a column value pointer from the tuple.
-@return	NULL or pointer to buffer */
-
+@return NULL or pointer to buffer */
 const void*
 ib_col_get_value(
 /*=============*/
@@ -872,8 +753,7 @@ ib_col_get_value(
 
 /*****************************************************************//**
 Get a column type, length and attributes from the tuple.
-@return	len of column data */
-
+@return len of column data */
 ib_ulint_t
 ib_col_get_meta(
 /*============*/
@@ -883,8 +763,7 @@ ib_col_get_meta(
 
 /*****************************************************************//**
 "Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
-@return	new tuple, or NULL */
-
+@return new tuple, or NULL */
 ib_tpl_t
 ib_tuple_clear(
 /*============*/
@@ -894,8 +773,7 @@ ib_tuple_clear(
 Create a new cluster key search tuple and copy the contents of  the
 secondary index key tuple columns that refer to the cluster index record
 to the cluster key. It does a deep copy of the column data.
-@return	DB_SUCCESS or error code */
-
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_tuple_get_cluster_key(
 /*=====================*/
@@ -904,20 +782,8 @@ ib_tuple_get_cluster_key(
 	const ib_tpl_t	ib_src_tpl);	/*!< in: source tuple */
 
 /*****************************************************************//**
-Copy the contents of  source tuple to destination tuple. The tuples
-must be of the same type and belong to the same table/index.
-@return	DB_SUCCESS or error code */
-
-ib_err_t
-ib_tuple_copy(
-/*==========*/
-	ib_tpl_t	ib_dst_tpl,	/*!< in: destination tuple */
-	const ib_tpl_t	ib_src_tpl);	/*!< in: source tuple */
-
-/*****************************************************************//**
 Create an InnoDB tuple used for index/table search.
 @return tuple for current index */
-
 ib_tpl_t
 ib_sec_search_tuple_create(
 /*=======================*/
@@ -925,8 +791,7 @@ ib_sec_search_tuple_create(
 
 /*****************************************************************//**
 Create an InnoDB tuple used for index/table search.
-@return	tuple for current index */
-
+@return tuple for current index */
 ib_tpl_t
 ib_sec_read_tuple_create(
 /*=====================*/
@@ -934,8 +799,7 @@ ib_sec_read_tuple_create(
 
 /*****************************************************************//**
 Create an InnoDB tuple used for table key operations.
-@return	tuple for current table */
-
+@return tuple for current table */
 ib_tpl_t
 ib_clust_search_tuple_create(
 /*=========================*/
@@ -943,8 +807,7 @@ ib_clust_search_tuple_create(
 
 /*****************************************************************//**
 Create an InnoDB tuple for table row operations.
-@return	tuple for current table */
-
+@return tuple for current table */
 ib_tpl_t
 ib_clust_read_tuple_create(
 /*=======================*/
@@ -952,8 +815,7 @@ ib_clust_read_tuple_create(
 
 /*****************************************************************//**
 Return the number of user columns in the tuple definition.
-@return	number of user columns */
-
+@return number of user columns */
 ib_ulint_t
 ib_tuple_get_n_user_cols(
 /*=====================*/
@@ -961,8 +823,7 @@ ib_tuple_get_n_user_cols(
 
 /*****************************************************************//**
 Return the number of columns in the tuple definition.
-@return	number of columns */
-
+@return number of columns */
 ib_ulint_t
 ib_tuple_get_n_cols(
 /*================*/
@@ -970,7 +831,6 @@ ib_tuple_get_n_cols(
 
 /*****************************************************************//**
 Destroy an InnoDB tuple. */
-
 void
 ib_tuple_delete(
 /*============*/
@@ -979,8 +839,7 @@ ib_tuple_delete(
 /*****************************************************************//**
 Truncate a table. The cursor handle will be closed and set to NULL
 on success.
-@return	DB_SUCCESS or error code */
-
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_cursor_truncate(
 /*===============*/
@@ -990,8 +849,7 @@ ib_cursor_truncate(
 
 /*****************************************************************//**
 Get a table id.
-@return	DB_SUCCESS if found */
-
+@return DB_SUCCESS if found */
 ib_err_t
 ib_table_get_id(
 /*============*/
@@ -999,20 +857,8 @@ ib_table_get_id(
 	ib_id_u64_t*	table_id);	/*!< out: table id if found */
 
 /*****************************************************************//**
-Get an index id.
-@return	DB_SUCCESS if found */
-
-ib_err_t
-ib_index_get_id(
-/*============*/
-	const char*	table_name,	/*!< in: find index for this table */
-	const char*	index_name,	/*!< in: index to find */
-	ib_id_u64_t*	index_id);	/*!< out: index id if found */
-
-/*****************************************************************//**
 Check if cursor is positioned.
-@return	IB_TRUE if positioned */
-
+@return IB_TRUE if positioned */
 ib_bool_t
 ib_cursor_is_positioned(
 /*====================*/
@@ -1022,7 +868,6 @@ ib_cursor_is_positioned(
 Checks if the data dictionary is latched in exclusive mode by a
 user transaction.
 @return TRUE if exclusive latch */
-
 ib_bool_t
 ib_schema_lock_is_exclusive(
 /*========================*/
@@ -1030,8 +875,7 @@ ib_schema_lock_is_exclusive(
 
 /*****************************************************************//**
 Lock an InnoDB cursor/table.
-@return	DB_SUCCESS or error code */
-
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_cursor_lock(
 /*===========*/
@@ -1040,8 +884,7 @@ ib_cursor_lock(
 
 /*****************************************************************//**
 Set the Lock an InnoDB table using the table id.
-@return	DB_SUCCESS or error code */
-
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_table_lock(
 /*===========*/
@@ -1051,8 +894,7 @@ ib_table_lock(
 
 /*****************************************************************//**
 Set the Lock mode of the cursor.
-@return	DB_SUCCESS or error code */
-
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_cursor_set_lock_mode(
 /*====================*/
@@ -1061,111 +903,13 @@ ib_cursor_set_lock_mode(
 
 /*****************************************************************//**
 Set need to access clustered index record flag. */
-
 void
 ib_cursor_set_cluster_access(
 /*=========================*/
 	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
 
 /*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i8(
-/*==============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i8_t		val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i16(
-/*=================*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i16_t	val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i32(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i32_t	val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i64(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i64_t	val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u8(
-/*==============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u8_t		val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u16(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u16_t	val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u32(
-/*=================*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u32_t	val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u64(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u64_t	val);		/*!< in: value to write */
-
-/*****************************************************************//**
 Inform the cursor that it's the start of an SQL statement. */
-
 void
 ib_cursor_stmt_begin(
 /*=================*/
@@ -1173,8 +917,7 @@ ib_cursor_stmt_begin(
 
 /*****************************************************************//**
 Write a double value to a column.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_write_double(
 /*==================*/
@@ -1184,8 +927,7 @@ ib_tuple_write_double(
 
 /*************************************************************//**
 Read a double column value from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_double(
 /*=================*/
@@ -1195,8 +937,7 @@ ib_tuple_read_double(
 
 /*****************************************************************//**
 Write a float value to a column.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_write_float(
 /*=================*/
@@ -1206,8 +947,7 @@ ib_tuple_write_float(
 
 /*************************************************************//**
 Read a float value from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_float(
 /*================*/
@@ -1218,7 +958,6 @@ ib_tuple_read_float(
 /*****************************************************************//**
 Get a column type, length and attributes from the tuple.
 @return len of column data */
-
 const char*
 ib_col_get_name(
 /*============*/
@@ -1228,7 +967,6 @@ ib_col_get_name(
 /*****************************************************************//**
 Get an index field name from the cursor.
 @return name of the field */
-
 const char*
 ib_get_idx_field_name(
 /*==================*/
@@ -1238,7 +976,6 @@ ib_get_idx_field_name(
 /*****************************************************************//**
 Truncate a table.
 @return DB_SUCCESS or error code */
-
 ib_err_t
 ib_table_truncate(
 /*==============*/
@@ -1246,20 +983,8 @@ ib_table_truncate(
 	ib_id_u64_t*	table_id);	/*!< out: new table id */
 
 /*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return DB_SUCCESS or error number */
-
-ib_err_t
-ib_close_thd(
-/*=========*/
-	void*		thd);		/*!< in: handle to the MySQL
-					thread of the user whose resources
-					should be free'd */
-
-/*****************************************************************//**
 Get generic configure status
 @return configure status*/
-
 int
 ib_cfg_get_cfg();
 /*============*/
@@ -1275,27 +1000,15 @@ ib_cursor_set_memcached_sync(
 	ib_bool_t	flag);		/*!< in: true for increasing */
 
 /*****************************************************************//**
-Check whether the table name conforms to our requirements. Currently
-we only do a simple check for the presence of a '/'.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_table_name_check(
-/*================*/
-	const char*	name);		/*!< in: table name to check */
-
-/*****************************************************************//**
 Return isolation configuration set by "innodb_api_trx_level"
 @return trx isolation level*/
-
-ib_trx_state_t
+ib_trx_level_t
 ib_cfg_trx_level();
 /*==============*/
 
 /*****************************************************************//**
 Return configure value for background commit interval (in seconds)
 @return background commit interval (in seconds) */
-
 ib_ulint_t
 ib_cfg_bk_commit_interval();
 /*=======================*/
@@ -1303,10 +1016,25 @@ ib_cfg_bk_commit_interval();
 /*****************************************************************//**
 Get a trx start time.
 @return trx start_time */
-
 ib_u64_t
 ib_trx_get_start_time(
 /*==================*/
 	ib_trx_t	ib_trx);	/*!< in: transaction */
 
+/*****************************************************************//**
+Wrapper of ut_strerr() which converts an InnoDB error number to a
+human readable text message.
+@return string, describing the error */
+const char*
+ib_ut_strerr(
+/*=========*/
+	ib_err_t	num);		/*!< in: error number */
+
+/** Check the table whether it contains virtual columns.
+@param[in]	crsr	InnoDB Cursor
+@return true if table contains virtual column else false. */
+ib_bool_t
+ib_is_virtual_table(
+        ib_crsr_t	crsr);
+
 #endif /* api0api_h */
diff --git a/storage/innobase/include/api0misc.h b/storage/innobase/include/api0misc.h
index fcd748390d1..84ac3d622a9 100644
--- a/storage/innobase/include/api0misc.h
+++ b/storage/innobase/include/api0misc.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -49,9 +49,8 @@ extern ulong			ib_bk_commit_interval;
 
 /********************************************************************
 Handles user errors and lock waits detected by the database engine.
-@return	TRUE if it was a lock wait and we should continue running
+@return TRUE if it was a lock wait and we should continue running
 the query thread */
-UNIV_INTERN
 ibool
 ib_handle_errors(
 /*=============*/
@@ -66,8 +65,7 @@ ib_handle_errors(
 
 /*************************************************************************
 Sets a lock on a table.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 ib_trx_lock_table_with_retry(
 /*=========================*/
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index a1882cdd0ad..48c5eb42724 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -35,6 +35,7 @@ Created 6/2/1994 Heikki Tuuri
 #include "page0cur.h"
 #include "mtr0mtr.h"
 #include "btr0types.h"
+#include "gis0type.h"
 
 #ifndef UNIV_HOTBACKUP
 /** Maximum record size which can be stored on a page, without using the
@@ -67,7 +68,11 @@ enum btr_latch_mode {
 	/** Search the previous record. */
 	BTR_SEARCH_PREV = 35,
 	/** Modify the previous record. */
-	BTR_MODIFY_PREV = 36
+	BTR_MODIFY_PREV = 36,
+	/** Start searching the entire B-tree. */
+	BTR_SEARCH_TREE = 37,
+	/** Continue searching the entire B-tree. */
+	BTR_CONT_SEARCH_TREE = 38
 };
 
 /* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
@@ -98,18 +103,47 @@ buffer when the record is not in the buffer pool. */
 already holding an S latch on the index tree */
 #define BTR_ALREADY_S_LATCHED	16384
 
+/** In the case of BTR_MODIFY_TREE, the caller specifies the intention
+to insert record only. It is used to optimize block->lock range.*/
+#define BTR_LATCH_FOR_INSERT	32768
+
+/** In the case of BTR_MODIFY_TREE, the caller specifies the intention
+to delete record only. It is used to optimize block->lock range.*/
+#define BTR_LATCH_FOR_DELETE	65536
+
+/** This flag is for undo insert of rtree. For rtree, we need this flag
+to find proper rec to undo insert.*/
+#define BTR_RTREE_UNDO_INS	131072
+
+/** In the case of BTR_MODIFY_LEAF, the caller intends to allocate or
+free the pages of externally stored fields. */
+#define BTR_MODIFY_EXTERNAL	262144
+
+/** Try to delete mark the record at the searched position when the
+record is in spatial index */
+#define BTR_RTREE_DELETE_MARK	524288
+
 #define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode)	\
 	((latch_mode) & ~(BTR_INSERT			\
 			  | BTR_DELETE_MARK		\
+			  | BTR_RTREE_UNDO_INS		\
+			  | BTR_RTREE_DELETE_MARK	\
 			  | BTR_DELETE			\
 			  | BTR_ESTIMATE		\
 			  | BTR_IGNORE_SEC_UNIQUE	\
-			  | BTR_ALREADY_S_LATCHED))
+			  | BTR_ALREADY_S_LATCHED	\
+			  | BTR_LATCH_FOR_INSERT	\
+			  | BTR_LATCH_FOR_DELETE	\
+			  | BTR_MODIFY_EXTERNAL))
+
+#define BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode)	\
+	((latch_mode) & ~(BTR_LATCH_FOR_INSERT		\
+			  | BTR_LATCH_FOR_DELETE	\
+			  | BTR_MODIFY_EXTERNAL))
 #endif /* UNIV_HOTBACKUP */
 
 /**************************************************************//**
 Report that an index page is corrupted. */
-UNIV_INTERN
 void
 btr_corruption_report(
 /*==================*/
@@ -128,95 +162,9 @@ btr_corruption_report(
 	}
 
 #ifndef UNIV_HOTBACKUP
-#ifdef UNIV_BLOB_DEBUG
-# include "ut0rbt.h"
-/** An index->blobs entry for keeping track of off-page column references */
-struct btr_blob_dbg_t
-{
-	unsigned	blob_page_no:32;	/*!< first BLOB page number */
-	unsigned	ref_page_no:32;		/*!< referring page number */
-	unsigned	ref_heap_no:16;		/*!< referring heap number */
-	unsigned	ref_field_no:10;	/*!< referring field number */
-	unsigned	owner:1;		/*!< TRUE if BLOB owner */
-	unsigned	always_owner:1;		/*!< TRUE if always
-						has been the BLOB owner;
-						reset to TRUE on B-tree
-						page splits and merges */
-	unsigned	del:1;			/*!< TRUE if currently
-						delete-marked */
-};
-
-/**************************************************************//**
-Add a reference to an off-page column to the index->blobs map. */
-UNIV_INTERN
-void
-btr_blob_dbg_add_blob(
-/*==================*/
-	const rec_t*	rec,		/*!< in: clustered index record */
-	ulint		field_no,	/*!< in: number of off-page column */
-	ulint		page_no,	/*!< in: start page of the column */
-	dict_index_t*	index,		/*!< in/out: index tree */
-	const char*	ctx)		/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Display the references to off-page columns.
-This function is to be called from a debugger,
-for example when a breakpoint on ut_dbg_assertion_failed is hit. */
-UNIV_INTERN
-void
-btr_blob_dbg_print(
-/*===============*/
-	const dict_index_t*	index)	/*!< in: index tree */
-	MY_ATTRIBUTE((nonnull));
 /**************************************************************//**
-Check that there are no references to off-page columns from or to
-the given page. Invoked when freeing or clearing a page.
-@return TRUE when no orphan references exist */
-UNIV_INTERN
-ibool
-btr_blob_dbg_is_empty(
-/*==================*/
-	dict_index_t*	index,		/*!< in: index */
-	ulint		page_no)	/*!< in: page number */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/**************************************************************//**
-Modify the 'deleted' flag of a record. */
-UNIV_INTERN
-void
-btr_blob_dbg_set_deleted_flag(
-/*==========================*/
-	const rec_t*		rec,	/*!< in: record */
-	dict_index_t*		index,	/*!< in/out: index */
-	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
-	ibool			del)	/*!< in: TRUE=deleted, FALSE=exists */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Change the ownership of an off-page column. */
-UNIV_INTERN
-void
-btr_blob_dbg_owner(
-/*===============*/
-	const rec_t*		rec,	/*!< in: record */
-	dict_index_t*		index,	/*!< in/out: index */
-	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
-	ulint			i,	/*!< in: ith field in rec */
-	ibool			own)	/*!< in: TRUE=owned, FALSE=disowned */
-	MY_ATTRIBUTE((nonnull));
-/** Assert that there are no BLOB references to or from the given page. */
-# define btr_blob_dbg_assert_empty(index, page_no)	\
-	ut_a(btr_blob_dbg_is_empty(index, page_no))
-#else /* UNIV_BLOB_DEBUG */
-# define btr_blob_dbg_add_blob(rec, field_no, page, index, ctx)	((void) 0)
-# define btr_blob_dbg_set_deleted_flag(rec, index, offsets, del)((void) 0)
-# define btr_blob_dbg_owner(rec, index, offsets, i, val)	((void) 0)
-# define btr_blob_dbg_assert_empty(index, page_no)		((void) 0)
-#endif /* UNIV_BLOB_DEBUG */
-
-/**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return	root page, x-latched */
-UNIV_INTERN
+Gets the root node of a tree and sx-latches it for segment access.
+@return root page, sx-latched */
 page_t*
 btr_root_get(
 /*=========*/
@@ -227,150 +175,124 @@ btr_root_get(
 /**************************************************************//**
 Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
 @return error code, or DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 btr_root_adjust_on_import(
 /*======================*/
 	const dict_index_t*	index)	/*!< in: index tree */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /**************************************************************//**
 Gets the height of the B-tree (the level of the root, when the leaf
 level is assumed to be 0). The caller must hold an S or X latch on
 the index.
-@return	tree height (level of the root) */
-UNIV_INTERN
+@return tree height (level of the root) */
 ulint
 btr_height_get(
 /*===========*/
 	dict_index_t*	index,	/*!< in: index tree */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Gets a buffer page and declares its latching order level.
+@param[in]	page_id	page id
+@param[in]	mode	latch mode
+@param[in]	file	file name
+@param[in]	line	line where called
+@param[in]	index	index tree, may be NULL if it is not an insert buffer
+tree
+@param[in,out]	mtr	mini-transaction
+@return block */
 UNIV_INLINE
 buf_block_t*
 btr_block_get_func(
-/*===============*/
-	ulint		space,		/*!< in: space id */
-	ulint		zip_size,	/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
-	ulint		page_no,	/*!< in: page number */
-	ulint		mode,		/*!< in: latch mode */
-	const char*	file,		/*!< in: file name */
-	ulint		line,		/*!< in: line where called */
-	dict_index_t*	index,		/*!< in: index tree, may be NULL
-					if it is not an insert buffer tree */
-	mtr_t*		mtr);		/*!< in/out: mini-transaction */
-# ifdef UNIV_SYNC_DEBUG
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			mode,
+	const char*		file,
+	ulint			line,
+	dict_index_t*		index,
+	mtr_t*			mtr);
+
+# ifdef UNIV_DEBUG
 /** Gets a buffer page and declares its latching order level.
-@param space	tablespace identifier
-@param zip_size	compressed page size in bytes or 0 for uncompressed pages
-@param page_no	page number
-@param mode	latch mode
-@param index	index tree, may be NULL if not the insert buffer tree
-@param mtr	mini-transaction handle
+@param page_id tablespace/page identifier
+@param page_size page size
+@param mode latch mode
+@param index index tree, may be NULL if not the insert buffer tree
+@param mtr mini-transaction handle
 @return the block descriptor */
-#  define btr_block_get(space,zip_size,page_no,mode,index,mtr)	\
-	btr_block_get_func(space,zip_size,page_no,mode,		\
-			   __FILE__,__LINE__,index,mtr)
-# else /* UNIV_SYNC_DEBUG */
+#  define btr_block_get(page_id, page_size, mode, index, mtr)	\
+	btr_block_get_func(page_id, page_size, mode,		\
+		__FILE__, __LINE__, (dict_index_t*)index, mtr)
+# else /* UNIV_DEBUG */
 /** Gets a buffer page and declares its latching order level.
-@param space	tablespace identifier
-@param zip_size	compressed page size in bytes or 0 for uncompressed pages
-@param page_no	page number
-@param mode	latch mode
-@param idx	index tree, may be NULL if not the insert buffer tree
-@param mtr	mini-transaction handle
+@param page_id tablespace/page identifier
+@param page_size page size
+@param mode latch mode
+@param index index tree, may be NULL if not the insert buffer tree
+@param mtr mini-transaction handle
 @return the block descriptor */
-#  define btr_block_get(space,zip_size,page_no,mode,idx,mtr)		\
-		btr_block_get_func(space,zip_size,page_no,mode, \
-			__FILE__,__LINE__,idx,mtr)
-# endif /* UNIV_SYNC_DEBUG */
+#  define btr_block_get(page_id, page_size, mode, index, mtr)	\
+	btr_block_get_func(page_id, page_size, mode, __FILE__, __LINE__, (dict_index_t*)index, mtr)
+# endif /* UNIV_DEBUG */
 /** Gets a buffer page and declares its latching order level.
-@param space	tablespace identifier
-@param zip_size	compressed page size in bytes or 0 for uncompressed pages
-@param page_no	page number
-@param mode	latch mode
-@param idx	index tree, may be NULL if not the insert buffer tree
-@param mtr	mini-transaction handle
+@param page_id tablespace/page identifier
+@param page_size page size
+@param mode latch mode
+@param index index tree, may be NULL if not the insert buffer tree
+@param mtr mini-transaction handle
 @return the uncompressed page frame */
 UNIV_INLINE
 page_t*
 btr_page_get(
 /*=========*/
-	ulint		space,
-	ulint		zip_size,
-	ulint		root_page_no,
-	ulint		mode,
-	dict_index_t*	index,
-	mtr_t*		mtr)
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			mode,
+	dict_index_t*		index,
+	mtr_t*			mtr)
 	MY_ATTRIBUTE((warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /**************************************************************//**
 Gets the index id field of a page.
-@return	index id */
+@return index id */
 UNIV_INLINE
 index_id_t
 btr_page_get_index_id(
 /*==================*/
 	const page_t*	page)	/*!< in: index page */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /********************************************************//**
 Gets the node level field in an index page.
-@return	level, leaf level == 0 */
+@return level, leaf level == 0 */
 UNIV_INLINE
 ulint
 btr_page_get_level_low(
 /*===================*/
 	const page_t*	page)	/*!< in: index page */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #define btr_page_get_level(page, mtr) btr_page_get_level_low(page)
 /********************************************************//**
 Gets the next index page number.
-@return	next page number */
+@return next page number */
 UNIV_INLINE
 ulint
 btr_page_get_next(
 /*==============*/
 	const page_t*	page,	/*!< in: index page */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************//**
 Gets the previous index page number.
-@return	prev page number */
+@return prev page number */
 UNIV_INLINE
 ulint
 btr_page_get_prev(
 /*==============*/
 	const page_t*	page,	/*!< in: index page */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Gets pointer to the previous user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor.
-@return	previous user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
-	rec_t*	rec,	/*!< in: record on leaf level */
-	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
-			needed, also to the previous page */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Gets pointer to the next user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor.
-@return	next user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
-	rec_t*	rec,	/*!< in: record on leaf level */
-	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
-			needed, also to the next page */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**************************************************************//**
 Releases the latch on a leaf page and bufferunfixes it. */
 UNIV_INLINE
@@ -388,60 +310,63 @@ NOTE: the offsets array must contain all offsets for the record since
 we read the last field according to offsets and assume that it contains
 the child page number. In other words offsets must have been retrieved
 with rec_get_offsets(n_fields=ULINT_UNDEFINED).
-@return	child node address */
+@return child node address */
 UNIV_INLINE
 ulint
 btr_node_ptr_get_child_page_no(
 /*===========================*/
 	const rec_t*	rec,	/*!< in: node pointer record */
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/************************************************************//**
-Creates the root node for a new index tree.
-@return	page number of the created root, FIL_NULL if did not succeed */
-UNIV_INTERN
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Create the root node for a new index tree.
+@param[in]	type			type of the index
+@param[in]	space			space where created
+@param[in]	page_size		page size
+@param[in]	index_id		index id
+@param[in]	index			index, or NULL when applying TRUNCATE
+log record during recovery
+@param[in]	btr_redo_create_info	used for applying TRUNCATE log
+@param[in]	mtr			mini-transaction handle
+record during recovery
+@return page number of the created root, FIL_NULL if did not succeed */
 ulint
 btr_create(
-/*=======*/
-	ulint		type,	/*!< in: type of the index */
-	ulint		space,	/*!< in: space where created */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	index_id_t	index_id,/*!< in: index id */
-	dict_index_t*	index,	/*!< in: index */
-	mtr_t*		mtr)	/*!< in: mini-transaction handle */
-	MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-UNIV_INTERN
+	ulint			type,
+	ulint			space,
+	const page_size_t&	page_size,
+	index_id_t		index_id,
+	dict_index_t*		index,
+	const btr_create_t*	btr_redo_create_info,
+	mtr_t*			mtr);
+
+/** Free a persistent index tree if it exists.
+@param[in]	page_id		root page id
+@param[in]	page_size	page size
+@param[in]	index_id	PAGE_INDEX_ID contents
+@param[in,out]	mtr		mini-transaction */
 void
-btr_free_but_not_root(
-/*==================*/
-	ulint	space,		/*!< in: space where created */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	root_page_no);	/*!< in: root page number */
-/************************************************************//**
-Frees the B-tree root page. Other tree MUST already have been freed. */
-UNIV_INTERN
+btr_free_if_exists(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	index_id_t		index_id,
+	mtr_t*			mtr);
+
+/** Free an index tree in a temporary tablespace or during TRUNCATE TABLE.
+@param[in]	page_id		root page id
+@param[in]	page_size	page size */
 void
-btr_free_root(
-/*==========*/
-	ulint	space,		/*!< in: space where created */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	root_page_no,	/*!< in: root page number */
-	mtr_t*	mtr)		/*!< in/out: mini-transaction */
-	MY_ATTRIBUTE((nonnull));
+btr_free(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size);
+
 /*************************************************************//**
 Makes tree one level higher by splitting the root, and inserts
 the tuple. It is assumed that mtr contains an x-latch on the tree.
 NOTE that the operation of this function must always succeed,
 we cannot reverse it: therefore enough free disk space must be
 guaranteed to be available before this function is called.
-@return	inserted record */
-UNIV_INTERN
+@return inserted record */
 rec_t*
 btr_root_raise_and_insert(
 /*======================*/
@@ -456,7 +381,7 @@ btr_root_raise_and_insert(
 	const dtuple_t*	tuple,	/*!< in: tuple to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
 	mtr_t*		mtr)	/*!< in: mtr */
-	__attribute__((nonnull(2,3,4,7), warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*************************************************************//**
 Reorganizes an index page.
 
@@ -468,7 +393,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
 
 @retval true if the operation was successful
 @retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
 bool
 btr_page_reorganize_low(
 /*====================*/
@@ -482,7 +406,7 @@ btr_page_reorganize_low(
 	page_cur_t*	cursor,	/*!< in/out: page cursor */
 	dict_index_t*	index,	/*!< in: the index tree of the page */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*************************************************************//**
 Reorganizes an index page.
 
@@ -494,7 +418,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
 
 @retval true if the operation was successful
 @retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
 bool
 btr_page_reorganize(
 /*================*/
@@ -505,8 +428,7 @@ btr_page_reorganize(
 /*************************************************************//**
 Decides if the page should be split at the convergence point of
 inserts converging to left.
-@return	TRUE if split recommended */
-UNIV_INTERN
+@return TRUE if split recommended */
 ibool
 btr_page_get_split_rec_to_left(
 /*===========================*/
@@ -514,12 +436,11 @@ btr_page_get_split_rec_to_left(
 	rec_t**		split_rec)/*!< out: if split recommended,
 				the first record on upper half page,
 				or NULL if tuple should be first */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*************************************************************//**
 Decides if the page should be split at the convergence point of
 inserts converging to right.
-@return	TRUE if split recommended */
-UNIV_INTERN
+@return TRUE if split recommended */
 ibool
 btr_page_get_split_rec_to_right(
 /*============================*/
@@ -527,7 +448,8 @@ btr_page_get_split_rec_to_right(
 	rec_t**		split_rec)/*!< out: if split recommended,
 				the first record on upper half page,
 				or NULL if tuple should be first */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*************************************************************//**
 Splits an index page to halves and inserts the tuple. It is assumed
 that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
@@ -537,7 +459,6 @@ free disk space (2 pages) must be guaranteed to be available before
 this function is called.
 
 @return inserted record */
-UNIV_INTERN
 rec_t*
 btr_page_split_and_insert(
 /*======================*/
@@ -551,11 +472,10 @@ btr_page_split_and_insert(
 	const dtuple_t*	tuple,	/*!< in: tuple to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
 	mtr_t*		mtr)	/*!< in: mtr */
-	__attribute__((nonnull(2,3,4,7), warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************//**
 Inserts a data tuple to a tree on a non-leaf level. It is assumed
 that mtr holds an x-latch on the tree. */
-UNIV_INTERN
 void
 btr_insert_on_non_leaf_level_func(
 /*==============================*/
@@ -565,14 +485,12 @@ btr_insert_on_non_leaf_level_func(
 	dtuple_t*	tuple,	/*!< in: the record to be inserted */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr)	/*!< in: mtr */
-	MY_ATTRIBUTE((nonnull));
+	mtr_t*		mtr);	/*!< in: mtr */
 # define btr_insert_on_non_leaf_level(f,i,l,t,m)			\
 	btr_insert_on_non_leaf_level_func(f,i,l,t,__FILE__,__LINE__,m)
 #endif /* !UNIV_HOTBACKUP */
 /****************************************************************//**
 Sets a record as the predefined minimum record. */
-UNIV_INTERN
 void
 btr_set_min_rec_mark(
 /*=================*/
@@ -582,7 +500,6 @@ btr_set_min_rec_mark(
 #ifndef UNIV_HOTBACKUP
 /*************************************************************//**
 Deletes on the upper level the node pointer to a page. */
-UNIV_INTERN
 void
 btr_node_ptr_delete(
 /*================*/
@@ -593,15 +510,14 @@ btr_node_ptr_delete(
 #ifdef UNIV_DEBUG
 /************************************************************//**
 Checks that the node pointer to a page is appropriate.
-@return	TRUE */
-UNIV_INTERN
+@return TRUE */
 ibool
 btr_check_node_ptr(
 /*===============*/
 	dict_index_t*	index,	/*!< in: index tree */
 	buf_block_t*	block,	/*!< in: index page */
 	mtr_t*		mtr)	/*!< in: mtr */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* UNIV_DEBUG */
 /*************************************************************//**
 Tries to merge the page first to the left immediate brother if such a
@@ -612,8 +528,7 @@ level lifts the records of the page to the father page, thus reducing the
 tree height. It is assumed that mtr holds an x-latch on the tree and on the
 page. If cursor is on the leaf level, mtr must also hold x-latches to
 the brothers, if they exist.
-@return	TRUE on success */
-UNIV_INTERN
+@return TRUE on success */
 ibool
 btr_compress(
 /*=========*/
@@ -629,20 +544,17 @@ btr_compress(
 Discards a page from a B-tree. This is used to remove the last record from
 a B-tree page: the whole page must be removed at the same time. This cannot
 be used for the root page, which is allowed to be empty. */
-UNIV_INTERN
 void
 btr_discard_page(
 /*=============*/
 	btr_cur_t*	cursor,	/*!< in: cursor on the page to discard: not on
 				the root page */
-	mtr_t*		mtr)	/*!< in: mtr */
-	MY_ATTRIBUTE((nonnull));
+	mtr_t*		mtr);	/*!< in: mtr */
 #endif /* !UNIV_HOTBACKUP */
 /****************************************************************//**
 Parses the redo log record for setting an index record as the predefined
 minimum record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_parse_set_min_rec_mark(
 /*=======================*/
@@ -654,8 +566,7 @@ btr_parse_set_min_rec_mark(
 	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
 /***********************************************************//**
 Parses a redo log record of reorganizing a page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_parse_page_reorganize(
 /*======================*/
@@ -665,12 +576,11 @@ btr_parse_page_reorganize(
 	bool		compressed,/*!< in: true if compressed page */
 	buf_block_t*	block,	/*!< in: page to be reorganized, or NULL */
 	mtr_t*		mtr)	/*!< in: mtr or NULL */
-	MY_ATTRIBUTE((nonnull(1,2,3), warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /**************************************************************//**
 Gets the number of pages in a B-tree.
-@return	number of pages, or ULINT_UNDEFINED if the index is unavailable */
-UNIV_INTERN
+@return number of pages, or ULINT_UNDEFINED if the index is unavailable */
 ulint
 btr_get_size(
 /*=========*/
@@ -678,7 +588,7 @@ btr_get_size(
 	ulint		flag,	/*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction where index
 				is s-latched */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**************************************************************//**
 Gets the number of reserved and used pages in a B-tree.
 @return	number of pages reserved, or ULINT_UNDEFINED if the index
@@ -701,7 +611,6 @@ that the caller has made the reservation for free extents!
 @retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
 (init_mtr == mtr, or the page was not previously freed in mtr)
 @retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
 buf_block_t*
 btr_page_alloc(
 /*===========*/
@@ -716,11 +625,10 @@ btr_page_alloc(
 	mtr_t*		init_mtr)	/*!< in/out: mini-transaction
 					for x-latching and initializing
 					the page */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**************************************************************//**
 Frees a file page used in an index tree. NOTE: cannot free field external
 storage pages because the page must contain info on its level. */
-UNIV_INTERN
 void
 btr_page_free(
 /*==========*/
@@ -729,19 +637,39 @@ btr_page_free(
 	mtr_t*		mtr)	/*!< in: mtr */
 	MY_ATTRIBUTE((nonnull));
 /**************************************************************//**
+Creates a new index page (not the root, and also not
+used in page reorganization).  @see btr_page_empty(). */
+void
+btr_page_create(
+/*============*/
+	buf_block_t*	block,	/*!< in/out: page to be created */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: the B-tree level of the page */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**************************************************************//**
 Frees a file page used in an index tree. Can be used also to BLOB
-external storage pages, because the page level 0 can be given as an
-argument. */
-UNIV_INTERN
+external storage pages. */
 void
 btr_page_free_low(
 /*==============*/
 	dict_index_t*	index,	/*!< in: index tree */
 	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
-	ulint		level,	/*!< in: page level */
+	ulint		level,	/*!< in: page level (ULINT_UNDEFINED=BLOB) */
 	bool		blob,   /*!< in: blob page */
 	mtr_t*		mtr)	/*!< in: mtr */
-	__attribute__((nonnull));
+	MY_ATTRIBUTE((nonnull(1,2)));
+/**************************************************************//**
+Gets the root node of a tree and x- or s-latches it.
+@return root page, x- or s-latched */
+buf_block_t*
+btr_root_block_get(
+/*===============*/
+	const dict_index_t*	index,	/*!< in: index tree */
+	ulint			mode,	/*!< in: either RW_S_LATCH
+					or RW_X_LATCH */
+	mtr_t*			mtr);	/*!< in: mtr */
+
 /*************************************************************//**
 Reorganizes an index page.
 
@@ -772,7 +700,6 @@ btr_page_reorganize_block(
 #ifdef UNIV_BTR_PRINT
 /*************************************************************//**
 Prints size info of a B-tree. */
-UNIV_INTERN
 void
 btr_print_size(
 /*===========*/
@@ -780,7 +707,6 @@ btr_print_size(
 	MY_ATTRIBUTE((nonnull));
 /**************************************************************//**
 Prints directories and other info of all nodes in the index. */
-UNIV_INTERN
 void
 btr_print_index(
 /*============*/
@@ -792,8 +718,7 @@ btr_print_index(
 /************************************************************//**
 Checks the size and number of fields in a record based on the definition of
 the index.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 btr_index_rec_validate(
 /*===================*/
@@ -802,39 +727,17 @@ btr_index_rec_validate(
 	ibool			dump_on_error)	/*!< in: TRUE if the function
 						should print hex dump of record
 						and page on error */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**************************************************************//**
 Checks the consistency of an index tree.
 @return	DB_SUCCESS if ok, error code if not */
-UNIV_INTERN
 dberr_t
 btr_validate_index(
 /*===============*/
-	dict_index_t*	index,			/*!< in: index */
-	const trx_t*	trx)			/*!< in: transaction or 0 */
-	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-
-#ifdef UNIV_SYNC_DEBUG
-/*************************************************************//**
-Removes a page from the level list of pages.
-@param space	in: space where removed
-@param zip_size	in: compressed page size in bytes, or 0 for uncompressed
-@param page	in/out: page to remove
-@param index	in: index tree
-@param mtr	in/out: mini-transaction */
-# define btr_level_list_remove(space,zip_size,page,index,mtr)		\
-	btr_level_list_remove_func(space,zip_size,page,index,mtr)
-#else /* UNIV_SYNC_DEBUG */
-/*************************************************************//**
-Removes a page from the level list of pages.
-@param space	in: space where removed
-@param zip_size	in: compressed page size in bytes, or 0 for uncompressed
-@param page	in/out: page to remove
-@param index	in: index tree
-@param mtr	in/out: mini-transaction */
-# define btr_level_list_remove(space,zip_size,page,index,mtr)		\
-	btr_level_list_remove_func(space,zip_size,page,index,mtr)
-#endif /* UNIV_SYNC_DEBUG */
+	dict_index_t*	index,	/*!< in: index */
+	const trx_t*	trx,	/*!< in: transaction or 0 */
+	bool		lockout)/*!< in: true if X-latch index is intended */
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*************************************************************//**
 Removes a page from the level list of pages. */
@@ -843,11 +746,19 @@ void
 btr_level_list_remove_func(
 /*=======================*/
 	ulint			space,	/*!< in: space where removed */
-	ulint			zip_size,/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
+	const page_size_t&	page_size,/*!< in: page size */
 	page_t*			page,	/*!< in/out: page to remove */
 	dict_index_t*		index,	/*!< in: index tree */
 	mtr_t*			mtr);	/*!< in/out: mini-transaction */
+/*************************************************************//**
+Removes a page from the level list of pages.
+@param space	in: space where removed
+@param zip_size	in: compressed page size in bytes, or 0 for uncompressed
+@param page	in/out: page to remove
+@param index	in: index tree
+@param mtr	in/out: mini-transaction */
+# define btr_level_list_remove(space,zip_size,page,index,mtr)		\
+	btr_level_list_remove_func(space,zip_size,page,index,mtr)
 
 /*************************************************************//**
 If page is the only on its level, this function moves its records to the
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
index 5acbee1751e..d01e19b5202 100644
--- a/storage/innobase/include/btr0btr.ic
+++ b/storage/innobase/include/btr0btr.ic
@@ -36,28 +36,31 @@ Created 6/2/1994 Heikki Tuuri
 					in btr_page_set_level and
 					btr_page_get_level_low */
 
-/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
+/** Gets a buffer page and declares its latching order level.
+@param[in]	page_id	page id
+@param[in]	mode	latch mode
+@param[in]	file	file name
+@param[in]	line	line where called
+@param[in]	index	index tree, may be NULL if it is not an insert buffer
+tree
+@param[in,out]	mtr	mini-transaction
+@return block */
 UNIV_INLINE
 buf_block_t*
 btr_block_get_func(
-/*===============*/
-	ulint		space,		/*!< in: space id */
-	ulint		zip_size,	/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
-	ulint		page_no,	/*!< in: page number */
-	ulint		mode,		/*!< in: latch mode */
-	const char*	file,		/*!< in: file name */
-	ulint		line,		/*!< in: line where called */
-	dict_index_t*	index,		/*!< in: index tree, may be NULL
-					if it is not an insert buffer tree */
-	mtr_t*		mtr)		/*!< in/out: mtr */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			mode,
+	const char*		file,
+	ulint			line,
+	dict_index_t*		index,
+	mtr_t*			mtr)
 {
 	buf_block_t*	block;
-	dberr_t		err;
+	dberr_t		err=DB_SUCCESS;
 
-	block = buf_page_get_gen(space, zip_size, page_no, mode,
-		NULL, BUF_GET, file, line, mtr, &err);
+	block = buf_page_get_gen(
+		page_id, page_size, mode, NULL, BUF_GET, file, line, mtr, &err);
 
 	if (err == DB_DECRYPTION_FAILED) {
 		if (index && index->table) {
@@ -111,17 +114,16 @@ UNIV_INLINE
 page_t*
 btr_page_get(
 /*=========*/
-	ulint		space,
-	ulint		zip_size,
-	ulint		root_page_no,
-	ulint		mode,
-	dict_index_t*	index,
-	mtr_t*		mtr)
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			mode,
+	dict_index_t*		index,
+	mtr_t*			mtr)
 {
 	buf_block_t* block=NULL;
 	buf_frame_t* frame=NULL;
 
-	block = btr_block_get(space, zip_size, root_page_no, mode, index, mtr);
+	block = btr_block_get(page_id, page_size, mode, index, mtr);
 
 	if (block) {
 		frame = buf_block_get_frame(block);
@@ -134,7 +136,7 @@ btr_page_get(
 
 /**************************************************************//**
 Gets the index id field of a page.
-@return	index id */
+@return index id */
 UNIV_INLINE
 index_id_t
 btr_page_get_index_id(
@@ -147,7 +149,7 @@ btr_page_get_index_id(
 #ifndef UNIV_HOTBACKUP
 /********************************************************//**
 Gets the node level field in an index page.
-@return	level, leaf level == 0 */
+@return level, leaf level == 0 */
 UNIV_INLINE
 ulint
 btr_page_get_level_low(
@@ -177,7 +179,8 @@ btr_page_set_level(
 	ulint		level,	/*!< in: level, leaf level == 0 */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
 {
-	ut_ad(page && mtr);
+	ut_ad(page != NULL);
+	ut_ad(mtr != NULL);
 	ut_ad(level <= BTR_MAX_NODE_LEVEL);
 
 	if (page_zip) {
@@ -193,7 +196,7 @@ btr_page_set_level(
 
 /********************************************************//**
 Gets the next index page number.
-@return	next page number */
+@return next page number */
 UNIV_INLINE
 ulint
 btr_page_get_next(
@@ -204,10 +207,7 @@ btr_page_get_next(
 {
 	ut_ad(page != NULL);
 	ut_ad(mtr != NULL);
-#ifndef UNIV_INNOCHECKSUM
-	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
-	      || mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
-#endif /* UNIV_INNOCHECKSUM */
+
 	return(mach_read_from_4(page + FIL_PAGE_NEXT));
 }
 
@@ -236,7 +236,7 @@ btr_page_set_next(
 
 /********************************************************//**
 Gets the previous index page number.
-@return	prev page number */
+@return prev page number */
 UNIV_INLINE
 ulint
 btr_page_get_prev(
@@ -279,7 +279,7 @@ NOTE: the offsets array must contain all offsets for the record since
 we read the last field according to offsets and assume that it contains
 the child page number. In other words offsets must have been retrieved
 with rec_get_offsets(n_fields=ULINT_UNDEFINED).
-@return	child node address */
+@return child node address */
 UNIV_INLINE
 ulint
 btr_node_ptr_get_child_page_no(
@@ -300,15 +300,7 @@ btr_node_ptr_get_child_page_no(
 	ut_ad(len == 4);
 
 	page_no = mach_read_from_4(field);
-
-	if (page_no == 0) {
-		fprintf(stderr,
-			"InnoDB: a nonsensical page number 0"
-			" in a node ptr record at offset %lu\n",
-			(ulong) page_offset(rec));
-		buf_page_print(page_align(rec), 0, 0);
-		ut_ad(0);
-	}
+	ut_ad(page_no > 1);
 
 	return(page_no);
 }
@@ -324,12 +316,27 @@ btr_leaf_page_release(
 					BTR_MODIFY_LEAF */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
-	ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
+	ut_ad(latch_mode == BTR_SEARCH_LEAF
+	      || latch_mode == BTR_MODIFY_LEAF
+	      || latch_mode == BTR_NO_LATCHES);
+
 	ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY));
 
-	mtr_memo_release(mtr, block,
-			 latch_mode == BTR_SEARCH_LEAF
-			 ? MTR_MEMO_PAGE_S_FIX
-			 : MTR_MEMO_PAGE_X_FIX);
+	ulint mode;
+	switch (latch_mode) {
+		case BTR_SEARCH_LEAF:
+			mode = MTR_MEMO_PAGE_S_FIX;
+			break;
+		case BTR_MODIFY_LEAF:
+			mode = MTR_MEMO_PAGE_X_FIX;
+			break;
+		case BTR_NO_LATCHES:
+			mode = MTR_MEMO_BUF_FIX;
+			break;
+		default:
+			ut_a(0);
+	}
+
+	mtr->memo_release(block, mode);
 }
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/btr0bulk.h b/storage/innobase/include/btr0bulk.h
new file mode 100644
index 00000000000..a1887c3df2b
--- /dev/null
+++ b/storage/innobase/include/btr0bulk.h
@@ -0,0 +1,392 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/btr0bulk.h
+The B-tree bulk load
+
+Created 03/11/2014 Shaohua Wang
+*************************************************************************/
+
+#ifndef btr0bulk_h
+#define btr0bulk_h
+
+#include "dict0dict.h"
+#include "page0cur.h"
+#include "ut0new.h"
+
+#include <vector>
+
+/** Innodb B-tree index fill factor for bulk load. */
+extern	long	innobase_fill_factor;
+
+/*
+The proper function call sequence of PageBulk is as below:
+-- PageBulk::init
+-- PageBulk::insert
+-- PageBulk::finish
+-- PageBulk::compress(COMPRESSED table only)
+-- PageBulk::pageSplit(COMPRESSED table only)
+-- PageBulk::commit
+*/
+
+class PageBulk
+{
+public:
+	/** Constructor
+	@param[in]	index		B-tree index
+	@param[in]	page_no		page number
+	@param[in]	level		page level
+	@param[in]	trx_id		transaction id
+	@param[in]	observer	flush observer */
+	PageBulk(
+		dict_index_t*	index,
+		trx_id_t	trx_id,
+		ulint		page_no,
+		ulint		level,
+		FlushObserver*	observer)
+		:
+		m_heap(NULL),
+		m_index(index),
+		m_mtr(NULL),
+		m_trx_id(trx_id),
+		m_block(NULL),
+		m_page(NULL),
+		m_page_zip(NULL),
+		m_cur_rec(NULL),
+		m_page_no(page_no),
+		m_level(level),
+		m_is_comp(dict_table_is_comp(index->table)),
+		m_heap_top(NULL),
+		m_rec_no(0),
+		m_free_space(0),
+		m_reserved_space(0),
+#ifdef UNIV_DEBUG
+		m_total_data(0),
+#endif /* UNIV_DEBUG */
+		m_modify_clock(0),
+		m_flush_observer(observer),
+		m_err(DB_SUCCESS)
+	{
+		ut_ad(!dict_index_is_spatial(m_index));
+	}
+
+	/** Deconstructor */
+	~PageBulk()
+	{
+		mem_heap_free(m_heap);
+	}
+
+	/** Initialize members and allocate page if needed and start mtr.
+	Note: must be called and only once right after constructor.
+	@return error code */
+	dberr_t init();
+
+	/** Insert a record in the page.
+	@param[in]	rec		record
+	@param[in]	offsets		record offsets */
+	void insert(const rec_t* rec, ulint* offsets);
+
+	/** Mark end of insertion to the page. Scan all records to set page
+	dirs, and set page header members. */
+	void finish();
+
+	/** Commit mtr for a page
+	@param[in]	success		Flag whether all inserts succeed. */
+	void commit(bool success);
+
+	/** Compress if it is compressed table
+	@return	true	compress successfully or no need to compress
+	@return	false	compress failed. */
+	bool compress();
+
+	/** Check whether the record needs to be stored externally.
+	@return	true
+	@return	false */
+	bool needExt(const dtuple_t* tuple, ulint rec_size);
+
+	/** Store external record
+	@param[in]	big_rec		external recrod
+	@param[in]	offsets		record offsets
+	@return	error code */
+	dberr_t storeExt(const big_rec_t* big_rec, ulint* offsets);
+
+	/** Get node pointer
+	@return node pointer */
+	dtuple_t* getNodePtr();
+
+	/** Get split rec in the page. We split a page in half when compresssion
+	fails, and the split rec should be copied to the new page.
+	@return split rec */
+	rec_t*	getSplitRec();
+
+	/** Copy all records after split rec including itself.
+	@param[in]	rec	split rec */
+	void copyIn(rec_t*	split_rec);
+
+	/** Remove all records after split rec including itself.
+	@param[in]	rec	split rec	*/
+	void copyOut(rec_t*	split_rec);
+
+	/** Set next page
+	@param[in]	next_page_no	next page no */
+	void setNext(ulint	next_page_no);
+
+	/** Set previous page
+	@param[in]	prev_page_no	previous page no */
+	void setPrev(ulint	prev_page_no);
+
+	/** Release block by commiting mtr */
+	inline void release();
+
+	/** Start mtr and latch block */
+	inline dberr_t latch();
+
+	/** Check if required space is available in the page for the rec
+	to be inserted.	We check fill factor & padding here.
+	@param[in]	length		required length
+	@return true	if space is available */
+	inline bool isSpaceAvailable(ulint	rec_size);
+
+	/** Get page no */
+	ulint	getPageNo()
+	{
+		return(m_page_no);
+	}
+
+	/** Get page level */
+	ulint	getLevel()
+	{
+		return(m_level);
+	}
+
+	/** Get record no */
+	ulint	getRecNo()
+	{
+		return(m_rec_no);
+	}
+
+	/** Get page */
+	page_t*	getPage()
+	{
+		return(m_page);
+	}
+
+	/** Get page zip */
+	page_zip_des_t*	getPageZip()
+	{
+		return(m_page_zip);
+	}
+
+	dberr_t getError()
+	{
+		return(m_err);
+	}
+
+	/* Memory heap for internal allocation */
+	mem_heap_t*	m_heap;
+
+private:
+	/** The index B-tree */
+	dict_index_t*	m_index;
+
+	/** The min-transaction */
+	mtr_t*		m_mtr;
+
+	/** The transaction id */
+	trx_id_t	m_trx_id;
+
+	/** The buffer block */
+	buf_block_t*	m_block;
+
+	/** The page */
+	page_t*		m_page;
+
+	/** The page zip descriptor */
+	page_zip_des_t*	m_page_zip;
+
+	/** The current rec, just before the next insert rec */
+	rec_t*		m_cur_rec;
+
+	/** The page no */
+	ulint		m_page_no;
+
+	/** The page level in B-tree */
+	ulint		m_level;
+
+	/** Flag: is page in compact format */
+	const bool	m_is_comp;
+
+	/** The heap top in page for next insert */
+	byte*		m_heap_top;
+
+	/** User record no */
+	ulint		m_rec_no;
+
+	/** The free space left in the page */
+	ulint		m_free_space;
+
+	/** The reserved space for fill factor */
+	ulint		m_reserved_space;
+
+	/** The padding space for compressed page */
+	ulint		m_padding_space;
+
+#ifdef UNIV_DEBUG
+	/** Total data in the page */
+	ulint		m_total_data;
+#endif /* UNIV_DEBUG */
+
+	/** The modify clock value of the buffer block
+	when the block is re-pinned */
+	ib_uint64_t     m_modify_clock;
+
+	/** Flush observer */
+	FlushObserver*	m_flush_observer;
+
+	/** Operation result DB_SUCCESS or error code */
+	dberr_t		m_err;
+};
+
+typedef std::vector<PageBulk*, ut_allocator<PageBulk*> >
+	page_bulk_vector;
+
+class BtrBulk
+{
+public:
+	/** Constructor
+	@param[in]	index		B-tree index
+	@param[in]	trx_id		transaction id
+	@param[in]	observer	flush observer */
+	BtrBulk(
+		dict_index_t*	index,
+		trx_id_t	trx_id,
+		FlushObserver*	observer)
+		:
+		m_heap(NULL),
+		m_index(index),
+		m_trx_id(trx_id),
+		m_flush_observer(observer)
+	{
+		ut_ad(m_flush_observer != NULL);
+#ifdef UNIV_DEBUG
+		fil_space_inc_redo_skipped_count(m_index->space);
+#endif /* UNIV_DEBUG */
+	}
+
+	/** Destructor */
+	~BtrBulk()
+	{
+		mem_heap_free(m_heap);
+		UT_DELETE(m_page_bulks);
+
+#ifdef UNIV_DEBUG
+		fil_space_dec_redo_skipped_count(m_index->space);
+#endif /* UNIV_DEBUG */
+	}
+
+	/** Initialization
+	Note: must be called right after constructor. */
+	void init()
+	{
+		ut_ad(m_heap == NULL);
+		m_heap = mem_heap_create(1000);
+
+		m_page_bulks = UT_NEW_NOKEY(page_bulk_vector());
+	}
+
+	/** Insert a tuple
+	@param[in]	tuple	tuple to insert.
+	@return error code */
+	dberr_t	insert(dtuple_t*	tuple)
+	{
+		return(insert(tuple, 0));
+	}
+
+	/** Btree bulk load finish. We commit the last page in each level
+	and copy the last page in top level to the root page of the index
+	if no error occurs.
+	@param[in]	err	whether bulk load was successful until now
+	@return error code  */
+	dberr_t finish(dberr_t	err);
+
+	/** Release all latches */
+	void release();
+
+	/** Re-latch all latches */
+	void latch();
+
+private:
+	/** Insert a tuple to a page in a level
+	@param[in]	tuple	tuple to insert
+	@param[in]	level	B-tree level
+	@return error code */
+	dberr_t insert(dtuple_t* tuple, ulint level);
+
+	/** Split a page
+	@param[in]	page_bulk	page to split
+	@param[in]	next_page_bulk	next page
+	@return	error code */
+	dberr_t pageSplit(PageBulk* page_bulk,
+			  PageBulk* next_page_bulk);
+
+	/** Commit(finish) a page. We set next/prev page no, compress a page of
+	compressed table and split the page if compression fails, insert a node
+	pointer to father page if needed, and commit mini-transaction.
+	@param[in]	page_bulk	page to commit
+	@param[in]	next_page_bulk	next page
+	@param[in]	insert_father	flag whether need to insert node ptr
+	@return	error code */
+	dberr_t pageCommit(PageBulk* page_bulk,
+			   PageBulk* next_page_bulk,
+			   bool insert_father);
+
+	/** Abort a page when an error occurs
+	@param[in]	page_bulk	page bulk object
+	Note: we should call pageAbort for a PageBulk object, which is not in
+	m_page_bulks after pageCommit, and we will commit or abort PageBulk
+	objects in function "finish". */
+	void	pageAbort(PageBulk* page_bulk)
+	{
+		page_bulk->commit(false);
+	}
+
+	/** Log free check */
+	void logFreeCheck();
+
+private:
+	/** Memory heap for allocation */
+	mem_heap_t*		m_heap;
+
+	/** B-tree index */
+	dict_index_t*		m_index;
+
+	/** Transaction id */
+	trx_id_t		m_trx_id;
+
+	/** Root page level */
+	ulint			m_root_level;
+
+	/** Flush observer */
+	FlushObserver*		m_flush_observer;
+
+	/** Page cursor vector for all level */
+	page_bulk_vector*	m_page_bulks;
+};
+
+#endif
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index aa799e0fc00..e445331b60c 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -30,6 +30,7 @@ Created 10/16/1994 Heikki Tuuri
 #include "dict0dict.h"
 #include "page0cur.h"
 #include "btr0types.h"
+#include "gis0type.h"
 
 /** Mode flags for btr_cur operations; these can be ORed */
 enum {
@@ -52,6 +53,13 @@ enum {
 	BTR_KEEP_IBUF_BITMAP = 32
 };
 
+/* btr_cur_latch_leaves() returns latched blocks and savepoints. */
+struct btr_latch_leaves_t {
+	/* left block, target block and right block */
+	buf_block_t*	blocks[3];
+	ulint		savepoints[3];
+};
+
 #ifndef UNIV_HOTBACKUP
 #include "que0types.h"
 #include "row0types.h"
@@ -63,7 +71,7 @@ enum {
 #ifdef UNIV_DEBUG
 /*********************************************************//**
 Returns the page cursor component of a tree cursor.
-@return	pointer to page cursor component */
+@return pointer to page cursor component */
 UNIV_INLINE
 page_cur_t*
 btr_cur_get_page_cur(
@@ -71,7 +79,7 @@ btr_cur_get_page_cur(
 	const btr_cur_t*	cursor);/*!< in: tree cursor */
 /*********************************************************//**
 Returns the buffer block on which the tree cursor is positioned.
-@return	pointer to buffer block */
+@return pointer to buffer block */
 UNIV_INLINE
 buf_block_t*
 btr_cur_get_block(
@@ -79,7 +87,7 @@ btr_cur_get_block(
 	const btr_cur_t*	cursor);/*!< in: tree cursor */
 /*********************************************************//**
 Returns the record pointer of a tree cursor.
-@return	pointer to record */
+@return pointer to record */
 UNIV_INLINE
 rec_t*
 btr_cur_get_rec(
@@ -92,22 +100,15 @@ btr_cur_get_rec(
 #endif /* UNIV_DEBUG */
 /*********************************************************//**
 Returns the compressed page on which the tree cursor is positioned.
-@return	pointer to compressed page, or NULL if the page is not compressed */
+@return pointer to compressed page, or NULL if the page is not compressed */
 UNIV_INLINE
 page_zip_des_t*
 btr_cur_get_page_zip(
 /*=================*/
 	btr_cur_t*	cursor);/*!< in: tree cursor */
 /*********************************************************//**
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
-	btr_cur_t*	cursor);/*!< in: tree cursor */
-/*********************************************************//**
 Returns the page of a tree cursor.
-@return	pointer to page */
+@return pointer to page */
 UNIV_INLINE
 page_t*
 btr_cur_get_page(
@@ -115,8 +116,8 @@ btr_cur_get_page(
 	btr_cur_t*	cursor);/*!< in: tree cursor */
 /*********************************************************//**
 Returns the index of a cursor.
-@param cursor	b-tree cursor
-@return	index */
+@param cursor b-tree cursor
+@return index */
 #define btr_cur_get_index(cursor) ((cursor)->index)
 /*********************************************************//**
 Positions a tree cursor at a given record. */
@@ -128,6 +129,26 @@ btr_cur_position(
 	rec_t*		rec,	/*!< in: record in tree */
 	buf_block_t*	block,	/*!< in: buffer block of rec */
 	btr_cur_t*	cursor);/*!< in: cursor */
+
+/** Optimistically latches the leaf page or pages requested.
+@param[in]	block		guessed buffer block
+@param[in]	modify_clock	modify clock value
+@param[in,out]	latch_mode	BTR_SEARCH_LEAF, ...
+@param[in,out]	cursor		cursor
+@param[in]	file		file name
+@param[in]	line		line where called
+@param[in]	mtr		mini-transaction
+@return true if success */
+bool
+btr_cur_optimistic_latch_leaves(
+	buf_block_t*	block,
+	ib_uint64_t	modify_clock,
+	ulint*		latch_mode,
+	btr_cur_t*	cursor,
+	const char*	file,
+	ulint		line,
+	mtr_t*		mtr);
+
 /********************************************************************//**
 Searches an index tree and positions a tree cursor on a given level.
 NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
@@ -135,7 +156,6 @@ to node pointer page number fields on the upper levels of the tree!
 Note that if mode is PAGE_CUR_LE, which is used in inserts, then
 cursor->up_match and cursor->low_match both will have sensible values.
 If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
-UNIV_INTERN
 dberr_t
 btr_cur_search_to_nth_level(
 /*========================*/
@@ -144,7 +164,7 @@ btr_cur_search_to_nth_level(
 	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
 				tuple must be set so that it cannot get
 				compared to the node ptr page number field! */
-	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be PAGE_CUR_LE,
 				not PAGE_CUR_GE, as the latter may end up on
@@ -164,15 +184,17 @@ btr_cur_search_to_nth_level(
 				to protect the record! */
 	btr_cur_t*	cursor, /*!< in/out: tree cursor; the cursor page is
 				s- or x-latched, but see also above! */
-	ulint		has_search_latch,/*!< in: latch mode the caller
-				currently has on btr_search_latch:
+	ulint		has_search_latch,
+				/*!< in: latch mode the caller
+				currently has on search system:
 				RW_S_LATCH, or 0 */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr);	/*!< in: mtr */
+
 /*****************************************************************//**
-Opens a cursor at either end of an index. */
-UNIV_INTERN
+Opens a cursor at either end of an index.
+@return DB_SUCCESS or error code */
 dberr_t
 btr_cur_open_at_index_side_func(
 /*============================*/
@@ -187,12 +209,15 @@ btr_cur_open_at_index_side_func(
 	ulint		line,		/*!< in: line where called */
 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 	MY_ATTRIBUTE((nonnull));
+
 #define btr_cur_open_at_index_side(f,i,l,c,lv,m)			\
 	btr_cur_open_at_index_side_func(f,i,l,c,lv,__FILE__,__LINE__,m)
+
 /**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INTERN
-void
+Positions a cursor at a randomly chosen position within a B-tree.
+@return true if the index is available and we have put the cursor, false
+if the index is unavailable */
+bool
 btr_cur_open_at_rnd_pos_func(
 /*=========================*/
 	dict_index_t*	index,		/*!< in: index */
@@ -209,8 +234,7 @@ It is assumed that mtr holds an x-latch on the page. The operation does
 not succeed if there is too little space on the page. If there is just
 one record on the page, the insert will always succeed; this is to
 prevent trying to split a page with just one record.
-@return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
 dberr_t
 btr_cur_optimistic_insert(
 /*======================*/
@@ -241,8 +265,7 @@ Performs an insert on a page of an index tree. It is assumed that mtr
 holds an x-latch on the tree and on the cursor page. If the insert is
 made on the leaf level, to avoid deadlocks, mtr must also own x-latches
 to brothers of page, if those brothers exist.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
 dberr_t
 btr_cur_pessimistic_insert(
 /*=======================*/
@@ -273,13 +296,12 @@ an update-in-place.
 
 @retval false if out of space; IBUF_BITMAP_FREE will be reset
 outside mtr if the page was recompressed
-@retval	true if enough place;
+@retval true if enough place;
 
 IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
 a secondary index leaf page. This has to be done either within the
 same mini-transaction, or by invoking ibuf_reset_free_bits() before
 mtr_commit(mtr). */
-UNIV_INTERN
 bool
 btr_cur_update_alloc_zip_func(
 /*==========================*/
@@ -307,7 +329,6 @@ Updates a record when the update causes no size changes in its fields.
 @retval DB_SUCCESS on success
 @retval DB_ZIP_OVERFLOW if there is not enough space left
 on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
-UNIV_INTERN
 dberr_t
 btr_cur_update_in_place(
 /*====================*/
@@ -328,7 +349,6 @@ btr_cur_update_in_place(
 	MY_ATTRIBUTE((warn_unused_result, nonnull));
 /***********************************************************//**
 Writes a redo log record of updating a record in-place. */
-UNIV_INTERN
 void
 btr_cur_update_in_place_log(
 /*========================*/
@@ -351,7 +371,6 @@ so that tree compression is recommended.
 @retval DB_UNDERFLOW if the page would become too empty
 @retval DB_ZIP_OVERFLOW if there is not enough space left
 on the compressed page */
-UNIV_INTERN
 dberr_t
 btr_cur_optimistic_update(
 /*======================*/
@@ -377,8 +396,7 @@ Performs an update of a record on a page of a tree. It is assumed
 that mtr holds an x-latch on the tree and on the cursor page. If the
 update is made on the leaf level, to avoid deadlocks, mtr must also
 own x-latches to brothers of page, if those brothers exist.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 btr_cur_pessimistic_update(
 /*=======================*/
@@ -396,9 +414,10 @@ btr_cur_pessimistic_update(
 				big_rec and the index tuple */
 	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
 				be stored externally by the caller, or NULL */
-	const upd_t*	update,	/*!< in: update vector; this is allowed also
-				contain trx id and roll ptr fields, but
-				the values in update vector have no effect */
+	upd_t*		update,	/*!< in/out: update vector; this is allowed to
+				also contain trx id and roll ptr fields.
+				Non-updated columns that are moved offpage will
+				be appended to this. */
 	ulint		cmpl_info,/*!< in: compiler info on secondary index
 				updates */
 	que_thr_t*	thr,	/*!< in: query thread */
@@ -411,22 +430,22 @@ Marks a clustered index record deleted. Writes an undo log record to
 undo log on this delete marking. Writes in the trx id field the id
 of the deleting transaction, and in the roll ptr field pointer to the
 undo log record created.
-@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
 dberr_t
 btr_cur_del_mark_set_clust_rec(
 /*===========================*/
+	ulint		flags,  /*!< in: undo logging and locking flags */
 	buf_block_t*	block,	/*!< in/out: buffer block of the record */
 	rec_t*		rec,	/*!< in/out: record */
 	dict_index_t*	index,	/*!< in: clustered index of the record */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
 	que_thr_t*	thr,	/*!< in: query thread */
+	const dtuple_t*	entry,	/*!< in: dtuple for the deleting record */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /***********************************************************//**
 Sets a secondary index record delete mark to TRUE or FALSE.
-@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
 dberr_t
 btr_cur_del_mark_set_sec_rec(
 /*=========================*/
@@ -442,8 +461,7 @@ that mtr holds an x-latch on the tree and on the cursor page. To avoid
 deadlocks, mtr must also own x-latches to brothers of page, if those
 brothers exist. NOTE: it is assumed that the caller has reserved enough
 free extents so that the compression will always succeed if done!
-@return	TRUE if compression occurred */
-UNIV_INTERN
+@return TRUE if compression occurred */
 ibool
 btr_cur_compress_if_useful(
 /*=======================*/
@@ -458,8 +476,7 @@ btr_cur_compress_if_useful(
 Removes the record on which the tree cursor is positioned. It is assumed
 that the mtr has an x-latch on the page where the cursor is positioned,
 but no latch on the whole tree.
-@return	TRUE if success, i.e., the page did not become too empty */
-UNIV_INTERN
+@return TRUE if success, i.e., the page did not become too empty */
 ibool
 btr_cur_optimistic_delete_func(
 /*===========================*/
@@ -489,8 +506,7 @@ or if it is the only page on the level. It is assumed that mtr holds
 an x-latch on the tree and on the cursor page. To avoid deadlocks,
 mtr must also own x-latches to brothers of page, if those brothers
 exist.
-@return	TRUE if compression occurred */
-UNIV_INTERN
+@return TRUE if compression occurred */
 ibool
 btr_cur_pessimistic_delete(
 /*=======================*/
@@ -508,14 +524,13 @@ btr_cur_pessimistic_delete(
 				stays valid: it points to successor of
 				deleted record on function exit */
 	ulint		flags,	/*!< in: BTR_CREATE_FLAG or 0 */
-	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	bool		rollback,/*!< in: performing rollback? */
 	mtr_t*		mtr)	/*!< in: mtr */
 	MY_ATTRIBUTE((nonnull));
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Parses a redo log record of updating a record in-place.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_cur_parse_update_in_place(
 /*==========================*/
@@ -527,8 +542,7 @@ btr_cur_parse_update_in_place(
 /****************************************************************//**
 Parses the redo log record for delete marking or unmarking of a clustered
 index record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_cur_parse_del_mark_set_clust_rec(
 /*=================================*/
@@ -540,8 +554,7 @@ btr_cur_parse_del_mark_set_clust_rec(
 /****************************************************************//**
 Parses the redo log record for delete marking or unmarking of a secondary
 index record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_cur_parse_del_mark_set_sec_rec(
 /*===============================*/
@@ -550,19 +563,22 @@ btr_cur_parse_del_mark_set_sec_rec(
 	page_t*		page,	/*!< in/out: page or NULL */
 	page_zip_des_t*	page_zip);/*!< in/out: compressed page, or NULL */
 #ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Estimates the number of rows in a given index range.
-@return	estimated number of rows */
-UNIV_INTERN
-ib_int64_t
+
+/** Estimates the number of rows in a given index range.
+@param[in]	index	index
+@param[in]	tuple1	range start, may also be empty tuple
+@param[in]	mode1	search mode for range start
+@param[in]	tuple2	range end, may also be empty tuple
+@param[in]	mode2	search mode for range end
+@return estimated number of rows */
+int64_t
 btr_estimate_n_rows_in_range(
-/*=========================*/
-	dict_index_t*	index,	/*!< in: index */
-	const dtuple_t*	tuple1,	/*!< in: range start, may also be empty tuple */
-	ulint		mode1,	/*!< in: search mode for range start */
-	const dtuple_t*	tuple2,	/*!< in: range end, may also be empty tuple */
-	ulint		mode2,	/*!< in: search mode for range end */
-	trx_t*		trx);	/*!< in: trx */
+	dict_index_t*	index,
+	const dtuple_t*	tuple1,
+	page_cur_mode_t	mode1,
+	const dtuple_t*	tuple2,
+	page_cur_mode_t	mode2);
+
 /*******************************************************************//**
 Estimates the number of different key values in a given index, for
 each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
@@ -571,9 +587,10 @@ The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
 index->stat_n_sample_sizes[].
 If innodb_stats_method is nulls_ignored, we also record the number of
 non-null values for each prefix and stored the estimates in
-array index->stat_n_non_null_key_vals. */
-UNIV_INTERN
-void
+array index->stat_n_non_null_key_vals.
+@return true if the index is available and we get the estimated numbers,
+false if the index is unavailable. */
+bool
 btr_estimate_number_of_different_key_vals(
 /*======================================*/
 	dict_index_t*	index);	/*!< in: index */
@@ -582,7 +599,6 @@ btr_estimate_number_of_different_key_vals(
 @param[in]	rec	record
 @param[in]	offsets	array returned by rec_get_offsets()
 @return externally stored part, in units of a database page */
-
 ulint
 btr_rec_get_externally_stored_len(
 	const rec_t*	rec,
@@ -593,7 +609,6 @@ Marks non-updated off-page fields as disowned by this record. The ownership
 must be transferred to the updated record which is inserted elsewhere in the
 index tree. In purge only the owner of externally stored field is allowed
 to free the field. */
-UNIV_INTERN
 void
 btr_cur_disown_inherited_fields(
 /*============================*/
@@ -613,7 +628,9 @@ enum blob_op {
 	/** Store off-page columns for an insert by update */
 	BTR_STORE_INSERT_UPDATE,
 	/** Store off-page columns for an update */
-	BTR_STORE_UPDATE
+	BTR_STORE_UPDATE,
+	/** Store off-page columns for a freshly inserted record by bulk */
+	BTR_STORE_INSERT_BULK
 };
 
 /*******************************************************************//**
@@ -631,32 +648,31 @@ Stores the fields in big_rec_vec to the tablespace and puts pointers to
 them in rec.  The extern flags in rec will have to be set beforehand.
 The fields are stored on pages allocated from leaf node
 file segment of the index tree.
-@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 dberr_t
 btr_store_big_rec_extern_fields(
 /*============================*/
-	dict_index_t*	index,		/*!< in: index of rec; the index tree
-					MUST be X-latched */
-	buf_block_t*	rec_block,	/*!< in/out: block containing rec */
-	rec_t*		rec,		/*!< in/out: record */
-	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index);
-					the "external storage" flags in offsets
-					will not correspond to rec when
-					this function returns */
+	btr_pcur_t*	pcur,		/*!< in/out: a persistent cursor. if
+					btr_mtr is restarted, then this can
+					be repositioned. */
+	const upd_t*	upd,		/*!< in: update vector */
+	ulint*		offsets,	/*!< in/out: rec_get_offsets() on
+					pcur. the "external storage" flags
+					in offsets will correctly correspond
+					to rec when this function returns */
 	const big_rec_t*big_rec_vec,	/*!< in: vector containing fields
 					to be stored externally */
-	mtr_t*		btr_mtr,	/*!< in: mtr containing the
-					latches to the clustered index */
+	mtr_t*		btr_mtr,	/*!< in/out: mtr containing the
+					latches to the clustered index. can be
+					committed and restarted. */
 	enum blob_op	op)		/*! in: operation code */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*******************************************************************//**
 Frees the space in an externally stored field to the file space
 management if the field in data is owned the externally stored field,
 in a rollback we may have the additional condition that the field must
 not be inherited. */
-UNIV_INTERN
 void
 btr_free_externally_stored_field(
 /*=============================*/
@@ -677,69 +693,68 @@ btr_free_externally_stored_field(
 					to rec, or NULL if rec == NULL */
 	ulint		i,		/*!< in: field number of field_ref;
 					ignored if rec == NULL */
-	enum trx_rb_ctx	rb_ctx,		/*!< in: rollback context */
-	mtr_t*		local_mtr);	/*!< in: mtr containing the latch to
-					data an an X-latch to the index
-					tree */
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record.  The
-clustered index record must be protected by a lock or a page latch.
+	bool		rollback,	/*!< in: performing rollback? */
+	mtr_t*		local_mtr);	/*!< in: mtr containing the latch */
+/** Copies the prefix of an externally stored field of a record.
+The clustered index record must be protected by a lock or a page latch.
+@param[out]	buf		the field, or a prefix of it
+@param[in]	len		length of buf, in bytes
+@param[in]	page_size	BLOB page size
+@param[in]	data		'internally' stored part of the field
+containing also the reference to the external part; must be protected by
+a lock or a page latch
+@param[in]	local_len	length of data, in bytes
 @return the length of the copied field, or 0 if the column was being
 or has been deleted */
-UNIV_INTERN
 ulint
 btr_copy_externally_stored_field_prefix(
-/*====================================*/
-	byte*		buf,	/*!< out: the field, or a prefix of it */
-	ulint		len,	/*!< in: length of buf, in bytes */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	const byte*	data,	/*!< in: 'internally' stored part of the
-				field containing also the reference to
-				the external part; must be protected by
-				a lock or a page latch */
-	ulint		local_len,/*!< in: length of data, in bytes */
-	trx_t*		trx);	/*!< in: transaction handle */
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.  The
-clustered index record must be protected by a lock or a page latch.
+	byte*			buf,
+	ulint			len,
+	const page_size_t&	page_size,
+	const byte*		data,
+	ulint			local_len);
+
+/** Copies an externally stored field of a record to mem heap.
+The clustered index record must be protected by a lock or a page latch.
+@param[out]	len		length of the whole field
+@param[in]	data		'internally' stored part of the field
+containing also the reference to the external part; must be protected by
+a lock or a page latch
+@param[in]	page_size	BLOB page size
+@param[in]	local_len	length of data
+@param[in,out]	heap		mem heap
 @return the whole field copied to heap */
-UNIV_INTERN
 byte*
 btr_copy_externally_stored_field(
-/*=============================*/
-	ulint*		len,	/*!< out: length of the whole field */
-	const byte*	data,	/*!< in: 'internally' stored part of the
-				field containing also the reference to
-				the external part; must be protected by
-				a lock or a page latch */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	ulint		local_len,/*!< in: length of data */
-	mem_heap_t*	heap,	/*!< in: mem heap */
-	trx_t*		trx);	/*!< in: transaction handle */
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.
-@return	the field copied to heap, or NULL if the field is incomplete */
-UNIV_INTERN
+	ulint*			len,
+	const byte*		data,
+	const page_size_t&	page_size,
+	ulint			local_len,
+	mem_heap_t*		heap);
+
+/** Copies an externally stored field of a record to mem heap.
+@param[in]	rec		record in a clustered index; must be
+protected by a lock or a page latch
+@param[in]	offset		array returned by rec_get_offsets()
+@param[in]	page_size	BLOB page size
+@param[in]	no		field number
+@param[out]	len		length of the field
+@param[in,out]	heap		mem heap
+@return the field copied to heap, or NULL if the field is incomplete */
 byte*
 btr_rec_copy_externally_stored_field(
-/*=================================*/
-	const rec_t*	rec,	/*!< in: record in a clustered index;
-				must be protected by a lock or a page latch */
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	ulint		no,	/*!< in: field number */
-	ulint*		len,	/*!< out: length of the field */
-	mem_heap_t*	heap,	/*!< in: mem heap */
-	trx_t*		trx);	/*!< in: transaction handle */
+	const rec_t*		rec,
+	const ulint*		offsets,
+	const page_size_t&	page_size,
+	ulint			no,
+	ulint*			len,
+	mem_heap_t*		heap);
+
 /*******************************************************************//**
 Flags the data tuple fields that are marked as extern storage in the
 update vector.  We use this function to remember which fields we must
 mark as extern storage in a record inserted for an update.
-@return	number of flagged external columns */
-UNIV_INTERN
+@return number of flagged external columns */
 ulint
 btr_push_update_extern_fields(
 /*==========================*/
@@ -750,38 +765,74 @@ btr_push_update_extern_fields(
 /***********************************************************//**
 Sets a secondary index record's delete mark to the given value. This
 function is only used by the insert buffer merge mechanism. */
-UNIV_INTERN
 void
 btr_cur_set_deleted_flag_for_ibuf(
 /*==============================*/
 	rec_t*		rec,		/*!< in/out: record */
 	page_zip_des_t*	page_zip,	/*!< in/out: compressed page
 					corresponding to rec, or NULL
-					when the tablespace is
-					uncompressed */
+					when the tablespace is uncompressed */
 	ibool		val,		/*!< in: value to set */
 	mtr_t*		mtr);		/*!< in/out: mini-transaction */
+
+/******************************************************//**
+The following function is used to set the deleted bit of a record. */
+UNIV_INLINE
+void
+btr_rec_set_deleted_flag(
+/*=====================*/
+	rec_t*		rec,	/*!< in/out: physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page (or NULL) */
+	ulint		flag);	/*!< in: nonzero if delete marked */
+
+/** Latches the leaf page or pages requested.
+@param[in]	block		leaf page where the search converged
+@param[in]	page_id		page id of the leaf
+@param[in]	latch_mode	BTR_SEARCH_LEAF, ...
+@param[in]	cursor		cursor
+@param[in]	mtr		mini-transaction
+@return	blocks and savepoints which actually latched. */
+btr_latch_leaves_t
+btr_cur_latch_leaves(
+	buf_block_t*		block,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			latch_mode,
+	btr_cur_t*		cursor,
+	mtr_t*			mtr);
+
 /*######################################################################*/
 
 /** In the pessimistic delete, if the page data size drops below this
 limit, merging it to a neighbor is tried */
-#define BTR_CUR_PAGE_COMPRESS_LIMIT	(UNIV_PAGE_SIZE / 2)
+#define BTR_CUR_PAGE_COMPRESS_LIMIT(index) \
+	((UNIV_PAGE_SIZE * (ulint)((index)->merge_threshold)) / 100)
 
 /** A slot in the path array. We store here info on a search path down the
 tree. Each slot contains data on a single level of the tree. */
-
-struct btr_path_t{
-	ulint	nth_rec;	/*!< index of the record
-				where the page cursor stopped on
-				this level (index in alphabetical
-				order); value ULINT_UNDEFINED
-				denotes array end */
-	ulint	n_recs;		/*!< number of records on the page */
-	ulint	page_no;	/*!< no of the page containing the record */
-	ulint	page_level;	/*!< level of the page, if later we fetch
-				the page under page_no and it is no different
-				level then we know that the tree has been
-				reorganized */
+struct btr_path_t {
+	/* Assume a page like:
+	records:             (inf, a, b, c, d, sup)
+	index of the record:    0, 1, 2, 3, 4, 5
+	*/
+
+	/** Index of the record where the page cursor stopped on this level
+	(index in alphabetical order). Value ULINT_UNDEFINED denotes array
+	end. In the above example, if the search stopped on record 'c', then
+	nth_rec will be 3. */
+	ulint	nth_rec;
+
+	/** Number of the records on the page, not counting inf and sup.
+	In the above example n_recs will be 4. */
+	ulint	n_recs;
+
+	/** Number of the page containing the record. */
+	ulint	page_no;
+
+	/** Level of the page. If later we fetch the page under page_no
+	and it is no different level then we know that the tree has been
+	reorganized. */
+	ulint	page_level;
 };
 
 #define BTR_PATH_ARRAY_N_SLOTS	250	/*!< size of path array (in slots) */
@@ -858,7 +909,7 @@ struct btr_cur_t {
 					other search modes; see also the NOTE
 					in up_match! */
 	ulint		low_bytes;	/*!< number of matched bytes to the
-					right at the time cursor positioned;
+					left at the time cursor positioned;
 					only used internally in searches: not
 					defined after the search */
 	ulint		n_fields;	/*!< prefix length used in a hash
@@ -872,8 +923,22 @@ struct btr_cur_t {
 					rows in range, we store in this array
 					information of the path through
 					the tree */
+	rtr_info_t*	rtr_info;	/*!< rtree search info */
+	btr_cur_t():thr(NULL), rtr_info(NULL) {}
+					/* default values */
 };
 
+/******************************************************//**
+The following function is used to set the deleted bit of a record. */
+UNIV_INLINE
+void
+btr_rec_set_deleted_flag(
+/*=====================*/
+	rec_t*		rec,	/*!< in/out: physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page (or NULL) */
+	ulint		flag);	/*!< in: nonzero if delete marked */
+
+
 /** If pessimistic delete fails because of lack of file space, there
 is still a good change of success a little later.  Try this many
 times. */
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
index 43ee3304c0e..45c0d59a8aa 100644
--- a/storage/innobase/include/btr0cur.ic
+++ b/storage/innobase/include/btr0cur.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -39,7 +39,7 @@ if (btr_cur_limit_optimistic_insert_debug > 1\
 #ifdef UNIV_DEBUG
 /*********************************************************//**
 Returns the page cursor component of a tree cursor.
-@return	pointer to page cursor component */
+@return pointer to page cursor component */
 UNIV_INLINE
 page_cur_t*
 btr_cur_get_page_cur(
@@ -51,7 +51,7 @@ btr_cur_get_page_cur(
 
 /*********************************************************//**
 Returns the buffer block on which the tree cursor is positioned.
-@return	pointer to buffer block */
+@return pointer to buffer block */
 UNIV_INLINE
 buf_block_t*
 btr_cur_get_block(
@@ -63,7 +63,7 @@ btr_cur_get_block(
 
 /*********************************************************//**
 Returns the record pointer of a tree cursor.
-@return	pointer to record */
+@return pointer to record */
 UNIV_INLINE
 rec_t*
 btr_cur_get_rec(
@@ -76,7 +76,7 @@ btr_cur_get_rec(
 
 /*********************************************************//**
 Returns the compressed page on which the tree cursor is positioned.
-@return	pointer to compressed page, or NULL if the page is not compressed */
+@return pointer to compressed page, or NULL if the page is not compressed */
 UNIV_INLINE
 page_zip_des_t*
 btr_cur_get_page_zip(
@@ -87,19 +87,8 @@ btr_cur_get_page_zip(
 }
 
 /*********************************************************//**
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
-	btr_cur_t*	cursor)	/*!< in: tree cursor */
-{
-	page_cur_invalidate(&(cursor->page_cur));
-}
-
-/*********************************************************//**
 Returns the page of a tree cursor.
-@return	pointer to page */
+@return pointer to page */
 UNIV_INLINE
 page_t*
 btr_cur_get_page(
@@ -130,7 +119,7 @@ btr_cur_position(
 /*********************************************************************//**
 Checks if compressing an index page where a btr cursor is placed makes
 sense.
-@return	TRUE if compression is recommended */
+@return TRUE if compression is recommended */
 UNIV_INLINE
 ibool
 btr_cur_compress_recommendation(
@@ -140,15 +129,17 @@ btr_cur_compress_recommendation(
 {
 	const page_t*	page;
 
-	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(
+		mtr, btr_cur_get_block(cursor),
+		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
 
 	page = btr_cur_get_page(cursor);
 
 	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
 				      return(FALSE));
 
-	if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
+	if ((page_get_data_size(page)
+	     < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index))
 	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
 		&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
 
@@ -167,7 +158,7 @@ btr_cur_compress_recommendation(
 /*********************************************************************//**
 Checks if the record on which the cursor is placed can be deleted without
 making tree compression necessary (or, recommended).
-@return	TRUE if can be deleted without recommended compression */
+@return TRUE if can be deleted without recommended compression */
 UNIV_INLINE
 ibool
 btr_cur_can_delete_without_compress(
@@ -183,7 +174,8 @@ btr_cur_can_delete_without_compress(
 
 	page = btr_cur_get_page(cursor);
 
-	if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
+	if ((page_get_data_size(page) - rec_size
+	     < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index))
 	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
 		&& (btr_page_get_prev(page, mtr) == FIL_NULL))
 	    || (page_get_n_recs(page) < 2)) {
@@ -211,6 +203,7 @@ btr_blob_op_is_update(
 {
 	switch (op) {
 	case BTR_STORE_INSERT:
+	case BTR_STORE_INSERT_BULK:
 		return(FALSE);
 	case BTR_STORE_INSERT_UPDATE:
 	case BTR_STORE_UPDATE:
@@ -220,4 +213,23 @@ btr_blob_op_is_update(
 	ut_ad(0);
 	return(FALSE);
 }
+
+/******************************************************//**
+The following function is used to set the deleted bit of a record. */
+UNIV_INLINE
+void
+btr_rec_set_deleted_flag(
+/*=====================*/
+	rec_t*		rec,	/*!< in/out: physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page (or NULL) */
+	ulint		flag)	/*!< in: nonzero if delete marked */
+{
+	if (page_rec_is_comp(rec)) {
+		rec_set_deleted_flag_new(rec, page_zip, flag);
+	} else {
+		ut_ad(!page_zip);
+		rec_set_deleted_flag_old(rec, flag);
+	}
+}
+
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
index dafe14ce556..02f4faf24a5 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innobase/include/btr0pcur.h
@@ -34,22 +34,24 @@ Created 2/23/1996 Heikki Tuuri
 #include "btr0cur.h"
 #include "btr0btr.h"
 #include "btr0types.h"
+#include "gis0rtree.h"
 
 /* Relative positions for a stored cursor position */
-#define BTR_PCUR_ON			1
-#define BTR_PCUR_BEFORE			2
-#define BTR_PCUR_AFTER			3
+enum btr_pcur_pos_t {
+	BTR_PCUR_ON		= 1,
+	BTR_PCUR_BEFORE		= 2,
+	BTR_PCUR_AFTER		= 3,
 /* Note that if the tree is not empty, btr_pcur_store_position does not
 use the following, but only uses the above three alternatives, where the
 position is stored relative to a specific record: this makes implementation
 of a scroll cursor easier */
-#define BTR_PCUR_BEFORE_FIRST_IN_TREE	4	/* in an empty tree */
-#define BTR_PCUR_AFTER_LAST_IN_TREE	5	/* in an empty tree */
+	BTR_PCUR_BEFORE_FIRST_IN_TREE	= 4,	/* in an empty tree */
+	BTR_PCUR_AFTER_LAST_IN_TREE	= 5	/* in an empty tree */
+};
 
 /**************************************************************//**
 Allocates memory for a persistent cursor object and initializes the cursor.
-@return	own: persistent cursor */
-UNIV_INTERN
+@return own: persistent cursor */
 btr_pcur_t*
 btr_pcur_create_for_mysql(void);
 /*============================*/
@@ -57,7 +59,6 @@ btr_pcur_create_for_mysql(void);
 /**************************************************************//**
 Resets a persistent cursor object, freeing ::old_rec_buf if it is
 allocated and resetting the other members to their initial values. */
-UNIV_INTERN
 void
 btr_pcur_reset(
 /*===========*/
@@ -65,14 +66,12 @@ btr_pcur_reset(
 
 /**************************************************************//**
 Frees the memory for a persistent cursor object. */
-UNIV_INTERN
 void
 btr_pcur_free_for_mysql(
 /*====================*/
 	btr_pcur_t*	cursor);	/*!< in, own: persistent cursor */
 /**************************************************************//**
 Copies the stored position of a pcur to another pcur. */
-UNIV_INTERN
 void
 btr_pcur_copy_stored_position(
 /*==========================*/
@@ -87,6 +86,14 @@ void
 btr_pcur_init(
 /*==========*/
 	btr_pcur_t*	pcur);	/*!< in: persistent cursor */
+
+/** Free old_rec_buf.
+@param[in]	pcur	Persistent cursor holding old_rec to be freed. */
+UNIV_INLINE
+void
+btr_pcur_free(
+	btr_pcur_t*	pcur);
+
 /**************************************************************//**
 Initializes and opens a persistent cursor to an index tree. It should be
 closed with btr_pcur_close. */
@@ -97,7 +104,7 @@ btr_pcur_open_low(
 	dict_index_t*	index,	/*!< in: index */
 	ulint		level,	/*!< in: level in the btree */
 	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
-	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
@@ -119,7 +126,7 @@ btr_pcur_open_with_no_init_func(
 /*============================*/
 	dict_index_t*	index,	/*!< in: index */
 	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
-	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
@@ -131,8 +138,9 @@ btr_pcur_open_with_no_init_func(
 				page, but assume that the caller uses his
 				btr search latch to protect the record! */
 	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
-	ulint		has_search_latch,/*!< in: latch mode the caller
-				currently has on btr_search_latch:
+	ulint		has_search_latch,
+				/*!< in: latch mode the caller
+				currently has on search system:
 				RW_S_LATCH, or 0 */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
@@ -181,13 +189,12 @@ PAGE_CUR_LE, on the last user record. If no such user record exists, then
 in the first case sets the cursor after last in tree, and in the latter case
 before first in tree. The latching mode must be BTR_SEARCH_LEAF or
 BTR_MODIFY_LEAF. */
-UNIV_INTERN
 void
 btr_pcur_open_on_user_rec_func(
 /*===========================*/
 	dict_index_t*	index,		/*!< in: index */
 	const dtuple_t*	tuple,		/*!< in: tuple on which search done */
-	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
+	page_cur_mode_t	mode,		/*!< in: PAGE_CUR_L, ... */
 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
 					BTR_MODIFY_LEAF */
 	btr_pcur_t*	cursor,		/*!< in: memory buffer for persistent
@@ -198,9 +205,11 @@ btr_pcur_open_on_user_rec_func(
 #define btr_pcur_open_on_user_rec(i,t,md,l,c,m)				\
 	btr_pcur_open_on_user_rec_func(i,t,md,l,c,__FILE__,__LINE__,m)
 /**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
+Positions a cursor at a randomly chosen position within a B-tree.
+@return true if the index is available and we have put the cursor, false
+if the index is unavailable */
 UNIV_INLINE
-void
+bool
 btr_pcur_open_at_rnd_pos_func(
 /*==========================*/
 	dict_index_t*	index,		/*!< in: index */
@@ -235,7 +244,6 @@ cursor data structure, or just setting a flag if the cursor id before the
 first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
 page where the cursor is positioned must not be empty if the index tree is
 not totally empty! */
-UNIV_INTERN
 void
 btr_pcur_store_position(
 /*====================*/
@@ -256,7 +264,6 @@ restores to before first or after the last in the tree.
 @return TRUE if the cursor position was stored when it was on a user
 record and it can be restored on a user record whose ordering fields
 are identical to the ones of the original user record */
-UNIV_INTERN
 ibool
 btr_pcur_restore_position_func(
 /*===========================*/
@@ -269,7 +276,7 @@ btr_pcur_restore_position_func(
 	btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr)
 /*********************************************************//**
 Gets the rel_pos field for a cursor whose position has been stored.
-@return	BTR_PCUR_ON, ... */
+@return BTR_PCUR_ON, ... */
 UNIV_INLINE
 ulint
 btr_pcur_get_rel_pos(
@@ -289,7 +296,7 @@ btr_pcur_commit_specify_mtr(
 /*********************************************************//**
 Moves the persistent cursor to the next record in the tree. If no records are
 left, the cursor stays 'after last in tree'.
-@return	TRUE if the cursor was not after last in tree */
+@return TRUE if the cursor was not after last in tree */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next(
@@ -300,8 +307,7 @@ btr_pcur_move_to_next(
 /*********************************************************//**
 Moves the persistent cursor to the previous record in the tree. If no records
 are left, the cursor stays 'before first in tree'.
-@return	TRUE if the cursor was not before first in tree */
-UNIV_INTERN
+@return TRUE if the cursor was not before first in tree */
 ibool
 btr_pcur_move_to_prev(
 /*==================*/
@@ -319,7 +325,7 @@ btr_pcur_move_to_last_on_page(
 /*********************************************************//**
 Moves the persistent cursor to the next user record in the tree. If no user
 records are left, the cursor ends up 'after last in tree'.
-@return	TRUE if the cursor moved forward, ending on a user record */
+@return TRUE if the cursor moved forward, ending on a user record */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next_user_rec(
@@ -332,7 +338,6 @@ Moves the persistent cursor to the first record on the next page.
 Releases the latch on the current page, and bufferunfixes it.
 Note that there must not be modifications on the current page,
 as then the x-latch can be released only in mtr_commit. */
-UNIV_INTERN
 void
 btr_pcur_move_to_next_page(
 /*=======================*/
@@ -349,7 +354,6 @@ The alphabetical position of the cursor is guaranteed to be sensible
 on return, but it may happen that the cursor is not positioned on the
 last record of any page, because the structure of the tree may have
 changed while the cursor had no latches. */
-UNIV_INTERN
 void
 btr_pcur_move_backward_from_page(
 /*=============================*/
@@ -359,7 +363,7 @@ btr_pcur_move_backward_from_page(
 #ifdef UNIV_DEBUG
 /*********************************************************//**
 Returns the btr cursor component of a persistent cursor.
-@return	pointer to btr cursor component */
+@return pointer to btr cursor component */
 UNIV_INLINE
 btr_cur_t*
 btr_pcur_get_btr_cur(
@@ -367,7 +371,7 @@ btr_pcur_get_btr_cur(
 	const btr_pcur_t*	cursor);	/*!< in: persistent cursor */
 /*********************************************************//**
 Returns the page cursor component of a persistent cursor.
-@return	pointer to page cursor component */
+@return pointer to page cursor component */
 UNIV_INLINE
 page_cur_t*
 btr_pcur_get_page_cur(
@@ -375,7 +379,7 @@ btr_pcur_get_page_cur(
 	const btr_pcur_t*	cursor);	/*!< in: persistent cursor */
 /*********************************************************//**
 Returns the page of a persistent cursor.
-@return	pointer to the page */
+@return pointer to the page */
 UNIV_INLINE
 page_t*
 btr_pcur_get_page(
@@ -383,7 +387,7 @@ btr_pcur_get_page(
 	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
 /*********************************************************//**
 Returns the buffer block of a persistent cursor.
-@return	pointer to the block */
+@return pointer to the block */
 UNIV_INLINE
 buf_block_t*
 btr_pcur_get_block(
@@ -391,7 +395,7 @@ btr_pcur_get_block(
 	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
 /*********************************************************//**
 Returns the record of a persistent cursor.
-@return	pointer to the record */
+@return pointer to the record */
 UNIV_INLINE
 rec_t*
 btr_pcur_get_rec(
@@ -493,53 +497,53 @@ enum pcur_pos_t {
 selects, updates, and deletes. */
 
 struct btr_pcur_t{
-	btr_cur_t	btr_cur;	/*!< a B-tree cursor */
-	ulint		latch_mode;	/*!< see TODO note below!
-					BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
-					BTR_MODIFY_TREE, or BTR_NO_LATCHES,
-					depending on the latching state of
-					the page and tree where the cursor is
-					positioned; BTR_NO_LATCHES means that
-					the cursor is not currently positioned:
-					we say then that the cursor is
-					detached; it can be restored to
-					attached if the old position was
-					stored in old_rec */
-	ulint		old_stored;	/*!< BTR_PCUR_OLD_STORED
-					or BTR_PCUR_OLD_NOT_STORED */
-	rec_t*		old_rec;	/*!< if cursor position is stored,
-					contains an initial segment of the
-					latest record cursor was positioned
-					either on, before, or after */
-	ulint		old_n_fields;	/*!< number of fields in old_rec */
-	ulint		rel_pos;	/*!< BTR_PCUR_ON, BTR_PCUR_BEFORE, or
-					BTR_PCUR_AFTER, depending on whether
-					cursor was on, before, or after the
-					old_rec record */
-	buf_block_t*	block_when_stored;/* buffer block when the position was
-					stored */
-	ib_uint64_t	modify_clock;	/*!< the modify clock value of the
-					buffer block when the cursor position
-					was stored */
-	enum pcur_pos_t	pos_state;	/*!< btr_pcur_store_position() and
-					btr_pcur_restore_position() state. */
-	ulint		search_mode;	/*!< PAGE_CUR_G, ... */
-	trx_t*		trx_if_known;	/*!< the transaction, if we know it;
-					otherwise this field is not defined;
-					can ONLY BE USED in error prints in
-					fatal assertion failures! */
+	/** a B-tree cursor */
+	btr_cur_t	btr_cur;
+	/** see TODO note below!
+	BTR_SEARCH_LEAF, BTR_MODIFY_LEAF, BTR_MODIFY_TREE or BTR_NO_LATCHES,
+	depending on the latching state of the page and tree where the cursor
+	is positioned; BTR_NO_LATCHES means that the cursor is not currently
+	positioned:
+	we say then that the cursor is detached; it can be restored to
+	attached if the old position was stored in old_rec */
+	ulint		latch_mode;
+	/** true if old_rec is stored */
+	bool		old_stored;
+	/** if cursor position is stored, contains an initial segment of the
+	latest record cursor was positioned either on, before or after */
+	rec_t*		old_rec;
+	/** number of fields in old_rec */
+	ulint		old_n_fields;
+	/** BTR_PCUR_ON, BTR_PCUR_BEFORE, or BTR_PCUR_AFTER, depending on
+	whether cursor was on, before, or after the old_rec record */
+	enum btr_pcur_pos_t	rel_pos;
+	/** buffer block when the position was stored */
+	buf_block_t*	block_when_stored;
+	/** the modify clock value of the buffer block when the cursor position
+	was stored */
+	ib_uint64_t	modify_clock;
+	/** the withdraw clock value of the buffer pool when the cursor
+	position was stored */
+	ulint		withdraw_clock;
+	/** btr_pcur_store_position() and btr_pcur_restore_position() state. */
+	enum pcur_pos_t	pos_state;
+	/** PAGE_CUR_G, ... */
+	page_cur_mode_t	search_mode;
+	/** the transaction, if we know it; otherwise this field is not defined;
+	can ONLY BE USED in error prints in fatal assertion failures! */
+	trx_t*		trx_if_known;
 	/*-----------------------------*/
 	/* NOTE that the following fields may possess dynamically allocated
 	memory which should be freed if not needed anymore! */
 
-	byte*		old_rec_buf;	/*!< NULL, or a dynamically allocated
-					buffer for old_rec */
-	ulint		buf_size;	/*!< old_rec_buf size if old_rec_buf
-					is not NULL */
-};
+	/** NULL, or a dynamically allocated buffer for old_rec */
+	byte*		old_rec_buf;
+	/** old_rec_buf size if old_rec_buf is not NULL */
+	ulint		buf_size;
 
-#define BTR_PCUR_OLD_STORED	908467085
-#define BTR_PCUR_OLD_NOT_STORED	122766467
+	/** Return the index of this persistent cursor */
+	dict_index_t*	index() const { return(btr_cur.index); }
+};
 
 #ifndef UNIV_NONINL
 #include "btr0pcur.ic"
diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic
index 1cd13824542..e7ae85dd730 100644
--- a/storage/innobase/include/btr0pcur.ic
+++ b/storage/innobase/include/btr0pcur.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +27,7 @@ Created 2/23/1996 Heikki Tuuri
 
 /*********************************************************//**
 Gets the rel_pos field for a cursor whose position has been stored.
-@return	BTR_PCUR_ON, ... */
+@return BTR_PCUR_ON, ... */
 UNIV_INLINE
 ulint
 btr_pcur_get_rel_pos(
@@ -35,7 +36,7 @@ btr_pcur_get_rel_pos(
 {
 	ut_ad(cursor);
 	ut_ad(cursor->old_rec);
-	ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
+	ut_ad(cursor->old_stored);
 	ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
 	      || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 
@@ -45,7 +46,7 @@ btr_pcur_get_rel_pos(
 #ifdef UNIV_DEBUG
 /*********************************************************//**
 Returns the btr cursor component of a persistent cursor.
-@return	pointer to btr cursor component */
+@return pointer to btr cursor component */
 UNIV_INLINE
 btr_cur_t*
 btr_pcur_get_btr_cur(
@@ -58,7 +59,7 @@ btr_pcur_get_btr_cur(
 
 /*********************************************************//**
 Returns the page cursor component of a persistent cursor.
-@return	pointer to page cursor component */
+@return pointer to page cursor component */
 UNIV_INLINE
 page_cur_t*
 btr_pcur_get_page_cur(
@@ -70,7 +71,7 @@ btr_pcur_get_page_cur(
 
 /*********************************************************//**
 Returns the page of a persistent cursor.
-@return	pointer to the page */
+@return pointer to the page */
 UNIV_INLINE
 page_t*
 btr_pcur_get_page(
@@ -84,7 +85,7 @@ btr_pcur_get_page(
 
 /*********************************************************//**
 Returns the buffer block of a persistent cursor.
-@return	pointer to the block */
+@return pointer to the block */
 UNIV_INLINE
 buf_block_t*
 btr_pcur_get_block(
@@ -98,7 +99,7 @@ btr_pcur_get_block(
 
 /*********************************************************//**
 Returns the record of a persistent cursor.
-@return	pointer to the record */
+@return pointer to the record */
 UNIV_INLINE
 rec_t*
 btr_pcur_get_rec(
@@ -260,7 +261,7 @@ btr_pcur_move_to_next_on_page(
 
 	page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 }
 
 /*********************************************************//**
@@ -276,7 +277,7 @@ btr_pcur_move_to_prev_on_page(
 
 	page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 }
 
 /*********************************************************//**
@@ -294,13 +295,13 @@ btr_pcur_move_to_last_on_page(
 	page_cur_set_after_last(btr_pcur_get_block(cursor),
 				btr_pcur_get_page_cur(cursor));
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 }
 
 /*********************************************************//**
 Moves the persistent cursor to the next user record in the tree. If no user
 records are left, the cursor ends up 'after last in tree'.
-@return	TRUE if the cursor moved forward, ending on a user record */
+@return TRUE if the cursor moved forward, ending on a user record */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next_user_rec(
@@ -311,7 +312,7 @@ btr_pcur_move_to_next_user_rec(
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 loop:
 	if (btr_pcur_is_after_last_on_page(cursor)) {
 
@@ -336,7 +337,7 @@ loop:
 /*********************************************************//**
 Moves the persistent cursor to the next record in the tree. If no records are
 left, the cursor stays 'after last in tree'.
-@return	TRUE if the cursor was not after last in tree */
+@return TRUE if the cursor was not after last in tree */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next(
@@ -348,7 +349,7 @@ btr_pcur_move_to_next(
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	if (btr_pcur_is_after_last_on_page(cursor)) {
 
@@ -396,9 +397,21 @@ btr_pcur_init(
 /*==========*/
 	btr_pcur_t*	pcur)	/*!< in: persistent cursor */
 {
-	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	pcur->old_stored = false;
 	pcur->old_rec_buf = NULL;
 	pcur->old_rec = NULL;
+
+	pcur->btr_cur.rtr_info = NULL;
+}
+
+/** Free old_rec_buf.
+@param[in]	pcur	Persistent cursor holding old_rec to be freed. */
+UNIV_INLINE
+void
+btr_pcur_free(
+	btr_pcur_t*	pcur)
+{
+	ut_free(pcur->old_rec_buf);
 }
 
 /**************************************************************//**
@@ -411,7 +424,7 @@ btr_pcur_open_low(
 	dict_index_t*	index,	/*!< in: index */
 	ulint		level,	/*!< in: level in the btree */
 	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
-	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
@@ -424,6 +437,7 @@ btr_pcur_open_low(
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	btr_cur_t*	btr_cursor;
+	dberr_t err = DB_SUCCESS;
 
 	/* Initialize the cursor */
 
@@ -436,8 +450,22 @@ btr_pcur_open_low(
 
 	btr_cursor = btr_pcur_get_btr_cur(cursor);
 
-	btr_cur_search_to_nth_level(index, level, tuple, mode, latch_mode,
-				    btr_cursor, 0, file, line, mtr);
+	ut_ad(!dict_index_is_spatial(index));
+
+	err = btr_cur_search_to_nth_level(
+		index, level, tuple, mode, latch_mode,
+		btr_cursor, 0, file, line, mtr);
+
+	if (err != DB_SUCCESS) {
+		ib::warn() << " Error code: " << err
+			   << " btr_pcur_open_low "
+			   << " level: " << level
+			   << " called from file: "
+			   << file << " line: " << line
+			   << " table: " << index->table->name
+			   << " index: " << index->name;
+	}
+
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 
 	cursor->trx_if_known = NULL;
@@ -452,7 +480,7 @@ btr_pcur_open_with_no_init_func(
 /*============================*/
 	dict_index_t*	index,	/*!< in: index */
 	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
-	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
@@ -464,8 +492,9 @@ btr_pcur_open_with_no_init_func(
 				page, but assume that the caller uses his
 				btr search latch to protect the record! */
 	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
-	ulint		has_search_latch,/*!< in: latch mode the caller
-				currently has on btr_search_latch:
+	ulint		has_search_latch,
+				/*!< in: latch mode the caller
+				currently has on search system:
 				RW_S_LATCH, or 0 */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
@@ -474,19 +503,20 @@ btr_pcur_open_with_no_init_func(
 	btr_cur_t*	btr_cursor;
 	dberr_t		err = DB_SUCCESS;
 
-	cursor->latch_mode = latch_mode;
+	cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode);
 	cursor->search_mode = mode;
 
 	/* Search with the tree cursor */
 
 	btr_cursor = btr_pcur_get_btr_cur(cursor);
 
-	err = btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
-				    	  btr_cursor, has_search_latch,
-				    	  file, line, mtr);
+	err = btr_cur_search_to_nth_level(
+		index, 0, tuple, mode, latch_mode, btr_cursor,
+		has_search_latch, file, line, mtr);
+
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	cursor->trx_if_known = NULL;
 	return err;
@@ -518,11 +548,12 @@ btr_pcur_open_at_index_side(
 		btr_pcur_init(pcur);
 	}
 
-	err = btr_cur_open_at_index_side(from_left, index, latch_mode,
-				   	 btr_pcur_get_btr_cur(pcur), level, mtr);
+	err = btr_cur_open_at_index_side(
+		from_left, index, latch_mode,
+		btr_pcur_get_btr_cur(pcur), level, mtr);
 	pcur->pos_state = BTR_PCUR_IS_POSITIONED;
 
-	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	pcur->old_stored = false;
 
 	pcur->trx_if_known = NULL;
 
@@ -530,9 +561,11 @@ btr_pcur_open_at_index_side(
 }
 
 /**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
+Positions a cursor at a randomly chosen position within a B-tree.
+@return true if the index is available and we have put the cursor, false
+if the index is unavailable */
 UNIV_INLINE
-void
+bool
 btr_pcur_open_at_rnd_pos_func(
 /*==========================*/
 	dict_index_t*	index,		/*!< in: index */
@@ -549,13 +582,17 @@ btr_pcur_open_at_rnd_pos_func(
 
 	btr_pcur_init(cursor);
 
-	btr_cur_open_at_rnd_pos_func(index, latch_mode,
-				     btr_pcur_get_btr_cur(cursor),
-				     file, line, mtr);
+	bool	available;
+
+	available = btr_cur_open_at_rnd_pos_func(index, latch_mode,
+						 btr_pcur_get_btr_cur(cursor),
+						 file, line, mtr);
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	cursor->trx_if_known = NULL;
+
+	return(available);
 }
 
 /**************************************************************//**
@@ -576,18 +613,20 @@ btr_pcur_close(
 /*===========*/
 	btr_pcur_t*	cursor)	/*!< in: persistent cursor */
 {
-	if (cursor->old_rec_buf != NULL) {
-
-		mem_free(cursor->old_rec_buf);
+	ut_free(cursor->old_rec_buf);
 
-		cursor->old_rec = NULL;
-		cursor->old_rec_buf = NULL;
+	if (cursor->btr_cur.rtr_info) {
+		rtr_clean_rtr_info(cursor->btr_cur.rtr_info, true);
+		cursor->btr_cur.rtr_info = NULL;
 	}
 
+	cursor->old_rec = NULL;
+	cursor->old_rec_buf = NULL;
 	cursor->btr_cur.page_cur.rec = NULL;
 	cursor->btr_cur.page_cur.block = NULL;
+
 	cursor->old_rec = NULL;
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	cursor->latch_mode = BTR_NO_LATCHES;
 	cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
@@ -608,5 +647,5 @@ btr_pcur_move_before_first_on_page(
 	page_cur_set_before_first(btr_pcur_get_block(cursor),
 		btr_pcur_get_page_cur(cursor));
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 }
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
index c95ca28057a..12659037904 100644
--- a/storage/innobase/include/btr0sea.h
+++ b/storage/innobase/include/btr0sea.h
@@ -34,59 +34,54 @@ Created 2/17/1996 Heikki Tuuri
 #include "mtr0mtr.h"
 #include "ha0ha.h"
 
-/*****************************************************************//**
-Creates and initializes the adaptive search system at a database start. */
-UNIV_INTERN
+/** Creates and initializes the adaptive search system at a database start.
+@param[in]	hash_size	hash table size. */
 void
-btr_search_sys_create(
-/*==================*/
-	ulint	hash_size);	/*!< in: hash index hash table size */
-/*****************************************************************//**
-Frees the adaptive search system at a database shutdown. */
-UNIV_INTERN
+btr_search_sys_create(ulint hash_size);
+
+/** Resize hash index hash table.
+@param[in]	hash_size	hash index hash table size */
 void
-btr_search_sys_free(void);
-/*=====================*/
+btr_search_sys_resize(ulint hash_size);
 
-/********************************************************************//**
-Disable the adaptive hash search system and empty the index. */
-UNIV_INTERN
+/** Frees the adaptive search system at a database shutdown. */
 void
-btr_search_disable(void);
-/*====================*/
-/********************************************************************//**
-Enable the adaptive hash search system. */
-UNIV_INTERN
+btr_search_sys_free();
+
+/** Disable the adaptive hash search system and empty the index.
+@param  need_mutex      need to acquire dict_sys->mutex */
 void
-btr_search_enable(void);
-/*====================*/
+btr_search_disable(
+	bool	need_mutex);
+/** Enable the adaptive hash search system. */
+void
+btr_search_enable();
 
 /********************************************************************//**
 Returns search info for an index.
-@return	search info; search mutex reserved */
+@return search info; search mutex reserved */
 UNIV_INLINE
 btr_search_t*
 btr_search_get_info(
 /*================*/
 	dict_index_t*	index)	/*!< in: index */
 	MY_ATTRIBUTE((nonnull));
-/*****************************************************************//**
-Creates and initializes a search info struct.
-@return	own: search info struct */
-UNIV_INTERN
+
+/** Creates and initializes a search info struct.
+@param[in]	heap		heap where created.
+@return own: search info struct */
 btr_search_t*
-btr_search_info_create(
-/*===================*/
-	mem_heap_t*	heap);	/*!< in: heap where created */
-/*****************************************************************//**
-Returns the value of ref_count. The value is protected by
-btr_search_latch.
-@return	ref_count value. */
-UNIV_INTERN
+btr_search_info_create(mem_heap_t* heap);
+
+/** Returns the value of ref_count. The value is protected by latch.
+@param[in]	info		search info
+@param[in]	index		index identifier
+@return ref_count value. */
 ulint
 btr_search_info_get_ref_count(
-/*==========================*/
-	btr_search_t*   info);	/*!< in: search info. */
+	btr_search_t*	info,
+	dict_index_t*	index);
+
 /*********************************************************************//**
 Updates the search info. */
 UNIV_INLINE
@@ -95,108 +90,180 @@ btr_search_info_update(
 /*===================*/
 	dict_index_t*	index,	/*!< in: index of the cursor */
 	btr_cur_t*	cursor);/*!< in: cursor which was just positioned */
-/******************************************************************//**
-Tries to guess the right search position based on the hash search info
+
+/** Tries to guess the right search position based on the hash search info
 of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
 and the function returns TRUE, then cursor->up_match and cursor->low_match
 both have sensible values.
-@return	TRUE if succeeded */
-UNIV_INTERN
+@param[in,out]	index		index
+@param[in,out]	info		index search info
+@param[in]	tuple		logical record
+@param[in]	mode		PAGE_CUR_L, ....
+@param[in]	latch_mode	BTR_SEARCH_LEAF, ...;
+				NOTE that only if has_search_latch is 0, we will
+				have a latch set on the cursor page, otherwise
+				we assume the caller uses his search latch
+				to protect the record!
+@param[out]	cursor		tree cursor
+@param[in]	has_search_latch
+				latch mode the caller currently has on
+				search system: RW_S/X_LATCH or 0
+@param[in]	mtr		mini transaction
+@return TRUE if succeeded */
 ibool
 btr_search_guess_on_hash(
-/*=====================*/
-	dict_index_t*	index,		/*!< in: index */
-	btr_search_t*	info,		/*!< in: index search info */
-	const dtuple_t*	tuple,		/*!< in: logical record */
-	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
-	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
-	btr_cur_t*	cursor,		/*!< out: tree cursor */
-	ulint		has_search_latch,/*!< in: latch mode the caller
-					currently has on btr_search_latch:
-					RW_S_LATCH, RW_X_LATCH, or 0 */
-	mtr_t*		mtr);		/*!< in: mtr */
-/********************************************************************//**
-Moves or deletes hash entries for moved records. If new_page is already hashed,
-then the hash index for page, if any, is dropped. If new_page is not hashed,
-and page is hashed, then a new hash index is built to new_page with the same
-parameters as page (this often happens when a page is split). */
-UNIV_INTERN
+	dict_index_t*	index,
+	btr_search_t*	info,
+	const dtuple_t*	tuple,
+	ulint		mode,
+	ulint		latch_mode,
+	btr_cur_t*	cursor,
+	ulint		has_search_latch,
+	mtr_t*		mtr);
+
+/** Moves or deletes hash entries for moved records. If new_page is already
+hashed, then the hash index for page, if any, is dropped. If new_page is not
+hashed, and page is hashed, then a new hash index is built to new_page with the
+same parameters as page (this often happens when a page is split).
+@param[in,out]	new_block	records are copied to this page.
+@param[in,out]	block		index page from which record are copied, and the
+				copied records will be deleted from this page.
+@param[in,out]	index		record descriptor */
 void
 btr_search_move_or_delete_hash_entries(
-/*===================================*/
-	buf_block_t*	new_block,	/*!< in: records are copied
-					to this page */
-	buf_block_t*	block,		/*!< in: index page from which
-					records were copied, and the
-					copied records will be deleted
-					from this page */
-	dict_index_t*	index);		/*!< in: record descriptor */
-/********************************************************************//**
-Drops a page hash index. */
-UNIV_INTERN
+	buf_block_t*	new_block,
+	buf_block_t*	block,
+	dict_index_t*	index);
+
+/** Drop any adaptive hash index entries that point to an index page.
+@param[in,out]	block	block containing index page, s- or x-latched, or an
+			index page for which we know that
+			block->buf_fix_count == 0 or it is an index page which
+			has already been removed from the buf_pool->page_hash
+			i.e.: it is in state BUF_BLOCK_REMOVE_HASH */
 void
-btr_search_drop_page_hash_index(
-/*============================*/
-	buf_block_t*	block);	/*!< in: block containing index page,
-				s- or x-latched, or an index page
-				for which we know that
-				block->buf_fix_count == 0 */
-/********************************************************************//**
-Drops a possible page hash index when a page is evicted from the buffer pool
-or freed in a file segment. */
-UNIV_INTERN
+btr_search_drop_page_hash_index(buf_block_t* block);
+
+/** Drop any adaptive hash index entries that may point to an index
+page that may be in the buffer pool, when a page is evicted from the
+buffer pool or freed in a file segment.
+@param[in]	page_id		page id
+@param[in]	page_size	page size */
 void
 btr_search_drop_page_hash_when_freed(
-/*=================================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no);	/*!< in: page number */
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
+	const page_id_t&	page_id,
+	const page_size_t&	page_size);
+
+/** Updates the page hash index when a single record is inserted on a page.
+@param[in]	cursor	cursor which was positioned to the place to insert
+			using btr_cur_search_, and the new record has been
+			inserted next to the cursor. */
 void
-btr_search_update_hash_node_on_insert(
-/*==================================*/
-	btr_cur_t*	cursor);/*!< in: cursor which was positioned to the
+btr_search_update_hash_node_on_insert(btr_cur_t* cursor);
+
+/** Updates the page hash index when a single record is inserted on a page.
+@param[in]	cursor		cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
 				to the cursor */
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
 void
-btr_search_update_hash_on_insert(
-/*=============================*/
-	btr_cur_t*	cursor);/*!< in: cursor which was positioned to the
-				place to insert using btr_cur_search_...,
-				and the new record has been inserted next
-				to the cursor */
-/********************************************************************//**
-Updates the page hash index when a single record is deleted from a page. */
-UNIV_INTERN
+btr_search_update_hash_on_insert(btr_cur_t* cursor);
+
+/** Updates the page hash index when a single record is deleted from a page.
+@param[in]	cursor	cursor which was positioned on the record to delete
+			using btr_cur_search_, the record is not yet deleted.*/
 void
-btr_search_update_hash_on_delete(
-/*=============================*/
-	btr_cur_t*	cursor);/*!< in: cursor which was positioned on the
-				record to delete using btr_cur_search_...,
-				the record is not yet deleted */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/********************************************************************//**
-Validates the search system.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-btr_search_validate(void);
-/*======================*/
-#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
+btr_search_update_hash_on_delete(btr_cur_t* cursor);
+
+/** Validates the search system.
+@return true if ok */
+bool
+btr_search_validate();
+
+/** X-Lock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_x_lock(const dict_index_t* index);
+
+/** X-Unlock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_x_unlock(const dict_index_t* index);
+
+/** Lock all search latches in exclusive mode. */
+UNIV_INLINE
+void
+btr_search_x_lock_all();
+
+/** Unlock all search latches from exclusive mode. */
+UNIV_INLINE
+void
+btr_search_x_unlock_all();
+
+/** S-Lock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_s_lock(const dict_index_t* index);
+
+/** S-Unlock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_s_unlock(const dict_index_t* index);
+
+/** Lock all search latches in shared mode. */
+UNIV_INLINE
+void
+btr_search_s_lock_all();
+
+#ifdef UNIV_DEBUG
+/** Check if thread owns all the search latches.
+@param[in]	mode	lock mode check
+@retval true if owns all of them
+@retval false if does not own some of them */
+UNIV_INLINE
+bool
+btr_search_own_all(ulint mode);
+
+/** Check if thread owns any of the search latches.
+@param[in]	mode	lock mode check
+@retval true if owns any of them
+@retval false if owns no search latch */
+UNIV_INLINE
+bool
+btr_search_own_any(ulint mode);
+#endif /* UNIV_DEBUG */
+
+/** Unlock all search latches from shared mode. */
+UNIV_INLINE
+void
+btr_search_s_unlock_all();
+
+/** Get the latch based on index attributes.
+A latch is selected from an array of latches using pair of index-id, space-id.
+@param[in]	index	index handler
+@return latch */
+UNIV_INLINE
+rw_lock_t*
+btr_get_search_latch(const dict_index_t* index);
+
+/** Get the hash-table based on index attributes.
+A table is selected from an array of tables using pair of index-id, space-id.
+@param[in]	index	index handler
+@return hash table */
+UNIV_INLINE
+hash_table_t*
+btr_get_search_table(const dict_index_t* index);
 
 /** The search info struct in an index */
 struct btr_search_t{
 	ulint	ref_count;	/*!< Number of blocks in this index tree
 				that have search index built
 				i.e. block->index points to this index.
-				Protected by btr_search_latch except
+				Protected by search latch except
 				when during initialization in
 				btr_search_info_create(). */
 
@@ -205,6 +272,8 @@ struct btr_search_t{
 	the machine word, i.e., they cannot be turned into bit-fields. */
 	buf_block_t* root_guess;/*!< the root page frame when it was last time
 				fetched, or NULL */
+	ulint	withdraw_clock;	/*!< the withdraw clock value of the buffer
+				pool when root_guess was stored */
 	ulint	hash_analysis;	/*!< when this exceeds
 				BTR_SEARCH_HASH_ANALYSIS, the hash
 				analysis starts; this is reset if no
@@ -248,11 +317,14 @@ struct btr_search_t{
 
 /** The hash index system */
 struct btr_search_sys_t{
-	hash_table_t*	hash_index;	/*!< the adaptive hash index,
+	hash_table_t**	hash_tables;	/*!< the adaptive hash tables,
 					mapping dtuple_fold values
 					to rec_t pointers on index pages */
 };
 
+/** Latches protecting access to adaptive hash index. */
+extern rw_lock_t**		btr_search_latches;
+
 /** The adaptive hash index */
 extern btr_search_sys_t*	btr_search_sys;
 
diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
index 0bd869be136..5f7c39ba500 100644
--- a/storage/innobase/include/btr0sea.ic
+++ b/storage/innobase/include/btr0sea.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,6 @@ Created 2/17/1996 Heikki Tuuri
 
 /*********************************************************************//**
 Updates the search info. */
-UNIV_INTERN
 void
 btr_search_info_update_slow(
 /*========================*/
@@ -38,7 +37,7 @@ btr_search_info_update_slow(
 
 /********************************************************************//**
 Returns search info for an index.
-@return	search info; search mutex reserved */
+@return search info; search mutex reserved */
 UNIV_INLINE
 btr_search_t*
 btr_search_get_info(
@@ -57,13 +56,14 @@ btr_search_info_update(
 	dict_index_t*	index,	/*!< in: index of the cursor */
 	btr_cur_t*	cursor)	/*!< in: cursor which was just positioned */
 {
-	btr_search_t*	info;
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	if (dict_index_is_spatial(index) || !btr_search_enabled) {
+		return;
+	}
 
+	btr_search_t*	info;
 	info = btr_search_get_info(index);
 
 	info->hash_analysis++;
@@ -80,3 +80,144 @@ btr_search_info_update(
 
 	btr_search_info_update_slow(info, cursor);
 }
+
+/** X-Lock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_x_lock(const dict_index_t* index)
+{
+	rw_lock_x_lock(btr_get_search_latch(index));
+}
+
+/** X-Unlock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_x_unlock(const dict_index_t* index)
+{
+	rw_lock_x_unlock(btr_get_search_latch(index));
+}
+
+/** Lock all search latches in exclusive mode. */
+UNIV_INLINE
+void
+btr_search_x_lock_all()
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		rw_lock_x_lock(btr_search_latches[i]);
+	}
+}
+
+/** Unlock all search latches from exclusive mode. */
+UNIV_INLINE
+void
+btr_search_x_unlock_all()
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		rw_lock_x_unlock(btr_search_latches[i]);
+	}
+}
+
+/** S-Lock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_s_lock(const dict_index_t* index)
+{
+	rw_lock_s_lock(btr_get_search_latch(index));
+}
+
+/** S-Unlock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_s_unlock(const dict_index_t* index)
+{
+	rw_lock_s_unlock(btr_get_search_latch(index));
+}
+
+/** Lock all search latches in shared mode. */
+UNIV_INLINE
+void
+btr_search_s_lock_all()
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		rw_lock_s_lock(btr_search_latches[i]);
+	}
+}
+
+/** Unlock all search latches from shared mode. */
+UNIV_INLINE
+void
+btr_search_s_unlock_all()
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		rw_lock_s_unlock(btr_search_latches[i]);
+	}
+}
+
+#ifdef UNIV_DEBUG
+/** Check if thread owns all the search latches.
+@param[in]	mode	lock mode check
+@retval true if owns all of them
+@retval false if does not own some of them */
+UNIV_INLINE
+bool
+btr_search_own_all(ulint mode)
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		if (!rw_lock_own(btr_search_latches[i], mode)) {
+			return(false);
+		}
+	}
+	return(true);
+}
+
+/** Check if thread owns any of the search latches.
+@param[in]	mode	lock mode check
+@retval true if owns any of them
+@retval false if owns no search latch */
+UNIV_INLINE
+bool
+btr_search_own_any(ulint mode)
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		if (rw_lock_own(btr_search_latches[i], mode)) {
+			return(true);
+		}
+	}
+	return(false);
+}
+#endif /* UNIV_DEBUG */
+
+/** Get the adaptive hash search index latch for a b-tree.
+@param[in]	index	b-tree index
+@return latch */
+UNIV_INLINE
+rw_lock_t*
+btr_get_search_latch(const dict_index_t* index)
+{
+	ut_ad(index != NULL);
+
+	ulint	ifold = ut_fold_ulint_pair(static_cast<ulint>(index->id),
+					   static_cast<ulint>(index->space));
+
+	return(btr_search_latches[ifold % btr_ahi_parts]);
+}
+
+/** Get the hash-table based on index attributes.
+A table is selected from an array of tables using pair of index-id, space-id.
+@param[in]	index	index handler
+@return hash table */
+UNIV_INLINE
+hash_table_t*
+btr_get_search_table(const dict_index_t* index)
+{
+	ut_ad(index != NULL);
+
+	ulint	ifold = ut_fold_ulint_pair(static_cast<ulint>(index->id),
+					   static_cast<ulint>(index->space));
+
+	return(btr_search_sys->hash_tables[ifold % btr_ahi_parts]);
+}
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
index 04b69d8145c..19c21982011 100644
--- a/storage/innobase/include/btr0types.h
+++ b/storage/innobase/include/btr0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,6 +31,7 @@ Created 2/17/1996 Heikki Tuuri
 #include "rem0types.h"
 #include "page0types.h"
 #include "sync0rw.h"
+#include "page0size.h"
 
 /** Persistent cursor */
 struct btr_pcur_t;
@@ -39,165 +40,51 @@ struct btr_cur_t;
 /** B-tree search information for the adaptive hash index */
 struct btr_search_t;
 
-#ifndef UNIV_HOTBACKUP
-
-/** @brief The latch protecting the adaptive search system
-
-This latch protects the
-(1) hash index;
-(2) columns of a record to which we have a pointer in the hash index;
-
-but does NOT protect:
-
-(3) next record offset field in a record;
-(4) next or previous records on the same page.
-
-Bear in mind (3) and (4) when using the hash index.
-*/
-extern rw_lock_t*	btr_search_latch_temp;
-
-#endif /* UNIV_HOTBACKUP */
-
-/** The latch protecting the adaptive search system */
-#define btr_search_latch	(*btr_search_latch_temp)
-
-/** Flag: has the search system been enabled?
-Protected by btr_search_latch. */
+/** Is search system enabled.
+Search system is protected by array of latches. */
 extern char	btr_search_enabled;
 
-#ifdef UNIV_BLOB_DEBUG
-# include "buf0types.h"
-/** An index->blobs entry for keeping track of off-page column references */
-struct btr_blob_dbg_t;
-
-/** Insert to index->blobs a reference to an off-page column.
-@param index	the index tree
-@param b	the reference
-@param ctx	context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_insert(
-/*====================*/
-	dict_index_t*		index,	/*!< in/out: index tree */
-	const btr_blob_dbg_t*	b,	/*!< in: the reference */
-	const char*		ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-
-/** Remove from index->blobs a reference to an off-page column.
-@param index	the index tree
-@param b	the reference
-@param ctx	context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_delete(
-/*====================*/
-	dict_index_t*		index,	/*!< in/out: index tree */
-	const btr_blob_dbg_t*	b,	/*!< in: the reference */
-	const char*		ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-
-/**************************************************************//**
-Add to index->blobs any references to off-page columns from a record.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add_rec(
-/*=================*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in/out: index */
-	const ulint*	offsets,/*!< in: offsets */
-	const char*	ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Remove from index->blobs any references to off-page columns from a record.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove_rec(
-/*====================*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in/out: index */
-	const ulint*	offsets,/*!< in: offsets */
-	const char*	ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Count and add to index->blobs any references to off-page columns
-from records on a page.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add(
-/*=============*/
-	const page_t*	page,	/*!< in: rewritten page */
-	dict_index_t*	index,	/*!< in/out: index */
-	const char*	ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Count and remove from index->blobs any references to off-page columns
-from records on a page.
-Used when reorganizing a page, before copying the records.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove(
-/*================*/
-	const page_t*	page,	/*!< in: b-tree page */
-	dict_index_t*	index,	/*!< in/out: index */
-	const char*	ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Restore in index->blobs any references to off-page columns
-Used when page reorganize fails due to compressed page overflow. */
-UNIV_INTERN
-void
-btr_blob_dbg_restore(
-/*=================*/
-	const page_t*	npage,	/*!< in: page that failed to compress */
-	const page_t*	page,	/*!< in: copy of original page */
-	dict_index_t*	index,	/*!< in/out: index */
-	const char*	ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-
-/** Operation that processes the BLOB references of an index record
-@param[in]	rec	record on index page
-@param[in/out]	index	the index tree of the record
-@param[in]	offsets	rec_get_offsets(rec,index)
-@param[in]	ctx	context (for logging)
-@return			number of BLOB references processed */
-typedef ulint (*btr_blob_dbg_op_f)
-(const rec_t* rec,dict_index_t* index,const ulint* offsets,const char* ctx);
-
-/**************************************************************//**
-Count and process all references to off-page columns on a page.
-@return number of references processed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_op(
-/*============*/
-	const page_t*		page,	/*!< in: B-tree leaf page */
-	const rec_t*		rec,	/*!< in: record to start from
-					(NULL to process the whole page) */
-	dict_index_t*		index,	/*!< in/out: index */
-	const char*		ctx,	/*!< in: context (for logging) */
-	const btr_blob_dbg_op_f	op)	/*!< in: operation on records */
-	MY_ATTRIBUTE((nonnull(1,3,4,5)));
-#else /* UNIV_BLOB_DEBUG */
-# define btr_blob_dbg_add_rec(rec, index, offsets, ctx)		((void) 0)
-# define btr_blob_dbg_add(page, index, ctx)			((void) 0)
-# define btr_blob_dbg_remove_rec(rec, index, offsets, ctx)	((void) 0)
-# define btr_blob_dbg_remove(page, index, ctx)			((void) 0)
-# define btr_blob_dbg_restore(npage, page, index, ctx)		((void) 0)
-# define btr_blob_dbg_op(page, rec, index, ctx, op)		((void) 0)
-#endif /* UNIV_BLOB_DEBUG */
+/** Number of adaptive hash index partition. */
+extern ulong	btr_ahi_parts;
 
 /** The size of a reference to data stored on a different page.
 The reference is stored at the end of the prefix of the field
 in the index record. */
-#define BTR_EXTERN_FIELD_REF_SIZE	20
-
-/** A BLOB field reference full of zero, for use in assertions and tests.
-Initially, BLOB field references are set to zero, in
-dtuple_convert_big_rec(). */
-extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
+#define BTR_EXTERN_FIELD_REF_SIZE	FIELD_REF_SIZE
+
+/** If the data don't exceed the size, the data are stored locally. */
+#define BTR_EXTERN_LOCAL_STORED_MAX_SIZE	\
+	(BTR_EXTERN_FIELD_REF_SIZE * 2)
+
+/** The information is used for creating a new index tree when
+applying TRUNCATE log record during recovery */
+struct btr_create_t {
+
+	explicit btr_create_t(const byte* const ptr)
+		:
+		format_flags(),
+		n_fields(),
+		field_len(),
+		fields(ptr),
+		trx_id_pos(ULINT_UNDEFINED)
+	{
+		/* Do nothing */
+	}
+
+	/** Page format */
+	ulint			format_flags;
+
+	/** Numbr of index fields */
+	ulint			n_fields;
+
+	/** The length of the encoded meta-data */
+	ulint			field_len;
+
+	/** Field meta-data, encoded. */
+	const byte* const	fields;
+
+	/** Position of trx-id column. */
+	ulint			trx_id_pos;
+};
 
 #endif
diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
index 7fc4408505d..c2c100e83e6 100644
--- a/storage/innobase/include/buf0buddy.h
+++ b/storage/innobase/include/buf0buddy.h
@@ -39,7 +39,7 @@ Allocate a block.  The thread calling this function must hold
 buf_pool->mutex and must not hold buf_pool->zip_mutex or any
 block->mutex.  The buf_pool->mutex may be released and reacquired.
 This function should only be used for allocating compressed page frames.
-@return	allocated block, never NULL */
+@return allocated block, never NULL */
 UNIV_INLINE
 byte*
 buf_buddy_alloc(
@@ -70,6 +70,24 @@ buf_buddy_free(
 					up to UNIV_PAGE_SIZE */
 	MY_ATTRIBUTE((nonnull));
 
+/** Reallocate a block.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	buf		block to be reallocated, must be pointed
+to by the buffer pool
+@param[in]	size		block size, up to UNIV_PAGE_SIZE
+@retval false	if failed because of no free blocks. */
+bool
+buf_buddy_realloc(
+	buf_pool_t*	buf_pool,
+	void*		buf,
+	ulint		size);
+
+/** Combine all pairs of free buddies.
+@param[in]	buf_pool	buffer pool instance */
+void
+buf_buddy_condense_free(
+	buf_pool_t*	buf_pool);
+
 #ifndef UNIV_NONINL
 # include "buf0buddy.ic"
 #endif
diff --git a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
index 4352ebe8945..2b6d76df009 100644
--- a/storage/innobase/include/buf0buddy.ic
+++ b/storage/innobase/include/buf0buddy.ic
@@ -30,15 +30,12 @@ Created December 2006 by Marko Makela
 
 #include "buf0buf.h"
 #include "buf0buddy.h"
-#include "ut0ut.h"
-#include "sync0sync.h"
 
 /**********************************************************************//**
 Allocate a block.  The thread calling this function must hold
 buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
 The buf_pool_mutex may be released and reacquired.
-@return	allocated block, never NULL */
-UNIV_INTERN
+@return allocated block, never NULL */
 void*
 buf_buddy_alloc_low(
 /*================*/
@@ -54,7 +51,6 @@ buf_buddy_alloc_low(
 
 /**********************************************************************//**
 Deallocate a block. */
-UNIV_INTERN
 void
 buf_buddy_free_low(
 /*===============*/
@@ -67,7 +63,7 @@ buf_buddy_free_low(
 
 /**********************************************************************//**
 Get the index of buf_pool->zip_free[] for a given block size.
-@return	index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */
+@return index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */
 UNIV_INLINE
 ulint
 buf_buddy_get_slot(
@@ -91,7 +87,7 @@ Allocate a block.  The thread calling this function must hold
 buf_pool->mutex and must not hold buf_pool->zip_mutex or any
 block->mutex.  The buf_pool->mutex may be released and reacquired.
 This function should only be used for allocating compressed page frames.
-@return	allocated block, never NULL */
+@return allocated block, never NULL */
 UNIV_INLINE
 byte*
 buf_buddy_alloc(
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index d7de9d81ca2..617bb2b9b5d 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -31,6 +31,7 @@ Created 11/5/1995 Heikki Tuuri
 #include "fil0fil.h"
 #include "mtr0types.h"
 #include "buf0types.h"
+#ifndef UNIV_INNOCHECKSUM
 #include "hash0hash.h"
 #include "ut0byte.h"
 #include "page0types.h"
@@ -38,6 +39,11 @@ Created 11/5/1995 Heikki Tuuri
 #include "ut0rbt.h"
 #include "os0proc.h"
 #include "log0log.h"
+#include "srv0srv.h"
+#include <ostream>
+
+// Forward declaration
+struct fil_addr_t;
 
 /** @name Modes for buf_page_get_gen */
 /* @{ */
@@ -85,18 +91,28 @@ Created 11/5/1995 Heikki Tuuri
 
 extern	buf_pool_t*	buf_pool_ptr;	/*!< The buffer pools
 					of the database */
+
+extern	volatile bool	buf_pool_withdrawing; /*!< true when withdrawing buffer
+					pool pages might cause page relocation */
+
+extern	volatile ulint	buf_withdraw_clock; /*!< the clock is incremented
+					every time a pointer to a page may
+					become obsolete */
+
 #ifdef UNIV_DEBUG
-extern ibool		buf_debug_prints;/*!< If this is set TRUE, the program
-					prints info whenever read or flush
-					occurs */
+extern my_bool	buf_disable_resize_buffer_pool_debug; /*!< if TRUE, resizing
+					buffer pool is not allowed. */
 #endif /* UNIV_DEBUG */
-extern ulint srv_buf_pool_instances;
-extern ulint srv_buf_pool_curr_size;
 #else /* !UNIV_HOTBACKUP */
 extern buf_block_t*	back_block1;	/*!< first block, for --apply-log */
 extern buf_block_t*	back_block2;	/*!< second block, for page reorganize */
 #endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_INNOCHECKSUM */
+
+/** Magic value to use instead of checksums when they are disabled */
+#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
 
+#ifndef UNIV_INNOCHECKSUM
 /** @brief States of a control block
 @see buf_page_t
 
@@ -204,6 +220,127 @@ struct buf_pools_list_size_t {
 	ulint	flush_list_bytes;	/*!< flush_list size in bytes */
 };
 
+/** Page identifier. */
+class page_id_t {
+public:
+
+	/** Constructor from (space, page_no).
+	@param[in]	space	tablespace id
+	@param[in]	page_no	page number */
+	page_id_t(ulint space, ulint page_no)
+		:
+		m_space(static_cast<ib_uint32_t>(space)),
+		m_page_no(static_cast<ib_uint32_t>(page_no)),
+		m_fold(ULINT_UNDEFINED)
+	{
+		ut_ad(space <= 0xFFFFFFFFU);
+		ut_ad(page_no <= 0xFFFFFFFFU);
+	}
+
+	/** Retrieve the tablespace id.
+	@return tablespace id */
+	inline ib_uint32_t space() const
+	{
+		return(m_space);
+	}
+
+	/** Retrieve the page number.
+	@return page number */
+	inline ib_uint32_t page_no() const
+	{
+		return(m_page_no);
+	}
+
+	/** Retrieve the fold value.
+	@return fold value */
+	inline ulint fold() const
+	{
+		/* Initialize m_fold if it has not been initialized yet. */
+		if (m_fold == ULINT_UNDEFINED) {
+			m_fold = (m_space << 20) + m_space + m_page_no;
+			ut_ad(m_fold != ULINT_UNDEFINED);
+		}
+
+		return(m_fold);
+	}
+
+	/** Copy the values from a given page_id_t object.
+	@param[in]	src	page id object whose values to fetch */
+	inline void copy_from(const page_id_t& src)
+	{
+		m_space = src.space();
+		m_page_no = src.page_no();
+		m_fold = src.fold();
+	}
+
+	/** Reset the values from a (space, page_no).
+	@param[in]	space	tablespace id
+	@param[in]	page_no	page number */
+	inline void reset(ulint space, ulint page_no)
+	{
+		m_space = static_cast<ib_uint32_t>(space);
+		m_page_no = static_cast<ib_uint32_t>(page_no);
+		m_fold = ULINT_UNDEFINED;
+
+		ut_ad(space <= 0xFFFFFFFFU);
+		ut_ad(page_no <= 0xFFFFFFFFU);
+	}
+
+	/** Reset the page number only.
+	@param[in]	page_no	page number */
+	inline void set_page_no(ulint page_no)
+	{
+		m_page_no = static_cast<ib_uint32_t>(page_no);
+		m_fold = ULINT_UNDEFINED;
+
+		ut_ad(page_no <= 0xFFFFFFFFU);
+	}
+
+	/** Check if a given page_id_t object is equal to the current one.
+	@param[in]	a	page_id_t object to compare
+	@return true if equal */
+	inline bool equals_to(const page_id_t& a) const
+	{
+		return(a.space() == m_space && a.page_no() == m_page_no);
+	}
+
+private:
+
+	/** Tablespace id. */
+	ib_uint32_t	m_space;
+
+	/** Page number. */
+	ib_uint32_t	m_page_no;
+
+	/** A fold value derived from m_space and m_page_no,
+	used in hashing. */
+	mutable ulint	m_fold;
+
+	/* Disable implicit copying. */
+	void operator=(const page_id_t&);
+
+	/** Declare the overloaded global operator<< as a friend of this
+	class. Refer to the global declaration for further details.  Print
+	the given page_id_t object.
+	@param[in,out]	out	the output stream
+	@param[in]	page_id	the page_id_t object to be printed
+	@return the output stream */
+        friend
+        std::ostream&
+        operator<<(
+                std::ostream&           out,
+                const page_id_t&        page_id);
+};
+
+/** Print the given page_id_t object.
+@param[in,out]	out	the output stream
+@param[in]	page_id	the page_id_t object to be printed
+@return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		out,
+	const page_id_t&	page_id);
+
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Acquire mutex on all buffer pool instances */
@@ -221,8 +358,7 @@ buf_pool_mutex_exit_all(void);
 
 /********************************************************************//**
 Creates the buffer pool.
-@return	DB_SUCCESS if success, DB_ERROR if not enough memory or error */
-UNIV_INTERN
+@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
 dberr_t
 buf_pool_init(
 /*=========*/
@@ -231,42 +367,62 @@ buf_pool_init(
 /********************************************************************//**
 Frees the buffer pool at shutdown.  This must not be invoked before
 freeing all mutexes. */
-UNIV_INTERN
 void
 buf_pool_free(
 /*==========*/
 	ulint	n_instances);	/*!< in: numbere of instances to free */
 
+/** Determines if a block is intended to be withdrawn.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	block		pointer to control block
+@retval true	if will be withdrawn */
+bool
+buf_block_will_withdrawn(
+	buf_pool_t*		buf_pool,
+	const buf_block_t*	block);
+
+/** Determines if a frame is intended to be withdrawn.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	ptr		pointer to a frame
+@retval true	if will be withdrawn */
+bool
+buf_frame_will_withdrawn(
+	buf_pool_t*	buf_pool,
+	const byte*	ptr);
+
+/** Resize the buffer pool based on srv_buf_pool_size from
+srv_buf_pool_old_size. */
+void
+buf_pool_resize();
+
+/** This is the thread for resizing buffer pool. It waits for an event and
+when waked up either performs a resizing and sleeps again.
+@param[in]	arg	a dummy parameter required by os_thread_create.
+@return	this function does not return, calls os_thread_exit()
+*/
+extern "C"
+os_thread_ret_t
+DECLARE_THREAD(buf_resize_thread)(
+/*==============================*/
+	void*	arg);				/*!< in: a dummy parameter
+						required by os_thread_create */
+
 /********************************************************************//**
 Clears the adaptive hash index on all pages in the buffer pool. */
-UNIV_INTERN
 void
 buf_pool_clear_hash_index(void);
 /*===========================*/
 
-/********************************************************************//**
-Relocate a buffer control block.  Relocates the block on the LRU list
-and in buf_pool->page_hash.  Does not relocate bpage->list.
-The caller must take care of relocating bpage->list. */
-UNIV_INTERN
-void
-buf_relocate(
-/*=========*/
-	buf_page_t*	bpage,	/*!< in/out: control block being relocated;
-				buf_page_get_state(bpage) must be
-				BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
-	buf_page_t*	dpage)	/*!< in/out: destination control block */
-	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Gets the current size of buffer buf_pool in bytes.
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
 buf_pool_get_curr_size(void);
 /*========================*/
 /*********************************************************************//**
 Gets the current size of buffer buf_pool in frames.
-@return	size in pages */
+@return size in pages */
 UNIV_INLINE
 ulint
 buf_pool_get_n_pages(void);
@@ -274,8 +430,7 @@ buf_pool_get_n_pages(void);
 /********************************************************************//**
 Gets the smallest oldest_modification lsn for any page in the pool. Returns
 zero if all modified pages have been flushed to disk.
-@return	oldest modification in pool, zero if none */
-UNIV_INTERN
+@return oldest modification in pool, zero if none */
 lsn_t
 buf_pool_get_oldest_modification(void);
 /*==================================*/
@@ -299,8 +454,7 @@ buf_page_free_descriptor(
 
 /********************************************************************//**
 Allocates a buffer block.
-@return	own: the allocated block, in state BUF_BLOCK_MEMORY */
-UNIV_INTERN
+@return own: the allocated block, in state BUF_BLOCK_MEMORY */
 buf_block_t*
 buf_block_alloc(
 /*============*/
@@ -317,7 +471,7 @@ buf_block_free(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Copies contents of a buffer frame to a given buffer.
-@return	buf */
+@return buf */
 UNIV_INLINE
 byte*
 buf_frame_copy(
@@ -329,23 +483,21 @@ buf_frame_copy(
 NOTE! The following macros should be used instead of buf_page_get_gen,
 to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
 in LA! */
-#define buf_page_get(SP, ZS, OF, LA, MTR)	 buf_page_get_gen(\
-				SP, ZS, OF, LA, NULL,\
-				BUF_GET, __FILE__, __LINE__, MTR)
+#define buf_page_get(ID, SIZE, LA, MTR)					\
+	buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, __FILE__, __LINE__, MTR, NULL)
 /**************************************************************//**
 Use these macros to bufferfix a page with no latching. Remember not to
 read the contents of the page unless you know it is safe. Do not modify
 the contents of the page! We have separated this case, because it is
 error-prone programming not to set a latch, and it should be used
 with care. */
-#define buf_page_get_with_no_latch(SP, ZS, OF, MTR)	   buf_page_get_gen(\
-				SP, ZS, OF, RW_NO_LATCH, NULL,\
-				BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
+#define buf_page_get_with_no_latch(ID, SIZE, MTR)	\
+	buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, \
+			 __FILE__, __LINE__, MTR, NULL)
 /********************************************************************//**
 This is the general function used to get optimistic access to a database
 page.
-@return	TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
 ibool
 buf_page_optimistic_get(
 /*====================*/
@@ -358,8 +510,7 @@ buf_page_optimistic_get(
 /********************************************************************//**
 This is used to get access to a known database page, when no waiting can be
 done.
-@return	TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
 ibool
 buf_page_get_known_nowait(
 /*======================*/
@@ -370,96 +521,94 @@ buf_page_get_known_nowait(
 	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr);	/*!< in: mini-transaction */
 
-/*******************************************************************//**
-Given a tablespace id and page number tries to get that page. If the
+/** Given a tablespace id and page number tries to get that page. If the
 page is not in the buffer pool it is not loaded and NULL is returned.
-Suitable for using when holding the lock_sys_t::mutex. */
-UNIV_INTERN
+Suitable for using when holding the lock_sys_t::mutex.
+@param[in]	page_id	page id
+@param[in]	file	file name
+@param[in]	line	line where called
+@param[in]	mtr	mini-transaction
+@return pointer to a page or NULL */
 buf_block_t*
 buf_page_try_get_func(
-/*==================*/
-	ulint		space_id,/*!< in: tablespace id */
-	ulint		page_no,/*!< in: page number */
-	ulint		rw_latch,       /*!< in: RW_S_LATCH, RW_X_LATCH */
-	bool		possibly_freed, /*!< in: don't mind if page is freed */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr);	/*!< in: mini-transaction */
-
-/** Tries to get a page. If the page is not in the buffer pool it is
-not loaded.  Suitable for using when holding the lock_sys_t::mutex.
-@param space_id	in: tablespace id
-@param page_no	in: page number
-@param mtr	in: mini-transaction
-@return		the page if in buffer pool, NULL if not */
-#define buf_page_try_get(space_id, page_no, mtr)	\
-	buf_page_try_get_func(space_id, page_no, RW_S_LATCH, false, \
-			      __FILE__, __LINE__, mtr);
-
-/********************************************************************//**
-Get read access to a compressed page (usually of type
+	const page_id_t&	page_id,
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr);
+
+/** Tries to get a page.
+If the page is not in the buffer pool it is not loaded. Suitable for using
+when holding the lock_sys_t::mutex.
+@param[in]	page_id	page identifier
+@param[in]	mtr	mini-transaction
+@return the page if in buffer pool, NULL if not */
+#define buf_page_try_get(page_id, mtr)	\
+	buf_page_try_get_func((page_id), __FILE__, __LINE__, mtr);
+
+/** Get read access to a compressed page (usually of type
 FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
 The page must be released with buf_page_release_zip().
 NOTE: the page is not protected by any latch.  Mutual exclusion has to
 be implemented at a higher level.  In other words, all possible
 accesses to a given page through this function must be protected by
 the same set of mutexes or latches.
-@return	pointer to the block, or NULL if not compressed */
-UNIV_INTERN
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return pointer to the block */
 buf_page_t*
 buf_page_get_zip(
-/*=============*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size */
-	ulint		offset);/*!< in: page number */
-/********************************************************************//**
-This is the general function used to get access to a database page.
-@return	pointer to the block or NULL */
-UNIV_INTERN
+	const page_id_t&	page_id,
+	const page_size_t&	page_size);
+
+/** This is the general function used to get access to a database page.
+@param[in]	page_id		page id
+@param[in]	rw_latch	RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in]	guess		guessed block or NULL
+@param[in]	mode		BUF_GET, BUF_GET_IF_IN_POOL,
+BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in]	file		file name
+@param[in]	line		line where called
+@param[in]	mtr		mini-transaction
+@param[out]	err		DB_SUCCESS or error code
+@return pointer to the block or NULL */
 buf_block_t*
 buf_page_get_gen(
-/*=============*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint		offset,	/*!< in: page number */
-	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-	buf_block_t*	guess,	/*!< in: guessed block or NULL */
-	ulint		mode,	/*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
-				BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH or
-				BUF_GET_IF_IN_POOL_OR_WATCH */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr,	/*!< in: mini-transaction */
-	dberr_t*	err = NULL); /*!< out: error code */
-/********************************************************************//**
-Initializes a page to the buffer buf_pool. The page is usually not read
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			rw_latch,
+	buf_block_t*		guess,
+	ulint			mode,
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr,
+	dberr_t*		err);
+
+/** Initializes a page to the buffer buf_pool. The page is usually not read
 from a file even if it cannot be found in the buffer buf_pool. This is one
 of the functions which perform to a block a state transition NOT_USED =>
 FILE_PAGE (the other is buf_page_get_gen).
-@return	pointer to the block, page bufferfixed */
-UNIV_INTERN
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	mtr		mini-transaction
+@return pointer to the block, page bufferfixed */
 buf_block_t*
 buf_page_create(
-/*============*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset,	/*!< in: offset of the page within space in units of
-			a page */
-	ulint	zip_size,/*!< in: compressed page size, or 0 */
-	mtr_t*	mtr);	/*!< in: mini-transaction handle */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr);
+
 #else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. */
-UNIV_INTERN
+
+/** Inits a page to the buffer buf_pool, for use in mysqlbackup --restore.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in,out]	block		block to init */
 void
 buf_page_init_for_backup_restore(
-/*=============================*/
-	ulint		space,	/*!< in: space id */
-	ulint		offset,	/*!< in: offset of the page within space
-				in units of a page */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	buf_block_t*	block);	/*!< in: block to init */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	buf_block_t*		block);
+
 #endif /* !UNIV_HOTBACKUP */
 
 #ifndef UNIV_HOTBACKUP
@@ -471,12 +620,11 @@ buf_page_release_zip(
 /*=================*/
 	buf_page_t*	bpage);		/*!< in: buffer block */
 /********************************************************************//**
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
+Releases a latch, if specified. */
 UNIV_INLINE
 void
-buf_page_release(
-/*=============*/
+buf_page_release_latch(
+/*=====================*/
 	buf_block_t*	block,		/*!< in: buffer block */
 	ulint		rw_latch);	/*!< in: RW_S_LATCH, RW_X_LATCH,
 					RW_NO_LATCH */
@@ -484,68 +632,62 @@ buf_page_release(
 Moves a page to the start of the buffer pool LRU list. This high-level
 function can be used to prevent an important page from slipping out of
 the buffer pool. */
-UNIV_INTERN
 void
 buf_page_make_young(
 /*================*/
 	buf_page_t*	bpage);	/*!< in: buffer block of a file page */
-/********************************************************************//**
-Returns TRUE if the page can be found in the buffer pool hash table.
 
+/** Returns TRUE if the page can be found in the buffer pool hash table.
 NOTE that it is possible that the page is not yet read from disk,
 though.
-
-@return	TRUE if found in the page hash table */
+@param[in]	page_id	page id
+@return TRUE if found in the page hash table */
 UNIV_INLINE
 ibool
 buf_page_peek(
-/*==========*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: page number */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-/********************************************************************//**
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
+	const page_id_t&	page_id);
+
+#ifdef UNIV_DEBUG
+
+/** Sets file_page_was_freed TRUE if the page is found in the buffer pool.
 This function should be called when we free a file page and want the
 debug version to check that it is not accessed any more unless
 reallocated.
-@return	control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
+@param[in]	page_id	page id
+@return control block if found in page hash table, otherwise NULL */
 buf_page_t*
 buf_page_set_file_page_was_freed(
-/*=============================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: page number */
-/********************************************************************//**
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
+	const page_id_t&	page_id);
+
+/** Sets file_page_was_freed FALSE if the page is found in the buffer pool.
 This function should be called when we free a file page and want the
 debug version to check that it is not accessed any more unless
 reallocated.
-@return	control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
+@param[in]	page_id	page id
+@return control block if found in page hash table, otherwise NULL */
 buf_page_t*
 buf_page_reset_file_page_was_freed(
-/*===============================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);	/*!< in: page number */
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+	const page_id_t&	page_id);
+
+#endif /* UNIV_DEBUG */
 /********************************************************************//**
 Reads the freed_page_clock of a buffer block.
-@return	freed_page_clock */
+@return freed_page_clock */
 UNIV_INLINE
 ulint
 buf_page_get_freed_page_clock(
 /*==========================*/
 	const buf_page_t*	bpage)	/*!< in: block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************************//**
 Reads the freed_page_clock of a buffer block.
-@return	freed_page_clock */
+@return freed_page_clock */
 UNIV_INLINE
 ulint
 buf_block_get_freed_page_clock(
 /*===========================*/
 	const buf_block_t*	block)	/*!< in: block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /********************************************************************//**
 Tells if a block is still close enough to the MRU end of the LRU list
@@ -553,7 +695,7 @@ meaning that it is not in danger of getting evicted and also implying
 that it has been accessed recently.
 Note that this is for heuristics only and does not reserve buffer pool
 mutex.
-@return	TRUE if block is close to MRU end of LRU */
+@return TRUE if block is close to MRU end of LRU */
 UNIV_INLINE
 ibool
 buf_page_peek_if_young(
@@ -563,7 +705,7 @@ buf_page_peek_if_young(
 Recommends a move of a block to the start of the LRU list if there is danger
 of dropping from the buffer pool. NOTE: does not reserve the buffer pool
 mutex.
-@return	TRUE if should be made younger */
+@return TRUE if should be made younger */
 UNIV_INLINE
 ibool
 buf_page_peek_if_too_old(
@@ -572,7 +714,7 @@ buf_page_peek_if_too_old(
 /********************************************************************//**
 Gets the youngest modification log sequence number for a frame.
 Returns zero if not file page or no modification occurred yet.
-@return	newest modification to page */
+@return newest modification to page */
 UNIV_INLINE
 lsn_t
 buf_page_get_newest_modification(
@@ -591,7 +733,7 @@ buf_block_modify_clock_inc(
 /********************************************************************//**
 Returns the value of the modify clock. The caller must have an s-lock
 or x-lock on the block.
-@return	value */
+@return value */
 UNIV_INLINE
 ib_uint64_t
 buf_block_get_modify_clock(
@@ -603,67 +745,96 @@ UNIV_INLINE
 void
 buf_block_buf_fix_inc_func(
 /*=======================*/
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line */
-# endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
 	buf_block_t*	block)	/*!< in/out: block to bufferfix */
 	MY_ATTRIBUTE((nonnull));
 
-/*******************************************************************//**
-Increments the bufferfix count. */
+/** Increments the bufferfix count.
+@param[in,out]	bpage	block to bufferfix
+@return the count */
 UNIV_INLINE
-void
+ulint
 buf_block_fix(
-/*===========*/
-	buf_block_t*	block);	/*!< in/out: block to bufferfix */
+	buf_page_t*	bpage);
 
-/*******************************************************************//**
-Increments the bufferfix count. */
+/** Increments the bufferfix count.
+@param[in,out]	block	block to bufferfix
+@return the count */
 UNIV_INLINE
-void
+ulint
+buf_block_fix(
+	buf_block_t*	block);
+
+/** Decrements the bufferfix count.
+@param[in,out]	bpage	block to bufferunfix
+@return	the remaining buffer-fix count */
+UNIV_INLINE
+ulint
 buf_block_unfix(
-/*===========*/
-	buf_block_t*	block);	/*!< in/out: block to bufferfix */
+	buf_page_t*	bpage);
+/** Decrements the bufferfix count.
+@param[in,out]	block	block to bufferunfix
+@return	the remaining buffer-fix count */
+UNIV_INLINE
+ulint
+buf_block_unfix(
+	buf_block_t*	block);
 
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 /** Increments the bufferfix count.
-@param b	in/out: block to bufferfix
-@param f	in: file name where requested
-@param l	in: line number where requested */
+@param[in,out]	b	block to bufferfix
+@param[in]	f	file name where requested
+@param[in]	l	line number where requested */
 # define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
-# else /* UNIV_SYNC_DEBUG */
+# else /* UNIV_DEBUG */
 /** Increments the bufferfix count.
-@param b	in/out: block to bufferfix
-@param f	in: file name where requested
-@param l	in: line number where requested */
+@param[in,out]	b	block to bufferfix
+@param[in]	f	file name where requested
+@param[in]	l	line number where requested */
 # define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
-# endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
 #else /* !UNIV_HOTBACKUP */
 # define buf_block_modify_clock_inc(block) ((void) 0)
 #endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Checks if a page is corrupt.
-@return	TRUE if corrupted */
-UNIV_INTERN
-ibool
-buf_page_is_corrupted(
-/*==================*/
-	bool		check_lsn,	/*!< in: true if we need to check the
-					and complain about the LSN */
-	const byte*	read_buf,	/*!< in: a database page */
-	ulint		zip_size)	/*!< in: size of compressed page;
-					0 for uncompressed pages */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Checks if a page is all zeroes.
-@return	TRUE if the page is all zeroes */
+#endif /* !UNIV_INNOCHECKSUM */
+
+/** Checks if a page contains only zeroes.
+@param[in]	read_buf	database page
+@param[in]	page_size	page size
+@return true if page is filled with zeroes */
 bool
 buf_page_is_zeroes(
-/*===============*/
-	const byte*	read_buf,	/*!< in: a database page */
-	const ulint	zip_size);	/*!< in: size of compressed page;
-					0 for uncompressed pages */
+	const byte*		read_buf,
+	const page_size_t&	page_size);
+
+/** Checks if a page is corrupt.
+@param[in]	check_lsn	true if we need to check and complain about
+the LSN
+@param[in]	read_buf	database page
+@param[in]	page_size	page size
+@param[in]	skip_checksum	if true, skip checksum
+@param[in]	page_no		page number of given read_buf
+@param[in]	strict_check	true if strict-check option is enabled
+@param[in]	is_log_enabled	true if log option is enabled
+@param[in]	log_file	file pointer to log_file
+@return TRUE if corrupted */
+ibool
+buf_page_is_corrupted(
+	bool			check_lsn,
+	const byte*		read_buf,
+	const page_size_t&	page_size,
+	bool			skip_checksum
+#ifdef UNIV_INNOCHECKSUM
+	,uintmax_t		page_no,
+	bool			strict_check,
+	bool			is_log_enabled,
+	FILE*			log_file
+#endif /* UNIV_INNOCHECKSUM */
+) MY_ATTRIBUTE((warn_unused_result));
+#ifndef UNIV_INNOCHECKSUM
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Gets the space id, page offset, and byte offset within page of a
@@ -678,19 +849,18 @@ buf_ptr_get_fsp_addr(
 /**********************************************************************//**
 Gets the hash value of a block. This can be used in searches in the
 lock hash table.
-@return	lock hash value */
+@return lock hash value */
 UNIV_INLINE
 ulint
 buf_block_get_lock_hash_val(
 /*========================*/
 	const buf_block_t*	block)	/*!< in: block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Finds a block in the buffer pool that points to a
 given compressed page.
-@return	buffer block pointing to the compressed page, or NULL */
-UNIV_INTERN
+@return buffer block pointing to the compressed page, or NULL */
 buf_block_t*
 buf_pool_contains_zip(
 /*==================*/
@@ -711,8 +881,7 @@ buf_frame_align(
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /*********************************************************************//**
 Validates the buffer pool data structure.
-@return	TRUE */
-UNIV_INTERN
+@return TRUE */
 ibool
 buf_validate(void);
 /*==============*/
@@ -720,7 +889,6 @@ buf_validate(void);
 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /*********************************************************************//**
 Prints info of the buffer pool data structure. */
-UNIV_INTERN
 void
 buf_print(void);
 /*============*/
@@ -733,23 +901,20 @@ enum buf_page_print_flags {
 	BUF_PAGE_PRINT_NO_FULL = 2
 };
 
-/********************************************************************//**
-Prints a page to stderr. */
-UNIV_INTERN
+/** Prints a page to stderr.
+@param[in]	read_buf	a database page
+@param[in]	page_size	page size
+@param[in]	flags		0 or BUF_PAGE_PRINT_NO_CRASH or
+BUF_PAGE_PRINT_NO_FULL */
 void
 buf_page_print(
-/*===========*/
-	const byte*	read_buf,	/*!< in: a database page */
-	ulint		zip_size,	/*!< in: compressed page size, or
-					0 for uncompressed pages */
-	ulint		flags)		/*!< in: 0 or
-					BUF_PAGE_PRINT_NO_CRASH or
-					BUF_PAGE_PRINT_NO_FULL */
-	UNIV_COLD MY_ATTRIBUTE((nonnull));
+	const byte*		read_buf,
+	const page_size_t&	page_size,
+	ulint			flags);
+
 /********************************************************************//**
 Decompress a block.
-@return	TRUE if successful */
-UNIV_INTERN
+@return TRUE if successful */
 ibool
 buf_zip_decompress(
 /*===============*/
@@ -759,22 +924,19 @@ buf_zip_decompress(
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Returns the number of latched pages in the buffer pool.
-@return	number of latched pages */
-UNIV_INTERN
+@return number of latched pages */
 ulint
 buf_get_latched_pages_number(void);
 /*==============================*/
 #endif /* UNIV_DEBUG */
 /*********************************************************************//**
 Returns the number of pending buf pool read ios.
-@return	number of pending read I/O operations */
-UNIV_INTERN
+@return number of pending read I/O operations */
 ulint
 buf_get_n_pending_read_ios(void);
 /*============================*/
 /*********************************************************************//**
 Prints info of the buffer i/o. */
-UNIV_INTERN
 void
 buf_print_io(
 /*=========*/
@@ -783,7 +945,6 @@ buf_print_io(
 Collect buffer pool stats information for a buffer pool. Also
 record aggregated stats if there are more than one buffer pool
 in the server */
-UNIV_INTERN
 void
 buf_stats_get_pool_info(
 /*====================*/
@@ -794,36 +955,31 @@ buf_stats_get_pool_info(
 /*********************************************************************//**
 Returns the ratio in percents of modified pages in the buffer pool /
 database pages in the buffer pool.
-@return	modified page percentage ratio */
-UNIV_INTERN
+@return modified page percentage ratio */
 double
 buf_get_modified_ratio_pct(void);
 /*============================*/
 /**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
 void
 buf_refresh_io_stats(
 /*=================*/
 	buf_pool_t*	buf_pool);	/*!< buffer pool instance */
 /**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
 void
 buf_refresh_io_stats_all(void);
 /*=================*/
 /*********************************************************************//**
 Asserts that all file pages in the buffer are in a replaceable state.
-@return	TRUE */
-UNIV_INTERN
+@return TRUE */
 ibool
 buf_all_freed(void);
 /*===============*/
 /*********************************************************************//**
 Checks that there currently are no pending i/o-operations for the buffer
 pool.
-@return	number of pending i/o operations */
-UNIV_INTERN
+@return number of pending i/o operations */
 ulint
 buf_pool_check_no_pending_io(void);
 /*==============================*/
@@ -831,7 +987,6 @@ buf_pool_check_no_pending_io(void);
 Invalidates the file pages in the buffer pool when an archive recovery is
 completed. All the file pages buffered must be in a replaceable state when
 this function is called: not latched and not modified. */
-UNIV_INTERN
 void
 buf_pool_invalidate(void);
 /*=====================*/
@@ -841,7 +996,7 @@ buf_pool_invalidate(void);
 --------------------------- LOWER LEVEL ROUTINES -------------------------
 =========================================================================*/
 
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 /*********************************************************************//**
 Adds latch level info for the rw-lock protecting the buffer frame. This
 should be called in the debug version after a successful latching of a
@@ -852,13 +1007,13 @@ buf_block_dbg_add_level(
 /*====================*/
 	buf_block_t*	block,	/*!< in: buffer page
 				where we have acquired latch */
-	ulint		level);	/*!< in: latching order level */
-#else /* UNIV_SYNC_DEBUG */
+	latch_level_t	level);	/*!< in: latching order level */
+#else /* UNIV_DEBUG */
 # define buf_block_dbg_add_level(block, level) /* nothing */
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 /*********************************************************************//**
 Gets the state of a block.
-@return	state */
+@return state */
 UNIV_INLINE
 enum buf_page_state
 buf_page_get_state(
@@ -876,13 +1031,13 @@ buf_get_state_name(
 					block */
 /*********************************************************************//**
 Gets the state of a block.
-@return	state */
+@return state */
 UNIV_INLINE
 enum buf_page_state
 buf_block_get_state(
 /*================*/
 	const buf_block_t*	block)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Sets the state of a block. */
 UNIV_INLINE
@@ -901,43 +1056,43 @@ buf_block_set_state(
 	enum buf_page_state	state);	/*!< in: state */
 /*********************************************************************//**
 Determines if a block is mapped to a tablespace.
-@return	TRUE if mapped */
+@return TRUE if mapped */
 UNIV_INLINE
 ibool
 buf_page_in_file(
 /*=============*/
 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Determines if a block should be on unzip_LRU list.
-@return	TRUE if block belongs to unzip_LRU */
+@return TRUE if block belongs to unzip_LRU */
 UNIV_INLINE
 ibool
 buf_page_belongs_to_unzip_LRU(
 /*==========================*/
 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*********************************************************************//**
 Gets the mutex of a block.
-@return	pointer to mutex protecting bpage */
+@return pointer to mutex protecting bpage */
 UNIV_INLINE
-ib_mutex_t*
+BPageMutex*
 buf_page_get_mutex(
 /*===============*/
 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*********************************************************************//**
 Get the flush type of a page.
-@return	flush type */
+@return flush type */
 UNIV_INLINE
 buf_flush_t
 buf_page_get_flush_type(
 /*====================*/
 	const buf_page_t*	bpage)	/*!< in: buffer page */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Set the flush type of a page. */
 UNIV_INLINE
@@ -946,33 +1101,34 @@ buf_page_set_flush_type(
 /*====================*/
 	buf_page_t*	bpage,		/*!< in: buffer page */
 	buf_flush_t	flush_type);	/*!< in: flush type */
-/*********************************************************************//**
-Map a block to a file page. */
+
+/** Map a block to a file page.
+@param[in,out]	block	pointer to control block
+@param[in]	page_id	page id */
 UNIV_INLINE
 void
 buf_block_set_file_page(
-/*====================*/
-	buf_block_t*		block,	/*!< in/out: pointer to control block */
-	ulint			space,	/*!< in: tablespace id */
-	ulint			page_no);/*!< in: page number */
+	buf_block_t*		block,
+	const page_id_t&	page_id);
+
 /*********************************************************************//**
 Gets the io_fix state of a block.
-@return	io_fix state */
+@return io_fix state */
 UNIV_INLINE
 enum buf_io_fix
 buf_page_get_io_fix(
 /*================*/
 	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Gets the io_fix state of a block.
-@return	io_fix state */
+@return io_fix state */
 UNIV_INLINE
 enum buf_io_fix
 buf_block_get_io_fix(
 /*================*/
 	const buf_block_t*	block)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Sets the io_fix state of a block. */
 UNIV_INLINE
@@ -1018,17 +1174,17 @@ ibool
 buf_page_can_relocate(
 /*==================*/
 	const buf_page_t*	bpage)	/*!< control block being relocated */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*********************************************************************//**
 Determine if a block has been flagged old.
-@return	TRUE if old */
+@return TRUE if old */
 UNIV_INLINE
 ibool
 buf_page_is_old(
 /*============*/
 	const buf_page_t*	bpage)	/*!< in: control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Flag a block old. */
 UNIV_INLINE
@@ -1039,13 +1195,13 @@ buf_page_set_old(
 	ibool		old);	/*!< in: old */
 /*********************************************************************//**
 Determine the time of first access of a block in the buffer pool.
-@return	ut_time_ms() at the time of first access, 0 if not accessed */
+@return ut_time_ms() at the time of first access, 0 if not accessed */
 UNIV_INLINE
 unsigned
 buf_page_is_accessed(
 /*=================*/
 	const buf_page_t*	bpage)	/*!< in: control block */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Flag a block accessed. */
 UNIV_INLINE
@@ -1057,130 +1213,64 @@ buf_page_set_accessed(
 /*********************************************************************//**
 Gets the buf_block_t handle of a buffered file block if an uncompressed
 page frame exists, or NULL. Note: even though bpage is not declared a
-const we don't update its value. It is safe to make this pure.
-@return	control block, or NULL */
+const we don't update its value.
+@return control block, or NULL */
 UNIV_INLINE
 buf_block_t*
 buf_page_get_block(
 /*===============*/
 	buf_page_t*	bpage)	/*!< in: control block, or NULL */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Gets a pointer to the memory frame of a block.
-@return	pointer to the frame */
+@return pointer to the frame */
 UNIV_INLINE
 buf_frame_t*
 buf_block_get_frame(
 /*================*/
 	const buf_block_t*	block)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 #else /* UNIV_DEBUG */
 # define buf_block_get_frame(block) (block)->frame
 #endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Gets the space id of a block.
-@return	space id */
-UNIV_INLINE
-ulint
-buf_page_get_space(
-/*===============*/
-	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the space id of a block.
-@return	space id */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
-	const buf_block_t*	block)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the page number of a block.
-@return	page number */
-UNIV_INLINE
-ulint
-buf_page_get_page_no(
-/*=================*/
-	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the page number of a block.
-@return	page number */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
-	const buf_block_t*	block)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return	compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_page_get_zip_size(
-/*==================*/
-	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return	compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_block_get_zip_size(
-/*===================*/
-	const buf_block_t*	block)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
+
 /*********************************************************************//**
 Gets the compressed page descriptor corresponding to an uncompressed page
 if applicable. */
 #define buf_block_get_page_zip(block) \
 	((block)->page.zip.data ? &(block)->page.zip : NULL)
 #ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to.
-@return	pointer to block, never NULL */
-UNIV_INTERN
+
+/** Get a buffer block from an adaptive hash index pointer.
+This function does not return if the block is not identified.
+@param[in]	ptr	pointer to within a page frame
+@return pointer to block, never NULL */
 buf_block_t*
-buf_block_align(
-/*============*/
-	const byte*	ptr);	/*!< in: pointer to a frame */
+buf_block_from_ahi(const byte* ptr);
+
 /********************************************************************//**
 Find out if a pointer belongs to a buf_block_t. It can be a pointer to
 the buf_block_t itself or a member of it
-@return	TRUE if ptr belongs to a buf_block_t struct */
-UNIV_INTERN
+@return TRUE if ptr belongs to a buf_block_t struct */
 ibool
 buf_pointer_is_block_field(
 /*=======================*/
 	const void*		ptr);	/*!< in: pointer not
 					dereferenced */
 /** Find out if a pointer corresponds to a buf_block_t::mutex.
-@param m	in: mutex candidate
-@return		TRUE if m is a buf_block_t::mutex */
+@param m in: mutex candidate
+@return TRUE if m is a buf_block_t::mutex */
 #define buf_pool_is_block_mutex(m)			\
 	buf_pointer_is_block_field((const void*)(m))
 /** Find out if a pointer corresponds to a buf_block_t::lock.
-@param l	in: rw-lock candidate
-@return		TRUE if l is a buf_block_t::lock */
+@param l in: rw-lock candidate
+@return TRUE if l is a buf_block_t::lock */
 #define buf_pool_is_block_lock(l)			\
 	buf_pointer_is_block_field((const void*)(l))
 
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-/*********************************************************************//**
-Gets the compressed page descriptor corresponding to an uncompressed page
-if applicable.
-@return	compressed page descriptor, or NULL */
-UNIV_INLINE
-const page_zip_des_t*
-buf_frame_get_page_zip(
-/*===================*/
-	const byte*	ptr);	/*!< in: pointer to the page */
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-/********************************************************************//**
-Function which inits a page for read to the buffer buf_pool. If the page is
+/** Inits a page for read to the buffer buf_pool. If the page is
 (1) already in buf_pool, or
 (2) if we specify to read only ibuf pages and the page is not an ibuf page, or
 (3) if the space is deleted or being deleted,
@@ -1188,25 +1278,23 @@ then this function does nothing.
 Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
 on the buffer frame. The io-handler must take care that the flag is cleared
 and the lock released later.
-@return	pointer to the block or NULL */
-UNIV_INTERN
+@param[out]	err			DB_SUCCESS or DB_TABLESPACE_DELETED
+@param[in]	mode			BUF_READ_IBUF_PAGES_ONLY, ...
+@param[in]	page_id			page id
+@param[in]	unzip			TRUE=request uncompressed page
+@return pointer to the block or NULL */
 buf_page_t*
 buf_page_init_for_read(
-/*===================*/
-	dberr_t*	err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
-	ulint		mode,	/*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size, or 0 */
-	ibool		unzip,	/*!< in: TRUE=request uncompressed page */
-	ib_int64_t	tablespace_version,/*!< in: prevents reading from a wrong
-				version of the tablespace in case we have done
-				DISCARD + IMPORT */
-	ulint		offset);/*!< in: page number */
+	dberr_t*		err,
+	ulint			mode,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ibool			unzip);
+
 /********************************************************************//**
 Completes an asynchronous read or write request of a file page to or from
 the buffer pool.
 @return true if successful */
-UNIV_INTERN
 bool
 buf_page_io_complete(
 /*=================*/
@@ -1214,25 +1302,14 @@ buf_page_io_complete(
 	bool		evict = false);/*!< in: whether or not to evict
 				the page from LRU list. */
 /********************************************************************//**
-Calculates a folded value of a file page address to use in the page hash
-table.
-@return	the folded value */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: offset of the page within space */
-	MY_ATTRIBUTE((const));
-/********************************************************************//**
 Calculates the index of a buffer pool to the buf_pool[] array.
-@return	the position of the buffer pool in buf_pool[] */
+@return the position of the buffer pool in buf_pool[] */
 UNIV_INLINE
 ulint
 buf_pool_index(
 /*===========*/
 	const buf_pool_t*	buf_pool)	/*!< in: buffer pool */
-	MY_ATTRIBUTE((nonnull, const));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************************//**
 Returns the buffer pool instance given a page instance
 @return buf_pool */
@@ -1249,15 +1326,15 @@ buf_pool_t*
 buf_pool_from_block(
 /*================*/
 	const buf_block_t*	block); /*!< in: block */
-/******************************************************************//**
-Returns the buffer pool instance given space and offset of page
+
+/** Returns the buffer pool instance given a page id.
+@param[in]	page_id	page id
 @return buffer pool */
 UNIV_INLINE
 buf_pool_t*
 buf_pool_get(
-/*==========*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: offset of the page within space */
+	const page_id_t&	page_id);
+
 /******************************************************************//**
 Returns the buffer pool instance given its array index
 @return buffer pool */
@@ -1267,71 +1344,64 @@ buf_pool_from_array(
 /*================*/
 	ulint	index);		/*!< in: array index to get
 				buffer pool instance from */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-@return	block, NULL if not found */
+
+/** Returns the control block of a file page, NULL if not found.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	page_id		page id
+@return block, NULL if not found */
 UNIV_INLINE
 buf_page_t*
 buf_page_hash_get_low(
-/*==================*/
-	buf_pool_t*	buf_pool,/*!< buffer pool instance */
-	ulint		space,	/*!< in: space id */
-	ulint		offset,	/*!< in: offset of the page within space */
-	ulint		fold);	/*!< in: buf_page_address_fold(space, offset) */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id);
+
+/** Returns the control block of a file page, NULL if not found.
 If the block is found and lock is not NULL then the appropriate
 page_hash lock is acquired in the specified lock mode. Otherwise,
 mode value is ignored. It is up to the caller to release the
 lock. If the block is found and the lock is NULL then the page_hash
 lock is released by this function.
-@return	block, NULL if not found, or watch sentinel (if watch is true) */
+@param[in]	buf_pool	buffer pool instance
+@param[in]	page_id		page id
+@param[in,out]	lock		lock of the page hash acquired if bpage is
+found, NULL otherwise. If NULL is passed then the hash_lock is released by
+this function.
+@param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
+lock == NULL
+@param[in]	watch		if true, return watch sentinel also.
+@return pointer to the bpage or NULL; if NULL, lock is also NULL or
+a watch sentinel. */
 UNIV_INLINE
 buf_page_t*
 buf_page_hash_get_locked(
-/*=====================*/
-					/*!< out: pointer to the bpage,
-					or NULL; if NULL, hash_lock
-					is also NULL. */
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page number */
-	rw_lock_t**	lock,		/*!< in/out: lock of the page
-					hash acquired if bpage is
-					found. NULL otherwise. If NULL
-					is passed then the hash_lock
-					is released by this function */
-	ulint		lock_mode,	/*!< in: RW_LOCK_EX or
-					RW_LOCK_SHARED. Ignored if
-					lock == NULL */
-	bool		watch = false);	/*!< in: if true, return watch
-					sentinel also. */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id,
+	rw_lock_t**		lock,
+	ulint			lock_mode,
+	bool			watch = false);
+
+/** Returns the control block of a file page, NULL if not found.
 If the block is found and lock is not NULL then the appropriate
 page_hash lock is acquired in the specified lock mode. Otherwise,
 mode value is ignored. It is up to the caller to release the
 lock. If the block is found and the lock is NULL then the page_hash
 lock is released by this function.
-@return	block, NULL if not found */
+@param[in]	buf_pool	buffer pool instance
+@param[in]	page_id		page id
+@param[in,out]	lock		lock of the page hash acquired if bpage is
+found, NULL otherwise. If NULL is passed then the hash_lock is released by
+this function.
+@param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
+lock == NULL
+@return pointer to the block or NULL; if NULL, lock is also NULL. */
 UNIV_INLINE
 buf_block_t*
 buf_block_hash_get_locked(
-/*=====================*/
-					/*!< out: pointer to the bpage,
-					or NULL; if NULL, hash_lock
-					is also NULL. */
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page number */
-	rw_lock_t**	lock,		/*!< in/out: lock of the page
-					hash acquired if bpage is
-					found. NULL otherwise. If NULL
-					is passed then the hash_lock
-					is released by this function */
-	ulint		lock_mode);	/*!< in: RW_LOCK_EX or
-					RW_LOCK_SHARED. Ignored if
-					lock == NULL */
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id,
+	rw_lock_t**		lock,
+	ulint			lock_mode);
+
 /* There are four different ways we can try to get a bpage or block
 from the page hash:
 1) Caller already holds the appropriate page hash lock: in the case call
@@ -1339,75 +1409,70 @@ buf_page_hash_get_low() function.
 2) Caller wants to hold page hash lock in x-mode
 3) Caller wants to hold page hash lock in s-mode
 4) Caller doesn't want to hold page hash lock */
-#define buf_page_hash_get_s_locked(b, s, o, l)			\
-	buf_page_hash_get_locked(b, s, o, l, RW_LOCK_SHARED)
-#define buf_page_hash_get_x_locked(b, s, o, l)			\
-	buf_page_hash_get_locked(b, s, o, l, RW_LOCK_EX)
-#define buf_page_hash_get(b, s, o)				\
-	buf_page_hash_get_locked(b, s, o, NULL, 0)
-#define buf_page_get_also_watch(b, s, o)			\
-	buf_page_hash_get_locked(b, s, o, NULL, 0, true)
-
-#define buf_block_hash_get_s_locked(b, s, o, l)			\
-	buf_block_hash_get_locked(b, s, o, l, RW_LOCK_SHARED)
-#define buf_block_hash_get_x_locked(b, s, o, l)			\
-	buf_block_hash_get_locked(b, s, o, l, RW_LOCK_EX)
-#define buf_block_hash_get(b, s, o)				\
-	buf_block_hash_get_locked(b, s, o, NULL, 0)
+#define buf_page_hash_get_s_locked(b, page_id, l)		\
+	buf_page_hash_get_locked(b, page_id, l, RW_LOCK_S)
+#define buf_page_hash_get_x_locked(b, page_id, l)		\
+	buf_page_hash_get_locked(b, page_id, l, RW_LOCK_X)
+#define buf_page_hash_get(b, page_id)				\
+	buf_page_hash_get_locked(b, page_id, NULL, 0)
+#define buf_page_get_also_watch(b, page_id)			\
+	buf_page_hash_get_locked(b, page_id, NULL, 0, true)
+
+#define buf_block_hash_get_s_locked(b, page_id, l)		\
+	buf_block_hash_get_locked(b, page_id, l, RW_LOCK_S)
+#define buf_block_hash_get_x_locked(b, page_id, l)		\
+	buf_block_hash_get_locked(b, page_id, l, RW_LOCK_X)
+#define buf_block_hash_get(b, page_id)				\
+	buf_block_hash_get_locked(b, page_id, NULL, 0)
 
 /*********************************************************************//**
 Gets the current length of the free list of buffer blocks.
-@return	length of the free list */
-UNIV_INTERN
+@return length of the free list */
 ulint
 buf_get_free_list_len(void);
 /*=======================*/
 
 /********************************************************************//**
 Determine if a block is a sentinel for a buffer pool watch.
-@return	TRUE if a sentinel for a buffer pool watch, FALSE if not */
-UNIV_INTERN
+@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
 ibool
 buf_pool_watch_is_sentinel(
 /*=======================*/
-	buf_pool_t*		buf_pool,	/*!< buffer pool instance */
+	const buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	const buf_page_t*	bpage)		/*!< in: block */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/****************************************************************//**
-Add watch for the given page to be read in. Caller must have the buffer pool
+
+/** Add watch for the given page to be read in. Caller must have
+appropriate hash_lock for the bpage. This function may release the
+hash_lock and reacquire it.
+@param[in]	page_id		page id
+@param[in,out]	hash_lock	hash_lock currently latched
 @return NULL if watch set, block if the page is in the buffer pool */
-UNIV_INTERN
 buf_page_t*
 buf_pool_watch_set(
-/*===============*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset,	/*!< in: page number */
-	ulint	fold)	/*!< in: buf_page_address_fold(space, offset) */
-	MY_ATTRIBUTE((warn_unused_result));
-/****************************************************************//**
-Stop watching if the page has been read in.
-buf_pool_watch_set(space,offset) must have returned NULL before. */
-UNIV_INTERN
+	const page_id_t&	page_id,
+	rw_lock_t**		hash_lock)
+MY_ATTRIBUTE((warn_unused_result));
+
+/** Stop watching if the page has been read in.
+buf_pool_watch_set(space,offset) must have returned NULL before.
+@param[in]	page_id	page id */
 void
 buf_pool_watch_unset(
-/*=================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: page number */
-/****************************************************************//**
-Check if the page has been read in.
+	const page_id_t&	page_id);
+
+/** Check if the page has been read in.
 This may only be called after buf_pool_watch_set(space,offset)
 has returned NULL and before invoking buf_pool_watch_unset(space,offset).
-@return	FALSE if the given page was not read in, TRUE if it was */
-UNIV_INTERN
+@param[in]	page_id	page id
+@return FALSE if the given page was not read in, TRUE if it was */
 ibool
 buf_pool_watch_occurred(
-/*====================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
-	MY_ATTRIBUTE((warn_unused_result));
+	const page_id_t&	page_id)
+MY_ATTRIBUTE((warn_unused_result));
+
 /********************************************************************//**
 Get total buffer pool statistics. */
-UNIV_INTERN
 void
 buf_get_total_list_len(
 /*===================*/
@@ -1416,7 +1481,6 @@ buf_get_total_list_len(
 	ulint*		flush_list_len);/*!< out: length of all flush lists */
 /********************************************************************//**
 Get total list size in bytes from all buffer pools. */
-UNIV_INTERN
 void
 buf_get_total_list_size_in_bytes(
 /*=============================*/
@@ -1424,7 +1488,6 @@ buf_get_total_list_size_in_bytes(
 							in all buffer pools */
 /********************************************************************//**
 Get total buffer pool statistics. */
-UNIV_INTERN
 void
 buf_get_total_stat(
 /*===============*/
@@ -1440,15 +1503,33 @@ buf_get_nth_chunk_block(
 	ulint		n,		/*!< in: nth chunk in the buffer pool */
 	ulint*		chunk_size);	/*!< in: chunk size */
 
-/********************************************************************//**
-Calculate the checksum of a page from compressed table and update the page. */
-UNIV_INTERN
+/** Verify the possibility that a stored page is not in buffer pool.
+@param[in]	withdraw_clock	withdraw clock when stored the page
+@retval true	if the page might be relocated */
+UNIV_INLINE
+bool
+buf_pool_is_obsolete(
+	ulint	withdraw_clock);
+
+/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
+if needed.
+@param[in]	size	size in bytes
+@return	aligned size */
+UNIV_INLINE
+ulint
+buf_pool_size_align(
+	ulint	size);
+
+/** Calculate the checksum of a page from compressed table and update the
+page.
+@param[in,out]	page	page to update
+@param[in]	size	compressed page size
+@param[in]	lsn	LSN to stamp on the page */
 void
 buf_flush_update_zip_checksum(
-/*==========================*/
-	buf_frame_t*	page,		/*!< in/out: Page to update */
-	ulint		zip_size,	/*!< in: Compressed page size */
-	lsn_t		lsn);		/*!< in: Lsn to stamp on the page */
+	buf_frame_t*	page,
+	ulint		size,
+	lsn_t		lsn);
 
 #endif /* !UNIV_HOTBACKUP */
 
@@ -1528,7 +1609,8 @@ for compressed and uncompressed frames */
 /** Number of bits used for buffer page states. */
 #define BUF_PAGE_STATE_BITS	3
 
-struct buf_page_t{
+class buf_page_t {
+public:
 	/** @name General fields
 	None of these bit-fields must be modified without holding
 	buf_page_get_mutex() [buf_block_t::mutex or
@@ -1537,36 +1619,21 @@ struct buf_page_t{
 	by buf_pool->mutex. */
 	/* @{ */
 
-	ib_uint32_t	space;		/*!< tablespace id; also protected
-					by buf_pool->mutex. */
-	ib_uint32_t	offset;		/*!< page number; also protected
-					by buf_pool->mutex. */
-	/** count of how manyfold this block is currently bufferfixed */
-#ifdef PAGE_ATOMIC_REF_COUNT
-	ib_uint32_t	buf_fix_count;
+	/** Page id. Protected by buf_pool mutex. */
+	page_id_t	id;
 
-	/** type of pending I/O operation; also protected by
-	buf_pool->mutex for writes only @see enum buf_io_fix */
-	byte		io_fix;
+	/** Page size. Protected by buf_pool mutex. */
+	page_size_t	size;
 
-	byte		state;
-#else
-	unsigned	buf_fix_count:19;
+	/** Count of how manyfold this block is currently bufferfixed. */
+	ib_uint32_t	buf_fix_count;
 
 	/** type of pending I/O operation; also protected by
-	buf_pool->mutex for writes only @see enum buf_io_fix */
-	unsigned	io_fix:2;
+	buf_pool->mutex for writes only */
+	buf_io_fix	io_fix;
 
-	/*!< state of the control block; also protected by buf_pool->mutex.
-	State transitions from BUF_BLOCK_READY_FOR_USE to BUF_BLOCK_MEMORY
-	need not be protected by buf_page_get_mutex(). @see enum buf_page_state.
-	State changes that are relevant to page_hash are additionally protected
-	by the appropriate page_hash mutex i.e.: if a page is in page_hash or
-	is being added to/removed from page_hash then the corresponding changes
-	must also be protected by page_hash mutex. */
-	unsigned	state:BUF_PAGE_STATE_BITS;
-
-#endif /* PAGE_ATOMIC_REF_COUNT */
+	/** Block state. @see buf_page_in_file */
+	buf_page_state	state;
 
 #ifndef UNIV_HOTBACKUP
 	unsigned	flush_type:2;	/*!< if this block is currently being
@@ -1592,6 +1659,7 @@ struct buf_page_t{
 					if written again we check is TRIM
 					operation needed. */
 
+	ulint           space;          /*!< space id */
 	unsigned        key_version;	/*!< key version for this block */
 	bool            page_encrypted; /*!< page is page encrypted */
 	bool            page_compressed;/*!< page is page compressed */
@@ -1631,7 +1699,7 @@ struct buf_page_t{
 					in one of the following lists in
 					buf_pool:
 
-					- BUF_BLOCK_NOT_USED:	free
+					- BUF_BLOCK_NOT_USED:	free, withdraw
 					- BUF_BLOCK_FILE_PAGE:	flush_list
 					- BUF_BLOCK_ZIP_DIRTY:	flush_list
 					- BUF_BLOCK_ZIP_PAGE:	zip_clean
@@ -1667,6 +1735,9 @@ struct buf_page_t{
 					should hold: in_free_list
 					== (state == BUF_BLOCK_NOT_USED) */
 #endif /* UNIV_DEBUG */
+
+	FlushObserver*	flush_observer;	/*!< flush observer */
+
 	lsn_t		newest_modification;
 					/*!< log sequence number of
 					the youngest modification to
@@ -1714,13 +1785,13 @@ struct buf_page_t{
 					0 if the block was never accessed
 					in the buffer pool. Protected by
 					block mutex */
-# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+# ifdef UNIV_DEBUG
 	ibool		file_page_was_freed;
 					/*!< this is set to TRUE when
 					fsp frees a page in buffer pool;
 					protected by buf_pool->zip_mutex
 					or buf_block_t::mutex. */
-# endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+# endif /* UNIV_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
 };
 
@@ -1740,6 +1811,8 @@ struct buf_block_t{
 					aligned to an address divisible by
 					UNIV_PAGE_SIZE */
 #ifndef UNIV_HOTBACKUP
+	BPageLock	lock;		/*!< read-write lock of the buffer
+					frame */
 	UT_LIST_NODE_T(buf_block_t) unzip_LRU;
 					/*!< node of the decompressed LRU list;
 					a block is in the unzip_LRU list
@@ -1749,15 +1822,8 @@ struct buf_block_t{
 	ibool		in_unzip_LRU_list;/*!< TRUE if the page is in the
 					decompressed LRU list;
 					used in debugging */
+	ibool		in_withdraw_list;
 #endif /* UNIV_DEBUG */
-	ib_mutex_t	mutex;		/*!< mutex protecting this block:
-					state (also protected by the buffer
-					pool mutex), io_fix, buf_fix_count,
-					and accessed; we introduce this new
-					mutex in InnoDB-5.1 to relieve
-					contention on the buffer pool mutex */
-	rw_lock_t	lock;		/*!< read-write lock of the buffer
-					frame */
 	unsigned	lock_hash_val:32;/*!< hashed value of the page address
 					in the record lock hash table;
 					protected by buf_block_t::lock
@@ -1765,15 +1831,6 @@ struct buf_block_t{
 				        in buf_page_get_gen(),
 					buf_page_init_for_read()
 					and buf_page_create()) */
-	ibool		check_index_page_at_flush;
-					/*!< TRUE if we know that this is
-					an index page, and want the database
-					to check its consistency before flush;
-					note that there may be pages in the
-					buffer pool which are index pages,
-					but this flag is not set because
-					we do not keep track of all pages;
-					NOT protected by any mutex */
 	/* @} */
 	/** @name Optimistic search field */
 	/* @{ */
@@ -1796,11 +1853,12 @@ struct buf_block_t{
 
 	ulint		n_hash_helps;	/*!< counter which controls building
 					of a new hash index for the page */
-	ulint		n_fields;	/*!< recommended prefix length for hash
+	volatile ulint	n_bytes;	/*!< recommended prefix length for hash
+					search: number of bytes in
+					an incomplete last field */
+	volatile ulint	n_fields;	/*!< recommended prefix length for hash
 					search: number of full fields */
-	ulint		n_bytes;	/*!< recommended prefix: number of bytes
-					in an incomplete field */
-	ibool		left_side;	/*!< TRUE or FALSE, depending on
+	volatile bool	left_side;	/*!< true or false, depending on
 					whether the leftmost record of several
 					records with the same prefix should be
 					indexed in the hash index */
@@ -1808,7 +1866,7 @@ struct buf_block_t{
 
 	/** @name Hash search fields
 	These 5 fields may only be modified when we have
-	an x-latch on btr_search_latch AND
+	an x-latch on search system AND
 	- we are holding an s-latch or x-latch on buf_block_t::lock or
 	- we know that buf_block_t::buf_fix_count == 0.
 
@@ -1816,7 +1874,7 @@ struct buf_block_t{
 	in the buffer pool in buf0buf.cc.
 
 	Another exception is that assigning block->index = NULL
-	is allowed whenever holding an x-latch on btr_search_latch. */
+	is allowed whenever holding an x-latch on search system. */
 
 	/* @{ */
 
@@ -1839,8 +1897,11 @@ struct buf_block_t{
 					complete, though: there may
 					have been hash collisions,
 					record deletions, etc. */
+	bool		skip_flush_check;
+					/*!< Skip check in buf_dblwr_check_block
+					during bulk load, protected by lock.*/
 	/* @} */
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 	/** @name Debug fields */
 	/* @{ */
 	rw_lock_t	debug_latch;	/*!< in the debug version, each thread
@@ -1849,16 +1910,23 @@ struct buf_block_t{
 					debug utilities in sync0rw */
 	/* @} */
 # endif
+	BPageMutex	mutex;		/*!< mutex protecting this block:
+					state (also protected by the buffer
+					pool mutex), io_fix, buf_fix_count,
+					and accessed; we introduce this new
+					mutex in InnoDB-5.1 to relieve
+					contention on the buffer pool mutex */
 #endif /* !UNIV_HOTBACKUP */
 };
 
 /** Check if a buf_block_t object is in a valid state
-@param block	buffer block
-@return		TRUE if valid */
+@param block buffer block
+@return TRUE if valid */
 #define buf_block_state_valid(block)				\
 (buf_block_get_state(block) >= BUF_BLOCK_NOT_USED		\
  && (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH))
 
+
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Compute the hash fold value for blocks in buf_pool->zip_hash. */
@@ -1890,7 +1958,7 @@ public:
 	virtual ~HazardPointer() {}
 
 	/** Get current value */
-	buf_page_t* get()
+	buf_page_t* get() const
 	{
 		ut_ad(mutex_own(m_mutex));
 		return(m_hp);
@@ -2074,15 +2142,14 @@ struct buf_pool_t{
 
 	/** @name General fields */
 	/* @{ */
-	ib_mutex_t	mutex;		/*!< Buffer pool mutex of this
+	BufPoolMutex	mutex;		/*!< Buffer pool mutex of this
 					instance */
-	ib_mutex_t	zip_mutex;	/*!< Zip mutex of this buffer
+	BufPoolZipMutex	zip_mutex;	/*!< Zip mutex of this buffer
 					pool instance, protects compressed
 					only pages (of type buf_page_t, not
 					buf_block_t */
 	ulint		instance_no;	/*!< Array index of this buffer
 					pool instance */
-	ulint		old_pool_size;  /*!< Old pool size in bytes */
 	ulint		curr_pool_size;	/*!< Current pool size in bytes */
 	ulint		LRU_old_ratio;  /*!< Reserve this much of the buffer
 					pool for "old" blocks */
@@ -2093,9 +2160,19 @@ struct buf_pool_t{
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ulint		mutex_exit_forbidden; /*!< Forbid release mutex */
 #endif
-	ulint		n_chunks;	/*!< number of buffer pool chunks */
+	ut_allocator<unsigned char>	allocator;	/*!< Allocator used for
+					allocating memory for the the "chunks"
+					member. */
+	volatile ulint	n_chunks;	/*!< number of buffer pool chunks */
+	volatile ulint	n_chunks_new;	/*!< new number of buffer pool chunks */
 	buf_chunk_t*	chunks;		/*!< buffer pool chunks */
+	buf_chunk_t*	chunks_old;	/*!< old buffer pool chunks to be freed
+					after resizing buffer pool */
 	ulint		curr_size;	/*!< current pool size in pages */
+	ulint		old_size;	/*!< previous pool size in pages */
+	ulint		read_ahead_area;/*!< size in pages of the area which
+					the read-ahead algorithms read if
+					invoked */
 	hash_table_t*	page_hash;	/*!< hash table of buf_page_t or
 					buf_block_t file pages,
 					buf_page_in_file() == TRUE,
@@ -2107,6 +2184,8 @@ struct buf_pool_t{
 					page_hash mutex. Lookups can happen
 					while holding the buf_pool->mutex or
 					the relevant page_hash mutex. */
+	hash_table_t*	page_hash_old;	/*!< old pointer to page_hash to be
+					freed after resizing buffer pool */
 	hash_table_t*	zip_hash;	/*!< hash table of buf_block_t blocks
 					whose frames are allocated to the
 					zip buddy system,
@@ -2130,7 +2209,7 @@ struct buf_pool_t{
 
 	/* @{ */
 
-	ib_mutex_t	flush_list_mutex;/*!< mutex protecting the
+	FlushListMutex	flush_list_mutex;/*!< mutex protecting the
 					flush list access. This mutex
 					protects flush_list, flush_rbt
 					and bpage::list pointers when
@@ -2197,6 +2276,15 @@ struct buf_pool_t{
 					/*!< base node of the free
 					block list */
 
+	UT_LIST_BASE_NODE_T(buf_page_t) withdraw;
+					/*!< base node of the withdraw
+					block list. It is only used during
+					shrinking buffer pool size, not to
+					reuse the blocks will be removed */
+
+	ulint		withdraw_target;/*!< target length of withdraw
+					block list, when withdrawing */
+
 	/** "hazard pointer" used during scan of LRU while doing
 	LRU list batch.  Protected by buf_pool::mutex */
 	LRUHp		lru_hp;
@@ -2211,6 +2299,7 @@ struct buf_pool_t{
 
 	UT_LIST_BASE_NODE_T(buf_page_t) LRU;
 					/*!< base node of the LRU list */
+
 	buf_page_t*	LRU_old;	/*!< pointer to the about
 					LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
 					oldest blocks in the LRU list;
@@ -2258,6 +2347,15 @@ struct buf_pool_t{
 	/* @} */
 };
 
+/** Print the given buf_pool_t object.
+@param[in,out]	out		the output stream
+@param[in]	buf_pool	the buf_pool_t object to be printed
+@return the output stream */
+std::ostream&
+operator<<(
+        std::ostream&		out,
+        const buf_pool_t&	buf_pool);
+
 /** @name Accessors for buf_pool->mutex.
 Use these instead of accessing buf_pool->mutex directly. */
 /* @{ */
@@ -2265,77 +2363,79 @@ Use these instead of accessing buf_pool->mutex directly. */
 /** Test if a buffer pool mutex is owned. */
 #define buf_pool_mutex_own(b) mutex_own(&b->mutex)
 /** Acquire a buffer pool mutex. */
-#define buf_pool_mutex_enter(b) do {			\
-	ut_ad(!mutex_own(&b->zip_mutex));		\
-	mutex_enter(&b->mutex);				\
+#define buf_pool_mutex_enter(b) do {		\
+	ut_ad(!(b)->zip_mutex.is_owned());	\
+	mutex_enter(&(b)->mutex);		\
 } while (0)
 
 /** Test if flush list mutex is owned. */
-#define buf_flush_list_mutex_own(b) mutex_own(&b->flush_list_mutex)
+#define buf_flush_list_mutex_own(b) mutex_own(&(b)->flush_list_mutex)
 
 /** Acquire the flush list mutex. */
-#define buf_flush_list_mutex_enter(b) do {		\
-	mutex_enter(&b->flush_list_mutex);		\
+#define buf_flush_list_mutex_enter(b) do {	\
+	mutex_enter(&(b)->flush_list_mutex);	\
 } while (0)
 /** Release the flush list mutex. */
-# define buf_flush_list_mutex_exit(b) do {		\
-	mutex_exit(&b->flush_list_mutex);		\
+# define buf_flush_list_mutex_exit(b) do {	\
+	mutex_exit(&(b)->flush_list_mutex);	\
 } while (0)
 
+
 /** Test if block->mutex is owned. */
-#define buf_block_mutex_own(b)	mutex_own(&(b)->mutex)
+#define buf_page_mutex_own(b)	(b)->mutex.is_owned()
 
 /** Acquire the block->mutex. */
-#define buf_block_mutex_enter(b) do {			\
+#define buf_page_mutex_enter(b) do {			\
 	mutex_enter(&(b)->mutex);			\
 } while (0)
 
 /** Release the trx->mutex. */
-#define buf_block_mutex_exit(b) do {			\
-	mutex_exit(&(b)->mutex);				\
+#define buf_page_mutex_exit(b) do {			\
+	(b)->mutex.exit();				\
 } while (0)
 
 
 /** Get appropriate page_hash_lock. */
-# define buf_page_hash_lock_get(b, f)			\
-	hash_get_lock(b->page_hash, f)
+# define buf_page_hash_lock_get(buf_pool, page_id)	\
+	hash_get_lock((buf_pool)->page_hash, (page_id).fold())
 
-#ifdef UNIV_SYNC_DEBUG
+/** If not appropriate page_hash_lock, relock until appropriate. */
+# define buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id)\
+	hash_lock_s_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold())
+
+# define buf_page_hash_lock_x_confirm(hash_lock, buf_pool, page_id)\
+	hash_lock_x_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold())
+
+#ifdef UNIV_DEBUG
 /** Test if page_hash lock is held in s-mode. */
-# define buf_page_hash_lock_held_s(b, p)		\
-	rw_lock_own(buf_page_hash_lock_get(b,		\
-		  buf_page_address_fold(p->space,	\
-					p->offset)),	\
-					RW_LOCK_SHARED)
+# define buf_page_hash_lock_held_s(buf_pool, bpage)	\
+	rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_S)
 
 /** Test if page_hash lock is held in x-mode. */
-# define buf_page_hash_lock_held_x(b, p)		\
-	rw_lock_own(buf_page_hash_lock_get(b,		\
-		  buf_page_address_fold(p->space,	\
-					p->offset)),	\
-					RW_LOCK_EX)
+# define buf_page_hash_lock_held_x(buf_pool, bpage)	\
+	rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_X)
 
 /** Test if page_hash lock is held in x or s-mode. */
-# define buf_page_hash_lock_held_s_or_x(b, p)		\
-	(buf_page_hash_lock_held_s(b, p)		\
-	 || buf_page_hash_lock_held_x(b, p))
+# define buf_page_hash_lock_held_s_or_x(buf_pool, bpage)\
+	(buf_page_hash_lock_held_s((buf_pool), (bpage))	\
+	 || buf_page_hash_lock_held_x((buf_pool), (bpage)))
 
-# define buf_block_hash_lock_held_s(b, p)		\
-	buf_page_hash_lock_held_s(b, &(p->page))
+# define buf_block_hash_lock_held_s(buf_pool, block)	\
+	buf_page_hash_lock_held_s((buf_pool), &(block)->page)
 
-# define buf_block_hash_lock_held_x(b, p)		\
-	buf_page_hash_lock_held_x(b, &(p->page))
+# define buf_block_hash_lock_held_x(buf_pool, block)	\
+	buf_page_hash_lock_held_x((buf_pool), &(block)->page)
 
-# define buf_block_hash_lock_held_s_or_x(b, p)		\
-	buf_page_hash_lock_held_s_or_x(b, &(p->page))
-#else /* UNIV_SYNC_DEBUG */
+# define buf_block_hash_lock_held_s_or_x(buf_pool, block)	\
+	buf_page_hash_lock_held_s_or_x((buf_pool), &(block)->page)
+#else /* UNIV_DEBUG */
 # define buf_page_hash_lock_held_s(b, p)	(TRUE)
 # define buf_page_hash_lock_held_x(b, p)	(TRUE)
 # define buf_page_hash_lock_held_s_or_x(b, p)	(TRUE)
 # define buf_block_hash_lock_held_s(b, p)	(TRUE)
 # define buf_block_hash_lock_held_x(b, p)	(TRUE)
 # define buf_block_hash_lock_held_s_or_x(b, p)	(TRUE)
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /** Forbid the release of the buffer pool mutex. */
@@ -2416,6 +2516,12 @@ struct	CheckInLRUList {
 	{
 		ut_a(elem->in_LRU_list);
 	}
+
+	static void validate(const buf_pool_t* buf_pool)
+	{
+		CheckInLRUList	check;
+		ut_list_validate(buf_pool->LRU, check);
+	}
 };
 
 /** Functor to validate the LRU list. */
@@ -2424,6 +2530,12 @@ struct	CheckInFreeList {
 	{
 		ut_a(elem->in_free_list);
 	}
+
+	static void validate(const buf_pool_t* buf_pool)
+	{
+		CheckInFreeList	check;
+		ut_list_validate(buf_pool->free, check);
+	}
 };
 
 struct	CheckUnzipLRUAndLRUList {
@@ -2432,11 +2544,18 @@ struct	CheckUnzipLRUAndLRUList {
                 ut_a(elem->page.in_LRU_list);
                 ut_a(elem->in_unzip_LRU_list);
 	}
+
+	static void validate(const buf_pool_t* buf_pool)
+	{
+		CheckUnzipLRUAndLRUList	check;
+		ut_list_validate(buf_pool->unzip_LRU, check);
+	}
 };
 #endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */
 
 #ifndef UNIV_NONINL
 #include "buf0buf.ic"
 #endif
+#endif /* !UNIV_INNOCHECKSUM */
 
 #endif
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index e77c5a84202..9ff2a1bfab5 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -1,8 +1,8 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
-Copyright (c) 2014, 2015, MariaDB Corporation.
+Copyright (c) 2014, 2016, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -36,14 +36,25 @@ Created 11/5/1995 Heikki Tuuri
 #include "buf0flu.h"
 #include "buf0lru.h"
 #include "buf0rea.h"
+#include "sync0debug.h"
+#include "fsp0types.h"
+#include "ut0new.h"
 
 /** A chunk of buffers. The buffer pool is allocated in chunks. */
 struct buf_chunk_t{
-	ulint		mem_size;	/*!< allocated size of the chunk */
 	ulint		size;		/*!< size of frames[] and blocks[] */
-	void*		mem;		/*!< pointer to the memory area which
+	unsigned char*	mem;		/*!< pointer to the memory area which
 					was allocated for the frames */
+	ut_new_pfx_t	mem_pfx;	/*!< Auxiliary structure, describing
+					"mem". It is filled by the allocator's
+					alloc method and later passed to the
+					deallocate method. */
 	buf_block_t*	blocks;		/*!< array of buffer control blocks */
+
+	/** Get the size of 'mem' in bytes. */
+	size_t	mem_size() const {
+		return(mem_pfx.m_size);
+	}
 };
 
 /*********************************************************************//**
@@ -59,7 +70,7 @@ buf_pool_get_curr_size(void)
 
 /********************************************************************//**
 Calculates the index of a buffer pool to the buf_pool[] array.
-@return	the position of the buffer pool in buf_pool[] */
+@return the position of the buffer pool in buf_pool[] */
 UNIV_INLINE
 ulint
 buf_pool_index(
@@ -112,7 +123,7 @@ buf_pool_get_n_pages(void)
 
 /********************************************************************//**
 Reads the freed_page_clock of a buffer block.
-@return	freed_page_clock */
+@return freed_page_clock */
 UNIV_INLINE
 ulint
 buf_page_get_freed_page_clock(
@@ -125,7 +136,7 @@ buf_page_get_freed_page_clock(
 
 /********************************************************************//**
 Reads the freed_page_clock of a buffer block.
-@return	freed_page_clock */
+@return freed_page_clock */
 UNIV_INLINE
 ulint
 buf_block_get_freed_page_clock(
@@ -141,7 +152,7 @@ meaning that it is not in danger of getting evicted and also implying
 that it has been accessed recently.
 Note that this is for heuristics only and does not reserve buffer pool
 mutex.
-@return	TRUE if block is close to MRU end of LRU */
+@return TRUE if block is close to MRU end of LRU */
 UNIV_INLINE
 ibool
 buf_page_peek_if_young(
@@ -162,7 +173,7 @@ buf_page_peek_if_young(
 Recommends a move of a block to the start of the LRU list if there is danger
 of dropping from the buffer pool. NOTE: does not reserve the buffer pool
 mutex.
-@return	TRUE if should be made younger */
+@return TRUE if should be made younger */
 UNIV_INLINE
 ibool
 buf_page_peek_if_too_old(
@@ -179,6 +190,12 @@ buf_page_peek_if_too_old(
 	} else if (buf_LRU_old_threshold_ms && bpage->old) {
 		unsigned	access_time = buf_page_is_accessed(bpage);
 
+		/* It is possible that the below comparison returns an
+		unexpected result. 2^32 milliseconds pass in about 50 days,
+		so if the difference between ut_time_ms() and access_time
+		is e.g. 50 days + 15 ms, then the below will behave as if
+		it is 15 ms. This is known and fixing it would require to
+		increase buf_page_t::access_time from 32 to 64 bits. */
 		if (access_time > 0
 		    && ((ib_uint32_t) (ut_time_ms() - access_time))
 		    >= buf_LRU_old_threshold_ms) {
@@ -195,14 +212,14 @@ buf_page_peek_if_too_old(
 
 /*********************************************************************//**
 Gets the state of a block.
-@return	state */
+@return state */
 UNIV_INLINE
 enum buf_page_state
 buf_page_get_state(
 /*===============*/
 	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
 {
-	enum buf_page_state	state = (enum buf_page_state) bpage->state;
+	enum buf_page_state	state	= bpage->state;
 
 #ifdef UNIV_DEBUG
 	switch (state) {
@@ -224,7 +241,7 @@ buf_page_get_state(
 }
 /*********************************************************************//**
 Gets the state of a block.
-@return	state */
+@return state */
 UNIV_INLINE
 enum buf_page_state
 buf_block_get_state(
@@ -303,7 +320,8 @@ buf_page_set_state(
 		break;
 	case BUF_BLOCK_FILE_PAGE:
 		if (!(state == BUF_BLOCK_NOT_USED
-	              || state == BUF_BLOCK_REMOVE_HASH)) {
+	              || state == BUF_BLOCK_REMOVE_HASH
+		      || state == BUF_BLOCK_FILE_PAGE)) {
 			const char *old_state_name = buf_get_state_name((buf_block_t*)bpage);
 			bpage->state = state;
 
@@ -314,10 +332,11 @@ buf_page_set_state(
 				old_state_name,
 				state,
 				buf_get_state_name((buf_block_t*)bpage));
+			ut_a(state == BUF_BLOCK_NOT_USED
+				|| state == BUF_BLOCK_REMOVE_HASH
+				|| state == BUF_BLOCK_FILE_PAGE);
 		}
 
-		ut_a(state == BUF_BLOCK_NOT_USED
-		     || state == BUF_BLOCK_REMOVE_HASH);
 		break;
 	case BUF_BLOCK_REMOVE_HASH:
 		ut_a(state == BUF_BLOCK_MEMORY);
@@ -341,7 +360,7 @@ buf_block_set_state(
 
 /*********************************************************************//**
 Determines if a block is mapped to a tablespace.
-@return	TRUE if mapped */
+@return TRUE if mapped */
 UNIV_INLINE
 ibool
 buf_page_in_file(
@@ -369,7 +388,7 @@ buf_page_in_file(
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Determines if a block should be on unzip_LRU list.
-@return	TRUE if block belongs to unzip_LRU */
+@return TRUE if block belongs to unzip_LRU */
 UNIV_INLINE
 ibool
 buf_page_belongs_to_unzip_LRU(
@@ -384,23 +403,22 @@ buf_page_belongs_to_unzip_LRU(
 
 /*********************************************************************//**
 Gets the mutex of a block.
-@return	pointer to mutex protecting bpage */
+@return pointer to mutex protecting bpage */
 UNIV_INLINE
-ib_mutex_t*
+BPageMutex*
 buf_page_get_mutex(
 /*===============*/
 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
 {
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_POOL_WATCH:
 		ut_error;
 		return(NULL);
 	case BUF_BLOCK_ZIP_PAGE:
-	case BUF_BLOCK_ZIP_DIRTY: {
-		buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-
+	case BUF_BLOCK_ZIP_DIRTY:
 		return(&buf_pool->zip_mutex);
-		}
 	default:
 		return(&((buf_block_t*) bpage)->mutex);
 	}
@@ -408,7 +426,7 @@ buf_page_get_mutex(
 
 /*********************************************************************//**
 Get the flush type of a page.
-@return	flush type */
+@return flush type */
 UNIV_INLINE
 buf_flush_t
 buf_page_get_flush_type(
@@ -443,24 +461,22 @@ buf_page_set_flush_type(
 	ut_ad(buf_page_get_flush_type(bpage) == flush_type);
 }
 
-/*********************************************************************//**
-Map a block to a file page. */
+/** Map a block to a file page.
+@param[in,out]	block	pointer to control block
+@param[in]	page_id	page id */
 UNIV_INLINE
 void
 buf_block_set_file_page(
-/*====================*/
-	buf_block_t*		block,	/*!< in/out: pointer to control block */
-	ulint			space,	/*!< in: tablespace id */
-	ulint			page_no)/*!< in: page number */
+	buf_block_t*		block,
+	const page_id_t&	page_id)
 {
 	buf_block_set_state(block, BUF_BLOCK_FILE_PAGE);
-	block->page.space = static_cast<ib_uint32_t>(space);
-	block->page.offset = static_cast<ib_uint32_t>(page_no);
+	block->page.id.copy_from(page_id);
 }
 
 /*********************************************************************//**
 Gets the io_fix state of a block.
-@return	io_fix state */
+@return io_fix state */
 UNIV_INLINE
 enum buf_io_fix
 buf_page_get_io_fix(
@@ -469,7 +485,8 @@ buf_page_get_io_fix(
 {
 	ut_ad(bpage != NULL);
 
-	enum buf_io_fix	io_fix = (enum buf_io_fix) bpage->io_fix;
+	enum buf_io_fix	io_fix	= bpage->io_fix;
+
 #ifdef UNIV_DEBUG
 	switch (io_fix) {
 	case BUF_IO_NONE:
@@ -485,7 +502,7 @@ buf_page_get_io_fix(
 
 /*********************************************************************//**
 Gets the io_fix state of a block.
-@return	io_fix state */
+@return io_fix state */
 UNIV_INLINE
 enum buf_io_fix
 buf_block_get_io_fix(
@@ -507,7 +524,7 @@ buf_page_set_io_fix(
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
 	bpage->io_fix = io_fix;
@@ -544,7 +561,7 @@ buf_page_set_sticky(
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 
@@ -562,7 +579,7 @@ buf_page_unset_sticky(
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_PIN);
 
@@ -581,7 +598,7 @@ buf_page_can_relocate(
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(bpage->in_LRU_list);
@@ -592,7 +609,7 @@ buf_page_can_relocate(
 
 /*********************************************************************//**
 Determine if a block has been flagged old.
-@return	TRUE if old */
+@return TRUE if old */
 UNIV_INLINE
 ibool
 buf_page_is_old(
@@ -602,7 +619,7 @@ buf_page_is_old(
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
 	ut_ad(buf_page_in_file(bpage));
 
 	return(bpage->old);
@@ -646,7 +663,7 @@ buf_page_set_old(
 
 /*********************************************************************//**
 Determine the time of first access of a block in the buffer pool.
-@return	ut_time_ms() at the time of first access, 0 if not accessed */
+@return ut_time_ms() at the time of first access, 0 if not accessed */
 UNIV_INLINE
 unsigned
 buf_page_is_accessed(
@@ -683,7 +700,7 @@ buf_page_set_accessed(
 /*********************************************************************//**
 Gets the buf_block_t handle of a buffered file block if an uncompressed
 page frame exists, or NULL.
-@return	control block, or NULL */
+@return control block, or NULL */
 UNIV_INLINE
 buf_block_t*
 buf_page_get_block(
@@ -705,7 +722,7 @@ buf_page_get_block(
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Gets a pointer to the memory frame of a block.
-@return	pointer to the frame */
+@return pointer to the frame */
 UNIV_INLINE
 buf_frame_t*
 buf_block_get_frame(
@@ -742,50 +759,6 @@ ok:
 }
 #endif /* UNIV_DEBUG */
 
-/*********************************************************************//**
-Gets the space id of a block.
-@return	space id */
-UNIV_INLINE
-ulint
-buf_page_get_space(
-/*===============*/
-	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-{
-	ut_ad(bpage);
-	ut_a(buf_page_in_file(bpage));
-
-	return(bpage->space);
-}
-
-/*********************************************************************//**
-Gets the space id of a block.
-@return	space id */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
-	const buf_block_t*	block)	/*!< in: pointer to the control block */
-{
-	ut_ad(block);
-	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
-	return(block->page.space);
-}
-
-/*********************************************************************//**
-Gets the page number of a block.
-@return	page number */
-UNIV_INLINE
-ulint
-buf_page_get_page_no(
-/*=================*/
-	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-{
-	ut_ad(bpage);
-	ut_a(buf_page_in_file(bpage));
-
-	return(bpage->offset);
-}
 /***********************************************************************
 FIXME_FTS Gets the frame the pointer is pointing to. */
 UNIV_INLINE
@@ -804,64 +777,6 @@ buf_frame_align(
         return(frame);
 }
 
-/*********************************************************************//**
-Gets the page number of a block.
-@return	page number */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
-	const buf_block_t*	block)	/*!< in: pointer to the control block */
-{
-	ut_ad(block);
-	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
-	return(block->page.offset);
-}
-
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return	compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_page_get_zip_size(
-/*==================*/
-	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-{
-	return(bpage->zip.ssize
-	       ? (UNIV_ZIP_SIZE_MIN >> 1) << bpage->zip.ssize : 0);
-}
-
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return	compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_block_get_zip_size(
-/*===================*/
-	const buf_block_t*	block)	/*!< in: pointer to the control block */
-{
-	return(block->page.zip.ssize
-	       ? (UNIV_ZIP_SIZE_MIN >> 1) << block->page.zip.ssize : 0);
-}
-
-#ifndef UNIV_HOTBACKUP
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-/*********************************************************************//**
-Gets the compressed page descriptor corresponding to an uncompressed page
-if applicable.
-@return	compressed page descriptor, or NULL */
-UNIV_INLINE
-const page_zip_des_t*
-buf_frame_get_page_zip(
-/*===================*/
-	const byte*	ptr)	/*!< in: pointer to the page */
-{
-	return(buf_block_get_page_zip(buf_block_align(ptr)));
-}
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
 /**********************************************************************//**
 Gets the space id, page offset, and byte offset within page of a
 pointer pointing to a buffer frame containing a file page. */
@@ -885,7 +800,7 @@ buf_ptr_get_fsp_addr(
 /**********************************************************************//**
 Gets the hash value of the page the pointer is pointing to. This can be used
 in searches in the lock hash table.
-@return	lock hash value */
+@return lock hash value */
 UNIV_INLINE
 ulint
 buf_block_get_lock_hash_val(
@@ -894,10 +809,9 @@ buf_block_get_lock_hash_val(
 {
 	ut_ad(block);
 	ut_ad(buf_page_in_file(&block->page));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_EXCLUSIVE)
-	      || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_X)
+	      || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_S));
+
 	return(block->lock_hash_val);
 }
 
@@ -912,8 +826,8 @@ buf_page_alloc_descriptor(void)
 {
 	buf_page_t*	bpage;
 
-	bpage = (buf_page_t*) ut_malloc(sizeof *bpage);
-	ut_d(memset(bpage, 0, sizeof *bpage));
+	bpage = (buf_page_t*) ut_zalloc_nokey(sizeof *bpage);
+	ut_ad(bpage);
 	UNIV_MEM_ALLOC(bpage, sizeof *bpage);
 
 	return(bpage);
@@ -942,13 +856,13 @@ buf_block_free(
 
 	buf_pool_mutex_enter(buf_pool);
 
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
 
 	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
 
 	buf_LRU_block_free_non_file_page(block);
 
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 	buf_pool_mutex_exit(buf_pool);
 }
@@ -956,7 +870,7 @@ buf_block_free(
 
 /*********************************************************************//**
 Copies contents of a buffer frame to a given buffer.
-@return	buf */
+@return buf */
 UNIV_INLINE
 byte*
 buf_frame_copy(
@@ -973,23 +887,9 @@ buf_frame_copy(
 
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
-Calculates a folded value of a file page address to use in the page hash
-table.
-@return	the folded value */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: offset of the page within space */
-{
-	return((space << 20) + space + offset);
-}
-
-/********************************************************************//**
 Gets the youngest modification log sequence number for a frame.
 Returns zero if not file page or no modification occurred yet.
-@return	newest modification to page */
+@return newest modification to page */
 UNIV_INLINE
 lsn_t
 buf_page_get_newest_modification(
@@ -998,7 +898,7 @@ buf_page_get_newest_modification(
 					page frame */
 {
 	lsn_t		lsn;
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
 	mutex_enter(block_mutex);
 
@@ -1023,13 +923,17 @@ buf_block_modify_clock_inc(
 /*=======================*/
 	buf_block_t*	block)	/*!< in: block */
 {
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage((buf_page_t*) block);
 
-	ut_ad((buf_pool_mutex_own(buf_pool)
-	       && (block->page.buf_fix_count == 0))
-	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
+	/* No latch is acquired for the shared temporary tablespace. */
+	if (!fsp_is_system_temporary(block->page.id.space())) {
+		ut_ad((buf_pool_mutex_own(buf_pool)
+		       && (block->page.buf_fix_count == 0))
+		      || rw_lock_own_flagged(&block->lock,
+					     RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
+	}
+#endif /* UNIV_DEBUG */
 
 	block->modify_clock++;
 }
@@ -1037,38 +941,45 @@ buf_block_modify_clock_inc(
 /********************************************************************//**
 Returns the value of the modify clock. The caller must have an s-lock
 or x-lock on the block.
-@return	value */
+@return value */
 UNIV_INLINE
 ib_uint64_t
 buf_block_get_modify_clock(
 /*=======================*/
 	buf_block_t*	block)	/*!< in: block */
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
-	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	/* No latch is acquired for the shared temporary tablespace. */
+	if (!fsp_is_system_temporary(block->page.id.space())) {
+		ut_ad(rw_lock_own(&(block->lock), RW_LOCK_S)
+		      || rw_lock_own(&(block->lock), RW_LOCK_X)
+		      || rw_lock_own(&(block->lock), RW_LOCK_SX));
+	}
+#endif /* UNIV_DEBUG */
 
 	return(block->modify_clock);
 }
 
-/*******************************************************************//**
-Increments the bufferfix count. */
+/** Increments the bufferfix count.
+@param[in,out]	bpage	block to bufferfix
+@return the count */
 UNIV_INLINE
-void
+ulint
 buf_block_fix(
-/*===========*/
-	buf_block_t*	block)	/*!< in/out: block to bufferfix */
+	buf_page_t*	bpage)
 {
-#ifdef PAGE_ATOMIC_REF_COUNT
-	os_atomic_increment_uint32(&block->page.buf_fix_count, 1);
-#else
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(&block->page);
+	return(my_atomic_add32((int32*) &bpage->buf_fix_count, 1) + 1);
+}
 
-	mutex_enter(block_mutex);
-	++block->page.buf_fix_count;
-	mutex_exit(block_mutex);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+/** Increments the bufferfix count.
+@param[in,out]	block	block to bufferfix
+@return the count */
+UNIV_INLINE
+ulint
+buf_block_fix(
+	buf_block_t*	block)
+{
+	return(buf_block_fix(&block->page));
 }
 
 /*******************************************************************//**
@@ -1077,47 +988,48 @@ UNIV_INLINE
 void
 buf_block_buf_fix_inc_func(
 /*=======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line */
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 	buf_block_t*	block)	/*!< in/out: block to bufferfix */
 {
-#ifdef UNIV_SYNC_DEBUG
-	ibool	ret;
-
-	ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
-	ut_a(ret);
-#endif /* UNIV_SYNC_DEBUG */
-
-#ifdef PAGE_ATOMIC_REF_COUNT
-	os_atomic_increment_uint32(&block->page.buf_fix_count, 1);
-#else
-	ut_ad(mutex_own(&block->mutex));
+#ifdef UNIV_DEBUG
+	/* No debug latch is acquired if block belongs to system temporary.
+	Debug latch is not of much help if access to block is single
+	threaded. */
+	if (!fsp_is_system_temporary(block->page.id.space())) {
+		ibool   ret;
+		ret = rw_lock_s_lock_nowait(&block->debug_latch, file, line);
+		ut_a(ret);
+	}
+#endif /* UNIV_DEBUG */
 
-	++block->page.buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
+	buf_block_fix(block);
 }
 
-/*******************************************************************//**
-Decrements the bufferfix count. */
+/** Decrements the bufferfix count.
+@param[in,out]	bpage	block to bufferunfix
+@return	the remaining buffer-fix count */
 UNIV_INLINE
-void
+ulint
 buf_block_unfix(
-/*============*/
-	buf_block_t*	block)	/*!< in/out: block to bufferunfix */
+	buf_page_t*	bpage)
 {
-	ut_ad(block->page.buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
-	os_atomic_decrement_uint32(&block->page.buf_fix_count, 1);
-#else
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(&block->page);
+	ulint	count = my_atomic_add32((int32*) &bpage->buf_fix_count, -1) - 1;
+	ut_ad(count + 1 != 0);
+	return(count);
+}
 
-	mutex_enter(block_mutex);
-	--block->page.buf_fix_count;
-	mutex_exit(block_mutex);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+/** Decrements the bufferfix count.
+@param[in,out]	block	block to bufferunfix
+@return the remaining buffer-fix count */
+UNIV_INLINE
+ulint
+buf_block_unfix(
+	buf_block_t*	block)
+{
+	return(buf_block_unfix(&block->page));
 }
 
 /*******************************************************************//**
@@ -1128,39 +1040,34 @@ buf_block_buf_fix_dec(
 /*==================*/
 	buf_block_t*	block)	/*!< in/out: block to bufferunfix */
 {
-	ut_ad(block->page.buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
-	os_atomic_decrement_uint32(&block->page.buf_fix_count, 1);
-#else
-	mutex_enter(&block->mutex);
-	--block->page.buf_fix_count;
-	mutex_exit(&block->mutex);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+	buf_block_unfix(block);
 
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_s_unlock(&block->debug_latch);
-#endif
+#ifdef UNIV_DEBUG
+	/* No debug latch is acquired if block belongs to system temporary.
+	Debug latch is not of much help if access to block is single
+	threaded. */
+	if (!fsp_is_system_temporary(block->page.id.space())) {
+		rw_lock_s_unlock(&block->debug_latch);
+	}
+#endif /* UNIV_DEBUG */
 }
 
-/******************************************************************//**
-Returns the buffer pool instance given space and offset of page
+/** Returns the buffer pool instance given a page id.
+@param[in]	page_id	page id
 @return buffer pool */
 UNIV_INLINE
 buf_pool_t*
 buf_pool_get(
-/*==========*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: offset of the page within space */
+	const page_id_t&	page_id)
 {
-	ulint	fold;
-	ulint	index;
-	ulint	ignored_offset;
+        /* 2log of BUF_READ_AHEAD_AREA (64) */
+        ulint		ignored_page_no = page_id.page_no() >> 6;
 
-	ignored_offset = offset >> 6; /* 2log of BUF_READ_AHEAD_AREA (64)*/
-	fold = buf_page_address_fold(space, ignored_offset);
-	index = fold % srv_buf_pool_instances;
-	return(&buf_pool_ptr[index]);
+        page_id_t	id(page_id.space(), ignored_page_no);
+
+        ulint		i = id.fold() % srv_buf_pool_instances;
+
+        return(&buf_pool_ptr[i]);
 }
 
 /******************************************************************//**
@@ -1178,103 +1085,98 @@ buf_pool_from_array(
 	return(&buf_pool_ptr[index]);
 }
 
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-@return	block, NULL if not found */
+/** Returns the control block of a file page, NULL if not found.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	page_id		page id
+@return block, NULL if not found */
 UNIV_INLINE
 buf_page_t*
 buf_page_hash_get_low(
-/*==================*/
-	buf_pool_t*	buf_pool,/*!< buffer pool instance */
-	ulint		space,	/*!< in: space id */
-	ulint		offset,	/*!< in: offset of the page within space */
-	ulint		fold)	/*!< in: buf_page_address_fold(space, offset) */
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id)
 {
 	buf_page_t*	bpage;
 
-#ifdef UNIV_SYNC_DEBUG
-	ulint		hash_fold;
+#ifdef UNIV_DEBUG
 	rw_lock_t*	hash_lock;
 
-	hash_fold = buf_page_address_fold(space, offset);
-	ut_ad(hash_fold == fold);
-
-	hash_lock = hash_get_lock(buf_pool->page_hash, fold);
-	ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX)
-	      || rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	hash_lock = hash_get_lock(buf_pool->page_hash, page_id.fold());
+	ut_ad(rw_lock_own(hash_lock, RW_LOCK_X)
+	      || rw_lock_own(hash_lock, RW_LOCK_S));
+#endif /* UNIV_DEBUG */
 
 	/* Look for the page in the hash table */
 
-	HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage,
+	HASH_SEARCH(hash, buf_pool->page_hash, page_id.fold(), buf_page_t*,
+		    bpage,
 		    ut_ad(bpage->in_page_hash && !bpage->in_zip_hash
 			  && buf_page_in_file(bpage)),
-		    bpage->space == space && bpage->offset == offset);
+		    page_id.equals_to(bpage->id));
 	if (bpage) {
 		ut_a(buf_page_in_file(bpage));
 		ut_ad(bpage->in_page_hash);
 		ut_ad(!bpage->in_zip_hash);
+		ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
 	}
 
 	return(bpage);
 }
 
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
+/** Returns the control block of a file page, NULL if not found.
 If the block is found and lock is not NULL then the appropriate
 page_hash lock is acquired in the specified lock mode. Otherwise,
 mode value is ignored. It is up to the caller to release the
 lock. If the block is found and the lock is NULL then the page_hash
 lock is released by this function.
-@return	block, NULL if not found, or watch sentinel (if watch is true) */
+@param[in]	buf_pool	buffer pool instance
+@param[in]	page_id		page id
+@param[in,out]	lock		lock of the page hash acquired if bpage is
+found, NULL otherwise. If NULL is passed then the hash_lock is released by
+this function.
+@param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
+lock == NULL
+@param[in]	watch		if true, return watch sentinel also.
+@return pointer to the bpage or NULL; if NULL, lock is also NULL or
+a watch sentinel. */
 UNIV_INLINE
 buf_page_t*
 buf_page_hash_get_locked(
-/*=====================*/
-					/*!< out: pointer to the bpage,
-					or NULL; if NULL, hash_lock
-					is also NULL. */
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page number */
-	rw_lock_t**	lock,		/*!< in/out: lock of the page
-					hash acquired if bpage is
-					found. NULL otherwise. If NULL
-					is passed then the hash_lock
-					is released by this function */
-	ulint		lock_mode,	/*!< in: RW_LOCK_EX or
-					RW_LOCK_SHARED. Ignored if
-					lock == NULL */
-	bool		watch)		/*!< in: if true, return watch
-					sentinel also. */
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id,
+	rw_lock_t**		lock,
+	ulint			lock_mode,
+	bool			watch)
 {
 	buf_page_t*	bpage = NULL;
-	ulint		fold;
 	rw_lock_t*	hash_lock;
-	ulint		mode = RW_LOCK_SHARED;
+	ulint		mode = RW_LOCK_S;
 
 	if (lock != NULL) {
 		*lock = NULL;
-		ut_ad(lock_mode == RW_LOCK_EX
-		      || lock_mode == RW_LOCK_SHARED);
+		ut_ad(lock_mode == RW_LOCK_X
+		      || lock_mode == RW_LOCK_S);
 		mode = lock_mode;
 	}
 
-	fold = buf_page_address_fold(space, offset);
-	hash_lock = hash_get_lock(buf_pool->page_hash, fold);
+	hash_lock = hash_get_lock(buf_pool->page_hash, page_id.fold());
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
-	      && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X)
+	      && !rw_lock_own(hash_lock, RW_LOCK_S));
 
-	if (mode == RW_LOCK_SHARED) {
+	if (mode == RW_LOCK_S) {
 		rw_lock_s_lock(hash_lock);
+
+		/* If not own buf_pool_mutex, page_hash can be changed. */
+		hash_lock = hash_lock_s_confirm(
+			hash_lock, buf_pool->page_hash, page_id.fold());
 	} else {
 		rw_lock_x_lock(hash_lock);
+		/* If not own buf_pool_mutex, page_hash can be changed. */
+		hash_lock = hash_lock_x_confirm(
+			hash_lock, buf_pool->page_hash, page_id.fold());
 	}
 
-	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	bpage = buf_page_hash_get_low(buf_pool, page_id);
 
 	if (!bpage || buf_pool_watch_is_sentinel(buf_pool, bpage)) {
 		if (!watch) {
@@ -1284,8 +1186,7 @@ buf_page_hash_get_locked(
 	}
 
 	ut_ad(buf_page_in_file(bpage));
-	ut_ad(offset == bpage->offset);
-	ut_ad(space == bpage->space);
+	ut_ad(page_id.equals_to(bpage->id));
 
 	if (lock == NULL) {
 		/* The caller wants us to release the page_hash lock */
@@ -1297,7 +1198,7 @@ buf_page_hash_get_locked(
 	}
 
 unlock_and_exit:
-	if (mode == RW_LOCK_SHARED) {
+	if (mode == RW_LOCK_S) {
 		rw_lock_s_unlock(hash_lock);
 	} else {
 		rw_lock_x_unlock(hash_lock);
@@ -1306,52 +1207,46 @@ exit:
 	return(bpage);
 }
 
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
+/** Returns the control block of a file page, NULL if not found.
 If the block is found and lock is not NULL then the appropriate
 page_hash lock is acquired in the specified lock mode. Otherwise,
 mode value is ignored. It is up to the caller to release the
 lock. If the block is found and the lock is NULL then the page_hash
 lock is released by this function.
-@return	block, NULL if not found */
+@param[in]	buf_pool	buffer pool instance
+@param[in]	page_id		page id
+@param[in,out]	lock		lock of the page hash acquired if bpage is
+found, NULL otherwise. If NULL is passed then the hash_lock is released by
+this function.
+@param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
+lock == NULL
+@return pointer to the block or NULL; if NULL, lock is also NULL. */
 UNIV_INLINE
 buf_block_t*
 buf_block_hash_get_locked(
-/*=====================*/
-					/*!< out: pointer to the bpage,
-					or NULL; if NULL, hash_lock
-					is also NULL. */
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page number */
-	rw_lock_t**	lock,		/*!< in/out: lock of the page
-					hash acquired if bpage is
-					found. NULL otherwise. If NULL
-					is passed then the hash_lock
-					is released by this function */
-	ulint		lock_mode)	/*!< in: RW_LOCK_EX or
-					RW_LOCK_SHARED. Ignored if
-					lock == NULL */
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id,
+	rw_lock_t**		lock,
+	ulint			lock_mode)
 {
 	buf_page_t*	bpage = buf_page_hash_get_locked(buf_pool,
-							 space,
-							 offset,
+							 page_id,
 							 lock,
 							 lock_mode);
 	buf_block_t*	block = buf_page_get_block(bpage);
 
-	if (block) {
+	if (block != NULL) {
+
 		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-#ifdef UNIV_SYNC_DEBUG
 		ut_ad(!lock || rw_lock_own(*lock, lock_mode));
-#endif /* UNIV_SYNC_DEBUG */
+
 		return(block);
 	} else if (bpage) {
 		/* It is not a block. Just a bpage */
 		ut_ad(buf_page_in_file(bpage));
 
 		if (lock) {
-			if (lock_mode == RW_LOCK_SHARED) {
+			if (lock_mode == RW_LOCK_S) {
 				rw_lock_s_unlock(*lock);
 			} else {
 				rw_lock_x_unlock(*lock);
@@ -1366,23 +1261,19 @@ buf_block_hash_get_locked(
 	return(NULL);
 }
 
-/********************************************************************//**
-Returns TRUE if the page can be found in the buffer pool hash table.
-
+/** Returns TRUE if the page can be found in the buffer pool hash table.
 NOTE that it is possible that the page is not yet read from disk,
 though.
-
-@return	TRUE if found in the page hash table */
+@param[in]	page_id	page id
+@return TRUE if found in the page hash table */
 UNIV_INLINE
 ibool
 buf_page_peek(
-/*==========*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
-	buf_pool_t*		buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
-	return(buf_page_hash_get(buf_pool, space, offset) != NULL);
+	return(buf_page_hash_get(buf_pool, page_id) != NULL);
 }
 
 /********************************************************************//**
@@ -1393,19 +1284,27 @@ buf_page_release_zip(
 /*=================*/
 	buf_page_t*	bpage)		/*!< in: buffer block */
 {
-	buf_block_t*	block;
-
-	block = (buf_block_t*) bpage;
+	ut_ad(bpage);
+	ut_a(bpage->buf_fix_count > 0);
 
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_FILE_PAGE:
-#ifdef UNIV_SYNC_DEBUG
-		rw_lock_s_unlock(&block->debug_latch);
-#endif /* UNUV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	{
+		/* No debug latch is acquired if block belongs to system
+		temporary. Debug latch is not of much help if access to block
+		is single threaded. */
+		buf_block_t*	block = reinterpret_cast<buf_block_t*>(bpage);
+		if (!fsp_is_system_temporary(block->page.id.space())) {
+			rw_lock_s_unlock(&block->debug_latch);
+		}
+	}
 		/* Fall through */
+#endif /* UNIV_DEBUG */
+
 	case BUF_BLOCK_ZIP_PAGE:
 	case BUF_BLOCK_ZIP_DIRTY:
-		buf_block_unfix(block);
+		buf_block_unfix(reinterpret_cast<buf_block_t*>(bpage));
 		return;
 
 	case BUF_BLOCK_POOL_WATCH:
@@ -1420,31 +1319,34 @@ buf_page_release_zip(
 }
 
 /********************************************************************//**
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
+Releases a latch, if specified. */
 UNIV_INLINE
 void
-buf_page_release(
-/*=============*/
+buf_page_release_latch(
+/*===================*/
 	buf_block_t*	block,		/*!< in: buffer block */
 	ulint		rw_latch)	/*!< in: RW_S_LATCH, RW_X_LATCH,
 					RW_NO_LATCH */
 {
-	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#ifdef UNIV_DEBUG
+	/* No debug latch is acquired if block belongs to system
+	temporary. Debug latch is not of much help if access to block
+	is single threaded. */
+	if (!fsp_is_system_temporary(block->page.id.space())) {
+		rw_lock_s_unlock(&block->debug_latch);
+	}
+#endif /* UNIV_DEBUG */
 
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_s_unlock(&(block->debug_latch));
-#endif
 	if (rw_latch == RW_S_LATCH) {
-		rw_lock_s_unlock(&(block->lock));
+		rw_lock_s_unlock(&block->lock);
+	} else if (rw_latch == RW_SX_LATCH) {
+		rw_lock_sx_unlock(&block->lock);
 	} else if (rw_latch == RW_X_LATCH) {
-		rw_lock_x_unlock(&(block->lock));
+		rw_lock_x_unlock(&block->lock);
 	}
-
-	buf_block_unfix(block);
 }
 
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 /*********************************************************************//**
 Adds latch level info for the rw-lock protecting the buffer frame. This
 should be called in the debug version after a successful latching of a
@@ -1455,12 +1357,12 @@ buf_block_dbg_add_level(
 /*====================*/
 	buf_block_t*	block,	/*!< in: buffer page
 				where we have acquired latch */
-	ulint		level)	/*!< in: latching order level */
+	latch_level_t	level)	/*!< in: latching order level */
 {
-	sync_thread_add_level(&block->lock, level, FALSE);
+	sync_check_lock(&block->lock, level);
 }
 
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 /********************************************************************//**
 Acquire mutex on all buffer pool instances. */
 UNIV_INLINE
@@ -1468,12 +1370,9 @@ void
 buf_pool_mutex_enter_all(void)
 /*==========================*/
 {
-	ulint   i;
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
-		buf_pool_t*	buf_pool;
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
 
-		buf_pool = buf_pool_from_array(i);
 		buf_pool_mutex_enter(buf_pool);
 	}
 }
@@ -1531,4 +1430,35 @@ buf_page_get_frame(
 	}
 }
 
+/** Verify the possibility that a stored page is not in buffer pool.
+@param[in]	withdraw_clock	withdraw clock when stored the page
+@retval true	if the page might be relocated */
+UNIV_INLINE
+bool
+buf_pool_is_obsolete(
+	ulint	withdraw_clock)
+{
+	return(buf_pool_withdrawing
+	       || buf_withdraw_clock != withdraw_clock);
+}
+
+/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
+if needed.
+@param[in]	size	size in bytes
+@return	aligned size */
+UNIV_INLINE
+ulint
+buf_pool_size_align(
+	ulint	size)
+{
+	const ib_uint64_t	m = ((ib_uint64_t)srv_buf_pool_instances) * srv_buf_pool_chunk_unit;
+	size = ut_max(size, srv_buf_pool_min_size);
+
+	if (size % m == 0) {
+		return(size);
+	} else {
+		return (ulint)((size / m + 1) * m);
+	}
+}
+
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/buf0checksum.h b/storage/innobase/include/buf0checksum.h
index 6818345f965..9405251dc74 100644
--- a/storage/innobase/include/buf0checksum.h
+++ b/storage/innobase/include/buf0checksum.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,26 +30,25 @@ Created Aug 11, 2011 Vasil Dimov
 
 #include "buf0types.h"
 
-/** Magic value to use instead of checksums when they are disabled */
-#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
-
-/********************************************************************//**
-Calculates a page CRC32 which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures.
-@return	checksum */
-UNIV_INTERN
-ib_uint32_t
+/** Calculates the CRC32 checksum of a page. The value is stored to the page
+when it is written to a file and also checked for a match when reading from
+the file. When reading we allow both normal CRC32 and CRC-legacy-big-endian
+variants. Note that we must be careful to calculate the same value on 32-bit
+and 64-bit architectures.
+@param[in]	page			buffer page (UNIV_PAGE_SIZE bytes)
+@param[in]	use_legacy_big_endian	if true then use big endian
+byteorder when converting byte strings to integers
+@return checksum */
+uint32_t
 buf_calc_page_crc32(
-/*================*/
-	const byte*	page);	/*!< in: buffer page */
+	const byte*	page,
+	bool		use_legacy_big_endian = false);
 
 /********************************************************************//**
 Calculates a page checksum which is stored to the page when it is written
 to a file. Note that we must be careful to calculate the same value on
 32-bit and 64-bit architectures.
-@return	checksum */
-UNIV_INTERN
+@return checksum */
 ulint
 buf_calc_page_new_checksum(
 /*=======================*/
@@ -62,22 +61,22 @@ checksum.
 NOTE: we must first store the new formula checksum to
 FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
 because this takes that field as an input!
-@return	checksum */
-UNIV_INTERN
+@return checksum */
 ulint
 buf_calc_page_old_checksum(
 /*=======================*/
 	const byte*	page);	/*!< in: buffer page */
 
+
 /********************************************************************//**
 Return a printable string describing the checksum algorithm.
-@return	algorithm name */
-UNIV_INTERN
+@return algorithm name */
 const char*
 buf_checksum_algorithm_name(
 /*========================*/
 	srv_checksum_algorithm_t	algo);	/*!< in: algorithm */
 
 extern ulong	srv_checksum_algorithm;
+extern bool	legacy_big_endian_checksum;
 
 #endif /* buf0checksum_h */
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index a62a6400d97..eb13c3b35e5 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,6 +29,7 @@ Created 2011/12/19 Inaam Rana
 #include "univ.i"
 #include "ut0byte.h"
 #include "log0log.h"
+#include "buf0types.h"
 #include "log0recv.h"
 
 #ifndef UNIV_HOTBACKUP
@@ -40,9 +41,10 @@ extern ibool		buf_dblwr_being_created;
 
 /****************************************************************//**
 Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-UNIV_INTERN
-void
+doublewrite buffer is placed on the trx system header page.
+@return true if successful, false if not. */
+MY_ATTRIBUTE((warn_unused_result))
+bool
 buf_dblwr_create(void);
 /*==================*/
 
@@ -51,29 +53,24 @@ At a database startup initializes the doublewrite buffer memory structure if
 we already have a doublewrite buffer created in the data files. If we are
 upgrading to an InnoDB version which supports multiple tablespaces, then this
 function performs the necessary update operations. If we are in a crash
-recovery, this function loads the pages from double write buffer into memory. */
-void
+recovery, this function loads the pages from double write buffer into memory.
+@return DB_SUCCESS or error code */
+dberr_t
 buf_dblwr_init_or_load_pages(
-/*=========================*/
 	os_file_t	file,
-	char*		path,
-	bool		load_corrupt_pages);
+	const char*	path);
 
-/****************************************************************//**
-Process the double write buffer pages. */
+/** Process and remove the double write buffer pages for all tablespaces. */
 void
 buf_dblwr_process(void);
-/*===================*/
 
 /****************************************************************//**
 frees doublewrite buffer. */
-UNIV_INTERN
 void
 buf_dblwr_free(void);
 /*================*/
 /********************************************************************//**
 Updates the doublewrite buffer when an IO request is completed. */
-UNIV_INTERN
 void
 buf_dblwr_update(
 /*=============*/
@@ -83,7 +80,6 @@ buf_dblwr_update(
 Determines if a page number is located inside the doublewrite buffer.
 @return TRUE if the location is inside the two blocks of the
 doublewrite buffer */
-UNIV_INTERN
 ibool
 buf_dblwr_page_inside(
 /*==================*/
@@ -92,18 +88,23 @@ buf_dblwr_page_inside(
 Posts a buffer page for writing. If the doublewrite memory buffer is
 full, calls buf_dblwr_flush_buffered_writes and waits for for free
 space to appear. */
-UNIV_INTERN
 void
 buf_dblwr_add_to_batch(
 /*====================*/
 	buf_page_t*	bpage);	/*!< in: buffer block to write */
+
+/********************************************************************//**
+Flush a batch of writes to the datafiles that have already been
+written to the dblwr buffer on disk. */
+void
+buf_dblwr_sync_datafiles();
+
 /********************************************************************//**
 Flushes possible buffered writes from the doublewrite memory buffer to disk,
 and also wakes up the aio thread if simulated aio is used. It is very
 important to call this function after a batch of writes has been posted,
 and also when we may have to wait for a page latch! Otherwise a deadlock
 of threads can occur. */
-UNIV_INTERN
 void
 buf_dblwr_flush_buffered_writes(void);
 /*=================================*/
@@ -115,7 +116,6 @@ flushes in the doublewrite buffer are in use we wait here for one to
 become free. We are guaranteed that a slot will become free because any
 thread that is using a slot must also release the slot before leaving
 this function. */
-UNIV_INTERN
 void
 buf_dblwr_write_single_page(
 /*========================*/
diff --git a/storage/innobase/include/buf0dump.h b/storage/innobase/include/buf0dump.h
index c704a8e97e0..3dbddfa6bf5 100644
--- a/storage/innobase/include/buf0dump.h
+++ b/storage/innobase/include/buf0dump.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,6 @@ Wakes up the buffer pool dump/load thread and instructs it to start
 a dump. This function is called by MySQL code via buffer_pool_dump_now()
 and it should return immediately because the whole MySQL is frozen during
 its execution. */
-UNIV_INTERN
 void
 buf_dump_start();
 /*============*/
@@ -43,7 +42,6 @@ Wakes up the buffer pool dump/load thread and instructs it to start
 a load. This function is called by MySQL code via buffer_pool_load_now()
 and it should return immediately because the whole MySQL is frozen during
 its execution. */
-UNIV_INTERN
 void
 buf_load_start();
 /*============*/
@@ -52,7 +50,6 @@ buf_load_start();
 Aborts a currently running buffer pool load. This function is called by
 MySQL code via buffer_pool_load_abort() and it should return immediately
 because the whole MySQL is frozen during its execution. */
-UNIV_INTERN
 void
 buf_load_abort();
 /*============*/
@@ -62,7 +59,7 @@ This is the main thread for buffer pool dump/load. It waits for an
 event and when waked up either performs a dump or load and sleeps
 again.
 @return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(buf_dump_thread)(
 /*============================*/
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index f1ca1039ccb..40083798d48 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -31,15 +31,23 @@ Created 11/5/1995 Heikki Tuuri
 #include "ut0byte.h"
 #include "log0log.h"
 #ifndef UNIV_HOTBACKUP
-#include "mtr0types.h"
 #include "buf0types.h"
 
 /** Flag indicating if the page_cleaner is in active state. */
-extern ibool buf_page_cleaner_is_active;
+extern bool buf_page_cleaner_is_active;
+
+#ifdef UNIV_DEBUG
+
+/** Value of MySQL global variable used to disable page cleaner. */
+extern my_bool		innodb_page_cleaner_disabled_debug;
+
+#endif /* UNIV_DEBUG */
 
 /** Event to synchronise with the flushing. */
 extern os_event_t	buf_flush_event;
 
+class ut_stage_alter_t;
+
 /** Handled page counters for a single flush */
 struct flush_counters_t {
 	ulint	flushed;	/*!< number of dirty pages flushed */
@@ -50,7 +58,6 @@ struct flush_counters_t {
 
 /********************************************************************//**
 Remove a block from the flush list of modified blocks. */
-UNIV_INTERN
 void
 buf_flush_remove(
 /*=============*/
@@ -59,7 +66,6 @@ buf_flush_remove(
 Relocates a buffer control block on the flush_list.
 Note that it is assumed that the contents of bpage has already been
 copied to dpage. */
-UNIV_INTERN
 void
 buf_flush_relocate_on_flush_list(
 /*=============================*/
@@ -67,22 +73,25 @@ buf_flush_relocate_on_flush_list(
 	buf_page_t*	dpage);	/*!< in/out: destination block */
 /********************************************************************//**
 Updates the flush system data structures when a write is completed. */
-UNIV_INTERN
 void
 buf_flush_write_complete(
 /*=====================*/
 	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
 #endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Initializes a page for writing to the tablespace. */
-UNIV_INTERN
+/** Initialize a page for writing to the tablespace.
+@param[in]	block		buffer block; NULL if bypassing the buffer pool
+@param[in,out]	page		page frame
+@param[in,out]	page_zip_	compressed page, or NULL if uncompressed
+@param[in]	newest_lsn	newest modification LSN to the page
+@param[in]	skip_checksum	whether to disable the page checksum */
 void
 buf_flush_init_for_writing(
-/*=======================*/
-	byte*	page,		/*!< in/out: page */
-	void*	page_zip_,	/*!< in/out: compressed page, or NULL */
-	lsn_t	newest_lsn);	/*!< in: newest modification lsn
-				to the page */
+	const buf_block_t*	block,
+	byte*			page,
+	void*			page_zip_,
+	lsn_t			newest_lsn,
+	bool			skip_checksum);
+
 #ifndef UNIV_HOTBACKUP
 # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 /********************************************************************//**
@@ -91,36 +100,54 @@ NOTE: buf_pool->mutex and block->mutex must be held upon entering this
 function, and they will be released by this function after flushing.
 This is loosely based on buf_flush_batch() and buf_flush_page().
 @return TRUE if the page was flushed and the mutexes released */
-UNIV_INTERN
 ibool
 buf_flush_page_try(
 /*===============*/
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
 	buf_block_t*	block)		/*!< in/out: buffer control block */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush list of
-all buffer pool instances.
+/** Do flushing batch of a given type.
+NOTE: The calling thread is not allowed to own any latches on pages!
+@param[in,out]	buf_pool	buffer pool instance
+@param[in]	type		flush type
+@param[in]	min_n		wished minimum mumber of blocks flushed
+(it is not guaranteed that the actual number is that big, though)
+@param[in]	lsn_limit	in the case BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
+@param[out]	n		the number of pages which were processed is
+passed back to caller. Ignored if NULL
+@retval true	if a batch was queued successfully.
+@retval false	if another batch of same type was already running. */
+bool
+buf_flush_do_batch(
+	buf_pool_t*		buf_pool,
+	buf_flush_t		type,
+	ulint			min_n,
+	lsn_t			lsn_limit,
+	flush_counters_t*	n);
+
+
+/** This utility flushes dirty blocks from the end of the flush list of all
+buffer pool instances.
 NOTE: The calling thread is not allowed to own any latches on pages!
+@param[in]	min_n		wished minimum mumber of blocks flushed (it is
+not guaranteed that the actual number is that big, though)
+@param[in]	lsn_limit	in the case BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
+@param[out]	n_processed	the number of pages which were processed is
+passed back to caller. Ignored if NULL.
 @return true if a batch was queued successfully for each buffer pool
 instance. false if another batch of same type was already running in
 at least one of the buffer pool instance */
-UNIV_INTERN
 bool
-buf_flush_list(
-/*===========*/
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
-					blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
-	ulint*		n_processed);	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
+buf_flush_lists(
+	ulint			min_n,
+	lsn_t			lsn_limit,
+	ulint*			n_processed);
+
 /******************************************************************//**
 This function picks up a single page from the tail of the LRU
 list, flushes it (if it is dirty), removes it from page_hash and LRU
@@ -128,26 +155,31 @@ list and puts it on the free list. It is called from user threads when
 they are unable to find a replaceable page at the tail of the LRU
 list i.e.: when the background LRU flushing in the page_cleaner thread
 is not fast enough to keep pace with the workload.
-@return TRUE if success. */
-UNIV_INTERN
-ibool
+@return true if success. */
+bool
 buf_flush_single_page_from_LRU(
 /*===========================*/
 	buf_pool_t*	buf_pool);	/*!< in/out: buffer pool instance */
 /******************************************************************//**
 Waits until a flush batch of the given type ends */
-UNIV_INTERN
 void
 buf_flush_wait_batch_end(
 /*=====================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
 	buf_flush_t	type);		/*!< in: BUF_FLUSH_LRU
 					or BUF_FLUSH_LIST */
+/**
+Waits until a flush batch of the given lsn ends
+@param[in]	new_oldest	target oldest_modified_lsn to wait for */
+
+void
+buf_flush_wait_flushed(
+	lsn_t		new_oldest);
+
 /******************************************************************//**
 Waits until a flush batch of the given type ends. This is called by
 a thread that only wants to wait for a flush to end but doesn't do
 any flushing itself. */
-UNIV_INTERN
 void
 buf_flush_wait_batch_end_wait_only(
 /*===============================*/
@@ -162,8 +194,13 @@ UNIV_INLINE
 void
 buf_flush_note_modification(
 /*========================*/
-	buf_block_t*	block,	/*!< in: block which is modified */
-	mtr_t*		mtr);	/*!< in: mtr */
+	buf_block_t*	block,		/*!< in: block which is modified */
+	lsn_t		start_lsn,	/*!< in: start lsn of the first mtr in a
+					set of mtr's */
+	lsn_t		end_lsn,	/*!< in: end lsn of the last mtr in the
+					set of mtr's */
+	FlushObserver*	observer);	/*!< in: flush observer */
+
 /********************************************************************//**
 This function should be called when recovery has modified a buffer page. */
 UNIV_INLINE
@@ -178,23 +215,52 @@ buf_flush_recv_note_modification(
 /********************************************************************//**
 Returns TRUE if the file page block is immediately suitable for replacement,
 i.e., transition FILE_PAGE => NOT_USED allowed.
-@return	TRUE if can replace immediately */
-UNIV_INTERN
+@return TRUE if can replace immediately */
 ibool
 buf_flush_ready_for_replace(
 /*========================*/
 	buf_page_t*	bpage);	/*!< in: buffer control block, must be
 				buf_page_in_file(bpage) and in the LRU list */
+
+#ifdef UNIV_DEBUG
+/** Disables page cleaner threads (coordinator and workers).
+It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0).
+@param[in]	thd		thread handle
+@param[in]	var		pointer to system variable
+@param[out]	var_ptr		where the formal string goes
+@param[in]	save		immediate result from check function */
+void
+buf_flush_page_cleaner_disabled_debug_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save);
+#endif /* UNIV_DEBUG */
+
 /******************************************************************//**
 page_cleaner thread tasked with flushing dirty pages from the buffer
-pools. As of now we'll have only one instance of this thread.
+pools. As of now we'll have only one coordinator of this thread.
 @return a dummy parameter */
-extern "C" UNIV_INTERN
+extern "C"
 os_thread_ret_t
-DECLARE_THREAD(buf_flush_page_cleaner_thread)(
+DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(
+/*===============================================*/
+	void*	arg);		/*!< in: a dummy parameter required by
+				os_thread_create */
+/******************************************************************//**
+Worker thread of page_cleaner.
+@return a dummy parameter */
+extern "C"
+os_thread_ret_t
+DECLARE_THREAD(buf_flush_page_cleaner_worker)(
 /*==========================================*/
 	void*	arg);		/*!< in: a dummy parameter required by
 				os_thread_create */
+/******************************************************************//**
+Initialize page_cleaner. */
+void
+buf_flush_page_cleaner_init(void);
+/*=============================*/
 /*********************************************************************//**
 Clears up tail of the LRU lists:
 * Put replaceable pages at the tail of LRU to the free list
@@ -202,13 +268,11 @@ Clears up tail of the LRU lists:
 The depth to which we scan each buffer pool is controlled by dynamic
 config parameter innodb_LRU_scan_depth.
 @return total pages flushed */
-UNIV_INTERN
 ulint
-buf_flush_LRU_tail(void);
-/*====================*/
+buf_flush_LRU_lists(void);
+/*=====================*/
 /*********************************************************************//**
 Wait for any possible LRU flushes that are in progress to end. */
-UNIV_INTERN
 void
 buf_flush_wait_LRU_batch_end(void);
 /*==============================*/
@@ -216,8 +280,7 @@ buf_flush_wait_LRU_batch_end(void);
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /******************************************************************//**
 Validates the flush list.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 buf_flush_validate(
 /*===============*/
@@ -228,14 +291,12 @@ buf_flush_validate(
 Initialize the red-black tree to speed up insertions into the flush_list
 during recovery process. Should be called at the start of recovery
 process before any page has been read/written. */
-UNIV_INTERN
 void
 buf_flush_init_flush_rbt(void);
 /*==========================*/
 
 /********************************************************************//**
 Frees up the red-black tree. */
-UNIV_INTERN
 void
 buf_flush_free_flush_rbt(void);
 /*==========================*/
@@ -246,10 +307,9 @@ NOTE: in simulated aio we must call
 os_aio_simulated_wake_handler_threads after we have posted a batch of
 writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
 held upon entering this function, and they will be released by this
-function if it returns true.
-@return TRUE if the page was flushed */
-UNIV_INTERN
-bool
+function.
+@return TRUE if page was flushed */
+ibool
 buf_flush_page(
 /*===========*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
@@ -258,8 +318,7 @@ buf_flush_page(
 	bool		sync);		/*!< in: true if sync IO request */
 /********************************************************************//**
 Returns true if the block is modified and ready for flushing.
-@return	true if can flush immediately */
-UNIV_INTERN
+@return true if can flush immediately */
 bool
 buf_flush_ready_for_flush(
 /*======================*/
@@ -268,26 +327,116 @@ buf_flush_ready_for_flush(
 	buf_flush_t	flush_type)/*!< in: type of flush */
 	MY_ATTRIBUTE((warn_unused_result));
 
-#ifdef UNIV_DEBUG
 /******************************************************************//**
 Check if there are any dirty pages that belong to a space id in the flush
 list in a particular buffer pool.
-@return	number of dirty pages present in a single buffer pool */
-UNIV_INTERN
+@return number of dirty pages present in a single buffer pool */
 ulint
 buf_pool_get_dirty_pages_count(
 /*===========================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool */
-	ulint		id);		/*!< in: space id to check */
+	ulint		id,		/*!< in: space id to check */
+	FlushObserver*	observer);	/*!< in: flush observer to check */
 /******************************************************************//**
 Check if there are any dirty pages that belong to a space id in the flush list.
-@return	count of dirty pages present in all the buffer pools */
-UNIV_INTERN
+@return count of dirty pages present in all the buffer pools */
 ulint
 buf_flush_get_dirty_pages_count(
 /*============================*/
-	ulint		id);		/*!< in: space id to check */
-#endif /* UNIV_DEBUG */
+	ulint		id,		/*!< in: space id to check */
+	FlushObserver*	observer);	/*!< in: flush observer to check */
+
+/*******************************************************************//**
+Synchronously flush dirty blocks from the end of the flush list of all buffer
+pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages! */
+void
+buf_flush_sync_all_buf_pools(void);
+/*==============================*/
+
+/** Request IO burst and wake page_cleaner up.
+@param[in]	lsn_limit	upper limit of LSN to be flushed */
+void
+buf_flush_request_force(
+	lsn_t	lsn_limit);
+
+/** We use FlushObserver to track flushing of non-redo logged pages in bulk
+create index(BtrBulk.cc).Since we disable redo logging during a index build,
+we need to make sure that all dirty pages modifed by the index build are
+flushed to disk before any redo logged operations go to the index. */
+
+class FlushObserver {
+public:
+	/** Constructor
+	@param[in]	space_id	table space id
+	@param[in]	trx		trx instance
+	@param[in]	stage		performance schema accounting object,
+	used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages()
+	for accounting. */
+	FlushObserver(ulint space_id, trx_t* trx, ut_stage_alter_t* stage);
+
+	/** Deconstructor */
+	~FlushObserver();
+
+	/** Check pages have been flushed and removed from the flush list
+	in a buffer pool instance.
+	@pram[in]	instance_no	buffer pool instance no
+	@return true if the pages were removed from the flush list */
+	bool is_complete(ulint	instance_no)
+	{
+		return(m_flushed->at(instance_no) == m_removed->at(instance_no)
+		       || m_interrupted);
+	}
+
+	/** Interrupt observer not to wait. */
+	void interrupted()
+	{
+		m_interrupted = true;
+	}
+
+	/** Check whether trx is interrupted
+	@return true if trx is interrupted */
+	bool check_interrupted();
+
+	/** Flush dirty pages. */
+	void flush();
+
+	/** Notify observer of flushing a page
+	@param[in]	buf_pool	buffer pool instance
+	@param[in]	bpage		buffer page to flush */
+	void notify_flush(
+		buf_pool_t*	buf_pool,
+		buf_page_t*	bpage);
+
+	/** Notify observer of removing a page from flush list
+	@param[in]	buf_pool	buffer pool instance
+	@param[in]	bpage		buffer page flushed */
+	void notify_remove(
+		buf_pool_t*	buf_pool,
+		buf_page_t*	bpage);
+private:
+	/** Table space id */
+	ulint			m_space_id;
+
+	/** Trx instance */
+	trx_t*			m_trx;
+
+	/** Performance schema accounting object, used by ALTER TABLE.
+	If not NULL, then stage->begin_phase_flush() will be called initially,
+	specifying the number of pages to be attempted to be flushed and
+	subsequently, stage->inc() will be called for each page we attempt to
+	flush. */
+	ut_stage_alter_t*	m_stage;
+
+	/* Flush request sent */
+	std::vector<ulint>*	m_flushed;
+
+	/* Flush request finished */
+	std::vector<ulint>*	m_removed;
+
+	/* True if the operation was interrupted. */
+	bool			m_interrupted;
+};
 
 #endif /* !UNIV_HOTBACKUP */
 
diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic
index a763cd115fe..ecb98e32619 100644
--- a/storage/innobase/include/buf0flu.ic
+++ b/storage/innobase/include/buf0flu.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,21 +27,21 @@ Created 11/5/1995 Heikki Tuuri
 #include "buf0buf.h"
 #include "mtr0mtr.h"
 #include "srv0srv.h"
+#include "fsp0types.h"
 
 /********************************************************************//**
 Inserts a modified block into the flush list. */
-UNIV_INTERN
 void
 buf_flush_insert_into_flush_list(
 /*=============================*/
 	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	buf_block_t*	block,		/*!< in/out: block which is modified */
 	lsn_t		lsn);		/*!< in: oldest modification */
+
 /********************************************************************//**
 Inserts a modified block into the flush list in the right sorted position.
 This function is used by recovery, because there the modifications do not
 necessarily come in the order of lsn's. */
-UNIV_INTERN
 void
 buf_flush_insert_sorted_into_flush_list(
 /*====================================*/
@@ -57,40 +57,49 @@ UNIV_INLINE
 void
 buf_flush_note_modification(
 /*========================*/
-	buf_block_t*	block,	/*!< in: block which is modified */
-	mtr_t*		mtr)	/*!< in: mtr */
+	buf_block_t*	block,		/*!< in: block which is modified */
+	lsn_t		start_lsn,	/*!< in: start lsn of the mtr that
+					modified this block */
+	lsn_t		end_lsn,	/*!< in: end lsn of the mtr that
+					modified this block */
+	FlushObserver*	observer)	/*!< in: flush observer */
 {
-	buf_pool_t*	buf_pool = buf_pool_from_block(block);
-
-	ut_ad(!srv_read_only_mode);
-	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-	ut_ad(block->page.buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	{
+		/* Allow write to proceed to shared temporary tablespace
+		in read-only mode. */
+		ut_ad(!srv_read_only_mode
+		      || fsp_is_system_temporary(block->page.id.space()));
+		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+		ut_ad(block->page.buf_fix_count > 0);
+
+		buf_pool_t*	buf_pool = buf_pool_from_block(block);
+
+		ut_ad(!buf_pool_mutex_own(buf_pool));
+		ut_ad(!buf_flush_list_mutex_own(buf_pool));
+	}
+#endif /* UNIV_DEBUG */
 
-	ut_ad(!buf_pool_mutex_own(buf_pool));
-	ut_ad(!buf_flush_list_mutex_own(buf_pool));
-	ut_ad(!mtr->made_dirty || log_flush_order_mutex_own());
+	mutex_enter(&block->mutex);
 
-	ut_ad(mtr->start_lsn != 0);
-	ut_ad(mtr->modifications);
+	ut_ad(block->page.newest_modification <= end_lsn);
+	block->page.newest_modification = end_lsn;
 
-	mutex_enter(&block->mutex);
-	ut_ad(block->page.newest_modification <= mtr->end_lsn);
+	/* Don't allow to set flush observer from non-null to null,
+	or from one observer to another. */
+	ut_ad(block->page.flush_observer == NULL
+	      || block->page.flush_observer == observer);
+	block->page.flush_observer = observer;
 
-	block->page.newest_modification = mtr->end_lsn;
+	if (block->page.oldest_modification == 0) {
+		buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-	if (!block->page.oldest_modification) {
-		ut_a(mtr->made_dirty);
-		ut_ad(log_flush_order_mutex_own());
-		buf_flush_insert_into_flush_list(
-			buf_pool, block, mtr->start_lsn);
+		buf_flush_insert_into_flush_list(buf_pool, block, start_lsn);
 	} else {
-		ut_ad(block->page.oldest_modification <= mtr->start_lsn);
+		ut_ad(block->page.oldest_modification <= start_lsn);
 	}
 
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 	srv_stats.buf_pool_write_requests.inc();
 }
@@ -107,33 +116,36 @@ buf_flush_recv_note_modification(
 	lsn_t		end_lsn)	/*!< in: end lsn of the last mtr in the
 					set of mtr's */
 {
-	buf_pool_t*	buf_pool = buf_pool_from_block(block);
+#ifdef UNIV_DEBUG
+	{
+		ut_ad(!srv_read_only_mode);
+		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+		ut_ad(block->page.buf_fix_count > 0);
 
-	ut_ad(!srv_read_only_mode);
-	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-	ut_ad(block->page.buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+		buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-	ut_ad(!buf_pool_mutex_own(buf_pool));
-	ut_ad(!buf_flush_list_mutex_own(buf_pool));
-	ut_ad(log_flush_order_mutex_own());
+		ut_ad(!buf_pool_mutex_own(buf_pool));
+		ut_ad(!buf_flush_list_mutex_own(buf_pool));
 
-	ut_ad(start_lsn != 0);
-	ut_ad(block->page.newest_modification <= end_lsn);
+		ut_ad(start_lsn != 0);
+		ut_ad(block->page.newest_modification <= end_lsn);
+	}
+#endif /* UNIV_DEBUG */
+
+	buf_page_mutex_enter(block);
 
-	mutex_enter(&block->mutex);
 	block->page.newest_modification = end_lsn;
 
 	if (!block->page.oldest_modification) {
+		buf_pool_t*	buf_pool = buf_pool_from_block(block);
+
 		buf_flush_insert_sorted_into_flush_list(
 			buf_pool, block, start_lsn);
 	} else {
 		ut_ad(block->page.oldest_modification <= start_lsn);
 	}
 
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 }
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index a7a65df33aa..0cbd77878ec 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -38,8 +38,7 @@ struct trx_t;
 Returns TRUE if less than 25 % of the buffer pool is available. This can be
 used in heuristics to prevent huge transactions eating up the whole buffer
 pool for their locks.
-@return	TRUE if less than 25 % of buffer pool left */
-UNIV_INTERN
+@return TRUE if less than 25 % of buffer pool left */
 ibool
 buf_LRU_buf_pool_running_out(void);
 /*==============================*/
@@ -56,7 +55,6 @@ Flushes all dirty pages or removes all pages belonging
 to a given tablespace. A PROBLEM: if readahead is being started, what
 guarantees that it will not try to read in pages after this operation
 has completed? */
-UNIV_INTERN
 void
 buf_LRU_flush_or_remove_pages(
 /*==========================*/
@@ -68,7 +66,6 @@ buf_LRU_flush_or_remove_pages(
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /********************************************************************//**
 Insert a compressed block into buf_pool->zip_clean in the LRU order. */
-UNIV_INTERN
 void
 buf_LRU_insert_zip_clean(
 /*=====================*/
@@ -86,7 +83,6 @@ accessible via bpage.
 The caller must hold buf_pool->mutex and must not hold any
 buf_page_get_mutex() when calling this function.
 @return true if freed, false otherwise. */
-UNIV_INTERN
 bool
 buf_LRU_free_page(
 /*==============*/
@@ -96,21 +92,19 @@ buf_LRU_free_page(
 	MY_ATTRIBUTE((nonnull));
 /******************************************************************//**
 Try to free a replaceable block.
-@return	TRUE if found and freed */
-UNIV_INTERN
-ibool
+@return true if found and freed */
+bool
 buf_LRU_scan_and_free_block(
 /*========================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	ibool		scan_all)	/*!< in: scan whole LRU list
-					if TRUE, otherwise scan only
+	bool		scan_all)	/*!< in: scan whole LRU list
+					if true, otherwise scan only
 					'old' blocks. */
 	MY_ATTRIBUTE((nonnull,warn_unused_result));
 /******************************************************************//**
 Returns a free block from the buf_pool.  The block is taken off the
 free list.  If it is empty, returns NULL.
-@return	a free control block, or NULL if the buf_block->free list is empty */
-UNIV_INTERN
+@return a free control block, or NULL if the buf_block->free list is empty */
 buf_block_t*
 buf_LRU_get_free_only(
 /*==================*/
@@ -138,8 +132,7 @@ we put it to free list to be used.
     * scan LRU list even if buf_pool->try_LRU_scan is not set
 * iteration > 1:
   * same as iteration 1 but sleep 10ms
-@return	the free control block, in state BUF_BLOCK_READY_FOR_USE */
-UNIV_INTERN
+@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
 buf_block_t*
 buf_LRU_get_free_block(
 /*===================*/
@@ -148,25 +141,21 @@ buf_LRU_get_free_block(
 /******************************************************************//**
 Determines if the unzip_LRU list should be used for evicting a victim
 instead of the general LRU list.
-@return	TRUE if should use unzip_LRU */
-UNIV_INTERN
+@return TRUE if should use unzip_LRU */
 ibool
 buf_LRU_evict_from_unzip_LRU(
 /*=========================*/
 	buf_pool_t*	buf_pool);
 /******************************************************************//**
 Puts a block back to the free list. */
-UNIV_INTERN
 void
 buf_LRU_block_free_non_file_page(
 /*=============================*/
 	buf_block_t*	block);	/*!< in: block, must not contain a file page */
 /******************************************************************//**
-Adds a block to the LRU list. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INTERN
+Adds a block to the LRU list. Please make sure that the page_size is
+already set when invoking the function, so that we can get correct
+page_size from the buffer page when adding a block into LRU */
 void
 buf_LRU_add_block(
 /*==============*/
@@ -177,7 +166,6 @@ buf_LRU_add_block(
 				the start regardless of this parameter */
 /******************************************************************//**
 Adds a block to the LRU list of decompressed zip pages. */
-UNIV_INTERN
 void
 buf_unzip_LRU_add_block(
 /*====================*/
@@ -186,23 +174,20 @@ buf_unzip_LRU_add_block(
 				of the list, else put to the start */
 /******************************************************************//**
 Moves a block to the start of the LRU list. */
-UNIV_INTERN
 void
 buf_LRU_make_block_young(
 /*=====================*/
 	buf_page_t*	bpage);	/*!< in: control block */
 /******************************************************************//**
 Moves a block to the end of the LRU list. */
-UNIV_INTERN
 void
 buf_LRU_make_block_old(
 /*===================*/
 	buf_page_t*	bpage);	/*!< in: control block */
 /**********************************************************************//**
 Updates buf_pool->LRU_old_ratio.
-@return	updated old_pct */
-UNIV_INTERN
-ulint
+@return updated old_pct */
+uint
 buf_LRU_old_ratio_update(
 /*=====================*/
 	uint	old_pct,/*!< in: Reserve this percentage of
@@ -213,14 +198,12 @@ buf_LRU_old_ratio_update(
 /********************************************************************//**
 Update the historical stats that we are collecting for LRU eviction
 policy at the end of each interval. */
-UNIV_INTERN
 void
 buf_LRU_stat_update(void);
 /*=====================*/
 
 /******************************************************************//**
 Remove one page from LRU list and put it to free list */
-UNIV_INTERN
 void
 buf_LRU_free_one_page(
 /*==================*/
@@ -231,7 +214,6 @@ buf_LRU_free_one_page(
 
 /******************************************************************//**
 Adjust LRU hazard pointers if needed. */
-
 void
 buf_LRU_adjust_hp(
 /*==============*/
@@ -241,8 +223,7 @@ buf_LRU_adjust_hp(
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**********************************************************************//**
 Validates the LRU list.
-@return	TRUE */
-UNIV_INTERN
+@return TRUE */
 ibool
 buf_LRU_validate(void);
 /*==================*/
@@ -250,7 +231,6 @@ buf_LRU_validate(void);
 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**********************************************************************//**
 Prints the LRU list. */
-UNIV_INTERN
 void
 buf_LRU_print(void);
 /*===============*/
diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
index 10714031710..9c97a5147c1 100644
--- a/storage/innobase/include/buf0rea.h
+++ b/storage/innobase/include/buf0rea.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2015, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -28,36 +28,38 @@ Created 11/5/1995 Heikki Tuuri
 #define buf0rea_h
 
 #include "univ.i"
+#include "buf0buf.h"
 #include "buf0types.h"
 
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
+/** High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
 @return TRUE if page has been read in, FALSE in case of failure */
-UNIV_INTERN
 ibool
 buf_read_page(
-/*==========*/
-	ulint	space,	/*!< in: space id */
-	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
-	ulint	offset, /*!< in: page number */
-	buf_page_t** bpage);/*!< out: page */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	buf_page_t**		bpage);
+
 /********************************************************************//**
 High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	sync		true if synchronous aio is desired
 @return TRUE if page has been read in, FALSE in case of failure */
-UNIV_INTERN
 ibool
-buf_read_page_async(
-/*================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: page number */
-/********************************************************************//**
-Applies a random read-ahead in buf_pool if there are at least a threshold
+buf_read_page_background(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	bool			sync);
+
+/** Applies a random read-ahead in buf_pool if there are at least a threshold
 value of accessed pages from the random read-ahead area. Does not read any
 page, not even the one at the position (space, offset), if the read-ahead
 mechanism is not activated. NOTE 1: the calling thread may own latches on
@@ -66,23 +68,20 @@ end up waiting for these latches! NOTE 2: the calling thread must want
 access to the page given: this rule is set to prevent unintended read-aheads
 performed by ibuf routines, a situation which could result in a deadlock if
 the OS does not support asynchronous i/o.
+@param[in]	page_id		page id of a page which the current thread
+wants to access
+@param[in]	page_size	page size
+@param[in]	inside_ibuf	TRUE if we are inside ibuf routine
 @return number of page read requests issued; NOTE that if we read ibuf
 pages, it may happen that the page at the given page number does not
-get read even if we return a positive value!
-@return	number of page read requests issued */
-UNIV_INTERN
+get read even if we return a positive value! */
 ulint
 buf_read_ahead_random(
-/*==================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes,
-				or 0 */
-	ulint	offset,		/*!< in: page number of a page which
-				the current thread wants to access */
-	ibool	inside_ibuf);	/*!< in: TRUE if we are inside ibuf
-				routine */
-/********************************************************************//**
-Applies linear read-ahead if in the buf_pool the page is a border page of
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ibool			inside_ibuf);
+
+/** Applies linear read-ahead if in the buf_pool the page is a border page of
 a linear read-ahead area and all the pages in the area have been accessed.
 Does not read any page if the read-ahead mechanism is not activated. Note
 that the algorithm looks at the 'natural' adjacent successor and
@@ -104,20 +103,20 @@ latches!
 NOTE 3: the calling thread must want access to the page given: this rule is
 set to prevent unintended read-aheads performed by ibuf routines, a situation
 which could result in a deadlock if the OS does not support asynchronous io.
-@return	number of page read requests issued */
-UNIV_INTERN
+@param[in]	page_id		page id; see NOTE 3 above
+@param[in]	page_size	page size
+@param[in]	inside_ibuf	TRUE if we are inside ibuf routine
+@return number of page read requests issued */
 ulint
 buf_read_ahead_linear(
-/*==================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes, or 0 */
-	ulint	offset,		/*!< in: page number; see NOTE 3 above */
-	ibool	inside_ibuf);	/*!< in: TRUE if we are inside ibuf routine */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ibool			inside_ibuf);
+
 /********************************************************************//**
 Issues read requests for pages which the ibuf module wants to read in, in
 order to contract the insert buffer tree. Technically, this function is like
 a read-ahead function. */
-UNIV_INTERN
 void
 buf_read_ibuf_merge_pages(
 /*======================*/
@@ -127,53 +126,37 @@ buf_read_ibuf_merge_pages(
 					to get read in, before this
 					function returns */
 	const ulint*	space_ids,	/*!< in: array of space ids */
-	const ib_int64_t* space_versions,/*!< in: the spaces must have
-					this version number
-					(timestamp), otherwise we
-					discard the read; we use this
-					to cancel reads if DISCARD +
-					IMPORT may have changed the
-					tablespace size */
 	const ulint*	page_nos,	/*!< in: array of page numbers
 					to read, with the highest page
 					number the last in the
 					array */
 	ulint		n_stored);	/*!< in: number of elements
 					in the arrays */
-/********************************************************************//**
-Issues read requests for pages which recovery wants to read in. */
-UNIV_INTERN
+
+/** Issues read requests for pages which recovery wants to read in.
+@param[in]	sync		true if the caller wants this function to wait
+for the highest address page to get read in, before this function returns
+@param[in]	space_id	tablespace id
+@param[in]	page_nos	array of page numbers to read, with the
+highest page number the last in the array
+@param[in]	n_stored	number of page numbers in the array */
+
 void
 buf_read_recv_pages(
-/*================*/
-	ibool		sync,		/*!< in: TRUE if the caller
-					wants this function to wait
-					for the highest address page
-					to get read in, before this
-					function returns */
-	ulint		space,		/*!< in: space id */
-	ulint		zip_size,	/*!< in: compressed page size in
-					bytes, or 0 */
-	const ulint*	page_nos,	/*!< in: array of page numbers
-					to read, with the highest page
-					number the last in the
-					array */
-	ulint		n_stored);	/*!< in: number of page numbers
-					in the array */
+	bool		sync,
+	ulint		space_id,
+	const ulint*	page_nos,
+	ulint		n_stored);
 
 /** The size in pages of the area which the read-ahead algorithms read if
 invoked */
-#define	BUF_READ_AHEAD_AREA(b)					\
-	ut_min(64, ut_2_power_up((b)->curr_size / 32))
+#define	BUF_READ_AHEAD_AREA(b)		((b)->read_ahead_area)
 
 /** @name Modes used in read-ahead @{ */
 /** read only pages belonging to the insert buffer tree */
 #define BUF_READ_IBUF_PAGES_ONLY	131
 /** read any page */
 #define BUF_READ_ANY_PAGE		132
-/** read any page, but ignore (return an error) if a page does not exist
-instead of crashing like BUF_READ_ANY_PAGE does */
-#define BUF_READ_IGNORE_NONEXISTENT_PAGES 1024
 /* @} */
 
 #endif
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index 11bbc9b5c8a..102b831ec61 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,12 +26,11 @@ Created 11/17/1995 Heikki Tuuri
 #ifndef buf0types_h
 #define buf0types_h
 
-#if defined(INNODB_PAGE_ATOMIC_REF_COUNT) && defined(HAVE_ATOMIC_BUILTINS)
-#define PAGE_ATOMIC_REF_COUNT
-#endif /* INNODB_PAGE_ATOMIC_REF_COUNT && HAVE_ATOMIC_BUILTINS */
+#include "os0event.h"
+#include "ut0ut.h"
 
 /** Buffer page (uncompressed or compressed) */
-struct buf_page_t;
+class buf_page_t;
 /** Buffer block for which an uncompressed page exists */
 struct buf_block_t;
 /** Buffer pool chunk comprising buf_block_t */
@@ -44,6 +43,8 @@ struct buf_pool_stat_t;
 struct buf_buddy_stat_t;
 /** Doublewrite memory struct */
 struct buf_dblwr_t;
+/** Flush observer for bulk create index */
+class FlushObserver;
 
 /** A buffer frame. @see page_t */
 typedef	byte	buf_frame_t;
@@ -96,6 +97,24 @@ enum srv_checksum_algorithm_t {
 						when reading */
 };
 
+inline
+bool
+is_checksum_strict(srv_checksum_algorithm_t algo)
+{
+	return(algo == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32
+	       || algo == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB
+	       || algo == SRV_CHECKSUM_ALGORITHM_STRICT_NONE);
+}
+
+inline
+bool
+is_checksum_strict(ulint algo)
+{
+	return(algo == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32
+	       || algo == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB
+	       || algo == SRV_CHECKSUM_ALGORITHM_STRICT_NONE);
+}
+
 /** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
 /* @{ */
 /** Zip shift value for the smallest page size */
@@ -117,4 +136,16 @@ this must be equal to UNIV_PAGE_SIZE */
 #define BUF_BUDDY_HIGH	(BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
 /* @} */
 
+#ifndef UNIV_INNOCHECKSUM
+
+#include "ut0mutex.h"
+#include "sync0rw.h"
+
+typedef ib_bpmutex_t BPageMutex;
+typedef ib_mutex_t BufPoolMutex;
+typedef ib_mutex_t FlushListMutex;
+typedef BPageMutex BufPoolZipMutex;
+typedef rw_lock_t BPageLock;
+#endif /* !UNIV_INNOCHECKSUM */
+
 #endif /* buf0types.h */
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
index 1d954bfc07c..5537d70548a 100644
--- a/storage/innobase/include/data0data.h
+++ b/storage/innobase/include/data0data.h
@@ -33,14 +33,17 @@ Created 5/30/1994 Heikki Tuuri
 #include "mem0mem.h"
 #include "dict0types.h"
 
+#include <ostream>
+
 /** Storage for overflow data in a big record, that is, a clustered
 index record which needs external storage of data fields */
 struct big_rec_t;
+struct upd_t;
 
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Gets pointer to the type struct of SQL data field.
-@return	pointer to the type struct */
+@return pointer to the type struct */
 UNIV_INLINE
 dtype_t*
 dfield_get_type(
@@ -49,7 +52,7 @@ dfield_get_type(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets pointer to the data in a field.
-@return	pointer to data */
+@return pointer to data */
 UNIV_INLINE
 void*
 dfield_get_data(
@@ -67,11 +70,11 @@ void
 dfield_set_type(
 /*============*/
 	dfield_t*	field,	/*!< in: SQL data field */
-	const dtype_t*	type)	/*!< in: pointer to data type struct */
-	MY_ATTRIBUTE((nonnull));
+	const dtype_t*	type);	/*!< in: pointer to data type struct */
+
 /*********************************************************************//**
 Gets length of field data.
-@return	length of data; UNIV_SQL_NULL if SQL null data */
+@return length of data; UNIV_SQL_NULL if SQL null data */
 UNIV_INLINE
 ulint
 dfield_get_len(
@@ -89,7 +92,7 @@ dfield_set_len(
 	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Determines if a field is SQL NULL
-@return	nonzero if SQL null data */
+@return nonzero if SQL null data */
 UNIV_INLINE
 ulint
 dfield_is_null(
@@ -98,7 +101,7 @@ dfield_is_null(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Determines if a field is externally stored
-@return	nonzero if externally stored */
+@return nonzero if externally stored */
 UNIV_INLINE
 ulint
 dfield_is_ext(
@@ -113,6 +116,23 @@ dfield_set_ext(
 /*===========*/
 	dfield_t*	field)	/*!< in/out: field */
 	MY_ATTRIBUTE((nonnull));
+
+/** Gets spatial status for "external storage"
+@param[in,out]	field		field */
+UNIV_INLINE
+spatial_status_t
+dfield_get_spatial_status(
+	const dfield_t*	field);
+
+/** Sets spatial status for "external storage"
+@param[in,out]	field		field
+@param[in]	spatial_status	spatial status */
+UNIV_INLINE
+void
+dfield_set_spatial_status(
+	dfield_t*		field,
+	spatial_status_t	spatial_status);
+
 /*********************************************************************//**
 Sets pointer to the data and length in a field. */
 UNIV_INLINE
@@ -124,6 +144,15 @@ dfield_set_data(
 	ulint		len)	/*!< in: length or UNIV_SQL_NULL */
 	MY_ATTRIBUTE((nonnull(1)));
 /*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+dfield_write_mbr(
+/*=============*/
+	dfield_t*	field,	/*!< in: field */
+	const double*	mbr)	/*!< in: data */
+	MY_ATTRIBUTE((nonnull(1)));
+/*********************************************************************//**
 Sets a data field to SQL NULL. */
 UNIV_INLINE
 void
@@ -146,9 +175,9 @@ UNIV_INLINE
 void
 dfield_copy_data(
 /*=============*/
-	dfield_t*	field1,	/*!< out: field to copy to */
-	const dfield_t*	field2)	/*!< in: field to copy from */
-	MY_ATTRIBUTE((nonnull));
+	dfield_t*	field1,		/*!< out: field to copy to */
+	const dfield_t*	field2);	/*!< in: field to copy from */
+
 /*********************************************************************//**
 Copies a data field to another. */
 UNIV_INLINE
@@ -172,7 +201,7 @@ dfield_dup(
 Tests if two data fields are equal.
 If len==0, tests the data length and content for equality.
 If len>0, tests the first len bytes of the content for equality.
-@return	TRUE if both fields are NULL or if they are equal */
+@return TRUE if both fields are NULL or if they are equal */
 UNIV_INLINE
 ibool
 dfield_datas_are_binary_equal(
@@ -184,7 +213,7 @@ dfield_datas_are_binary_equal(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Tests if dfield data length and content is equal to the given.
-@return	TRUE if equal */
+@return TRUE if equal */
 UNIV_INLINE
 ibool
 dfield_data_is_binary_equal(
@@ -196,29 +225,47 @@ dfield_data_is_binary_equal(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Gets number of fields in a data tuple.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dtuple_get_n_fields(
 /*================*/
 	const dtuple_t*	tuple)	/*!< in: tuple */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Gets number of virtual fields in a data tuple.
+@param[in]	tuple	dtuple to check
+@return number of fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_v_fields(
+	const dtuple_t*	tuple);
+
 #ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets nth field of a tuple.
-@return	nth field */
+/** Gets nth field of a tuple.
+@param[in]	tuple	tuple
+@param[in]	n	index of field
+@return nth field */
 UNIV_INLINE
 dfield_t*
 dtuple_get_nth_field(
-/*=================*/
-	const dtuple_t*	tuple,	/*!< in: tuple */
-	ulint		n);	/*!< in: index of field */
+	const dtuple_t*	tuple,
+	ulint		n);
+/** Gets nth virtual field of a tuple.
+@param[in]	tuple	tuple
+@oaran[in]	n	the nth field to get
+@return nth field */
+UNIV_INLINE
+dfield_t*
+dtuple_get_nth_v_field(
+	const dtuple_t*	tuple,
+	ulint		n);
 #else /* UNIV_DEBUG */
 # define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n))
+# define dtuple_get_nth_v_field(tuple, n) ((tuple)->fields + (tuple)->n_fields + (n))
 #endif /* UNIV_DEBUG */
 /*********************************************************************//**
 Gets info bits in a data tuple.
-@return	info bits */
+@return info bits */
 UNIV_INLINE
 ulint
 dtuple_get_info_bits(
@@ -236,7 +283,7 @@ dtuple_set_info_bits(
 	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Gets number of fields used in record comparisons.
-@return	number of fields used in comparisons in rem0cmp.* */
+@return number of fields used in comparisons in rem0cmp.* */
 UNIV_INLINE
 ulint
 dtuple_get_n_fields_cmp(
@@ -259,25 +306,28 @@ creating a new dtuple_t object */
 #define DTUPLE_EST_ALLOC(n_fields)	\
 	(sizeof(dtuple_t) + (n_fields) * sizeof(dfield_t))
 
-/**********************************************************//**
-Creates a data tuple from an already allocated chunk of memory.
+/** Creates a data tuple from an already allocated chunk of memory.
 The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields).
 The default value for number of fields used in record comparisons
 for this tuple is n_fields.
-@return	created tuple (inside buf) */
+@param[in,out]	buf		buffer to use
+@param[in]	buf_size	buffer size
+@param[in]	n_fields	number of field
+@param[in]	n_v_fields	number of fields on virtual columns
+@return created tuple (inside buf) */
 UNIV_INLINE
 dtuple_t*
 dtuple_create_from_mem(
-/*===================*/
-	void*	buf,		/*!< in, out: buffer to use */
-	ulint	buf_size,	/*!< in: buffer size */
-	ulint	n_fields)	/*!< in: number of fields */
+	void*	buf,
+	ulint	buf_size,
+	ulint	n_fields,
+	ulint	n_v_fields)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /**********************************************************//**
 Creates a data tuple to a memory heap. The default value for number
 of fields used in record comparisons for this tuple is n_fields.
-@return	own: created tuple */
+@return own: created tuple */
 UNIV_INLINE
 dtuple_t*
 dtuple_create(
@@ -288,20 +338,56 @@ dtuple_create(
 	ulint		n_fields)/*!< in: number of fields */
 	MY_ATTRIBUTE((nonnull, malloc));
 
+
+/** Initialize the virtual field data in a dtuple_t
+@param[in,out]		vrow	dtuple contains the virtual fields */
+UNIV_INLINE
+void
+dtuple_init_v_fld(
+	const dtuple_t*	vrow);
+
+/** Duplicate the virtual field data in a dtuple_t
+@param[in,out]		vrow	dtuple contains the virtual fields
+@param[in]		heap	heap memory to use */
+UNIV_INLINE
+void
+dtuple_dup_v_fld(
+	const dtuple_t*	vrow,
+	mem_heap_t*	heap);
+
+/** Creates a data tuple with possible virtual columns to a memory heap.
+@param[in]	heap		memory heap where the tuple is created
+@param[in]	n_fields	number of fields
+@param[in]	n_v_fields	number of fields on virtual col
+@return own: created tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_create_with_vcol(
+	mem_heap_t*	heap,
+	ulint		n_fields,
+	ulint		n_v_fields);
+
 /*********************************************************************//**
 Sets number of fields used in a tuple. Normally this is set in
 dtuple_create, but if you want later to set it smaller, you can use this. */
-UNIV_INTERN
 void
 dtuple_set_n_fields(
 /*================*/
 	dtuple_t*	tuple,		/*!< in: tuple */
 	ulint		n_fields)	/*!< in: number of fields */
 	MY_ATTRIBUTE((nonnull));
+/** Copies a data tuple's virtaul fields to another. This is a shallow copy;
+@param[in,out]	d_tuple		destination tuple
+@param[in]	s_tuple		source tuple */
+UNIV_INLINE
+void
+dtuple_copy_v_fields(
+	dtuple_t*	d_tuple,
+	const dtuple_t*	s_tuple);
 /*********************************************************************//**
 Copies a data tuple to another.  This is a shallow copy; if a deep copy
 is desired, dfield_dup() will have to be invoked on each field.
-@return	own: copy of tuple */
+@return own: copy of tuple */
 UNIV_INLINE
 dtuple_t*
 dtuple_copy(
@@ -313,7 +399,7 @@ dtuple_copy(
 /**********************************************************//**
 The following function returns the sum of data lengths of a tuple. The space
 occupied by the field structs or the tuple struct is not counted.
-@return	sum of data lens */
+@return sum of data lens */
 UNIV_INLINE
 ulint
 dtuple_get_data_size(
@@ -323,37 +409,37 @@ dtuple_get_data_size(
 	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Computes the number of externally stored fields in a data tuple.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dtuple_get_n_ext(
 /*=============*/
 	const dtuple_t*	tuple)	/*!< in: tuple */
 	MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Compare two data tuples, respecting the collation of character fields.
-@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
-than tuple2 */
-UNIV_INTERN
+/** Compare two data tuples.
+@param[in] tuple1 first data tuple
+@param[in] tuple2 second data tuple
+@return positive, 0, negative if tuple1 is greater, equal, less, than tuple2,
+respectively */
 int
 dtuple_coll_cmp(
-/*============*/
-	const dtuple_t*	tuple1,	/*!< in: tuple 1 */
-	const dtuple_t*	tuple2)	/*!< in: tuple 2 */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/************************************************************//**
-Folds a prefix given as the number of fields of a tuple.
-@return	the folded value */
+	const dtuple_t*	tuple1,
+	const dtuple_t*	tuple2)
+	MY_ATTRIBUTE((warn_unused_result));
+/** Fold a prefix given as the number of fields of a tuple.
+@param[in]	tuple		index record
+@param[in]	n_fields	number of complete fields to fold
+@param[in]	n_bytes		number of bytes to fold in the last field
+@param[in]	index_id	index tree ID
+@return the folded value */
 UNIV_INLINE
 ulint
 dtuple_fold(
-/*========*/
-	const dtuple_t*	tuple,	/*!< in: the tuple */
-	ulint		n_fields,/*!< in: number of complete fields to fold */
-	ulint		n_bytes,/*!< in: number of bytes to fold in an
-				incomplete last field */
-	index_id_t	tree_id)/*!< in: index tree id */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	const dtuple_t*	tuple,
+	ulint		n_fields,
+	ulint		n_bytes,
+	index_id_t	tree_id)
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************************//**
 Sets types of fields binary in a tuple. */
 UNIV_INLINE
@@ -365,7 +451,7 @@ dtuple_set_types_binary(
 	MY_ATTRIBUTE((nonnull));
 /**********************************************************************//**
 Checks if a dtuple contains an SQL null value.
-@return	TRUE if some field is SQL null */
+@return TRUE if some field is SQL null */
 UNIV_INLINE
 ibool
 dtuple_contains_null(
@@ -374,8 +460,7 @@ dtuple_contains_null(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /**********************************************************//**
 Checks that a data field is typed. Asserts an error if not.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dfield_check_typed(
 /*===============*/
@@ -383,8 +468,7 @@ dfield_check_typed(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /**********************************************************//**
 Checks that a data tuple is typed. Asserts an error if not.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dtuple_check_typed(
 /*===============*/
@@ -392,8 +476,7 @@ dtuple_check_typed(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /**********************************************************//**
 Checks that a data tuple is typed.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dtuple_check_typed_no_assert(
 /*=========================*/
@@ -403,8 +486,7 @@ dtuple_check_typed_no_assert(
 /**********************************************************//**
 Validates the consistency of a tuple which must be complete, i.e,
 all fields must have been set.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dtuple_validate(
 /*============*/
@@ -413,7 +495,6 @@ dtuple_validate(
 #endif /* UNIV_DEBUG */
 /*************************************************************//**
 Pretty prints a dfield value according to its data type. */
-UNIV_INTERN
 void
 dfield_print(
 /*=========*/
@@ -422,7 +503,6 @@ dfield_print(
 /*************************************************************//**
 Pretty prints a dfield value according to its data type. Also the hex string
 is printed if a string contains non-printable characters. */
-UNIV_INTERN
 void
 dfield_print_also_hex(
 /*==================*/
@@ -430,13 +510,41 @@ dfield_print_also_hex(
 	MY_ATTRIBUTE((nonnull));
 /**********************************************************//**
 The following function prints the contents of a tuple. */
-UNIV_INTERN
 void
 dtuple_print(
 /*=========*/
 	FILE*		f,	/*!< in: output stream */
 	const dtuple_t*	tuple)	/*!< in: tuple */
 	MY_ATTRIBUTE((nonnull));
+
+/** Print the contents of a tuple.
+@param[out]	o	output stream
+@param[in]	field	array of data fields
+@param[in]	n	number of data fields */
+void
+dfield_print(
+	std::ostream&	o,
+	const dfield_t*	field,
+	ulint		n);
+/** Print the contents of a tuple.
+@param[out]	o	output stream
+@param[in]	tuple	data tuple */
+void
+dtuple_print(
+	std::ostream&	o,
+	const dtuple_t*	tuple);
+
+/** Print the contents of a tuple.
+@param[out]	o	output stream
+@param[in]	tuple	data tuple */
+inline
+std::ostream&
+operator<<(std::ostream& o, const dtuple_t& tuple)
+{
+	dtuple_print(o, &tuple);
+	return(o);
+}
+
 /**************************************************************//**
 Moves parts of long fields in entry to the big record vector so that
 the size of tuple drops below the maximum record size allowed in the
@@ -445,20 +553,19 @@ to determine uniquely the insertion place of the tuple in the index.
 @return own: created big record vector, NULL if we are not able to
 shorten the entry enough, i.e., if there are too many fixed-length or
 short fields in entry or the index is clustered */
-UNIV_INTERN
 big_rec_t*
 dtuple_convert_big_rec(
 /*===================*/
 	dict_index_t*	index,	/*!< in: index */
+	upd_t*		upd,	/*!< in/out: update vector */
 	dtuple_t*	entry,	/*!< in/out: index entry */
 	ulint*		n_ext)	/*!< in/out: number of
 				externally stored columns */
-	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
+	MY_ATTRIBUTE((malloc, warn_unused_result));
 /**************************************************************//**
 Puts back to entry the data stored in vector. Note that to ensure the
 fields in entry can accommodate the data, vector must have been created
 from entry with dtuple_convert_big_rec. */
-UNIV_INTERN
 void
 dtuple_convert_back_big_rec(
 /*========================*/
@@ -483,8 +590,17 @@ dtuple_big_rec_free(
 struct dfield_t{
 	void*		data;	/*!< pointer to data */
 	unsigned	ext:1;	/*!< TRUE=externally stored, FALSE=local */
-	unsigned	len:32;	/*!< data length; UNIV_SQL_NULL if SQL null */
+	unsigned	spatial_status:2;
+				/*!< spatial status of externally stored field
+				in undo log for purge */
+	unsigned	len;	/*!< data length; UNIV_SQL_NULL if SQL null */
 	dtype_t		type;	/*!< type of data */
+
+	/** Create a deep copy of this object
+	@param[in]	heap	the memory heap in which the clone will be
+				created.
+	@return	the cloned object. */
+	dfield_t* clone(mem_heap_t* heap);
 };
 
 /** Structure for an SQL data tuple of fields (logical record) */
@@ -502,6 +618,8 @@ struct dtuple_t {
 					default value in dtuple creation is
 					the same value as n_fields */
 	dfield_t*	fields;		/*!< fields */
+	ulint		n_v_fields;	/*!< number of virtual fields */
+	dfield_t*	v_fields;	/*!< fields on virtual column */
 	UT_LIST_NODE_T(dtuple_t) tuple_list;
 					/*!< data tuples can be linked into a
 					list using this field */
@@ -513,8 +631,20 @@ struct dtuple_t {
 #endif /* UNIV_DEBUG */
 };
 
+
 /** A slot for a field in a big rec vector */
 struct big_rec_field_t {
+
+	/** Constructor.
+	@param[in]	field_no_	the field number
+	@param[in]	len_		the data length
+	@param[in]	data_		the data */
+	big_rec_field_t(ulint field_no_, ulint len_, const void* data_)
+		: field_no(field_no_),
+		  len(len_),
+		  data(data_)
+	{}
+
 	ulint		field_no;	/*!< field number in record */
 	ulint		len;		/*!< stored data length, in bytes */
 	const void*	data;		/*!< stored data */
@@ -525,8 +655,36 @@ clustered index record which needs external storage of data fields */
 struct big_rec_t {
 	mem_heap_t*	heap;		/*!< memory heap from which
 					allocated */
+	const ulint	capacity;	/*!< fields array size */
 	ulint		n_fields;	/*!< number of stored fields */
 	big_rec_field_t*fields;		/*!< stored fields */
+
+	/** Constructor.
+	@param[in]	max	the capacity of the array of fields. */
+	explicit big_rec_t(const ulint max)
+		: heap(0),
+		  capacity(max),
+		  n_fields(0),
+		  fields(0)
+	{}
+
+	/** Append one big_rec_field_t object to the end of array of fields */
+	void append(const big_rec_field_t& field)
+	{
+		ut_ad(n_fields < capacity);
+		fields[n_fields] = field;
+		n_fields++;
+	}
+
+	/** Allocate a big_rec_t object in the given memory heap, and for
+	storing n_fld number of fields.
+	@param[in]	heap	memory heap in which this object is allocated
+	@param[in]	n_fld	maximum number of fields that can be stored in
+			this object
+	@return the allocated object */
+	static big_rec_t* alloc(
+		mem_heap_t*	heap,
+		ulint		n_fld);
 };
 
 #ifndef UNIV_NONINL
diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic
index 11499ab928c..dc51735d340 100644
--- a/storage/innobase/include/data0data.ic
+++ b/storage/innobase/include/data0data.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,6 +25,7 @@ Created 5/30/1994 Heikki Tuuri
 
 #include "mem0mem.h"
 #include "ut0rnd.h"
+#include "btr0types.h"
 
 #ifdef UNIV_DEBUG
 /** Dummy variable to catch access to uninitialized fields.  In the
@@ -34,7 +35,7 @@ extern byte data_error;
 
 /*********************************************************************//**
 Gets pointer to the type struct of SQL data field.
-@return	pointer to the type struct */
+@return pointer to the type struct */
 UNIV_INLINE
 dtype_t*
 dfield_get_type(
@@ -65,7 +66,7 @@ dfield_set_type(
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Gets pointer to the data in a field.
-@return	pointer to data */
+@return pointer to data */
 UNIV_INLINE
 void*
 dfield_get_data(
@@ -82,7 +83,7 @@ dfield_get_data(
 
 /*********************************************************************//**
 Gets length of field data.
-@return	length of data; UNIV_SQL_NULL if SQL null data */
+@return length of data; UNIV_SQL_NULL if SQL null data */
 UNIV_INLINE
 ulint
 dfield_get_len(
@@ -111,12 +112,12 @@ dfield_set_len(
 #endif /* UNIV_VALGRIND_DEBUG */
 
 	field->ext = 0;
-	field->len = len;
+	field->len = static_cast<unsigned int>(len);
 }
 
 /*********************************************************************//**
 Determines if a field is SQL NULL
-@return	nonzero if SQL null data */
+@return nonzero if SQL null data */
 UNIV_INLINE
 ulint
 dfield_is_null(
@@ -130,7 +131,7 @@ dfield_is_null(
 
 /*********************************************************************//**
 Determines if a field is externally stored
-@return	nonzero if externally stored */
+@return nonzero if externally stored */
 UNIV_INLINE
 ulint
 dfield_is_ext(
@@ -138,6 +139,7 @@ dfield_is_ext(
 	const dfield_t* field)	/*!< in: field */
 {
 	ut_ad(field);
+	ut_ad(!field->ext || field->len >= BTR_EXTERN_FIELD_REF_SIZE);
 
 	return(field->ext);
 }
@@ -155,6 +157,34 @@ dfield_set_ext(
 	field->ext = 1;
 }
 
+/** Gets spatial status for "external storage"
+@param[in,out]	field		field */
+UNIV_INLINE
+spatial_status_t
+dfield_get_spatial_status(
+	const dfield_t*	field)
+{
+	ut_ad(field);
+	ut_ad(dfield_is_ext(field));
+
+	return(static_cast<spatial_status_t>(field->spatial_status));
+}
+
+/** Sets spatial status for "external storage"
+@param[in,out]	field		field
+@param[in]	spatial_status	spatial status */
+UNIV_INLINE
+void
+dfield_set_spatial_status(
+	dfield_t*		field,
+	spatial_status_t	spatial_status)
+{
+	ut_ad(field);
+	ut_ad(dfield_is_ext(field));
+
+	field->spatial_status = spatial_status;
+}
+
 /*********************************************************************//**
 Sets pointer to the data and length in a field. */
 UNIV_INLINE
@@ -172,7 +202,31 @@ dfield_set_data(
 #endif /* UNIV_VALGRIND_DEBUG */
 	field->data = (void*) data;
 	field->ext = 0;
-	field->len = len;
+	field->len = static_cast<unsigned int>(len);
+}
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+dfield_write_mbr(
+/*=============*/
+	dfield_t*	field,	/*!< in: field */
+	const double*	mbr)	/*!< in: data */
+{
+	ut_ad(field);
+
+#ifdef UNIV_VALGRIND_DEBUG
+	if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len);
+#endif /* UNIV_VALGRIND_DEBUG */
+	field->ext = 0;
+
+	for (int i = 0; i < SPDIMS * 2; i++) {
+		mach_double_write(static_cast<byte*>(field->data)
+				  + i * sizeof(double), mbr[i]);
+	}
+
+	field->len = DATA_MBR_LEN;
 }
 
 /*********************************************************************//**
@@ -201,6 +255,7 @@ dfield_copy_data(
 	field1->data = field2->data;
 	field1->len = field2->len;
 	field1->ext = field2->ext;
+	field1->spatial_status = field2->spatial_status;
 }
 
 /*********************************************************************//**
@@ -235,7 +290,7 @@ dfield_dup(
 Tests if two data fields are equal.
 If len==0, tests the data length and content for equality.
 If len>0, tests the first len bytes of the content for equality.
-@return	TRUE if both fields are NULL or if they are equal */
+@return TRUE if both fields are NULL or if they are equal */
 UNIV_INLINE
 ibool
 dfield_datas_are_binary_equal(
@@ -262,7 +317,7 @@ dfield_datas_are_binary_equal(
 
 /*********************************************************************//**
 Tests if dfield data length and content is equal to the given.
-@return	TRUE if equal */
+@return TRUE if equal */
 UNIV_INLINE
 ibool
 dfield_data_is_binary_equal(
@@ -279,7 +334,7 @@ dfield_data_is_binary_equal(
 
 /*********************************************************************//**
 Gets info bits in a data tuple.
-@return	info bits */
+@return info bits */
 UNIV_INLINE
 ulint
 dtuple_get_info_bits(
@@ -307,7 +362,7 @@ dtuple_set_info_bits(
 
 /*********************************************************************//**
 Gets number of fields used in record comparisons.
-@return	number of fields used in comparisons in rem0cmp.* */
+@return number of fields used in comparisons in rem0cmp.* */
 UNIV_INLINE
 ulint
 dtuple_get_n_fields_cmp(
@@ -337,7 +392,7 @@ dtuple_set_n_fields_cmp(
 
 /*********************************************************************//**
 Gets number of fields in a data tuple.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dtuple_get_n_fields(
@@ -349,48 +404,85 @@ dtuple_get_n_fields(
 	return(tuple->n_fields);
 }
 
+/** Gets the number of virtual fields in a data tuple.
+@param[in]	tuple	dtuple to check
+@return number of fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_v_fields(
+	const dtuple_t*	tuple)
+{
+	ut_ad(tuple);
+
+	return(tuple->n_v_fields);
+}
 #ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets nth field of a tuple.
-@return	nth field */
+/** Gets nth field of a tuple.
+@param[in]	tuple	tuple
+@param[in]	n	index of field
+@return nth field */
 UNIV_INLINE
 dfield_t*
 dtuple_get_nth_field(
-/*=================*/
-	const dtuple_t*	tuple,	/*!< in: tuple */
-	ulint		n)	/*!< in: index of field */
+	const dtuple_t*	tuple,
+	ulint		n)
 {
 	ut_ad(tuple);
 	ut_ad(n < tuple->n_fields);
 
 	return((dfield_t*) tuple->fields + n);
 }
+/** Gets nth virtual field of a tuple.
+@param[in]	tuple	tuple
+@oaran[in]	n	the nth field to get
+@return nth field */
+UNIV_INLINE
+dfield_t*
+dtuple_get_nth_v_field(
+	const dtuple_t*	tuple,
+	ulint		n)
+{
+	ut_ad(tuple);
+	ut_ad(n < tuple->n_v_fields);
+
+	return(static_cast<dfield_t*>(tuple->v_fields + n));
+}
 #endif /* UNIV_DEBUG */
 
-/**********************************************************//**
-Creates a data tuple from an already allocated chunk of memory.
+/** Creates a data tuple from an already allocated chunk of memory.
 The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields).
 The default value for number of fields used in record comparisons
 for this tuple is n_fields.
-@return	created tuple (inside buf) */
+@param[in,out]	buf		buffer to use
+@param[in]	buf_size	buffer size
+@param[in]	n_fields	number of field
+@param[in]	n_v_fields	number of fields on virtual columns
+@return created tuple (inside buf) */
 UNIV_INLINE
 dtuple_t*
 dtuple_create_from_mem(
-/*===================*/
-	void*	buf,		/*!< in, out: buffer to use */
-	ulint	buf_size,	/*!< in: buffer size */
-	ulint	n_fields)	/*!< in: number of fields */
+	void*	buf,
+	ulint	buf_size,
+	ulint	n_fields,
+	ulint	n_v_fields)
 {
 	dtuple_t*	tuple;
+	ulint		n_t_fields = n_fields + n_v_fields;
 
 	ut_ad(buf != NULL);
-	ut_a(buf_size >= DTUPLE_EST_ALLOC(n_fields));
+	ut_a(buf_size >= DTUPLE_EST_ALLOC(n_t_fields));
 
 	tuple = (dtuple_t*) buf;
 	tuple->info_bits = 0;
 	tuple->n_fields = n_fields;
+	tuple->n_v_fields = n_v_fields;
 	tuple->n_fields_cmp = n_fields;
 	tuple->fields = (dfield_t*) &tuple[1];
+	if (n_v_fields > 0) {
+		tuple->v_fields = &tuple->fields[n_fields];
+	} else {
+		tuple->v_fields = NULL;
+	}
 
 #ifdef UNIV_DEBUG
 	tuple->magic_n = DATA_TUPLE_MAGIC_N;
@@ -398,26 +490,61 @@ dtuple_create_from_mem(
 	{	/* In the debug version, initialize fields to an error value */
 		ulint	i;
 
-		for (i = 0; i < n_fields; i++) {
+		for (i = 0; i < n_t_fields; i++) {
 			dfield_t*       field;
 
-			field = dtuple_get_nth_field(tuple, i);
+			if (i >= n_fields) {
+				field = dtuple_get_nth_v_field(
+					tuple, i - n_fields);
+			} else {
+				field = dtuple_get_nth_field(tuple, i);
+			}
 
 			dfield_set_len(field, UNIV_SQL_NULL);
 			field->data = &data_error;
 			dfield_get_type(field)->mtype = DATA_ERROR;
+			dfield_get_type(field)->prtype = DATA_ERROR;
 		}
 	}
 #endif
-	UNIV_MEM_ASSERT_W(tuple->fields, n_fields * sizeof *tuple->fields);
-	UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
+	UNIV_MEM_ASSERT_W(tuple->fields, n_t_fields * sizeof *tuple->fields);
+	UNIV_MEM_INVALID(tuple->fields, n_t_fields * sizeof *tuple->fields);
 	return(tuple);
 }
 
+/** Duplicate the virtual field data in a dtuple_t
+@param[in,out]		vrow	dtuple contains the virtual fields
+@param[in]		heap	heap memory to use */
+UNIV_INLINE
+void
+dtuple_dup_v_fld(
+	const dtuple_t*	vrow,
+	mem_heap_t*	heap)
+{
+	for (ulint i = 0; i < vrow->n_v_fields; i++) {
+		dfield_t*       dfield = dtuple_get_nth_v_field(vrow, i);
+		dfield_dup(dfield, heap);
+	}
+}
+
+/** Initialize the virtual field data in a dtuple_t
+@param[in,out]		vrow	dtuple contains the virtual fields */
+UNIV_INLINE
+void
+dtuple_init_v_fld(
+	const dtuple_t*	vrow)
+{
+	for (ulint i = 0; i < vrow->n_v_fields; i++) {
+		dfield_t*       dfield = dtuple_get_nth_v_field(vrow, i);
+		dfield_get_type(dfield)->mtype = DATA_MISSING;
+		dfield_set_len(dfield, UNIV_SQL_NULL);
+	}
+}
+
 /**********************************************************//**
 Creates a data tuple to a memory heap. The default value for number
 of fields used in record comparisons for this tuple is n_fields.
-@return	own: created tuple */
+@return own: created tuple */
 UNIV_INLINE
 dtuple_t*
 dtuple_create(
@@ -427,24 +554,58 @@ dtuple_create(
 				bytes will be allocated from this heap */
 	ulint		n_fields) /*!< in: number of fields */
 {
+	return(dtuple_create_with_vcol(heap, n_fields, 0));
+}
+
+/** Creates a data tuple with virtual columns to a memory heap.
+@param[in]	heap		memory heap where the tuple is created
+@param[in]	n_fields	number of fields
+@param[in]	n_v_fields	number of fields on virtual col
+@return own: created tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_create_with_vcol(
+	mem_heap_t*	heap,
+	ulint		n_fields,
+	ulint		n_v_fields)
+{
 	void*		buf;
 	ulint		buf_size;
 	dtuple_t*	tuple;
 
 	ut_ad(heap);
 
-	buf_size = DTUPLE_EST_ALLOC(n_fields);
+	buf_size = DTUPLE_EST_ALLOC(n_fields + n_v_fields);
 	buf = mem_heap_alloc(heap, buf_size);
 
-	tuple = dtuple_create_from_mem(buf, buf_size, n_fields);
+	tuple = dtuple_create_from_mem(buf, buf_size, n_fields, n_v_fields);
 
 	return(tuple);
 }
 
+/** Copies a data tuple's virtual fields to another. This is a shallow copy;
+@param[in,out]	d_tuple		destination tuple
+@param[in]	s_tuple		source tuple */
+UNIV_INLINE
+void
+dtuple_copy_v_fields(
+	dtuple_t*	d_tuple,
+	const dtuple_t*	s_tuple)
+{
+
+	ulint		n_v_fields	= dtuple_get_n_v_fields(d_tuple);
+	ut_ad(n_v_fields == dtuple_get_n_v_fields(s_tuple));
+
+	for (ulint i = 0; i < n_v_fields; i++) {
+		dfield_copy(dtuple_get_nth_v_field(d_tuple, i),
+			    dtuple_get_nth_v_field(s_tuple, i));
+	}
+}
+
 /*********************************************************************//**
 Copies a data tuple to another.  This is a shallow copy; if a deep copy
 is desired, dfield_dup() will have to be invoked on each field.
-@return	own: copy of tuple */
+@return own: copy of tuple */
 UNIV_INLINE
 dtuple_t*
 dtuple_copy(
@@ -454,7 +615,9 @@ dtuple_copy(
 				where the tuple is created */
 {
 	ulint		n_fields	= dtuple_get_n_fields(tuple);
-	dtuple_t*	new_tuple	= dtuple_create(heap, n_fields);
+	ulint		n_v_fields	= dtuple_get_n_v_fields(tuple);
+	dtuple_t*	new_tuple	= dtuple_create_with_vcol(
+						heap, n_fields, n_v_fields);
 	ulint		i;
 
 	for (i = 0; i < n_fields; i++) {
@@ -462,6 +625,11 @@ dtuple_copy(
 			    dtuple_get_nth_field(tuple, i));
 	}
 
+	for (i = 0; i < n_v_fields; i++) {
+		dfield_copy(dtuple_get_nth_v_field(new_tuple, i),
+			    dtuple_get_nth_v_field(tuple, i));
+	}
+
 	return(new_tuple);
 }
 
@@ -469,7 +637,7 @@ dtuple_copy(
 The following function returns the sum of data lengths of a tuple. The space
 occupied by the field structs or the tuple struct is not counted. Neither
 is possible space in externally stored parts of the field.
-@return	sum of data lengths */
+@return sum of data lengths */
 UNIV_INLINE
 ulint
 dtuple_get_data_size(
@@ -506,7 +674,7 @@ dtuple_get_data_size(
 
 /*********************************************************************//**
 Computes the number of externally stored fields in a data tuple.
-@return	number of externally stored fields */
+@return number of externally stored fields */
 UNIV_INLINE
 ulint
 dtuple_get_n_ext(
@@ -546,18 +714,19 @@ dtuple_set_types_binary(
 	}
 }
 
-/************************************************************//**
-Folds a prefix given as the number of fields of a tuple.
-@return	the folded value */
+/** Fold a prefix given as the number of fields of a tuple.
+@param[in]	tuple		index record
+@param[in]	n_fields	number of complete fields to fold
+@param[in]	n_bytes		number of bytes to fold in the last field
+@param[in]	index_id	index tree ID
+@return the folded value */
 UNIV_INLINE
 ulint
 dtuple_fold(
-/*========*/
-	const dtuple_t*	tuple,	/*!< in: the tuple */
-	ulint		n_fields,/*!< in: number of complete fields to fold */
-	ulint		n_bytes,/*!< in: number of bytes to fold in an
-				incomplete last field */
-	index_id_t	tree_id)/*!< in: index tree id */
+	const dtuple_t*	tuple,
+	ulint		n_fields,
+	ulint		n_bytes,
+	index_id_t	tree_id)
 {
 	const dfield_t*	field;
 	ulint		i;
@@ -616,7 +785,7 @@ data_write_sql_null(
 
 /**********************************************************************//**
 Checks if a dtuple contains an SQL null value.
-@return	TRUE if some field is SQL null */
+@return TRUE if some field is SQL null */
 UNIV_INLINE
 ibool
 dtuple_contains_null(
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
index 111664b0b52..27310963ec5 100644
--- a/storage/innobase/include/data0type.h
+++ b/storage/innobase/include/data0type.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,19 +29,15 @@ Created 1/16/1996 Heikki Tuuri
 #include "univ.i"
 
 extern ulint	data_mysql_default_charset_coll;
-#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
 #define DATA_MYSQL_BINARY_CHARSET_COLL 63
 
 /* SQL data type struct */
 struct dtype_t;
 
-/* SQL Like operator comparison types */
+/** SQL Like operator comparison types */
 enum ib_like_t {
-	IB_LIKE_EXACT,                  /* e.g.  STRING */
-	IB_LIKE_PREFIX,                 /* e.g., STRING% */
-	IB_LIKE_SUFFIX,                 /* e.g., %STRING */
-	IB_LIKE_SUBSTR,                 /* e.g., %STRING% */
-	IB_LIKE_REGEXP                  /* Future */
+	IB_LIKE_EXACT,	/**< e.g.  STRING */
+	IB_LIKE_PREFIX	/**< e.g., STRING% */
 };
 
 /*-------------------------------------------*/
@@ -79,8 +75,29 @@ binary strings */
 				DATA_VARMYSQL for all character sets, and the
 				charset-collation for tables created with it
 				can also be latin1_swedish_ci */
+
+/* DATA_POINT&DATA_VAR_POINT are for standard geometry datatype 'point' and
+DATA_GEOMETRY include all other standard geometry datatypes as described in
+OGC standard(line_string, polygon, multi_point, multi_polygon,
+multi_line_string, geometry_collection, geometry).
+Currently, geometry data is stored in the standard Well-Known Binary(WKB)
+format (http://www.opengeospatial.org/standards/sfa).
+We use BLOB as underlying datatype for DATA_GEOMETRY and DATA_VAR_POINT
+while CHAR for DATA_POINT */
+#define DATA_GEOMETRY	14	/* geometry datatype of variable length */
+/* The following two are disabled temporarily, we won't create them in
+get_innobase_type_from_mysql_type().
+TODO: We will enable DATA_POINT/them when we come to the fixed-length POINT
+again. */
+#define DATA_POINT	15	/* geometry datatype of fixed length POINT */
+#define DATA_VAR_POINT	16	/* geometry datatype of variable length
+				POINT, used when we want to store POINT
+				as BLOB internally */
 #define DATA_MTYPE_MAX	63	/* dtype_store_for_order_and_null_size()
 				requires the values are <= 63 */
+
+#define DATA_MTYPE_CURRENT_MIN	DATA_VARCHAR	/* minimum value of mtype */
+#define DATA_MTYPE_CURRENT_MAX	DATA_VAR_POINT	/* maximum value of mtype */
 /*-------------------------------------------*/
 /* The 'PRECISE TYPE' of a column */
 /*
@@ -166,10 +183,20 @@ be less than 256 */
 				In earlier versions this was set for some
 				BLOB columns.
 */
+#define DATA_GIS_MBR	2048	/* Used as GIS MBR column */
+#define DATA_MBR_LEN	SPDIMS * 2 * sizeof(double) /* GIS MBR length*/
+
 #define	DATA_LONG_TRUE_VARCHAR 4096	/* this is ORed to the precise data
 				type when the column is true VARCHAR where
 				MySQL uses 2 bytes to store the data len;
 				for shorter VARCHARs MySQL uses only 1 byte */
+#define	DATA_VIRTUAL	8192	/* Virtual column */
+
+/** Get the number of system columns in a table. */
+#define dict_table_get_n_sys_cols(table) DATA_N_SYS_COLS
+/** Check whether locking is disabled (never). */
+#define dict_table_is_locking_disabled(table) false
+
 /*-------------------------------------------*/
 
 /* This many bytes we need to store the type information affecting the
@@ -183,6 +210,15 @@ store the charset-collation number; one byte is left unused, though */
 /* Maximum multi-byte character length in bytes, plus 1 */
 #define DATA_MBMAX	5
 
+/* For DATA_POINT of dimension 2, the length of value in btree is always 25,
+which is the summary of:
+SRID_SIZE(4) + WKB_HEADER_SIZE(1+4) + POINT_DATA_SIZE(8*2).
+So the length of physical record or POINT KEYs on btree are 25.
+GIS_TODO: When we support multi-dimensions DATA_POINT, we should get the
+length from corresponding column or index definition, instead of this MACRO
+*/
+#define DATA_POINT_LEN	25
+
 /* Pack mbminlen, mbmaxlen to mbminmaxlen. */
 #define DATA_MBMINMAXLEN(mbminlen, mbmaxlen)	\
 	((mbmaxlen) * DATA_MBMAX + (mbminlen))
@@ -194,6 +230,30 @@ because in GCC it returns a long. */
 /* Get mbmaxlen from mbminmaxlen. */
 #define DATA_MBMAXLEN(mbminmaxlen) ((ulint) ((mbminmaxlen) / DATA_MBMAX))
 
+/* For checking if a geom_type is POINT */
+#define DATA_POINT_MTYPE(mtype) ((mtype) == DATA_POINT			\
+				 || (mtype) == DATA_VAR_POINT)
+
+/* For checking if mtype is GEOMETRY datatype */
+#define DATA_GEOMETRY_MTYPE(mtype)	(DATA_POINT_MTYPE(mtype)	\
+					 || (mtype) == DATA_GEOMETRY)
+
+/* For checking if mtype is BLOB or GEOMETRY, since we use BLOB as
+the underling datatype of GEOMETRY(not DATA_POINT) data. */
+#define DATA_LARGE_MTYPE(mtype) ((mtype) == DATA_BLOB			\
+				 || (mtype) == DATA_VAR_POINT		\
+				 || (mtype) == DATA_GEOMETRY)
+
+/* For checking if data type is big length data type. */
+#define DATA_BIG_LEN_MTYPE(len, mtype) ((len) > 255 || DATA_LARGE_MTYPE(mtype))
+
+/* For checking if the column is a big length column. */
+#define DATA_BIG_COL(col) DATA_BIG_LEN_MTYPE((col)->len, (col)->mtype)
+
+/* For checking if data type is large binary data type. */
+#define DATA_LARGE_BINARY(mtype,prtype) ((mtype) == DATA_GEOMETRY || \
+	((mtype) == DATA_BLOB && !((prtype) & DATA_BINARY_TYPE)))
+
 /* We now support 15 bits (up to 32767) collation number */
 #define MAX_CHAR_COLL_NUM	32767
 
@@ -203,7 +263,7 @@ because in GCC it returns a long. */
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Gets the MySQL type code from a dtype.
-@return	MySQL type code; this is NOT an InnoDB type code! */
+@return MySQL type code; this is NOT an InnoDB type code! */
 UNIV_INLINE
 ulint
 dtype_get_mysql_type(
@@ -213,8 +273,7 @@ dtype_get_mysql_type(
 Determine how many bytes the first n characters of the given string occupy.
 If the string is shorter than n characters, returns the number of bytes
 the characters in the string occupy.
-@return	length of the prefix, in bytes */
-UNIV_INTERN
+@return length of the prefix, in bytes */
 ulint
 dtype_get_at_most_n_mbchars(
 /*========================*/
@@ -231,8 +290,7 @@ dtype_get_at_most_n_mbchars(
 /*********************************************************************//**
 Checks if a data main type is a string type. Also a BLOB is considered a
 string type.
-@return	TRUE if string type */
-UNIV_INTERN
+@return TRUE if string type */
 ibool
 dtype_is_string_type(
 /*=================*/
@@ -241,8 +299,7 @@ dtype_is_string_type(
 Checks if a type is a binary string type. Note that for tables created with
 < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
 those DATA_BLOB columns this function currently returns FALSE.
-@return	TRUE if binary string type */
-UNIV_INTERN
+@return TRUE if binary string type */
 ibool
 dtype_is_binary_string_type(
 /*========================*/
@@ -253,8 +310,7 @@ Checks if a type is a non-binary string type. That is, dtype_is_string_type is
 TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
 with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
 For those DATA_BLOB columns this function currently returns TRUE.
-@return	TRUE if non-binary string type */
-UNIV_INTERN
+@return TRUE if non-binary string type */
 ibool
 dtype_is_non_binary_string_type(
 /*============================*/
@@ -280,7 +336,7 @@ dtype_copy(
 	const dtype_t*	type2);	/*!< in: type struct to copy from */
 /*********************************************************************//**
 Gets the SQL main data type.
-@return	SQL main data type */
+@return SQL main data type */
 UNIV_INLINE
 ulint
 dtype_get_mtype(
@@ -288,7 +344,7 @@ dtype_get_mtype(
 	const dtype_t*	type);	/*!< in: data type */
 /*********************************************************************//**
 Gets the precise data type.
-@return	precise data type */
+@return precise data type */
 UNIV_INLINE
 ulint
 dtype_get_prtype(
@@ -309,7 +365,7 @@ dtype_get_mblen(
 				multi-byte character */
 /*********************************************************************//**
 Gets the MySQL charset-collation code for MySQL string types.
-@return	MySQL charset-collation code */
+@return MySQL charset-collation code */
 UNIV_INLINE
 ulint
 dtype_get_charset_coll(
@@ -319,7 +375,6 @@ dtype_get_charset_coll(
 Forms a precise type from the < 4.1.2 format precise type plus the
 charset-collation code.
 @return precise type, including the charset-collation code */
-UNIV_INTERN
 ulint
 dtype_form_prtype(
 /*==============*/
@@ -330,7 +385,7 @@ dtype_form_prtype(
 Determines if a MySQL string type is a subset of UTF-8.  This function
 may return false negatives, in case further character-set collation
 codes are introduced in MySQL later.
-@return	TRUE if a subset of UTF-8 */
+@return TRUE if a subset of UTF-8 */
 UNIV_INLINE
 ibool
 dtype_is_utf8(
@@ -339,7 +394,7 @@ dtype_is_utf8(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Gets the type length.
-@return	fixed length of the type, in bytes, or 0 if variable-length */
+@return fixed length of the type, in bytes, or 0 if variable-length */
 UNIV_INLINE
 ulint
 dtype_get_len(
@@ -377,19 +432,10 @@ dtype_set_mbminmaxlen(
 	ulint		mbmaxlen);	/*!< in: maximum length of a char,
 					in bytes, or 0 if this is not
 					a character type */
-/*********************************************************************//**
-Gets the padding character code for the type.
-@return	padding character code, or ULINT_UNDEFINED if no padding specified */
-UNIV_INLINE
-ulint
-dtype_get_pad_char(
-/*===============*/
-	ulint	mtype,		/*!< in: main type */
-	ulint	prtype);	/*!< in: precise type */
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************************//**
 Returns the size of a fixed size data type, 0 if not a fixed size type.
-@return	fixed size, or 0 */
+@return fixed size, or 0 */
 UNIV_INLINE
 ulint
 dtype_get_fixed_size_low(
@@ -403,7 +449,7 @@ dtype_get_fixed_size_low(
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Returns the minimum size of a data type.
-@return	minimum size */
+@return minimum size */
 UNIV_INLINE
 ulint
 dtype_get_min_size_low(
@@ -416,7 +462,7 @@ dtype_get_min_size_low(
 /***********************************************************************//**
 Returns the maximum size of a data type. Note: types in system tables may be
 incomplete and return incorrect information.
-@return	maximum size */
+@return maximum size */
 UNIV_INLINE
 ulint
 dtype_get_max_size_low(
@@ -427,7 +473,7 @@ dtype_get_max_size_low(
 /***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
 For fixed length types it is the fixed length of the type, otherwise 0.
-@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
 UNIV_INLINE
 ulint
 dtype_get_sql_null_size(
@@ -486,15 +532,13 @@ dtype_sql_name(
 
 /*********************************************************************//**
 Validates a data type structure.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dtype_validate(
 /*===========*/
 	const dtype_t*	type);	/*!< in: type struct to validate */
 /*********************************************************************//**
 Prints a data type structure. */
-UNIV_INTERN
 void
 dtype_print(
 /*========*/
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
index d489bef89a8..57770ec0e17 100644
--- a/storage/innobase/include/data0type.ic
+++ b/storage/innobase/include/data0type.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,15 +23,13 @@ Data types
 Created 1/16/1996 Heikki Tuuri
 *******************************************************/
 
-#include <string.h> /* strlen() */
-
 #include "mach0data.h"
 #ifndef UNIV_HOTBACKUP
 # include "ha_prototypes.h"
 
 /*********************************************************************//**
 Gets the MySQL charset-collation code for MySQL string types.
-@return	MySQL charset-collation code */
+@return MySQL charset-collation code */
 UNIV_INLINE
 ulint
 dtype_get_charset_coll(
@@ -45,7 +43,7 @@ dtype_get_charset_coll(
 Determines if a MySQL string type is a subset of UTF-8.  This function
 may return false negatives, in case further character-set collation
 codes are introduced in MySQL later.
-@return	TRUE if a subset of UTF-8 */
+@return TRUE if a subset of UTF-8 */
 UNIV_INLINE
 ibool
 dtype_is_utf8(
@@ -68,7 +66,7 @@ dtype_is_utf8(
 
 /*********************************************************************//**
 Gets the MySQL type code from a dtype.
-@return	MySQL type code; this is NOT an InnoDB type code! */
+@return MySQL type code; this is NOT an InnoDB type code! */
 UNIV_INLINE
 ulint
 dtype_get_mysql_type(
@@ -180,7 +178,7 @@ dtype_copy(
 
 /*********************************************************************//**
 Gets the SQL main data type.
-@return	SQL main data type */
+@return SQL main data type */
 UNIV_INLINE
 ulint
 dtype_get_mtype(
@@ -194,7 +192,7 @@ dtype_get_mtype(
 
 /*********************************************************************//**
 Gets the precise data type.
-@return	precise data type */
+@return precise data type */
 UNIV_INLINE
 ulint
 dtype_get_prtype(
@@ -208,7 +206,7 @@ dtype_get_prtype(
 
 /*********************************************************************//**
 Gets the type length.
-@return	fixed length of the type, in bytes, or 0 if variable-length */
+@return fixed length of the type, in bytes, or 0 if variable-length */
 UNIV_INLINE
 ulint
 dtype_get_len(
@@ -248,45 +246,6 @@ dtype_get_mbmaxlen(
 	return(DATA_MBMAXLEN(type->mbminmaxlen));
 }
 
-/*********************************************************************//**
-Gets the padding character code for a type.
-@return	padding character code, or ULINT_UNDEFINED if no padding specified */
-UNIV_INLINE
-ulint
-dtype_get_pad_char(
-/*===============*/
-	ulint	mtype,		/*!< in: main type */
-	ulint	prtype)		/*!< in: precise type */
-{
-	switch (mtype) {
-	case DATA_FIXBINARY:
-	case DATA_BINARY:
-		if (dtype_get_charset_coll(prtype)
-		    == DATA_MYSQL_BINARY_CHARSET_COLL) {
-			/* Starting from 5.0.18, do not pad
-			VARBINARY or BINARY columns. */
-			return(ULINT_UNDEFINED);
-		}
-		/* Fall through */
-	case DATA_CHAR:
-	case DATA_VARCHAR:
-	case DATA_MYSQL:
-	case DATA_VARMYSQL:
-		/* Space is the padding character for all char and binary
-		strings, and starting from 5.0.3, also for TEXT strings. */
-
-		return(0x20);
-	case DATA_BLOB:
-		if (!(prtype & DATA_BINARY_TYPE)) {
-			return(0x20);
-		}
-		/* Fall through */
-	default:
-		/* No padding specified */
-		return(ULINT_UNDEFINED);
-	}
-}
-
 /**********************************************************************//**
 Stores for a type the information which determines its alphabetical ordering
 and the storage size of an SQL NULL value. This is the >= 4.1.x storage
@@ -309,7 +268,7 @@ dtype_new_store_for_order_and_null_size(
 
 	ut_ad(type);
 	ut_ad(type->mtype >= DATA_VARCHAR);
-	ut_ad(type->mtype <= DATA_MYSQL);
+	ut_ad(type->mtype <= DATA_MTYPE_MAX);
 
 	buf[0] = (byte)(type->mtype & 0xFFUL);
 
@@ -483,6 +442,9 @@ dtype_sql_name(
 	case DATA_BINARY:
 		ut_snprintf(name, name_sz, "VARBINARY(%u)", len);
 		break;
+	case DATA_GEOMETRY:
+		ut_snprintf(name, name_sz, "GEOMETRY");
+		break;
 	case DATA_BLOB:
 		switch (len) {
 		case 9:
@@ -513,7 +475,7 @@ dtype_sql_name(
 
 /***********************************************************************//**
 Returns the size of a fixed size data type, 0 if not a fixed size type.
-@return	fixed size, or 0 */
+@return fixed size, or 0 */
 UNIV_INLINE
 ulint
 dtype_get_fixed_size_low(
@@ -548,6 +510,7 @@ dtype_get_fixed_size_low(
 	case DATA_INT:
 	case DATA_FLOAT:
 	case DATA_DOUBLE:
+	case DATA_POINT:
 		return(len);
 	case DATA_MYSQL:
 #ifndef UNIV_HOTBACKUP
@@ -579,6 +542,8 @@ dtype_get_fixed_size_low(
 	case DATA_BINARY:
 	case DATA_DECIMAL:
 	case DATA_VARMYSQL:
+	case DATA_VAR_POINT:
+	case DATA_GEOMETRY:
 	case DATA_BLOB:
 		return(0);
 	default:
@@ -591,7 +556,7 @@ dtype_get_fixed_size_low(
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Returns the minimum size of a data type.
-@return	minimum size */
+@return minimum size */
 UNIV_INLINE
 ulint
 dtype_get_min_size_low(
@@ -625,6 +590,7 @@ dtype_get_min_size_low(
 	case DATA_INT:
 	case DATA_FLOAT:
 	case DATA_DOUBLE:
+	case DATA_POINT:
 		return(len);
 	case DATA_MYSQL:
 		if (prtype & DATA_BINARY_TYPE) {
@@ -647,6 +613,8 @@ dtype_get_min_size_low(
 	case DATA_BINARY:
 	case DATA_DECIMAL:
 	case DATA_VARMYSQL:
+	case DATA_VAR_POINT:
+	case DATA_GEOMETRY:
 	case DATA_BLOB:
 		return(0);
 	default:
@@ -659,7 +627,7 @@ dtype_get_min_size_low(
 /***********************************************************************//**
 Returns the maximum size of a data type. Note: types in system tables may be
 incomplete and return incorrect information.
-@return	maximum size */
+@return maximum size */
 UNIV_INLINE
 ulint
 dtype_get_max_size_low(
@@ -679,7 +647,10 @@ dtype_get_max_size_low(
 	case DATA_BINARY:
 	case DATA_DECIMAL:
 	case DATA_VARMYSQL:
+	case DATA_POINT:
 		return(len);
+	case DATA_VAR_POINT:
+	case DATA_GEOMETRY:
 	case DATA_BLOB:
 		break;
 	default:
@@ -693,7 +664,7 @@ dtype_get_max_size_low(
 /***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
 For fixed length types it is the fixed length of the type, otherwise 0.
-@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
 UNIV_INLINE
 ulint
 dtype_get_sql_null_size(
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
index feac81af98e..32f9117af84 100644
--- a/storage/innobase/include/db0err.h
+++ b/storage/innobase/include/db0err.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2015, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -27,6 +27,7 @@ Created 5/24/1996 Heikki Tuuri
 #ifndef db0err_h
 #define db0err_h
 
+/* Do not include univ.i because univ.i includes this. */
 
 enum dberr_t {
 	DB_SUCCESS_LOCKED_REC = 9,	/*!< like DB_SUCCESS, but a new
@@ -64,7 +65,8 @@ enum dberr_t {
 					which is referenced */
 	DB_CANNOT_ADD_CONSTRAINT,	/*!< adding a foreign key constraint
 					to a table failed */
-	DB_CORRUPTION,			/*!< data structure corruption noticed */
+	DB_CORRUPTION,			/*!< data structure corruption
+					noticed */
 	DB_CANNOT_DROP_CONSTRAINT,	/*!< dropping a foreign key constraint
 					from a table failed */
 	DB_NO_SAVEPOINT,		/*!< no savepoint exists with the given
@@ -124,20 +126,60 @@ enum dberr_t {
 	DB_ONLINE_LOG_TOO_BIG,		/*!< Modification log grew too big
 					during online index creation */
 
-	DB_IO_ERROR,			/*!< Generic IO error */
 	DB_IDENTIFIER_TOO_LONG,		/*!< Identifier name too long */
 	DB_FTS_EXCEED_RESULT_CACHE_LIMIT,	/*!< FTS query memory
 					exceeds result cache limit */
-	DB_TEMP_FILE_WRITE_FAILURE,	/*!< Temp file write failure */
+	DB_TEMP_FILE_WRITE_FAIL,	/*!< Temp file write failure */
+	DB_CANT_CREATE_GEOMETRY_OBJECT,	/*!< Cannot create specified Geometry
+					data object */
+	DB_CANNOT_OPEN_FILE,		/*!< Cannot open a file */
 	DB_FTS_TOO_MANY_WORDS_IN_PHRASE,
 					/*< Too many words in a phrase */
-	DB_TOO_BIG_FOR_REDO,		/* Record length greater than 10%
-					of redo log */
+
+	DB_TABLESPACE_TRUNCATED,	/*!< tablespace was truncated */
+
 	DB_DECRYPTION_FAILED,		/* Tablespace encrypted and
 					decrypt operation failed because
 					of missing key management plugin,
 					or missing or incorrect key or
 					incorret AES method or algorithm. */
+
+	DB_IO_ERROR = 100,		/*!< Generic IO error */
+
+	DB_IO_DECOMPRESS_FAIL,		/*!< Failure to decompress a page
+					after reading it from disk */
+
+	DB_IO_NO_PUNCH_HOLE,		/*!< Punch hole not supported by
+					InnoDB */
+
+	DB_IO_NO_PUNCH_HOLE_FS,		/*!< The file system doesn't support
+					punch hole */
+
+	DB_IO_NO_PUNCH_HOLE_TABLESPACE,	/*!< The tablespace doesn't support
+					punch hole */
+
+	DB_IO_DECRYPT_FAIL,		/*!< Failure to decrypt a page
+					after reading it from disk */
+
+	DB_IO_NO_ENCRYPT_TABLESPACE,	/*!< The tablespace doesn't support
+					encrypt */
+
+	DB_IO_PARTIAL_FAILED,		/*!< Partial IO request failed */
+
+	DB_FORCED_ABORT,		/*!< Transaction was forced to rollback
+					by a higher priority transaction */
+
+	DB_TABLE_CORRUPT,		/*!< Table/clustered index is
+					corrupted */
+
+	DB_WRONG_FILE_NAME,		/*!< Invalid Filename */
+
+	DB_COMPUTE_VALUE_FAILED,	/*!< Compute generated value failed */
+
+	DB_NO_FK_ON_S_BASE_COL,		/*!< Cannot add foreign constrain
+					placed on the base column of
+					stored column */
+
 	/* The following are partial failure codes */
 	DB_FAIL = 1000,
 	DB_OVERFLOW,
@@ -146,22 +188,18 @@ enum dberr_t {
 	DB_ZIP_OVERFLOW,
 	DB_RECORD_NOT_FOUND = 1500,
 	DB_END_OF_INDEX,
-	DB_DICT_CHANGED,		/*!< Some part of table dictionary has
-					changed. Such as index dropped or
-					foreign key dropped */
-
+	DB_NOT_FOUND,			/*!< Generic error code for "Not found"
+					type of errors */
 
-        /* The following are API only error codes. */
+	/* The following are API only error codes. */
 	DB_DATA_MISMATCH = 2000,	/*!< Column update or read failed
 					because the types mismatch */
 
-	DB_SCHEMA_NOT_LOCKED,		/*!< If an API function expects the
+	DB_SCHEMA_NOT_LOCKED		/*!< If an API function expects the
 					schema to be locked in exclusive mode
 					and if it's not then that API function
 					will return this error code */
 
-	DB_NOT_FOUND			/*!< Generic error code for "Not found"
-					type of errors */
 };
 
 #endif
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
index 477e1150f43..5884ba4bcc2 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innobase/include/dict0boot.h
@@ -39,41 +39,42 @@ typedef	byte	dict_hdr_t;
 
 /**********************************************************************//**
 Gets a pointer to the dictionary header and x-latches its page.
-@return	pointer to the dictionary header, page x-latched */
-UNIV_INTERN
+@return pointer to the dictionary header, page x-latched */
 dict_hdr_t*
 dict_hdr_get(
 /*=========*/
 	mtr_t*	mtr);	/*!< in: mtr */
 /**********************************************************************//**
 Returns a new table, index, or space id. */
-UNIV_INTERN
 void
 dict_hdr_get_new_id(
 /*================*/
-	table_id_t*	table_id,	/*!< out: table id
-					(not assigned if NULL) */
-	index_id_t*	index_id,	/*!< out: index id
-					(not assigned if NULL) */
-	ulint*		space_id);	/*!< out: space id
-					(not assigned if NULL) */
+	table_id_t*		table_id,	/*!< out: table id
+						(not assigned if NULL) */
+	index_id_t*		index_id,	/*!< out: index id
+						(not assigned if NULL) */
+	ulint*			space_id,	/*!< out: space id
+						(not assigned if NULL) */
+	const dict_table_t*	table,		/*!< in: table */
+	bool			disable_redo);	/*!< in: if true and table
+						object is NULL
+						then disable-redo */
 /**********************************************************************//**
 Writes the current value of the row id counter to the dictionary header file
 page. */
-UNIV_INTERN
 void
 dict_hdr_flush_row_id(void);
 /*=======================*/
 /**********************************************************************//**
 Returns a new row id.
-@return	the new id */
+@return the new id */
 UNIV_INLINE
 row_id_t
 dict_sys_get_new_row_id(void);
 /*=========================*/
 /**********************************************************************//**
 Reads a row id from a record or other 6-byte stored form.
-@return	row id */
+@return row id */
 UNIV_INLINE
 row_id_t
 dict_sys_read_row_id(
@@ -91,7 +92,6 @@ dict_sys_write_row_id(
 Initializes the data dictionary memory structures when the database is
 started. This function is also called when the data dictionary is created.
 @return DB_SUCCESS or error code. */
-UNIV_INTERN
 dberr_t
 dict_boot(void)
 /*===========*/
@@ -100,7 +100,6 @@ dict_boot(void)
 /*****************************************************************//**
 Creates and initializes the data dictionary at the server bootstrap.
 @return DB_SUCCESS or error code. */
-UNIV_INTERN
 dberr_t
 dict_create(void)
 /*=============*/
@@ -221,7 +220,8 @@ enum dict_col_sys_indexes_enum {
 	DICT_COL__SYS_INDEXES__TYPE		= 4,
 	DICT_COL__SYS_INDEXES__SPACE		= 5,
 	DICT_COL__SYS_INDEXES__PAGE_NO		= 6,
-	DICT_NUM_COLS__SYS_INDEXES		= 7
+	DICT_COL__SYS_INDEXES__MERGE_THRESHOLD	= 7,
+	DICT_NUM_COLS__SYS_INDEXES		= 8
 };
 /* The field numbers in the SYS_INDEXES clustered index */
 enum dict_fld_sys_indexes_enum {
@@ -234,7 +234,8 @@ enum dict_fld_sys_indexes_enum {
 	DICT_FLD__SYS_INDEXES__TYPE		= 6,
 	DICT_FLD__SYS_INDEXES__SPACE		= 7,
 	DICT_FLD__SYS_INDEXES__PAGE_NO		= 8,
-	DICT_NUM_FIELDS__SYS_INDEXES		= 9
+	DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD	= 9,
+	DICT_NUM_FIELDS__SYS_INDEXES		= 10
 };
 /* The columns in SYS_FIELDS */
 enum dict_col_sys_fields_enum {
@@ -325,6 +326,23 @@ enum dict_fld_sys_datafiles_enum {
 	DICT_NUM_FIELDS__SYS_DATAFILES			= 4
 };
 
+/* The columns in SYS_VIRTUAL */
+enum dict_col_sys_virtual_enum {
+	DICT_COL__SYS_VIRTUAL__TABLE_ID		= 0,
+	DICT_COL__SYS_VIRTUAL__POS		= 1,
+	DICT_COL__SYS_VIRTUAL__BASE_POS		= 2,
+	DICT_NUM_COLS__SYS_VIRTUAL		= 3
+};
+/* The field numbers in the SYS_VIRTUAL clustered index */
+enum dict_fld_sys_virtual_enum {
+	DICT_FLD__SYS_VIRTUAL__TABLE_ID		= 0,
+	DICT_FLD__SYS_VIRTUAL__POS		= 1,
+	DICT_FLD__SYS_VIRTUAL__BASE_POS		= 2,
+	DICT_FLD__SYS_VIRTUAL__DB_TRX_ID	= 3,
+	DICT_FLD__SYS_VIRTUAL__DB_ROLL_PTR	= 4,
+	DICT_NUM_FIELDS__SYS_VIRTUAL		= 5
+};
+
 /* A number of the columns above occur in multiple tables.  These are the
 length of thos fields. */
 #define	DICT_FLD_LEN_SPACE	4
diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic
index 2b156a4f672..e40c3f844e3 100644
--- a/storage/innobase/include/dict0boot.ic
+++ b/storage/innobase/include/dict0boot.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,7 +25,7 @@ Created 4/18/1996 Heikki Tuuri
 
 /**********************************************************************//**
 Returns a new row id.
-@return	the new id */
+@return the new id */
 UNIV_INLINE
 row_id_t
 dict_sys_get_new_row_id(void)
@@ -33,7 +33,7 @@ dict_sys_get_new_row_id(void)
 {
 	row_id_t	id;
 
-	mutex_enter(&(dict_sys->mutex));
+	mutex_enter(&dict_sys->mutex);
 
 	id = dict_sys->row_id;
 
@@ -44,14 +44,14 @@ dict_sys_get_new_row_id(void)
 
 	dict_sys->row_id++;
 
-	mutex_exit(&(dict_sys->mutex));
+	mutex_exit(&dict_sys->mutex);
 
 	return(id);
 }
 
 /**********************************************************************//**
 Reads a row id from a record or other 6-byte stored form.
-@return	row id */
+@return row id */
 UNIV_INLINE
 row_id_t
 dict_sys_read_row_id(
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
index 150c76b2e65..51cef91d318 100644
--- a/storage/innobase/include/dict0crea.h
+++ b/storage/innobase/include/dict0crea.h
@@ -33,98 +33,156 @@ Created 1/8/1996 Heikki Tuuri
 #include "row0types.h"
 #include "mtr0mtr.h"
 #include "fil0crypt.h"
+#include "fsp0space.h"
 
 /*********************************************************************//**
 Creates a table create graph.
-@return	own: table create node */
-UNIV_INTERN
+@return own: table create node */
 tab_node_t*
 tab_create_graph_create(
 /*====================*/
-	dict_table_t*	table,	/*!< in: table to create, built as a memory data
-				structure */
-	mem_heap_t*	heap,	/*!< in: heap where created */
-	bool		commit, /*!< in: true if the commit node should be
-				added to the query graph */
-	fil_encryption_t mode,	/*!< in: encryption mode */
-	ulint		key_id);/*!< in: encryption key_id */
-/*********************************************************************//**
-Creates an index create graph.
-@return	own: index create node */
-UNIV_INTERN
+	dict_table_t*	table,		/*!< in: table to create, built as
+					a memory data structure */
+	mem_heap_t*	heap,		/*!< in: heap where created */
+	fil_encryption_t mode,		/*!< in: encryption mode */
+	ulint		key_id);	/*!< in: encryption key_id */
+
+/** Creates an index create graph.
+@param[in]	index	index to create, built as a memory data structure
+@param[in,out]	heap	heap where created
+@param[in]	add_v	new virtual columns added in the same clause with
+			add index
+@return own: index create node */
 ind_node_t*
 ind_create_graph_create(
-/*====================*/
-	dict_index_t*	index,	/*!< in: index to create, built as a memory data
-				structure */
-	mem_heap_t*	heap,	/*!< in: heap where created */
-	bool		commit);/*!< in: true if the commit node should be
-				added to the query graph */
+	dict_index_t*		index,
+	mem_heap_t*		heap,
+	const dict_add_v_col_t*	add_v);
+
 /***********************************************************//**
 Creates a table. This is a high-level function used in SQL execution graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 dict_create_table_step(
 /*===================*/
-	que_thr_t*	thr);	/*!< in: query thread */
+	que_thr_t*	thr);		/*!< in: query thread */
+
+/** Builds a tablespace to store various objects.
+@param[in,out]	tablespace	Tablespace object describing what to build.
+@return DB_SUCCESS or error code. */
+dberr_t
+dict_build_tablespace(
+	Tablespace*	tablespace);
+
+/** Builds a tablespace to contain a table, using file-per-table=1.
+@param[in,out]	table	Table to build in its own tablespace.
+@param[in]	node	Table create node
+@return DB_SUCCESS or error code */
+dberr_t
+dict_build_tablespace_for_table(
+	dict_table_t*	table,
+	tab_node_t*	node);
+
+/** Assign a new table ID and put it into the table cache and the transaction.
+@param[in,out]	table	Table that needs an ID
+@param[in,out]	trx	Transaction */
+void
+dict_table_assign_new_id(
+	dict_table_t*	table,
+	trx_t*		trx);
+
 /***********************************************************//**
 Creates an index. This is a high-level function used in SQL execution
 graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 dict_create_index_step(
 /*===================*/
-	que_thr_t*	thr);	/*!< in: query thread */
+	que_thr_t*	thr);		/*!< in: query thread */
+
+/***************************************************************//**
+Builds an index definition but doesn't update sys_table.
+@return DB_SUCCESS or error code */
+void
+dict_build_index_def(
+/*=================*/
+	const dict_table_t*	table,	/*!< in: table */
+	dict_index_t*		index,	/*!< in/out: index */
+	trx_t*			trx);	/*!< in/out: InnoDB transaction
+					handle */
+/***************************************************************//**
+Creates an index tree for the index if it is not a member of a cluster.
+Don't update SYSTEM TABLES.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+dberr_t
+dict_create_index_tree(
+/*===================*/
+	dict_index_t*	index,	/*!< in/out: index */
+	const trx_t*	trx);	/*!< in: InnoDB transaction handle */
+
 /*******************************************************************//**
-Truncates the index tree associated with a row in SYS_INDEXES table.
+Recreate the index tree associated with a row in SYS_INDEXES table.
 @return	new root page number, or FIL_NULL on failure */
-UNIV_INTERN
 ulint
-dict_truncate_index_tree(
-/*=====================*/
-	dict_table_t*	table,	/*!< in: the table the index belongs to */
-	ulint		space,	/*!< in: 0=truncate,
-				nonzero=create the index tree in the
-				given tablespace */
-	btr_pcur_t*	pcur,	/*!< in/out: persistent cursor pointing to
-				record in the clustered index of
-				SYS_INDEXES table. The cursor may be
-				repositioned in this call. */
-	mtr_t*		mtr);	/*!< in: mtr having the latch
-				on the record page. The mtr may be
-				committed and restarted in this call. */
+dict_recreate_index_tree(
+/*======================*/
+	const dict_table_t*	table,	/*!< in: the table the index
+					belongs to */
+	btr_pcur_t*		pcur,	/*!< in/out: persistent cursor pointing
+					to record in the clustered index of
+					SYS_INDEXES table. The cursor may be
+					repositioned in this call. */
+	mtr_t*			mtr);	/*!< in: mtr having the latch
+					on the record page. The mtr may be
+					committed and restarted in this call. */
+
+/** Drop the index tree associated with a row in SYS_INDEXES table.
+@param[in,out]	rec	SYS_INDEXES record
+@param[in,out]	pcur	persistent cursor on rec
+@param[in,out]	mtr	mini-transaction
+@return	whether freeing the B-tree was attempted */
+bool
+dict_drop_index_tree(
+	rec_t*		rec,
+	btr_pcur_t*	pcur,
+	mtr_t*		mtr);
+
+/***************************************************************//**
+Creates an index tree for the index if it is not a member of a cluster.
+Don't update SYSTEM TABLES.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+dberr_t
+dict_create_index_tree_in_mem(
+/*==========================*/
+	dict_index_t*	index,		/*!< in/out: index */
+	const trx_t*	trx);		/*!< in: InnoDB transaction handle */
+
 /*******************************************************************//**
-Drops the index tree associated with a row in SYS_INDEXES table. */
-UNIV_INTERN
+Truncates the index tree but don't update SYSTEM TABLES.
+@return DB_SUCCESS or error */
+dberr_t
+dict_truncate_index_tree_in_mem(
+/*============================*/
+	dict_index_t*	index);		/*!< in/out: index */
+
+/*******************************************************************//**
+Drops the index tree but don't update SYS_INDEXES table. */
 void
-dict_drop_index_tree(
-/*=================*/
-	rec_t*	rec,	/*!< in/out: record in the clustered index
-			of SYS_INDEXES table */
-	mtr_t*	mtr);	/*!< in: mtr having the latch on the record page */
+dict_drop_index_tree_in_mem(
+/*========================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			page_no);/*!< in: index page-no */
+
 /****************************************************************//**
 Creates the foreign key constraints system tables inside InnoDB
 at server bootstrap or server start if they are not found or are
 not of the right form.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 dict_create_or_check_foreign_constraint_tables(void);
 /*================================================*/
 
 /********************************************************************//**
-Construct foreign key constraint defintion from data dictionary information.
-*/
-UNIV_INTERN
-char*
-dict_foreign_def_get(
-/*=================*/
-	dict_foreign_t*	foreign,/*!< in: foreign */
-	trx_t*		trx);	/*!< in: trx */
-
-/********************************************************************//**
 Generate a foreign key constraint name when it was not named by the user.
 A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER,
 where the numbers start from 1, and are given locally for this table, that is,
@@ -133,11 +191,10 @@ UNIV_INLINE
 dberr_t
 dict_create_add_foreign_id(
 /*=======================*/
-	ulint*		id_nr,	/*!< in/out: number to use in id generation;
-				incremented if used */
-	const char*	name,	/*!< in: table name */
-	dict_foreign_t*	foreign)/*!< in/out: foreign key */
-	MY_ATTRIBUTE((nonnull));
+	ulint*		id_nr,		/*!< in/out: number to use in id
+					generation; incremented if used */
+	const char*	name,		/*!< in: table name */
+	dict_foreign_t*	foreign);	/*!< in/out: foreign key */
 
 /** Adds the given set of foreign key objects to the dictionary tables
 in the database. This function does not modify the dictionary cache. The
@@ -149,7 +206,6 @@ the dictionary tables
 local_fk_set belong to
 @param[in,out]	trx		transaction
 @return error code or DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 dict_create_add_foreigns_to_dictionary(
 /*===================================*/
@@ -157,38 +213,69 @@ dict_create_add_foreigns_to_dictionary(
 	const dict_table_t*	table,
 	trx_t*			trx)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Check if a foreign constraint is on columns server as base columns
+of any stored column. This is to prevent creating SET NULL or CASCADE
+constraint on such columns
+@param[in]	local_fk_set	set of foreign key objects, to be added to
+the dictionary tables
+@param[in]	table		table to which the foreign key objects in
+local_fk_set belong to
+@return true if yes, otherwise, false */
+bool
+dict_foreigns_has_s_base_col(
+	const dict_foreign_set&	local_fk_set,
+	const dict_table_t*	table);
+
 /****************************************************************//**
 Creates the tablespaces and datafiles system tables inside InnoDB
 at server bootstrap or server start if they are not found or are
 not of the right form.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 dict_create_or_check_sys_tablespace(void);
 /*=====================================*/
-/********************************************************************//**
-Add a single tablespace definition to the data dictionary tables in the
-database.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Creates the virtual column system tables inside InnoDB
+at server bootstrap or server start if they are not found or are
+not of the right form.
+@return DB_SUCCESS or error code */
 dberr_t
-dict_create_add_tablespace_to_dictionary(
-/*=====================================*/
-	ulint		space,		/*!< in: tablespace id */
-	const char*	name,		/*!< in: tablespace name */
-	ulint		flags,		/*!< in: tablespace flags */
-	const char*	path,		/*!< in: tablespace path */
-	trx_t*		trx,		/*!< in: transaction */
-	bool		commit);	/*!< in: if true then commit the
-					transaction */
+dict_create_or_check_sys_virtual();
+
+/** Put a tablespace definition into the data dictionary,
+replacing what was there previously.
+@param[in]	space	Tablespace id
+@param[in]	name	Tablespace name
+@param[in]	flags	Tablespace flags
+@param[in]	path	Tablespace path
+@param[in]	trx	Transaction
+@param[in]	commit	If true, commit the transaction
+@return error code or DB_SUCCESS */
+dberr_t
+dict_replace_tablespace_in_dictionary(
+	ulint		space_id,
+	const char*	name,
+	ulint		flags,
+	const char*	path,
+	trx_t*		trx,
+	bool		commit);
+
+/** Delete records from SYS_TABLESPACES and SYS_DATAFILES associated
+with a particular tablespace ID.
+@param[in]	space	Tablespace ID
+@param[in,out]	trx	Current transaction
+@return DB_SUCCESS if OK, dberr_t if the operation failed */
+dberr_t
+dict_delete_tablespace_and_datafiles(
+	ulint		space,
+	trx_t*		trx);
+
 /********************************************************************//**
 Add a foreign key definition to the data dictionary tables.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 dict_create_add_foreign_to_dictionary(
 /*==================================*/
-	dict_table_t*		table,	/*!< in: table */
 	const char*		name,	/*!< in: table name */
 	const dict_foreign_t*	foreign,/*!< in: foreign key */
 	trx_t*			trx)	/*!< in/out: dictionary transaction */
@@ -206,65 +293,102 @@ dict_foreign_def_get(
 
 /* Table create node structure */
 struct tab_node_t{
-	que_common_t	common;	/*!< node type: QUE_NODE_TABLE_CREATE */
-	dict_table_t*	table;	/*!< table to create, built as a memory data
-				structure with dict_mem_... functions */
-	ins_node_t*	tab_def; /* child node which does the insert of
-				the table definition; the row to be inserted
-				is built by the parent node  */
-	ins_node_t*	col_def; /* child node which does the inserts of
-				the column definitions; the row to be inserted
-				is built by the parent node  */
-	commit_node_t*	commit_node;
-				/* child node which performs a commit after
-				a successful table creation */
+	que_common_t	common;		/*!< node type: QUE_NODE_TABLE_CREATE */
+	dict_table_t*	table;		/*!< table to create, built as a
+					memory data structure with
+					dict_mem_... functions */
+	ins_node_t*	tab_def;	/*!< child node which does the insert of
+					the table definition; the row to be
+					inserted is built by the parent node  */
+	ins_node_t*	col_def;	/*!< child node which does the inserts
+					of the column definitions; the row to
+					be inserted is built by the parent
+					node  */
+	ins_node_t*	v_col_def;	/*!< child node which does the inserts
+					of the sys_virtual row definitions;
+					the row to be inserted is built by
+					the parent node  */
 	/*----------------------*/
 	/* Local storage for this graph node */
-	ulint		state;	/*!< node execution state */
-	ulint		col_no;	/*!< next column definition to insert */
+	ulint		state;		/*!< node execution state */
+	ulint		col_no;		/*!< next column definition to insert */
 	ulint		key_id;	/*!< encryption key_id */
 	fil_encryption_t mode;	/*!< encryption mode */
-	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage */
+	ulint		base_col_no;	/*!< next base column to insert */
+	mem_heap_t*	heap;		/*!< memory heap used as auxiliary
+					storage */
 };
 
 /* Table create node states */
 #define	TABLE_BUILD_TABLE_DEF	1
 #define	TABLE_BUILD_COL_DEF	2
-#define	TABLE_COMMIT_WORK	3
+#define	TABLE_BUILD_V_COL_DEF	3
 #define	TABLE_ADD_TO_CACHE	4
 #define	TABLE_COMPLETED		5
 
 /* Index create node struct */
 
 struct ind_node_t{
-	que_common_t	common;	/*!< node type: QUE_NODE_INDEX_CREATE */
-	dict_index_t*	index;	/*!< index to create, built as a memory data
-				structure with dict_mem_... functions */
-	ins_node_t*	ind_def; /* child node which does the insert of
-				the index definition; the row to be inserted
-				is built by the parent node  */
-	ins_node_t*	field_def; /* child node which does the inserts of
-				the field definitions; the row to be inserted
-				is built by the parent node  */
-	commit_node_t*	commit_node;
-				/* child node which performs a commit after
-				a successful index creation */
+	que_common_t	common;		/*!< node type: QUE_NODE_INDEX_CREATE */
+	dict_index_t*	index;		/*!< index to create, built as a
+					memory data structure with
+					dict_mem_... functions */
+	ins_node_t*	ind_def;	/*!< child node which does the insert of
+					the index definition; the row to be
+					inserted is built by the parent node  */
+	ins_node_t*	field_def;	/*!< child node which does the inserts
+					of the field definitions; the row to
+					be inserted is built by the parent
+					node  */
 	/*----------------------*/
 	/* Local storage for this graph node */
-	ulint		state;	/*!< node execution state */
-	ulint		page_no;/* root page number of the index */
-	dict_table_t*	table;	/*!< table which owns the index */
-	dtuple_t*	ind_row;/* index definition row built */
-	ulint		field_no;/* next field definition to insert */
-	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage */
+	ulint		state;		/*!< node execution state */
+	ulint		page_no;	/* root page number of the index */
+	dict_table_t*	table;		/*!< table which owns the index */
+	dtuple_t*	ind_row;	/* index definition row built */
+	ulint		field_no;	/* next field definition to insert */
+	mem_heap_t*	heap;		/*!< memory heap used as auxiliary
+					storage */
+	const dict_add_v_col_t*
+			add_v;		/*!< new virtual columns that being
+					added along with an add index call */
 };
 
+/** Compose a column number for a virtual column, stored in the "POS" field
+of Sys_columns. The column number includes both its virtual column sequence
+(the "nth" virtual column) and its actual column position in original table
+@param[in]	v_pos		virtual column sequence
+@param[in]	col_pos		column position in original table definition
+@return	composed column position number */
+UNIV_INLINE
+ulint
+dict_create_v_col_pos(
+	ulint	v_pos,
+	ulint	col_pos);
+
+/** Get the column number for a virtual column (the column position in
+original table), stored in the "POS" field of Sys_columns
+@param[in]      pos             virtual column position
+@return column position in original table */
+UNIV_INLINE
+ulint
+dict_get_v_col_mysql_pos(
+        ulint   pos);
+
+/** Get a virtual column sequence (the "nth" virtual column) for a
+virtual column, stord in the "POS" field of Sys_columns
+@param[in]      pos             virtual column position
+@return virtual column sequence */
+UNIV_INLINE
+ulint
+dict_get_v_col_pos(
+        ulint   pos);
+
 /* Index create node states */
 #define	INDEX_BUILD_INDEX_DEF	1
 #define	INDEX_BUILD_FIELD_DEF	2
 #define	INDEX_CREATE_INDEX_TREE	3
-#define	INDEX_COMMIT_WORK	4
-#define	INDEX_ADD_TO_CACHE	5
+#define	INDEX_ADD_TO_CACHE	4
 
 #ifndef UNIV_NONINL
 #include "dict0crea.ic"
diff --git a/storage/innobase/include/dict0crea.ic b/storage/innobase/include/dict0crea.ic
index 1cbaa47032b..565e4ed1a8c 100644
--- a/storage/innobase/include/dict0crea.ic
+++ b/storage/innobase/include/dict0crea.ic
@@ -23,13 +23,14 @@ Database object creation
 Created 1/8/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "mem0mem.h"
 
 /*********************************************************************//**
 Checks if a table name contains the string "/#sql" which denotes temporary
 tables in MySQL.
 @return true if temporary table */
-UNIV_INTERN
 bool
 row_is_mysql_tmp_table_name(
 /*========================*/
@@ -52,6 +53,8 @@ dict_create_add_foreign_id(
 	const char*	name,	/*!< in: table name */
 	dict_foreign_t*	foreign)/*!< in/out: foreign key */
 {
+	DBUG_ENTER("dict_create_add_foreign_id");
+
 	if (foreign->id == NULL) {
 		/* Generate a new constraint id */
 		ulint	namelen	= strlen(name);
@@ -87,12 +90,57 @@ dict_create_add_foreign_id(
 
 			if (innobase_check_identifier_length(
 				strchr(id,'/') + 1)) {
-				return(DB_IDENTIFIER_TOO_LONG);
+				DBUG_RETURN(DB_IDENTIFIER_TOO_LONG);
 			}
 		}
 		foreign->id = id;
+
+		DBUG_PRINT("dict_create_add_foreign_id",
+			   ("generated foreign id: %s", id));
 	}
 
-	return(DB_SUCCESS);
+
+	DBUG_RETURN(DB_SUCCESS);
+}
+
+/** Compose a column number for a virtual column, stored in the "POS" field
+of Sys_columns. The column number includes both its virtual column sequence
+(the "nth" virtual column) and its actual column position in original table
+@param[in]	v_pos		virtual column sequence
+@param[in]	col_pos		column position in original table definition
+@return composed column position number */
+UNIV_INLINE
+ulint
+dict_create_v_col_pos(
+	ulint	v_pos,
+	ulint	col_pos)
+{
+	ut_ad(v_pos <= REC_MAX_N_FIELDS);
+	ut_ad(col_pos <= REC_MAX_N_FIELDS);
+
+	return(((v_pos + 1) << 16) + col_pos);
+}
+
+/** Get the column number for a virtual column (the column position in
+original table), stored in the "POS" field of Sys_columns
+@param[in]	pos		virtual column position
+@return column position in original table */
+UNIV_INLINE
+ulint
+dict_get_v_col_mysql_pos(
+	ulint	pos)
+{
+	return(pos & 0xFFFF);
 }
 
+/** Get a virtual column sequence (the "nth" virtual column) for a
+virtual column, stord in the "POS" field of Sys_columns
+@param[in]	pos		virtual column position
+@return virtual column sequence */
+UNIV_INLINE
+ulint
+dict_get_v_col_pos(
+	ulint	pos)
+{
+	return((pos >> 16) - 1);
+}
diff --git a/storage/innobase/include/dict0defrag_bg.h b/storage/innobase/include/dict0defrag_bg.h
new file mode 100644
index 00000000000..eb2a6e6824f
--- /dev/null
+++ b/storage/innobase/include/dict0defrag_bg.h
@@ -0,0 +1,93 @@
+/*****************************************************************************
+
+Copyright (c) 2016, MariaDB Corporation. All rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0defrag_bg.h
+Code used for background table and index
+defragmentation
+
+Created 25/08/2016 Jan Lindström
+*******************************************************/
+
+#ifndef dict0defrag_bg_h
+#define dict0defrag_bg_h
+
+#include "univ.i"
+
+#include "dict0types.h"
+#include "os0event.h"
+#include "os0thread.h"
+
+/*****************************************************************//**
+Initialize the defrag pool, called once during thread initialization. */
+void
+dict_defrag_pool_init(void);
+/*========================*/
+
+/*****************************************************************//**
+Free the resources occupied by the defrag pool, called once during
+thread de-initialization. */
+void
+dict_defrag_pool_deinit(void);
+/*==========================*/
+
+/*****************************************************************//**
+Add an index in a table to the defrag pool, which is processed by the
+background stats gathering thread. Only the table id and index id are
+added to the list, so the table can be closed after being enqueued and
+it will be opened when needed. If the table or index does not exist later
+(has been DROPped), then it will be removed from the pool and skipped. */
+void
+dict_stats_defrag_pool_add(
+/*=======================*/
+	const dict_index_t*	index);	/*!< in: table to add */
+
+/*****************************************************************//**
+Delete a given index from the auto defrag pool. */
+void
+dict_stats_defrag_pool_del(
+/*=======================*/
+	const dict_table_t*	table,	/*!<in: if given, remove
+					all entries for the table */
+	const dict_index_t*	index);	/*!< in: index to remove */
+
+/*****************************************************************//**
+Get the first index that has been added for updating persistent defrag
+stats and eventually save its stats. */
+void
+dict_defrag_process_entries_from_defrag_pool();
+/*===========================================*/
+
+/*********************************************************************//**
+Save defragmentation result.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_defrag_summary(
+/*============================*/
+	dict_index_t*	index)	/*!< in: index */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/*********************************************************************//**
+Save defragmentation stats for a given index.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_defrag_stats(
+/*============================*/
+	dict_index_t*	index)	/*!< in: index */
+	MY_ATTRIBUTE((warn_unused_result));
+#endif /* dict0defrag_bg_h */
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 42f93b5a889..6ac696e75eb 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -29,20 +29,22 @@ Created 1/8/1996 Heikki Tuuri
 #define dict0dict_h
 
 #include "univ.i"
-#include "db0err.h"
-#include "dict0types.h"
-#include "dict0mem.h"
-#include "data0type.h"
 #include "data0data.h"
+#include "data0type.h"
+#include "dict0mem.h"
+#include "dict0types.h"
+#include "fsp0fsp.h"
+#include "fsp0sysspace.h"
+#include "hash0hash.h"
 #include "mem0mem.h"
 #include "rem0types.h"
+#include "row0types.h"
+#include "trx0types.h"
+#include "ut0byte.h"
 #include "ut0mem.h"
-#include "ut0lst.h"
-#include "hash0hash.h"
+#include "ut0new.h"
 #include "ut0rnd.h"
-#include "ut0byte.h"
-#include "trx0types.h"
-#include "row0types.h"
+#include <deque>
 #include "fsp0fsp.h"
 #include "dict0pagecompress.h"
 
@@ -50,20 +52,10 @@ extern bool innodb_table_stats_not_found;
 extern bool innodb_index_stats_not_found;
 
 #ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
 # include "sync0rw.h"
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
-void
-dict_casedn_str(
-/*============*/
-	char*	a)	/*!< in/out: string to put in lower case */
-	MY_ATTRIBUTE((nonnull));
 /********************************************************************//**
 Get the database name length in a table name.
-@return	database name length */
-UNIV_INTERN
+@return database name length */
 ulint
 dict_get_db_name_len(
 /*=================*/
@@ -75,7 +67,6 @@ Open a table from its database and table name, this is currently used by
 foreign constraint parser to get the referenced table.
 @return complete table name with database and table name, allocated from
 heap memory passed in */
-UNIV_INTERN
 char*
 dict_get_referenced_table(
 /*======================*/
@@ -88,7 +79,6 @@ dict_get_referenced_table(
 	mem_heap_t*	heap);		/*!< in: heap memory */
 /*********************************************************************//**
 Frees a foreign key struct. */
-
 void
 dict_foreign_free(
 /*==============*/
@@ -98,7 +88,6 @@ Finds the highest [number] for foreign key constraints of the table. Looks
 only at the >= 4.0.18-format id's, which are of the form
 databasename/tablename_ibfk_[number].
 @return highest number, 0 if table has no new format foreign key constraints */
-UNIV_INTERN
 ulint
 dict_table_get_highest_foreign_id(
 /*==============================*/
@@ -106,8 +95,7 @@ dict_table_get_highest_foreign_id(
 					memory cache */
 /********************************************************************//**
 Return the end of table name where we have removed dbname and '/'.
-@return	table name */
-UNIV_INTERN
+@return table name */
 const char*
 dict_remove_db_name(
 /*================*/
@@ -130,15 +118,14 @@ enum dict_table_op_t {
 
 /**********************************************************************//**
 Returns a table object based on table id.
-@return	table, NULL if does not exist */
-UNIV_INTERN
+@return table, NULL if does not exist */
 dict_table_t*
 dict_table_open_on_id(
 /*==================*/
 	table_id_t	table_id,	/*!< in: table id */
 	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
 	dict_table_op_t	table_op)	/*!< in: operation to perform */
-	__attribute__((warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /**********************************************************************//**
 Returns a table object based on table id.
@@ -152,7 +139,6 @@ dict_table_open_on_index_id(
 	__attribute__((warn_unused_result));
 /********************************************************************//**
 Decrements the count of open handles to a table. */
-UNIV_INTERN
 void
 dict_table_close(
 /*=============*/
@@ -162,22 +148,22 @@ dict_table_close(
 					indexes after an aborted online
 					index creation */
 	MY_ATTRIBUTE((nonnull));
+/*********************************************************************//**
+Closes the only open handle to a table and drops a table while assuring
+that dict_sys->mutex is held the whole time.  This assures that the table
+is not evicted after the close when the count of open handles goes to zero.
+Because dict_sys->mutex is held, we do not need to call
+dict_table_prevent_eviction().  */
+void
+dict_table_close_and_drop(
+/*======================*/
+	trx_t*		trx,		/*!< in: data dictionary transaction */
+	dict_table_t*	table);		/*!< in/out: table */
 /**********************************************************************//**
 Inits the data dictionary module. */
-UNIV_INTERN
 void
 dict_init(void);
-/*===========*/
-/********************************************************************//**
-Gets the space id of every table of the data dictionary and makes a linear
-list and a hash table of them to the data dictionary cache. This function
-can be called at database startup if we did not need to do a crash recovery.
-In crash recovery we must scan the space id's from the .ibd files in MySQL
-database directories. */
-UNIV_INTERN
-void
-dict_load_space_id_list(void);
-/*=========================*/
+
 /*********************************************************************//**
 Gets the minimum number of bytes per character.
 @return minimum multi-byte char size, in bytes */
@@ -215,8 +201,8 @@ void
 dict_col_copy_type(
 /*===============*/
 	const dict_col_t*	col,	/*!< in: column */
-	dtype_t*		type)	/*!< out: data type */
-	MY_ATTRIBUTE((nonnull));
+	dtype_t*		type);	/*!< out: data type */
+
 /**********************************************************************//**
 Determine bytes of column prefix to be stored in the undo log. Please
 note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
@@ -230,11 +216,23 @@ dict_max_field_len_store_undo(
 	const dict_col_t*	col)	/*!< in: column which index prefix
 					is based on */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Determine maximum bytes of a virtual column need to be stored
+in the undo log.
+@param[in]	table		dict_table_t for the table
+@param[in]	col_no		virtual column number
+@return maximum bytes of virtual column to be stored in the undo log */
+UNIV_INLINE
+ulint
+dict_max_v_field_len_store_undo(
+	dict_table_t*		table,
+	ulint			col_no);
+
 #endif /* !UNIV_HOTBACKUP */
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Assert that a column and a data type match.
-@return	TRUE */
+@return TRUE */
 UNIV_INLINE
 ibool
 dict_col_type_assert_equal(
@@ -246,7 +244,7 @@ dict_col_type_assert_equal(
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Returns the minimum size of the column.
-@return	minimum size */
+@return minimum size */
 UNIV_INLINE
 ulint
 dict_col_get_min_size(
@@ -255,7 +253,7 @@ dict_col_get_min_size(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /***********************************************************************//**
 Returns the maximum size of the column.
-@return	maximum size */
+@return maximum size */
 UNIV_INLINE
 ulint
 dict_col_get_max_size(
@@ -264,7 +262,7 @@ dict_col_get_max_size(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /***********************************************************************//**
 Returns the size of a fixed size column, 0 if not a fixed size column.
-@return	fixed size, or 0 */
+@return fixed size, or 0 */
 UNIV_INLINE
 ulint
 dict_col_get_fixed_size(
@@ -275,7 +273,7 @@ dict_col_get_fixed_size(
 /***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
 For fixed length types it is the fixed length of the type, otherwise 0.
-@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
 UNIV_INLINE
 ulint
 dict_col_get_sql_null_size(
@@ -285,7 +283,7 @@ dict_col_get_sql_null_size(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets the column number.
-@return	col->ind, table column position (starting from 0) */
+@return col->ind, table column position (starting from 0) */
 UNIV_INLINE
 ulint
 dict_col_get_no(
@@ -301,11 +299,22 @@ dict_col_get_clust_pos(
 	const dict_col_t*	col,		/*!< in: table column */
 	const dict_index_t*	clust_index)	/*!< in: clustered index */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Gets the column position in the given index.
+@param[in]	col	table column
+@param[in]	index	index to be searched for column
+@return position of column in the given index. */
+UNIV_INLINE
+ulint
+dict_col_get_index_pos(
+	const dict_col_t*	col,
+	const dict_index_t*	index)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /****************************************************************//**
 If the given column name is reserved for InnoDB system columns, return
 TRUE.
-@return	TRUE if name is reserved */
-UNIV_INTERN
+@return TRUE if name is reserved */
 ibool
 dict_col_name_is_reserved(
 /*======================*/
@@ -313,7 +322,6 @@ dict_col_name_is_reserved(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Acquire the autoinc lock. */
-UNIV_INTERN
 void
 dict_table_autoinc_lock(
 /*====================*/
@@ -321,7 +329,6 @@ dict_table_autoinc_lock(
 	MY_ATTRIBUTE((nonnull));
 /********************************************************************//**
 Unconditionally set the autoinc counter. */
-UNIV_INTERN
 void
 dict_table_autoinc_initialize(
 /*==========================*/
@@ -331,14 +338,12 @@ dict_table_autoinc_initialize(
 
 /** Store autoinc value when the table is evicted.
 @param[in]	table	table evicted */
-UNIV_INTERN
 void
 dict_table_autoinc_store(
 	const dict_table_t*	table);
 
 /** Restore autoinc value when the table is loaded.
 @param[in]	table	table loaded */
-UNIV_INTERN
 void
 dict_table_autoinc_restore(
 	dict_table_t*	table);
@@ -346,8 +351,7 @@ dict_table_autoinc_restore(
 /********************************************************************//**
 Reads the next autoinc value (== autoinc counter value), 0 if not yet
 initialized.
-@return	value for a new row, or 0 */
-UNIV_INTERN
+@return value for a new row, or 0 */
 ib_uint64_t
 dict_table_autoinc_read(
 /*====================*/
@@ -356,7 +360,6 @@ dict_table_autoinc_read(
 /********************************************************************//**
 Updates the autoinc counter if the value supplied is greater than the
 current value. */
-UNIV_INTERN
 void
 dict_table_autoinc_update_if_greater(
 /*=================================*/
@@ -366,7 +369,6 @@ dict_table_autoinc_update_if_greater(
 	MY_ATTRIBUTE((nonnull));
 /********************************************************************//**
 Release the autoinc lock. */
-UNIV_INTERN
 void
 dict_table_autoinc_unlock(
 /*======================*/
@@ -375,7 +377,6 @@ dict_table_autoinc_unlock(
 #endif /* !UNIV_HOTBACKUP */
 /**********************************************************************//**
 Adds system columns to a table object. */
-UNIV_INTERN
 void
 dict_table_add_system_columns(
 /*==========================*/
@@ -383,9 +384,14 @@ dict_table_add_system_columns(
 	mem_heap_t*	heap)	/*!< in: temporary heap */
 	MY_ATTRIBUTE((nonnull));
 #ifndef UNIV_HOTBACKUP
+/** Mark if table has big rows.
+@param[in,out]	table	table handler */
+void
+dict_table_set_big_rows(
+	dict_table_t*	table)
+	MY_ATTRIBUTE((nonnull));
 /**********************************************************************//**
 Adds a table object to the dictionary cache. */
-UNIV_INTERN
 void
 dict_table_add_to_cache(
 /*====================*/
@@ -395,7 +401,6 @@ dict_table_add_to_cache(
 	MY_ATTRIBUTE((nonnull));
 /**********************************************************************//**
 Removes a table object from the dictionary cache. */
-UNIV_INTERN
 void
 dict_table_remove_from_cache(
 /*=========================*/
@@ -403,7 +408,6 @@ dict_table_remove_from_cache(
 	MY_ATTRIBUTE((nonnull));
 /**********************************************************************//**
 Removes a table object from the dictionary cache. */
-UNIV_INTERN
 void
 dict_table_remove_from_cache_low(
 /*=============================*/
@@ -412,8 +416,7 @@ dict_table_remove_from_cache_low(
 					to make room in the table LRU list */
 /**********************************************************************//**
 Renames a table object.
-@return	TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
 dberr_t
 dict_table_rename_in_cache(
 /*=======================*/
@@ -424,19 +427,19 @@ dict_table_rename_in_cache(
 					to preserve the original table name
 					in constraints which reference it */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-UNIV_INTERN
+
+/** Removes an index from the dictionary cache.
+@param[in,out]	table	table whose index to remove
+@param[in,out]	index	index to remove, this object is destroyed and must not
+be accessed by the caller afterwards */
 void
 dict_index_remove_from_cache(
-/*=========================*/
-	dict_table_t*	table,	/*!< in/out: table */
-	dict_index_t*	index)	/*!< in, own: index */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	table,
+	dict_index_t*	index);
+
 /**********************************************************************//**
 Change the id of a table object in the dictionary cache. This is used in
 DISCARD TABLESPACE. */
-UNIV_INTERN
 void
 dict_table_change_id_in_cache(
 /*==========================*/
@@ -445,7 +448,6 @@ dict_table_change_id_in_cache(
 	MY_ATTRIBUTE((nonnull));
 /**********************************************************************//**
 Removes a foreign constraint struct from the dictionary cache. */
-UNIV_INTERN
 void
 dict_foreign_remove_from_cache(
 /*===========================*/
@@ -456,8 +458,7 @@ Adds a foreign key constraint object to the dictionary cache. May free
 the object if there already is an object with the same identifier in.
 At least one of foreign table or referenced table must already be in
 the dictionary cache!
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 dict_foreign_add_to_cache(
 /*======================*/
@@ -474,8 +475,7 @@ dict_foreign_add_to_cache(
 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
 /*********************************************************************//**
 Checks if a table is referenced by foreign keys.
-@return	TRUE if table is referenced by a foreign key */
-UNIV_INTERN
+@return TRUE if table is referenced by a foreign key */
 ibool
 dict_table_is_referenced_by_foreign_key(
 /*====================================*/
@@ -485,7 +485,6 @@ dict_table_is_referenced_by_foreign_key(
 Replace the index passed in with another equivalent index in the
 foreign key lists of the table.
 @return whether all replacements were found */
-UNIV_INTERN
 bool
 dict_foreign_replace_index(
 /*=======================*/
@@ -498,7 +497,6 @@ dict_foreign_replace_index(
 /**********************************************************************//**
 Determines whether a string starts with the specified keyword.
 @return TRUE if str starts with keyword */
-UNIV_INTERN
 ibool
 dict_str_starts_with_keyword(
 /*=========================*/
@@ -506,40 +504,38 @@ dict_str_starts_with_keyword(
 	const char*	str,		/*!< in: string to scan for keyword */
 	const char*	keyword)	/*!< in: keyword to look for */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary
+/** Scans a table create SQL string and adds to the data dictionary
 the foreign key constraints declared in the string. This function
 should be called after the indexes for a table have been created.
 Each foreign key constraint must be accompanied with indexes in
 bot participating tables. The indexes are allowed to contain more
 fields than mentioned in the constraint.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+
+@param[in]	trx		transaction
+@param[in]	sql_string	table create statement where
+				foreign keys are declared like:
+				FOREIGN KEY (a, b) REFERENCES table2(c, d),
+				table2 can be written also with the database
+				name before it: test.table2; the default
+				database id the database of parameter name
+@param[in]	sql_length	length of sql_string
+@param[in]	name		table full name in normalized form
+@param[in]	reject_fks	if TRUE, fail with error code
+				DB_CANNOT_ADD_CONSTRAINT if any
+				foreign keys are found.
+@return error code or DB_SUCCESS */
 dberr_t
 dict_create_foreign_constraints(
-/*============================*/
-	trx_t*		trx,		/*!< in: transaction */
-	const char*	sql_string,	/*!< in: table create statement where
-					foreign keys are declared like:
-					FOREIGN KEY (a, b) REFERENCES
-					table2(c, d), table2 can be written
-					also with the database
-					name before it: test.table2; the
-					default database id the database of
-					parameter name */
-	size_t		sql_length,	/*!< in: length of sql_string */
-	const char*	name,		/*!< in: table full name in the
-					normalized form
-					database_name/table_name */
-	ibool		reject_fks)	/*!< in: if TRUE, fail with error
-					code DB_CANNOT_ADD_CONSTRAINT if
-					any foreign keys are found. */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	trx_t*			trx,
+	const char*		sql_string,
+	size_t			sql_length,
+	const char*		name,
+	ibool			reject_fks)
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************************//**
 Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
 @return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
 constraint id does not match */
-UNIV_INTERN
 dberr_t
 dict_foreign_parse_drop_constraints(
 /*================================*/
@@ -557,27 +553,25 @@ Returns a table object and increments its open handle count.
 NOTE! This is a high-level function to be used mainly from outside the
 'dict' directory. Inside this directory dict_table_get_low
 is usually the appropriate function.
-@return	table, NULL if does not exist */
-UNIV_INTERN
+@param[in] table_name Table name
+@param[in] dict_locked TRUE=data dictionary locked
+@param[in] try_drop TRUE=try to drop any orphan indexes after
+				an aborted online index creation
+@param[in] ignore_err error to be ignored when loading the table
+@return table, NULL if does not exist */
 dict_table_t*
 dict_table_open_on_name(
-/*====================*/
-	const char*	table_name,	/*!< in: table name */
-	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
-	ibool		try_drop,	/*!< in: TRUE=try to drop any orphan
-					indexes after an aborted online
-					index creation */
-	dict_err_ignore_t
-			ignore_err)	/*!< in: error to be ignored when
-					loading the table */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	const char*		table_name,
+	ibool			dict_locked,
+	ibool			try_drop,
+	dict_err_ignore_t	ignore_err)
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*********************************************************************//**
 Tries to find an index whose first fields are the columns in the array,
 in the same order and is not marked for deletion and is not the same
 as types_idx.
-@return	matching index, NULL if not found */
-UNIV_INTERN
+@return matching index, NULL if not found */
 dict_index_t*
 dict_foreign_find_index(
 /*====================*/
@@ -604,7 +598,7 @@ dict_foreign_find_index(
 					/*!< out: column number where
 					error happened */
 	dict_index_t**		err_index)
-			                /*!< out: index where error
+					/*!< out: index where error
 					happened */
 
 	MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
@@ -612,35 +606,36 @@ dict_foreign_find_index(
 Returns a column's name.
 @return column name. NOTE: not guaranteed to stay valid if table is
 modified in any way (columns added, etc.). */
-UNIV_INTERN
 const char*
 dict_table_get_col_name(
 /*====================*/
 	const dict_table_t*	table,	/*!< in: table */
 	ulint			col_nr)	/*!< in: column number */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Returns a column's name.
-@return column name. NOTE: not guaranteed to stay valid if table is
-modified in any way (columns added, etc.). */
-UNIV_INTERN
+
+/** Returns a virtual column's name.
+@param[in]	table		table object
+@param[in]	col_nr		virtual column number(nth virtual column)
+@return column name. */
 const char*
-dict_table_get_col_name_for_mysql(
-/*==============================*/
-	const dict_table_t*	table,	/*!< in: table */
-	const char*		col_name)/*!< in: MySQL table column name */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print(
-/*=============*/
-	dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull));
+dict_table_get_v_col_name(
+	const dict_table_t*	table,
+	ulint			col_nr);
+
+/** Check if the table has a given column.
+@param[in]	table		table object
+@param[in]	col_name	column name
+@param[in]	col_nr		column number guessed, 0 as default
+@return column number if the table has the specified column,
+otherwise table->n_def */
+ulint
+dict_table_has_column(
+	const dict_table_t*	table,
+	const char*		col_name,
+	ulint			col_nr = 0);
+
 /**********************************************************************//**
 Outputs info on foreign keys of a table. */
-UNIV_INTERN
 std::string
 dict_print_info_on_foreign_keys(
 /*============================*/
@@ -650,32 +645,22 @@ dict_print_info_on_foreign_keys(
 				of SHOW TABLE STATUS */
 	trx_t*		trx,	/*!< in: transaction */
 	dict_table_t*	table);	/*!< in: table */
+
 /**********************************************************************//**
 Outputs info on a foreign key of a table in a format suitable for
 CREATE TABLE. */
-UNIV_INTERN
 std::string
 dict_print_info_on_foreign_key_in_create_format(
 /*============================================*/
 	trx_t*		trx,		/*!< in: transaction */
 	dict_foreign_t*	foreign,	/*!< in: foreign key constraint */
 	ibool		add_newline);	/*!< in: whether to add a newline */
-/********************************************************************//**
-Displays the names of the index and the table. */
-UNIV_INTERN
-void
-dict_index_name_print(
-/*==================*/
-	FILE*			file,	/*!< in: output stream */
-	const trx_t*		trx,	/*!< in: transaction */
-	const dict_index_t*	index)	/*!< in: index to print */
-	MY_ATTRIBUTE((nonnull(1,3)));
+
 /*********************************************************************//**
 Tries to find an index whose first fields are the columns in the array,
 in the same order and is not marked for deletion and is not the same
 as types_idx.
-@return	matching index, NULL if not found */
-UNIV_INTERN
+@return matching index, NULL if not found */
 bool
 dict_foreign_qualify_index(
 /*====================*/
@@ -709,7 +694,7 @@ dict_foreign_qualify_index(
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the first index on the table (the clustered index).
-@return	index, NULL if none exists */
+@return index, NULL if none exists */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_first_index(
@@ -718,7 +703,7 @@ dict_table_get_first_index(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the last index on the table.
-@return	index, NULL if none exists */
+@return index, NULL if none exists */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_last_index(
@@ -727,7 +712,7 @@ dict_table_get_last_index(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the next index on the table.
-@return	index, NULL if none left */
+@return index, NULL if none left */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_next_index(
@@ -756,90 +741,131 @@ do {								\
 
 /********************************************************************//**
 Check whether the index is the clustered index.
-@return	nonzero for clustered index, zero for other indexes */
+@return nonzero for clustered index, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_clust(
 /*================*/
 	const dict_index_t*	index)	/*!< in: index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Check if index is auto-generated clustered index.
+@param[in]	index	index
+
+@return true if index is auto-generated clustered index. */
+UNIV_INLINE
+bool
+dict_index_is_auto_gen_clust(
+	const dict_index_t*	index);
+
 /********************************************************************//**
 Check whether the index is unique.
-@return	nonzero for unique index, zero for other indexes */
+@return nonzero for unique index, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_unique(
 /*=================*/
 	const dict_index_t*	index)	/*!< in: index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+/********************************************************************//**
+Check whether the index is a Spatial Index.
+@return	nonzero for Spatial Index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_spatial(
+/*==================*/
+	const dict_index_t*	index)	/*!< in: index */
+	MY_ATTRIBUTE((warn_unused_result));
+/** Check whether the index contains a virtual column.
+@param[in]	index	index
+@return	nonzero for index on virtual column, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_has_virtual(
+	const dict_index_t*	index);
 /********************************************************************//**
 Check whether the index is the insert buffer tree.
-@return	nonzero for insert buffer, zero for other indexes */
+@return nonzero for insert buffer, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_ibuf(
 /*===============*/
 	const dict_index_t*	index)	/*!< in: index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************************//**
 Check whether the index is a secondary index or the insert buffer tree.
-@return	nonzero for insert buffer, zero for other indexes */
+@return nonzero for insert buffer, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_sec_or_ibuf(
 /*======================*/
 	const dict_index_t*	index)	/*!< in: index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
-/************************************************************************
-Gets the all the FTS indexes for the table. NOTE: must not be called for
-tables which do not have an FTS-index. */
-UNIV_INTERN
+/** Get all the FTS indexes on a table.
+@param[in]	table	table
+@param[out]	indexes	all FTS indexes on this table
+@return number of FTS indexes */
 ulint
 dict_table_get_all_fts_indexes(
-/*===========================*/
-				/* out: number of indexes collected */
-	dict_table_t*	table,	/* in: table */
-	ib_vector_t*	indexes)/* out: vector for collecting FTS indexes */
-	MY_ATTRIBUTE((nonnull));
+	const dict_table_t*	table,
+	ib_vector_t*		indexes);
+
 /********************************************************************//**
-Gets the number of user-defined columns in a table in the dictionary
-cache.
-@return	number of user-defined (e.g., not ROW_ID) columns of a table */
+Gets the number of user-defined non-virtual columns in a table in the
+dictionary cache.
+@return number of user-defined (e.g., not ROW_ID) non-virtual
+columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_user_cols(
 /*=======================*/
 	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/********************************************************************//**
-Gets the number of system columns in a table in the dictionary cache.
-@return	number of system (e.g., ROW_ID) columns of a table */
+	MY_ATTRIBUTE((warn_unused_result));
+/** Gets the number of user-defined virtual and non-virtual columns in a table
+in the dictionary cache.
+@param[in]	table	table
+@return number of user-defined (e.g., not ROW_ID) columns of a table */
 UNIV_INLINE
 ulint
-dict_table_get_n_sys_cols(
-/*======================*/
-	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+dict_table_get_n_tot_u_cols(
+	const dict_table_t*	table);
 /********************************************************************//**
-Gets the number of all columns (also system) in a table in the dictionary
-cache.
-@return	number of columns of a table */
+Gets the number of all non-virtual columns (also system) in a table
+in the dictionary cache.
+@return number of columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_cols(
 /*==================*/
 	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Gets the number of virtual columns in a table in the dictionary cache.
+@param[in]	table	the table to check
+@return number of virtual columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_v_cols(
+	const dict_table_t*	table);
+
+/** Check if a table has indexed virtual columns
+@param[in]	table	the table to check
+@return true is the table has indexed virtual columns */
+UNIV_INLINE
+bool
+dict_table_has_indexed_v_cols(
+	const dict_table_t*	table);
+
 /********************************************************************//**
 Gets the approximately estimated number of rows in the table.
-@return	estimated number of rows */
+@return estimated number of rows */
 UNIV_INLINE
 ib_uint64_t
 dict_table_get_n_rows(
 /*==================*/
 	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************************//**
 Increment the number of rows in the table by one.
 Notice that this operation is not protected by any latch, the number is
@@ -860,10 +886,21 @@ dict_table_n_rows_dec(
 /*==================*/
 	dict_table_t*	table)	/*!< in/out: table */
 	MY_ATTRIBUTE((nonnull));
+
+
+/** Get nth virtual column
+@param[in]	table	target table
+@param[in]	col_nr	column number in MySQL Table definition
+@return dict_v_col_t ptr */
+dict_v_col_t*
+dict_table_get_nth_v_col_mysql(
+	const dict_table_t*	table,
+	ulint			col_nr);
+
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth column of a table.
-@return	pointer to column object */
+@return pointer to column object */
 UNIV_INLINE
 dict_col_t*
 dict_table_get_nth_col(
@@ -871,9 +908,18 @@ dict_table_get_nth_col(
 	const dict_table_t*	table,	/*!< in: table */
 	ulint			pos)	/*!< in: position of column */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Gets the nth virtual column of a table.
+@param[in]	table	table
+@param[in]	pos	position of virtual column
+@return pointer to virtual column object */
+UNIV_INLINE
+dict_v_col_t*
+dict_table_get_nth_v_col(
+        const dict_table_t*	table,
+        ulint			pos);
 /********************************************************************//**
 Gets the given system column of a table.
-@return	pointer to column object */
+@return pointer to column object */
 UNIV_INLINE
 dict_col_t*
 dict_table_get_sys_col(
@@ -882,14 +928,17 @@ dict_table_get_sys_col(
 	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 #else /* UNIV_DEBUG */
-#define dict_table_get_nth_col(table, pos) \
+#define dict_table_get_nth_col(table, pos)	\
 ((table)->cols + (pos))
-#define dict_table_get_sys_col(table, sys) \
-((table)->cols + (table)->n_cols + (sys) - DATA_N_SYS_COLS)
+#define dict_table_get_sys_col(table, sys)	\
+((table)->cols + (table)->n_cols + (sys)	\
+ - (dict_table_get_n_sys_cols(table)))
+/* Get nth virtual columns */
+#define dict_table_get_nth_v_col(table, pos)	((table)->v_cols + (pos))
 #endif /* UNIV_DEBUG */
 /********************************************************************//**
 Gets the given system column number of a table.
-@return	column number */
+@return column number */
 UNIV_INLINE
 ulint
 dict_table_get_sys_col_no(
@@ -900,7 +949,7 @@ dict_table_get_sys_col_no(
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Returns the minimum data size of an index record.
-@return	minimum data size in bytes */
+@return minimum data size in bytes */
 UNIV_INLINE
 ulint
 dict_index_get_min_size(
@@ -910,16 +959,17 @@ dict_index_get_min_size(
 #endif /* !UNIV_HOTBACKUP */
 /********************************************************************//**
 Check whether the table uses the compact page format.
-@return	TRUE if table uses the compact page format */
+@return TRUE if table uses the compact page format */
 UNIV_INLINE
 ibool
 dict_table_is_comp(
 /*===============*/
 	const dict_table_t*	table)	/*!< in: table */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /********************************************************************//**
 Determine the file format of a table.
-@return	file format version */
+@return file format version */
 UNIV_INLINE
 ulint
 dict_table_get_format(
@@ -928,67 +978,97 @@ dict_table_get_format(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Determine the file format from a dict_table_t::flags.
-@return	file format version */
+@return file format version */
 UNIV_INLINE
 ulint
 dict_tf_get_format(
 /*===============*/
 	ulint		flags)		/*!< in: dict_table_t::flags */
 	MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Set the various values in a dict_table_t::flags pointer. */
+
+/** Set the various values in a dict_table_t::flags pointer.
+@param[in,out]	flags,		Pointer to a 4 byte Table Flags
+@param[in]	format,		File Format
+@param[in]	zip_ssize	Zip Shift Size
+@param[in]	use_data_dir	Table uses DATA DIRECTORY
+@param[in]	shared_space	Table uses a General Shared Tablespace */
 UNIV_INLINE
 void
 dict_tf_set(
-/*========*/
-	ulint*		flags,		/*!< in/out: table */
-	rec_format_t	format,		/*!< in: file format */
-	ulint		zip_ssize,	/*!< in: zip shift size */
-	bool		remote_path,	/*!< in: table uses DATA DIRECTORY
-					*/
-        bool		page_compressed,/*!< in: table uses page compressed
-					pages */
-	ulint		page_compression_level, /*!< in: table page compression
-						 level */
-	ulint		atomic_writes)  /*!< in: table atomic
-					writes option value*/
-	__attribute__((nonnull));
-/********************************************************************//**
-Convert a 32 bit integer table flags to the 32 bit integer that is
-written into the tablespace header at the offset FSP_SPACE_FLAGS and is
-also stored in the fil_space_t::flags field.  The following chart shows
-the translation of the low order bit.  Other bits are the same.
+	ulint*		flags,
+	rec_format_t	format,
+	ulint		zip_ssize,
+	bool		use_data_dir,
+	bool		shared_space,
+	bool		page_compressed,
+	ulint		page_compression_level,
+	ulint		atomic_writes);
+
+/** Initialize a dict_table_t::flags pointer.
+@param[in]	compact,	Table uses Compact or greater
+@param[in]	zip_ssize	Zip Shift Size (log 2 minus 9)
+@param[in]	atomic_blobs	Table uses Compressed or Dynamic
+@param[in]	data_dir	Table uses DATA DIRECTORY
+@param[in]	shared_space	Table uses a General Shared Tablespace */
+UNIV_INLINE
+ulint
+dict_tf_init(
+	bool		compact,
+	ulint		zip_ssize,
+	bool		atomic_blobs,
+	bool		data_dir,
+	bool		shared_space,
+	bool		page_compressed,
+	ulint		page_compression_level,
+	ulint		atomic_writes);
+
+/** Convert a 32 bit integer table flags to the 32 bit FSP Flags.
+Fsp Flags are written into the tablespace header at the offset
+FSP_SPACE_FLAGS and are also stored in the fil_space_t::flags field.
+The following chart shows the translation of the low order bit.
+Other bits are the same.
 ========================= Low order bit ==========================
                     | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
 dict_table_t::flags |     0     |    1    |     1      |    1
 fil_space_t::flags  |     0     |    0    |     1      |    1
 ==================================================================
-@return	tablespace flags (fil_space_t::flags) */
-UNIV_INLINE
+@param[in]	table_flags	dict_table_t::flags
+@param[in]	is_temp		whether the tablespace is temporary
+@param[in]	is_encrypted	whether the tablespace is encrypted
+@return tablespace flags (fil_space_t::flags) */
 ulint
 dict_tf_to_fsp_flags(
-/*=================*/
-	ulint	flags)	/*!< in: dict_table_t::flags */
+	ulint	table_flags,
+	bool	is_temp,
+	bool	is_encrypted = false)
 	MY_ATTRIBUTE((const));
-/********************************************************************//**
-Extract the compressed page size from table flags.
-@return	compressed page size, or 0 if not compressed */
-UNIV_INLINE
+
+/** Extract the page size from table flags.
+@param[in]	flags	flags
+@return compressed page size, or 0 if not compressed */
+UNIV_INLINE
+const page_size_t
+dict_tf_get_page_size(
+	ulint	flags)
+MY_ATTRIBUTE((const));
+
+/** Determine the extent size (in pages) for the given table
+@param[in]	table	the table whose extent size is being
+			calculated.
+@return extent size in pages (256, 128 or 64) */
 ulint
-dict_tf_get_zip_size(
-/*=================*/
-	ulint	flags)			/*!< in: flags */
-	__attribute__((const));
+dict_table_extent_size(
+	const dict_table_t*	table);
 
-/********************************************************************//**
-Check whether the table uses the compressed compact page format.
-@return	compressed page size, or 0 if not compressed */
+/** Get the table page size.
+@param[in]	table	table
+@return compressed page size, or 0 if not compressed */
 UNIV_INLINE
-ulint
-dict_table_zip_size(
-/*================*/
-	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+const page_size_t
+dict_table_page_size(
+	const dict_table_t*	table)
+	MY_ATTRIBUTE((warn_unused_result));
+
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Obtain exclusive locks on all index trees of the table. This is to prevent
@@ -1011,8 +1091,7 @@ dict_table_x_unlock_indexes(
 /********************************************************************//**
 Checks if a column is in the ordering columns of the clustered index of a
 table. Column prefixes are treated like whole columns.
-@return	TRUE if the column, or its prefix, is in the clustered key */
-UNIV_INTERN
+@return TRUE if the column, or its prefix, is in the clustered key */
 ibool
 dict_table_col_in_clustered_key(
 /*============================*/
@@ -1028,11 +1107,21 @@ dict_table_has_fts_index(
 /*=====================*/
 	dict_table_t*   table)		/*!< in: table */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Copies types of virtual columns contained in table to tuple and sets all
+fields of the tuple to the SQL NULL value.  This function should
+be called right after dtuple_create().
+@param[in,out]	tuple	data tuple
+@param[in]	table	table
+*/
+void
+dict_table_copy_v_types(
+	dtuple_t*		tuple,
+	const dict_table_t*	table);
+
 /*******************************************************************//**
 Copies types of columns contained in table to tuple and sets all
 fields of the tuple to the SQL NULL value.  This function should
 be called right after dtuple_create(). */
-UNIV_INTERN
 void
 dict_table_copy_types(
 /*==================*/
@@ -1043,7 +1132,6 @@ dict_table_copy_types(
 Wait until all the background threads of the given table have exited, i.e.,
 bg_threads == 0. Note: bg_threads_mutex must be reserved when
 calling this. */
-UNIV_INTERN
 void
 dict_table_wait_for_bg_threads_to_exit(
 /*===================================*/
@@ -1055,8 +1143,7 @@ dict_table_wait_for_bg_threads_to_exit(
 Looks for an index with the given id. NOTE that we do not reserve
 the dictionary mutex: this function is for emergency purposes like
 printing info of a corrupt database page!
-@return	index or NULL if not found from cache */
-UNIV_INTERN
+@return index or NULL if not found from cache */
 dict_index_t*
 dict_index_find_on_id_low(
 /*======================*/
@@ -1067,41 +1154,56 @@ Make room in the table cache by evicting an unused table. The unused table
 should not be part of FK relationship and currently not used in any user
 transaction. There is no guarantee that it will remove a table.
 @return number of tables evicted. */
-UNIV_INTERN
 ulint
 dict_make_room_in_cache(
 /*====================*/
 	ulint		max_tables,	/*!< in: max tables allowed in cache */
 	ulint		pct_check);	/*!< in: max percent to check */
-/**********************************************************************//**
-Adds an index to the dictionary cache.
-@return	DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-UNIV_INTERN
+
+#define BIG_ROW_SIZE	1024
+
+/** Adds an index to the dictionary cache.
+@param[in]	table	table on which the index is
+@param[in]	index	index; NOTE! The index memory
+			object is freed in this function!
+@param[in]	page_no	root page number of the index
+@param[in]	strict	TRUE=refuse to create the index
+			if records could be too big to fit in
+			an B-tree page
+@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
 dberr_t
 dict_index_add_to_cache(
-/*====================*/
-	dict_table_t*	table,	/*!< in: table on which the index is */
-	dict_index_t*	index,	/*!< in, own: index; NOTE! The index memory
-				object is freed in this function! */
-	ulint		page_no,/*!< in: root page number of the index */
-	ibool		strict)	/*!< in: TRUE=refuse to create the index
-				if records could be too big to fit in
-				an B-tree page */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
-	dict_table_t*	table,	/*!< in/out: table */
-	dict_index_t*	index)	/*!< in, own: index */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	table,
+	dict_index_t*	index,
+	ulint		page_no,
+	ibool		strict)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Adds an index to the dictionary cache, with possible indexing newly
+added column.
+@param[in]	table	table on which the index is
+@param[in]	index	index; NOTE! The index memory
+			object is freed in this function!
+@param[in]	add_v	new virtual column that being added along with
+			an add index call
+@param[in]	page_no	root page number of the index
+@param[in]	strict	TRUE=refuse to create the index
+			if records could be too big to fit in
+			an B-tree page
+@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
+dberr_t
+dict_index_add_to_cache_w_vcol(
+	dict_table_t*		table,
+	dict_index_t*		index,
+	const dict_add_v_col_t* add_v,
+	ulint			page_no,
+	ibool			strict)
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /********************************************************************//**
 Gets the number of fields in the internal representation of an index,
 including fields added by the dictionary system.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_fields(
@@ -1115,7 +1217,7 @@ Gets the number of fields in the internal representation of an index
 that uniquely determine the position of an index entry in the index, if
 we do not take multiversioning into account: in the B-tree use the value
 returned by dict_index_get_n_unique_in_tree.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_unique(
@@ -1127,7 +1229,7 @@ dict_index_get_n_unique(
 Gets the number of fields in the internal representation of an index
 which uniquely determine the position of an index entry in the index, if
 we also take multiversioning into account.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_unique_in_tree(
@@ -1135,12 +1237,28 @@ dict_index_get_n_unique_in_tree(
 	const dict_index_t*	index)	/*!< in: an internal representation
 					of index (in the dictionary cache) */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** The number of fields in the nonleaf page of spatial index, except
+the page no field. */
+#define DICT_INDEX_SPATIAL_NODEPTR_SIZE	1
+/**
+Gets the number of fields on nonleaf page level in the internal representation
+of an index which uniquely determine the position of an index entry in the
+index, if we also take multiversioning into account. Note, it doesn't
+include page no field.
+@param[in]	index	index
+@return number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree_nonleaf(
+	const dict_index_t*	index)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the number of user-defined ordering fields in the index. In the internal
 representation we add the row id to the ordering fields to make all indexes
 unique, but this function returns the number of fields the user defined
 in the index as ordering fields.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_ordering_defined_by_user(
@@ -1151,7 +1269,7 @@ dict_index_get_n_ordering_defined_by_user(
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth field of an index.
-@return	pointer to field object */
+@return pointer to field object */
 UNIV_INLINE
 dict_field_t*
 dict_index_get_nth_field(
@@ -1164,7 +1282,7 @@ dict_index_get_nth_field(
 #endif /* UNIV_DEBUG */
 /********************************************************************//**
 Gets pointer to the nth column in an index.
-@return	column */
+@return column */
 UNIV_INLINE
 const dict_col_t*
 dict_index_get_nth_col(
@@ -1174,7 +1292,7 @@ dict_index_get_nth_col(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the column number of the nth field in an index.
-@return	column number */
+@return column number */
 UNIV_INLINE
 ulint
 dict_index_get_nth_col_no(
@@ -1193,32 +1311,41 @@ dict_index_get_nth_col_pos(
 	const dict_index_t*	index,	/*!< in: index */
 	ulint			n,	/*!< in: column number */
 	ulint*			prefix_col_pos) /*!< out: col num if prefix */
-	__attribute__((nonnull(1), warn_unused_result));
-/********************************************************************//**
-Looks for column n in an index.
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
+/** Looks for column n in an index.
+@param[in]	index		index
+@param[in]	n		column number
+@param[in]	inc_prefix	true=consider column prefixes too
+@param[in]	is_virtual	true==virtual column
 @return position in internal representation of the index;
 ULINT_UNDEFINED if not contained */
-UNIV_INTERN
 ulint
 dict_index_get_nth_col_or_prefix_pos(
-/*=================================*/
 	const dict_index_t*	index,		/*!< in: index */
 	ulint			n,		/*!< in: column number */
-	ibool			inc_prefix,	/*!< in: TRUE=consider
+	bool			inc_prefix,	/*!< in: TRUE=consider
 						column prefixes too */
-	ulint*			prefix_col_pos)	/*!< out: col num if prefix */
+	bool			is_virtual,	/*!< in: is a virtual column
+						*/
+	ulint*			prefix_col_pos) /*!< out: col num if prefix
+						*/
+	__attribute__((warn_unused_result));
 
-	__attribute__((nonnull(1), warn_unused_result));
 /********************************************************************//**
 Returns TRUE if the index contains a column or a prefix of that column.
-@return	TRUE if contains the column or its prefix */
-UNIV_INTERN
+@param[in]	index		index
+@param[in]	n		column number
+@param[in]	is_virtual	whether it is a virtual col
+@return TRUE if contains the column or its prefix */
 ibool
 dict_index_contains_col_or_prefix(
 /*==============================*/
 	const dict_index_t*	index,	/*!< in: index */
-	ulint			n)	/*!< in: column number */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	ulint			n,	/*!< in: column number */
+	bool			is_virtual)
+					/*!< in: whether it is a virtual col */
+	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************************//**
 Looks for a matching field in an index. The column has to be the same. The
 column in index must be complete, or must contain a prefix longer than the
@@ -1226,7 +1353,6 @@ column in index2. That is, we must be able to construct the prefix in index2
 from the prefix in index.
 @return position in internal representation of the index;
 ULINT_UNDEFINED if not contained */
-UNIV_INTERN
 ulint
 dict_index_get_nth_field_pos(
 /*=========================*/
@@ -1236,17 +1362,17 @@ dict_index_get_nth_field_pos(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Looks for column n position in the clustered index.
-@return	position in internal representation of the clustered index */
-UNIV_INTERN
+@return position in internal representation of the clustered index */
 ulint
 dict_table_get_nth_col_pos(
 /*=======================*/
 	const dict_table_t*	table,	/*!< in: table */
-	ulint			n)	/*!< in: column number */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	ulint			n,	/*!< in: column number */
+	ulint*			prefix_col_pos) /*!< out: col num if prefix */
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
 /********************************************************************//**
 Returns the position of a system column in an index.
-@return	position, ULINT_UNDEFINED if not contained */
+@return position, ULINT_UNDEFINED if not contained */
 UNIV_INLINE
 ulint
 dict_index_get_sys_col_pos(
@@ -1256,7 +1382,6 @@ dict_index_get_sys_col_pos(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*******************************************************************//**
 Adds a column to index. */
-UNIV_INTERN
 void
 dict_index_add_col(
 /*===============*/
@@ -1268,7 +1393,6 @@ dict_index_add_col(
 #ifndef UNIV_HOTBACKUP
 /*******************************************************************//**
 Copies types of fields contained in index to tuple. */
-UNIV_INTERN
 void
 dict_index_copy_types(
 /*==================*/
@@ -1280,7 +1404,7 @@ dict_index_copy_types(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Gets the field column.
-@return	field->col, pointer to the table column */
+@return field->col, pointer to the table column */
 UNIV_INLINE
 const dict_col_t*
 dict_field_get_col(
@@ -1291,8 +1415,7 @@ dict_field_get_col(
 /**********************************************************************//**
 Returns an index object if it is found in the dictionary cache.
 Assumes that dict_sys->mutex is already being held.
-@return	index, NULL if not found */
-UNIV_INTERN
+@return index, NULL if not found */
 dict_index_t*
 dict_index_get_if_in_cache_low(
 /*===========================*/
@@ -1301,8 +1424,7 @@ dict_index_get_if_in_cache_low(
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**********************************************************************//**
 Returns an index object if it is found in the dictionary cache.
-@return	index, NULL if not found */
-UNIV_INTERN
+@return index, NULL if not found */
 dict_index_t*
 dict_index_get_if_in_cache(
 /*=======================*/
@@ -1313,8 +1435,7 @@ dict_index_get_if_in_cache(
 /**********************************************************************//**
 Checks that a tuple has n_fields_cmp value in a sensible range, so that
 no comparison can occur with the page number field in a node pointer.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dict_index_check_search_tuple(
 /*==========================*/
@@ -1332,7 +1453,6 @@ enum check_name {
 };
 /**********************************************************************//**
 Check for duplicate index entries in a table [using the index name] */
-UNIV_INTERN
 void
 dict_table_check_for_dup_indexes(
 /*=============================*/
@@ -1344,8 +1464,7 @@ dict_table_check_for_dup_indexes(
 #endif /* UNIV_DEBUG */
 /**********************************************************************//**
 Builds a node pointer out of a physical record and a page number.
-@return	own: node pointer */
-UNIV_INTERN
+@return own: node pointer */
 dtuple_t*
 dict_index_build_node_ptr(
 /*======================*/
@@ -1362,8 +1481,7 @@ dict_index_build_node_ptr(
 /**********************************************************************//**
 Copies an initial segment of a physical record, long enough to specify an
 index entry uniquely.
-@return	pointer to the prefix record */
-UNIV_INTERN
+@return pointer to the prefix record */
 rec_t*
 dict_index_copy_rec_order_prefix(
 /*=============================*/
@@ -1377,8 +1495,7 @@ dict_index_copy_rec_order_prefix(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /**********************************************************************//**
 Builds a typed data tuple out of a physical record.
-@return	own: data tuple */
-UNIV_INTERN
+@return own: data tuple */
 dtuple_t*
 dict_index_build_data_tuple(
 /*========================*/
@@ -1389,7 +1506,7 @@ dict_index_build_data_tuple(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets the space id of the root of the index tree.
-@return	space id */
+@return space id */
 UNIV_INLINE
 ulint
 dict_index_get_space(
@@ -1407,7 +1524,7 @@ dict_index_set_space(
 	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Gets the page number of the root of the index tree.
-@return	page number */
+@return page number */
 UNIV_INLINE
 ulint
 dict_index_get_page(
@@ -1416,7 +1533,7 @@ dict_index_get_page(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets the read-write lock of the index tree.
-@return	read-write lock */
+@return read-write lock */
 UNIV_INLINE
 rw_lock_t*
 dict_index_get_lock(
@@ -1427,7 +1544,7 @@ dict_index_get_lock(
 Returns free space reserved for future updates of records. This is
 relevant only in the case of many consecutive inserts, as updates
 which make the records bigger might fragment the index.
-@return	number of free bytes on page, reserved for updates */
+@return number of free bytes on page, reserved for updates */
 UNIV_INLINE
 ulint
 dict_index_get_space_reserve(void);
@@ -1468,7 +1585,6 @@ dict_index_is_online_ddl(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Calculates the minimum record length in an index. */
-UNIV_INTERN
 ulint
 dict_index_calc_min_rec_len(
 /*========================*/
@@ -1476,7 +1592,6 @@ dict_index_calc_min_rec_len(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Reserves the dictionary system mutex for MySQL. */
-UNIV_INTERN
 void
 dict_mutex_enter_for_mysql_func(const char * file, ulint line);
 /*============================*/
@@ -1486,7 +1601,6 @@ dict_mutex_enter_for_mysql_func(const char * file, ulint line);
 
 /********************************************************************//**
 Releases the dictionary system mutex for MySQL. */
-UNIV_INTERN
 void
 dict_mutex_exit_for_mysql(void);
 /*===========================*/
@@ -1497,7 +1611,6 @@ or from a thread that has not shared the table object with other threads.
 @param[in,out]	table	table whose stats latch to create
 @param[in]	enabled	if false then the latch is disabled
 and dict_table_stats_lock()/unlock() become noop on this table. */
-
 void
 dict_table_stats_latch_create(
 	dict_table_t*	table,
@@ -1507,33 +1620,29 @@ dict_table_stats_latch_create(
 This function is only called from either single threaded environment
 or from a thread that has not shared the table object with other threads.
 @param[in,out]	table	table whose stats latch to destroy */
-
 void
 dict_table_stats_latch_destroy(
 	dict_table_t*	table);
 
-/**********************************************************************//**
-Lock the appropriate latch to protect a given table's statistics.
-table->id is used to pick the corresponding latch from a global array of
-latches. */
-UNIV_INTERN
+/** Lock the appropriate latch to protect a given table's statistics.
+@param[in]	table		table whose stats to lock
+@param[in]	latch_mode	RW_S_LATCH or RW_X_LATCH */
 void
 dict_table_stats_lock(
-/*==================*/
-	dict_table_t*	table,		/*!< in: table */
-	ulint		latch_mode);	/*!< in: RW_S_LATCH or RW_X_LATCH */
-/**********************************************************************//**
-Unlock the latch that has been locked by dict_table_stats_lock() */
-UNIV_INTERN
+	dict_table_t*	table,
+	ulint		latch_mode);
+
+/** Unlock the latch that has been locked by dict_table_stats_lock().
+@param[in]	table		table whose stats to unlock
+@param[in]	latch_mode	RW_S_LATCH or RW_X_LATCH */
 void
 dict_table_stats_unlock(
-/*====================*/
-	dict_table_t*	table,		/*!< in: table */
-	ulint		latch_mode);	/*!< in: RW_S_LATCH or RW_X_LATCH */
+	dict_table_t*	table,
+	ulint		latch_mode);
+
 /********************************************************************//**
 Checks if the database name in two table names is the same.
-@return	TRUE if same db name */
-UNIV_INTERN
+@return TRUE if same db name */
 ibool
 dict_tables_have_same_db(
 /*=====================*/
@@ -1542,46 +1651,37 @@ dict_tables_have_same_db(
 	const char*	name2)	/*!< in: table name in the form
 				dbname '/' tablename */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Removes an index from the cache */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
-	dict_table_t*	table,	/*!< in/out: table */
-	dict_index_t*	index)	/*!< in, own: index */
-	MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Get index by name
-@return	index, NULL if does not exist */
-UNIV_INTERN
+
+/** Get an index by name.
+@param[in]	table		the table where to look for the index
+@param[in]	name		the index name to look for
+@param[in]	committed	true=search for committed,
+false=search for uncommitted
+@return index, NULL if does not exist */
 dict_index_t*
 dict_table_get_index_on_name(
-/*=========================*/
-	dict_table_t*	table,	/*!< in: table */
-	const char*	name)	/*!< in: name of the index to find */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Looks for an index with the given id given a table instance.
-@return	index or NULL */
-UNIV_INTERN
-dict_index_t*
-dict_table_find_index_on_id(
-/*========================*/
-	const dict_table_t*	table,	/*!< in: table instance */
-	index_id_t		id)	/*!< in: index id */
-	__attribute__((nonnull, warn_unused_result));
-/**********************************************************************//**
-In case there is more than one index with the same name return the index
-with the min(id).
-@return	index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name_and_min_id(
-/*====================================*/
-	dict_table_t*	table,	/*!< in: table */
-	const char*	name)	/*!< in: name of the index to find */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	dict_table_t*	table,
+	const char*	name,
+	bool		committed=true)
+		MY_ATTRIBUTE((warn_unused_result));
+
+/** Get an index by name.
+@param[in]	table		the table where to look for the index
+@param[in]	name		the index name to look for
+@param[in]	committed	true=search for committed,
+false=search for uncommitted
+@return index, NULL if does not exist */
+inline
+const dict_index_t*
+dict_table_get_index_on_name(
+	const dict_table_t*	table,
+	const char*		name,
+	bool			committed=true)
+{
+	return(dict_table_get_index_on_name(
+		       const_cast<dict_table_t*>(table), name, committed));
+}
+
 /***************************************************************
 Check whether a column exists in an FTS index. */
 UNIV_INLINE
@@ -1591,27 +1691,39 @@ dict_table_is_fts_column(
 				/* out: ULINT_UNDEFINED if no match else
 				the offset within the vector */
 	ib_vector_t*	indexes,/* in: vector containing only FTS indexes */
-	ulint		col_no)	/* in: col number to search for */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	ulint		col_no,	/* in: col number to search for */
+	bool		is_virtual)/*!< in: whether it is a virtual column */
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************************//**
-Move a table to the non LRU end of the LRU list. */
-UNIV_INTERN
+Prevent table eviction by moving a table to the non-LRU list from the
+LRU list if it is not already there. */
+UNIV_INLINE
 void
-dict_table_move_from_lru_to_non_lru(
-/*================================*/
-	dict_table_t*	table)	/*!< in: table to move from LRU to non-LRU */
+dict_table_prevent_eviction(
+/*========================*/
+	dict_table_t*	table)	/*!< in: table to prevent eviction */
 	MY_ATTRIBUTE((nonnull));
+
 /**********************************************************************//**
-Move a table to the LRU list from the non-LRU list. */
-UNIV_INTERN
+Move a table to the non LRU end of the LRU list. */
 void
-dict_table_move_from_non_lru_to_lru(
+dict_table_move_from_lru_to_non_lru(
 /*================================*/
-	dict_table_t*	table)	/*!< in: table to move from non-LRU to LRU */
+	dict_table_t*	table)	/*!< in: table to move from LRU to non-LRU */
 	MY_ATTRIBUTE((nonnull));
+
+/** Looks for an index with the given id given a table instance.
+@param[in]	table	table instance
+@param[in]	id	index id
+@return index or NULL */
+dict_index_t*
+dict_table_find_index_on_id(
+	const dict_table_t*	table,
+	index_id_t		id)
+	MY_ATTRIBUTE((nonnull(1)));
+
 /**********************************************************************//**
 Move to the most recently used segment of the LRU list. */
-UNIV_INTERN
 void
 dict_move_to_mru(
 /*=============*/
@@ -1625,19 +1737,20 @@ constraint */
 
 /* Buffers for storing detailed information about the latest foreign key
 and unique key errors */
-extern FILE*	dict_foreign_err_file;
-extern ib_mutex_t	dict_foreign_err_mutex; /* mutex protecting the buffers */
+extern FILE*		dict_foreign_err_file;
+extern ib_mutex_t	dict_foreign_err_mutex; /* mutex protecting the
+						foreign key error messages */
 
 /** the dictionary system */
 extern dict_sys_t*	dict_sys;
 /** the data dictionary rw-latch protecting dict_sys */
-extern rw_lock_t	dict_operation_lock;
+extern rw_lock_t*	dict_operation_lock;
 
 typedef std::map<table_id_t, ib_uint64_t> autoinc_map_t;
 
 /* Dictionary system struct */
 struct dict_sys_t{
-	ib_mutex_t		mutex;		/*!< mutex protecting the data
+	DictSysMutex	mutex;		/*!< mutex protecting the data
 					dictionary; protects also the
 					disk-based dictionary system tables;
 					this mutex serializes CREATE TABLE
@@ -1654,13 +1767,14 @@ struct dict_sys_t{
 					on name */
 	hash_table_t*	table_id_hash;	/*!< hash table of the tables, based
 					on id */
-	ulint		size;		/*!< varying space in bytes occupied
+	lint		size;		/*!< varying space in bytes occupied
 					by the data dictionary table and
 					index objects */
 	dict_table_t*	sys_tables;	/*!< SYS_TABLES table */
 	dict_table_t*	sys_columns;	/*!< SYS_COLUMNS table */
 	dict_table_t*	sys_indexes;	/*!< SYS_INDEXES table */
 	dict_table_t*	sys_fields;	/*!< SYS_FIELDS table */
+	dict_table_t*	sys_virtual;	/*!< SYS_VIRTUAL table */
 
 	/*=============================*/
 	UT_LIST_BASE_NODE_T(dict_table_t)
@@ -1676,12 +1790,9 @@ struct dict_sys_t{
 
 /** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
 extern dict_index_t*	dict_ind_redundant;
-/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
-extern dict_index_t*	dict_ind_compact;
 
 /**********************************************************************//**
-Inits dict_ind_redundant and dict_ind_compact. */
-UNIV_INTERN
+Inits dict_ind_redundant. */
 void
 dict_ind_init(void);
 /*===============*/
@@ -1701,7 +1812,7 @@ struct dict_col_meta_t {
 };
 
 /* This struct is used for checking whether a given table exists and
-whether it has a predefined schema (number of columns and columns names
+whether it has a predefined schema (number of columns and column names
 and types) */
 struct dict_table_schema_t {
 	const char*		table_name;	/* the name of the table whose
@@ -1729,7 +1840,6 @@ types. The order of the columns does not matter.
 The caller must own the dictionary mutex.
 dict_table_schema_check() @{
 @return DB_SUCCESS if the table exists and contains the necessary columns */
-UNIV_INTERN
 dberr_t
 dict_table_schema_check(
 /*====================*/
@@ -1748,7 +1858,6 @@ Converts a database and table name from filesystem encoding
 (e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
 strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
 at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
-UNIV_INTERN
 void
 dict_fs2utf8(
 /*=========*/
@@ -1760,16 +1869,19 @@ dict_fs2utf8(
 	size_t		table_utf8_size)/*!< in: table_utf8 size */
 	MY_ATTRIBUTE((nonnull));
 
+/** Resize the hash tables besed on the current buffer pool size. */
+void
+dict_resize();
+
 /**********************************************************************//**
 Closes the data dictionary module. */
-UNIV_INTERN
 void
 dict_close(void);
 /*============*/
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Check whether the table is corrupted.
-@return	nonzero for corrupted table, zero for valid tables */
+@return nonzero for corrupted table, zero for valid tables */
 UNIV_INLINE
 ulint
 dict_table_is_corrupted(
@@ -1779,7 +1891,7 @@ dict_table_is_corrupted(
 
 /**********************************************************************//**
 Check whether the index is corrupted.
-@return	nonzero for corrupted index, zero for valid indexes */
+@return nonzero for corrupted index, zero for valid indexes */
 UNIV_INLINE
 ulint
 dict_index_is_corrupted(
@@ -1791,7 +1903,6 @@ dict_index_is_corrupted(
 /**********************************************************************//**
 Flags an index and table corrupted both in the data dictionary cache
 and in the system table SYS_INDEXES. */
-UNIV_INTERN
 void
 dict_set_corrupted(
 /*===============*/
@@ -1800,63 +1911,110 @@ dict_set_corrupted(
 	const char*	ctx)	/*!< in: context */
 	UNIV_COLD MY_ATTRIBUTE((nonnull));
 
-/**********************************************************************//**
-Flags an index corrupted in the data dictionary cache only. This
+/** Flags an index corrupted in the data dictionary cache only. This
 is used mostly to mark a corrupted index when index's own dictionary
-is corrupted, and we force to load such index for repair purpose */
-UNIV_INTERN
+is corrupted, and we force to load such index for repair purpose
+@param[in,out]	index	index that is corrupted */
 void
 dict_set_corrupted_index_cache_only(
-/*================================*/
-	dict_index_t*	index,		/*!< in/out: index */
-	dict_table_t*	table)		/*!< in/out: table */
-	MY_ATTRIBUTE((nonnull));
+	dict_index_t*	index);
 
 /**********************************************************************//**
 Flags a table with specified space_id corrupted in the table dictionary
 cache.
 @return TRUE if successful */
-UNIV_INTERN
 ibool
 dict_set_corrupted_by_space(
 /*========================*/
 	ulint		space_id);	/*!< in: space ID */
 
-/********************************************************************//**
-Validate the table flags.
-@return	true if valid. */
+/** Sets merge_threshold in the SYS_INDEXES
+@param[in,out]	index		index
+@param[in]	merge_threshold	value to set */
+void
+dict_index_set_merge_threshold(
+	dict_index_t*	index,
+	ulint		merge_threshold);
+
+#ifdef UNIV_DEBUG
+/** Sets merge_threshold for all indexes in dictionary cache for debug.
+@param[in]	merge_threshold_all	value to set for all indexes */
+void
+dict_set_merge_threshold_all_debug(
+	uint	merge_threshold_all);
+#endif /* UNIV_DEBUG */
+
+/** Validate the table flags.
+@param[in]	flags	Table flags
+@return true if valid. */
 UNIV_INLINE
 bool
 dict_tf_is_valid(
-/*=============*/
-	ulint		flags)		/*!< in: table flags */
-	MY_ATTRIBUTE((warn_unused_result));
+	ulint	flags);
+
+/** Validate both table flags and table flags2 and make sure they
+are compatible.
+@param[in]	flags	Table flags
+@param[in]	flags2	Table flags2
+@return true if valid. */
+UNIV_INLINE
+bool
+dict_tf2_is_valid(
+	ulint	flags,
+	ulint	flags2);
 
 /********************************************************************//**
 Check if the tablespace for the table has been discarded.
-@return	true if the tablespace has been discarded. */
+@return true if the tablespace has been discarded. */
 UNIV_INLINE
 bool
 dict_table_is_discarded(
 /*====================*/
 	const dict_table_t*	table)	/*!< in: table to check */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /********************************************************************//**
 Check if it is a temporary table.
-@return	true if temporary table flag is set. */
+@return true if temporary table flag is set. */
 UNIV_INLINE
 bool
 dict_table_is_temporary(
 /*====================*/
 	const dict_table_t*	table)	/*!< in: table to check */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/********************************************************************//**
+Check if it is a encrypted table.
+@return true if table encryption flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_encrypted(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table to check */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Check if the table is in a shared tablespace (System or General).
+@param[in]	id	Space ID to check
+@return true if id is a shared tablespace, false if not. */
+UNIV_INLINE
+bool
+dict_table_in_shared_tablespace(
+	const dict_table_t*	table)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/********************************************************************//**
+Turn-off redo-logging if temporary table. */
+UNIV_INLINE
+void
+dict_disable_redo_if_temporary(
+/*===========================*/
+	const dict_table_t*	table,	/*!< in: table to check */
+	mtr_t*			mtr);	/*!< out: mini-transaction */
 
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 This function should be called whenever a page is successfully
 compressed. Updates the compression padding information. */
-UNIV_INTERN
 void
 dict_index_zip_success(
 /*===================*/
@@ -1865,7 +2023,6 @@ dict_index_zip_success(
 /*********************************************************************//**
 This function should be called whenever a page compression attempt
 fails. Updates the compression padding information. */
-UNIV_INTERN
 void
 dict_index_zip_failure(
 /*===================*/
@@ -1874,7 +2031,6 @@ dict_index_zip_failure(
 /*********************************************************************//**
 Return the optimal page size, for which page will likely compress.
 @return page size beyond which page may not compress*/
-UNIV_INTERN
 ulint
 dict_index_zip_pad_optimal_page_size(
 /*=================================*/
@@ -1884,11 +2040,18 @@ dict_index_zip_pad_optimal_page_size(
 /*************************************************************//**
 Convert table flag to row format string.
 @return row format name */
-UNIV_INTERN
 const char*
 dict_tf_to_row_format_string(
 /*=========================*/
 	ulint	table_flag);		/*!< in: row format setting */
+/****************************************************************//**
+Return maximum size of the node pointer record.
+@return maximum size of the record in bytes */
+ulint
+dict_index_node_ptr_max_size(
+/*=========================*/
+	const dict_index_t*	index)	/*!< in: index */
+	MY_ATTRIBUTE((warn_unused_result));
 /*****************************************************************//**
 Get index by first field of the index
 @return index which is having first field matches
@@ -1898,8 +2061,68 @@ dict_index_t*
 dict_table_get_index_on_first_col(
 /*==============================*/
 	const dict_table_t*	table,		/*!< in: table */
-	ulint			col_index);	/*!< in: position of column
+	ulint			col_index,	/*!< in: position of column
 						in table */
+	const char*		field_name);	/*!< in: field name */
+/** Check if a column is a virtual column
+@param[in]	col	column
+@return true if it is a virtual column, false otherwise */
+UNIV_INLINE
+bool
+dict_col_is_virtual(
+	const dict_col_t*	col);
+
+/** encode number of columns and number of virtual columns in one
+4 bytes value. We could do this because the number of columns in
+InnoDB is limited to 1017
+@param[in]	n_col	number of non-virtual column
+@param[in]	n_v_col	number of virtual column
+@return encoded value */
+UNIV_INLINE
+ulint
+dict_table_encode_n_col(
+	ulint	n_col,
+	ulint	n_v_col);
+
+/** Decode number of virtual and non-virtual columns in one 4 bytes value.
+@param[in]	encoded	encoded value
+@param[in,out]	n_col	number of non-virtual column
+@param[in,out]	n_v_col	number of virtual column */
+UNIV_INLINE
+void
+dict_table_decode_n_col(
+	ulint	encoded,
+	ulint*	n_col,
+	ulint*	n_v_col);
+
+/** Look for any dictionary objects that are found in the given tablespace.
+@param[in]	space_id	Tablespace ID to search for.
+@return true if tablespace is empty. */
+bool
+dict_space_is_empty(
+	ulint	space_id);
+
+/** Find the space_id for the given name in sys_tablespaces.
+@param[in]	name	Tablespace name to search for.
+@return the tablespace ID. */
+ulint
+dict_space_get_id(
+	const char*	name);
+
+/** Free the virtual column template
+@param[in,out]	vc_templ	virtual column template */
+UNIV_INLINE
+void
+dict_free_vc_templ(
+	dict_vcol_templ_t*	vc_templ);
+
+/** Check whether the table have virtual index.
+@param[in]	table	InnoDB table
+@return true if the table have virtual index, false otherwise. */
+UNIV_INLINE
+bool
+dict_table_have_virtual_index(
+	dict_table_t*	table);
 
 #endif /* !UNIV_HOTBACKUP */
 
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 3d2f0dff0da..c44dc156aaa 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2016, Oracle and/or its affiliates
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2013, 2016, MariaDB Corporation
 
 This program is free software; you can redistribute it and/or modify it under
@@ -30,7 +30,8 @@ Created 1/8/1996 Heikki Tuuri
 #include "rem0types.h"
 #include "fsp0fsp.h"
 #include "srv0srv.h"
-#include "sync0rw.h" /* RW_S_LATCH */
+#include "sync0rw.h"
+#include "fsp0sysspace.h"
 
 /*********************************************************************//**
 Gets the minimum number of bytes per character.
@@ -89,12 +90,23 @@ dict_col_copy_type(
 	type->len = col->len;
 	type->mbminmaxlen = col->mbminmaxlen;
 }
+/** Check if a column is a virtual column
+@param[in]      col     column
+@return true if it is a virtual column, false otherwise */
+UNIV_INLINE
+bool
+dict_col_is_virtual(
+	const dict_col_t*	col)
+{
+	return(col->prtype & DATA_VIRTUAL);
+}
+
 #endif /* !UNIV_HOTBACKUP */
 
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Assert that a column and a data type match.
-@return	TRUE */
+@return TRUE */
 UNIV_INLINE
 ibool
 dict_col_type_assert_equal(
@@ -119,7 +131,7 @@ dict_col_type_assert_equal(
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Returns the minimum size of the column.
-@return	minimum size */
+@return minimum size */
 UNIV_INLINE
 ulint
 dict_col_get_min_size(
@@ -131,7 +143,7 @@ dict_col_get_min_size(
 }
 /***********************************************************************//**
 Returns the maximum size of the column.
-@return	maximum size */
+@return maximum size */
 UNIV_INLINE
 ulint
 dict_col_get_max_size(
@@ -143,7 +155,7 @@ dict_col_get_max_size(
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************************//**
 Returns the size of a fixed size column, 0 if not a fixed size column.
-@return	fixed size, or 0 */
+@return fixed size, or 0 */
 UNIV_INLINE
 ulint
 dict_col_get_fixed_size(
@@ -157,7 +169,7 @@ dict_col_get_fixed_size(
 /***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
 For fixed length types it is the fixed length of the type, otherwise 0.
-@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
 UNIV_INLINE
 ulint
 dict_col_get_sql_null_size(
@@ -170,7 +182,7 @@ dict_col_get_sql_null_size(
 
 /*********************************************************************//**
 Gets the column number.
-@return	col->ind, table column position (starting from 0) */
+@return col->ind, table column position (starting from 0) */
 UNIV_INLINE
 ulint
 dict_col_get_no(
@@ -208,11 +220,36 @@ dict_col_get_clust_pos(
 	return(ULINT_UNDEFINED);
 }
 
+/** Gets the column position in the given index.
+@param[in]	col	table column
+@param[in]	index	index to be searched for column
+@return position of column in the given index. */
+UNIV_INLINE
+ulint
+dict_col_get_index_pos(
+	const dict_col_t*	col,
+	const dict_index_t*	index)
+{
+	ulint	i;
+
+	ut_ad(col);
+
+	for (i = 0; i < index->n_def; i++) {
+		const dict_field_t*	field = &index->fields[i];
+
+		if (!field->prefix_len && field->col == col) {
+			return(i);
+		}
+	}
+
+	return(ULINT_UNDEFINED);
+}
+
 #ifndef UNIV_HOTBACKUP
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the first index on the table (the clustered index).
-@return	index, NULL if none exists */
+@return index, NULL if none exists */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_first_index(
@@ -227,7 +264,7 @@ dict_table_get_first_index(
 
 /********************************************************************//**
 Gets the last index on the table.
-@return	index, NULL if none exists */
+@return index, NULL if none exists */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_last_index(
@@ -243,7 +280,7 @@ dict_table_get_last_index(
 
 /********************************************************************//**
 Gets the next index on the table.
-@return	index, NULL if none left */
+@return index, NULL if none left */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_next_index(
@@ -260,7 +297,7 @@ dict_table_get_next_index(
 
 /********************************************************************//**
 Check whether the index is the clustered index.
-@return	nonzero for clustered index, zero for other indexes */
+@return nonzero for clustered index, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_clust(
@@ -272,9 +309,22 @@ dict_index_is_clust(
 
 	return(index->type & DICT_CLUSTERED);
 }
+
+/** Check if index is auto-generated clustered index.
+@param[in]	index	index
+
+@return true if index is auto-generated clustered index. */
+UNIV_INLINE
+bool
+dict_index_is_auto_gen_clust(
+	const dict_index_t*	index)
+{
+	return(index->type == DICT_CLUSTERED);
+}
+
 /********************************************************************//**
 Check whether the index is unique.
-@return	nonzero for unique index, zero for other indexes */
+@return nonzero for unique index, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_unique(
@@ -288,38 +338,67 @@ dict_index_is_unique(
 }
 
 /********************************************************************//**
-Check whether the index is the insert buffer tree.
-@return	nonzero for insert buffer, zero for other indexes */
+Check whether the index is an universal index tree.
+@return	nonzero for universal tree, zero for other indexes */
 UNIV_INLINE
 ulint
-dict_index_is_ibuf(
+dict_index_is_univ(
 /*===============*/
 	const dict_index_t*	index)	/*!< in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 
-	return(index->type & DICT_IBUF);
+	return(index->type & DICT_UNIVERSAL);
 }
 
 /********************************************************************//**
-Check whether the index is an universal index tree.
-@return	nonzero for universal tree, zero for other indexes */
+Check whether the index is a Spatial Index.
+@return	nonzero for Spatial Index, zero for other indexes */
 UNIV_INLINE
 ulint
-dict_index_is_univ(
+dict_index_is_spatial(
+/*==================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->type & DICT_SPATIAL);
+}
+
+/** Check whether the index contains a virtual column
+@param[in]	index	index
+@return	nonzero for the index has virtual column, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_has_virtual(
+	const dict_index_t*	index)
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->type & DICT_VIRTUAL);
+}
+
+/********************************************************************//**
+Check whether the index is the insert buffer tree.
+@return nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_ibuf(
 /*===============*/
 	const dict_index_t*	index)	/*!< in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 
-	return(index->type & DICT_UNIVERSAL);
+	return(index->type & DICT_IBUF);
 }
 
 /********************************************************************//**
 Check whether the index is a secondary index or the insert buffer tree.
-@return	nonzero for insert buffer, zero for other indexes */
+@return nonzero for insert buffer, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_sec_or_ibuf(
@@ -337,9 +416,10 @@ dict_index_is_sec_or_ibuf(
 }
 
 /********************************************************************//**
-Gets the number of user-defined columns in a table in the dictionary
-cache.
-@return	number of user-defined (e.g., not ROW_ID) columns of a table */
+Gets the number of user-defined non-virtual columns in a table in the
+dictionary cache.
+@return number of user-defined (e.g., not ROW_ID) non-virtual
+columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_user_cols(
@@ -349,29 +429,29 @@ dict_table_get_n_user_cols(
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
-	return(table->n_cols - DATA_N_SYS_COLS);
+	return(table->n_cols - dict_table_get_n_sys_cols(table));
 }
 
-/********************************************************************//**
-Gets the number of system columns in a table in the dictionary cache.
-@return	number of system (e.g., ROW_ID) columns of a table */
+/** Gets the number of user-defined virtual and non-virtual columns in a table
+in the dictionary cache.
+@param[in]	table	table
+@return number of user-defined (e.g., not ROW_ID) columns of a table */
 UNIV_INLINE
 ulint
-dict_table_get_n_sys_cols(
-/*======================*/
-	const dict_table_t*	table MY_ATTRIBUTE((unused)))	/*!< in: table */
+dict_table_get_n_tot_u_cols(
+	const dict_table_t*	table)
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-	ut_ad(table->cached);
 
-	return(DATA_N_SYS_COLS);
+	return(dict_table_get_n_user_cols(table)
+	       + dict_table_get_n_v_cols(table));
 }
 
 /********************************************************************//**
-Gets the number of all columns (also system) in a table in the dictionary
-cache.
-@return	number of columns of a table */
+Gets the number of all non-virtual columns (also system) in a table
+in the dictionary cache.
+@return number of non-virtual columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_cols(
@@ -384,9 +464,42 @@ dict_table_get_n_cols(
 	return(table->n_cols);
 }
 
+/** Gets the number of virtual columns in a table in the dictionary cache.
+@param[in]	table	the table to check
+@return number of virtual columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_v_cols(
+	const dict_table_t*	table)
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(table->n_v_cols);
+}
+
+/** Check if a table has indexed virtual columns
+@param[in]	table	the table to check
+@return true is the table has indexed virtual columns */
+UNIV_INLINE
+bool
+dict_table_has_indexed_v_cols(
+	const dict_table_t*	table)
+{
+
+	for (ulint i = 0; i < table->n_v_cols; i++) {
+		const dict_v_col_t*     col = dict_table_get_nth_v_col(table, i);
+		if (col->m_col.ord_part) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
 /********************************************************************//**
 Gets the approximately estimated number of rows in the table.
-@return	estimated number of rows */
+@return estimated number of rows */
 UNIV_INLINE
 ib_uint64_t
 dict_table_get_n_rows(
@@ -437,7 +550,7 @@ dict_table_n_rows_dec(
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth column of a table.
-@return	pointer to column object */
+@return pointer to column object */
 UNIV_INLINE
 dict_col_t*
 dict_table_get_nth_col(
@@ -452,9 +565,26 @@ dict_table_get_nth_col(
 	return((dict_col_t*) (table->cols) + pos);
 }
 
+/** Gets the nth virtual column of a table.
+@param[in]	table	table
+@param[in]	pos	position of virtual column
+@return pointer to virtual column object */
+UNIV_INLINE
+dict_v_col_t*
+dict_table_get_nth_v_col(
+	const dict_table_t*	table,
+	ulint			pos)
+{
+	ut_ad(table);
+	ut_ad(pos < table->n_v_def);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(static_cast<dict_v_col_t*>(table->v_cols) + pos);
+}
+
 /********************************************************************//**
 Gets the given system column of a table.
-@return	pointer to column object */
+@return pointer to column object */
 UNIV_INLINE
 dict_col_t*
 dict_table_get_sys_col(
@@ -465,11 +595,12 @@ dict_table_get_sys_col(
 	dict_col_t*	col;
 
 	ut_ad(table);
-	ut_ad(sys < DATA_N_SYS_COLS);
+	ut_ad(sys < dict_table_get_n_sys_cols(table));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
 	col = dict_table_get_nth_col(table, table->n_cols
-				     - DATA_N_SYS_COLS + sys);
+				     - dict_table_get_n_sys_cols(table)
+				     + sys);
 	ut_ad(col->mtype == DATA_SYS);
 	ut_ad(col->prtype == (sys | DATA_NOT_NULL));
 
@@ -479,7 +610,7 @@ dict_table_get_sys_col(
 
 /********************************************************************//**
 Gets the given system column number of a table.
-@return	column number */
+@return column number */
 UNIV_INLINE
 ulint
 dict_table_get_sys_col_no(
@@ -488,15 +619,15 @@ dict_table_get_sys_col_no(
 	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
 {
 	ut_ad(table);
-	ut_ad(sys < DATA_N_SYS_COLS);
+	ut_ad(sys < dict_table_get_n_sys_cols(table));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
-	return(table->n_cols - DATA_N_SYS_COLS + sys);
+	return(table->n_cols - dict_table_get_n_sys_cols(table) + sys);
 }
 
 /********************************************************************//**
 Check whether the table uses the compact page format.
-@return	TRUE if table uses the compact page format */
+@return TRUE if table uses the compact page format */
 UNIV_INLINE
 ibool
 dict_table_is_comp(
@@ -526,77 +657,44 @@ dict_table_has_fts_index(
 	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS));
 }
 
-/********************************************************************//**
-Validate the table flags.
-@return	true if valid. */
+/** Validate the table flags.
+@param[in]	flags	Table flags
+@return true if valid. */
 UNIV_INLINE
 bool
 dict_tf_is_valid(
-/*=============*/
-	ulint	flags)		/*!< in: table flags */
+	ulint	flags)
 {
-	ulint	compact = DICT_TF_GET_COMPACT(flags);
+	bool	compact = DICT_TF_GET_COMPACT(flags);
 	ulint	zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
-	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
+	bool	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
+	bool	data_dir = DICT_TF_HAS_DATA_DIR(flags);
+	bool	shared_space = DICT_TF_HAS_SHARED_SPACE(flags);
 	ulint	unused = DICT_TF_GET_UNUSED(flags);
-	ulint	page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags);
+	bool	page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags);
 	ulint	page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
-	ulint	data_dir = DICT_TF_HAS_DATA_DIR(flags);
 	ulint	atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags);
+	bool	flags_corrupt = false;
 
 	/* Make sure there are no bits that we do not know about. */
 	if (unused != 0) {
-		fprintf(stderr,
-			"InnoDB: Error: table unused flags are %ld"
-			" in the data dictionary and are corrupted\n"
-			"InnoDB: Error: data dictionary flags are\n"
-			"InnoDB: compact %ld atomic_blobs %ld\n"
-			"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
-			"InnoDB: page_compression %ld page_compression_level %ld\n"
-			"InnoDB: atomic_writes %ld\n",
-			unused,
-			compact, atomic_blobs, unused, data_dir, zip_ssize,
-			page_compression, page_compression_level, atomic_writes
-		);
-
-		return(false);
+		flags_corrupt = true;
+	}
 
-	} else if (atomic_blobs) {
-		/* Barracuda row formats COMPRESSED and DYNAMIC build on
-		the page structure introduced for the COMPACT row format
-		by allowing keys in secondary indexes to be made from
-		data stored off-page in the clustered index. */
+	if (atomic_blobs) {
+		/* Barracuda row formats COMPRESSED and DYNAMIC both use
+		atomic_blobs, which build on the page structure introduced
+		for the COMPACT row format by allowing keys in secondary
+		indexes to be made from data stored off-page in the
+		clustered index. */
 
 		if (!compact) {
-			fprintf(stderr,
-				"InnoDB: Error: table compact flags are %ld"
-				" in the data dictionary and are corrupted\n"
-				"InnoDB: Error: data dictionary flags are\n"
-				"InnoDB: compact %ld atomic_blobs %ld\n"
-				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
-				"InnoDB: page_compression %ld page_compression_level %ld\n"
-				"InnoDB: atomic_writes %ld\n",
-				compact, compact, atomic_blobs, unused, data_dir, zip_ssize,
-				page_compression, page_compression_level, atomic_writes
-			);
-			return(false);
+			flags_corrupt = true;
 		}
 
 	} else if (zip_ssize) {
-
 		/* Antelope does not support COMPRESSED row format. */
-		fprintf(stderr,
-			"InnoDB: Error: table flags are %ld"
-			" in the data dictionary and are corrupted\n"
-			"InnoDB: Error: data dictionary flags are\n"
-			"InnoDB: compact %ld atomic_blobs %ld\n"
-			"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
-			"InnoDB: page_compression %ld page_compression_level %ld\n"
-			"InnoDB: atomic_writes %ld\n",
-			flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
-			page_compression, page_compression_level, atomic_writes
-		);
-		return(false);
+		flags_corrupt = true;
 	}
 
 	if (zip_ssize) {
@@ -607,72 +705,84 @@ dict_tf_is_valid(
 		if (!compact
 		    || !atomic_blobs
 		    || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
-
-			fprintf(stderr,
-				"InnoDB: Error: table compact flags are %ld in the data dictionary and are corrupted\n"
-				"InnoDB: Error: data dictionary flags are\n"
-				"InnoDB: compact %ld atomic_blobs %ld\n"
-				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
-				"InnoDB: page_compression %ld page_compression_level %ld\n"
-				"InnoDB: atomic_writes %ld\n",
-				flags,
-				compact, atomic_blobs, unused, data_dir, zip_ssize,
-				page_compression, page_compression_level, atomic_writes
-
-			);
-			return(false);
+			flags_corrupt = true;
 		}
 	}
 
-        if (page_compression || page_compression_level) {
+	if (page_compression || page_compression_level) {
 		/* Page compression format must have compact and
 		atomic_blobs and page_compression_level requires
 		page_compression */
 		if (!compact
 			|| !page_compression
 			|| !atomic_blobs) {
-
-			fprintf(stderr,
-				"InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n"
-				"InnoDB: Error: data dictionary flags are\n"
-				"InnoDB: compact %ld atomic_blobs %ld\n"
-				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
-				"InnoDB: page_compression %ld page_compression_level %ld\n"
-				"InnoDB: atomic_writes %ld\n",
-				flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
-				page_compression, page_compression_level, atomic_writes
-			);
-			return(false);
+			flags_corrupt = true;
 		}
 	}
 
 	if (atomic_writes) {
 
 		if(atomic_writes > ATOMIC_WRITES_OFF) {
-
-			fprintf(stderr,
-				"InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n"
-				"InnoDB: Error: data dictionary flags are\n"
-				"InnoDB: compact %ld atomic_blobs %ld\n"
-				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
-				"InnoDB: page_compression %ld page_compression_level %ld\n"
-				"InnoDB: atomic_writes %ld\n",
-				flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
-				page_compression, page_compression_level, atomic_writes
-			);
-			return(false);
+			flags_corrupt = true;
 		}
 	}
 
-	/* CREATE TABLE ... DATA DIRECTORY is supported for any row format,
-	so the DATA_DIR flag is compatible with all other table flags. */
+	/* HAS_DATA_DIR and SHARED_SPACE are mutually exclusive. */
+	if (data_dir && shared_space) {
+		flags_corrupt = true;
+	}
+
+	if (flags_corrupt) {
+		ib::error()
+			<< "InnoDB: Error: table unused flags are:" << flags
+			<< " in the data dictionary and are corrupted:"
+			<< " compact:" << compact
+			<< " atomic_blobs:" << atomic_blobs
+			<< " unused:" << unused
+			<< " data_dir:" << data_dir
+			<< " zip_ssize:" << zip_ssize
+			<< " page_compression:" << page_compression
+			<< " page_compression_level:" << page_compression_level
+			<< " atomic_writes:" << atomic_writes
+			<< " shared_space:" << shared_space;
+		return (false);
+	} else {
+		return(true);
+	}
+}
+
+/** Validate both table flags and table flags2 and make sure they
+are compatible.
+@param[in]	flags	Table flags
+@param[in]	flags2	Table flags2
+@return true if valid. */
+UNIV_INLINE
+bool
+dict_tf2_is_valid(
+	ulint	flags,
+	ulint	flags2)
+{
+	if (!dict_tf_is_valid(flags)) {
+		return(false);
+	}
+
+	if ((flags2 & DICT_TF2_UNUSED_BIT_MASK) != 0) {
+		return(false);
+	}
+
+	bool	file_per_table = ((flags2 & DICT_TF2_USE_FILE_PER_TABLE) != 0);
+	bool	shared_space = DICT_TF_HAS_SHARED_SPACE(flags);
+
+	if (file_per_table && shared_space) {
+		return(false);
+	}
 
 	return(true);
 }
 
 /********************************************************************//**
 Validate a SYS_TABLES TYPE field and return it.
-@return	Same as input after validating it as a SYS_TABLES TYPE field.
+@return Same as input after validating it as a SYS_TABLES TYPE field.
 If there is an error, return ULINT_UNDEFINED. */
 UNIV_INLINE
 ulint
@@ -686,7 +796,7 @@ dict_sys_tables_type_validate(
 	ulint	zip_ssize = DICT_TF_GET_ZIP_SSIZE(type);
 	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type);
 	ulint	unused = DICT_TF_GET_UNUSED(type);
-	ulint	page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
+	bool	page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
 	ulint	page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type);
 	ulint	atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type);
 
@@ -701,16 +811,17 @@ dict_sys_tables_type_validate(
 
 	if (redundant) {
 		if (zip_ssize || atomic_blobs) {
-			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=Redundant, zip_ssize %lu atomic_blobs %lu\n",
-				zip_ssize, atomic_blobs);
+			ib::error()
+				<< "SYS_TABLES::TYPE=Redundant, zip_ssize:" << zip_ssize
+				<< " atomic_blobs:" << atomic_blobs;
 			return(ULINT_UNDEFINED);
 		}
 	}
 
 	/* Make sure there are no bits that we do not know about. */
 	if (unused) {
-		fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, unused %lu\n",
-			type, unused);
+		ib::error()
+			<< "SYS_TABLES::TYPE=" << type << " unused:" << unused;
 		return(ULINT_UNDEFINED);
 	}
 
@@ -725,8 +836,9 @@ dict_sys_tables_type_validate(
 
 	} else if (zip_ssize) {
 		/* Antelope does not support COMPRESSED format. */
-		fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu\n",
-			type, zip_ssize);
+		ib::error()
+			<< "SYS_TABLES::TYPE=" << type << "zip_ssize:" << zip_ssize;
+
 		return(ULINT_UNDEFINED);
 	}
 
@@ -736,15 +848,17 @@ dict_sys_tables_type_validate(
 		should be in N_COLS, but we already know about the
 		low_order_bit and DICT_N_COLS_COMPACT flags. */
 		if (!atomic_blobs) {
-			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu atomic_blobs %lu\n",
-				type, zip_ssize, atomic_blobs);
+			ib::error() << "SYS_TABLES::TYPE=" << type
+				<< " zip_ssize:" << zip_ssize
+				<< " atomic_blobs:" << atomic_blobs;
 			return(ULINT_UNDEFINED);
 		}
 
 		/* Validate that the number is within allowed range. */
 		if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
-			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu max %d\n",
-				type, zip_ssize, PAGE_ZIP_SSIZE_MAX);
+			ib::error() << "SYS_TABLES::TYPE=" << type
+				<< " zip_ssize:" << zip_ssize
+				<< " max:" << PAGE_ZIP_SSIZE_MAX;
 			return(ULINT_UNDEFINED);
 		}
 	}
@@ -752,26 +866,28 @@ dict_sys_tables_type_validate(
 	/* There is nothing to validate for the data_dir field.
 	CREATE TABLE ... DATA DIRECTORY is supported for any row
 	format, so the DATA_DIR flag is compatible with any other
-	table flags. However, it is not used with TEMPORARY tables.*/
+	table flags. However, it is not used with TEMPORARY tables. */
 
-        if (page_compression || page_compression_level) {
+	if (page_compression || page_compression_level) {
 		/* page compressed row format must have low_order_bit and
 		atomic_blobs bits set and the DICT_N_COLS_COMPACT flag
 		should be in N_COLS, but we already know about the
 		low_order_bit and DICT_N_COLS_COMPACT flags. */
 
-                if (!atomic_blobs || !page_compression) {
-			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, page_compression %lu page_compression_level %lu\n"
-				"InnoDB: Error: atomic_blobs %lu\n",
-				type, page_compression, page_compression_level, atomic_blobs);
+		if (!atomic_blobs || !page_compression) {
+			ib::error() << "SYS_TABLES::TYPE=" << type
+				<< " page_compression:" << page_compression
+				<< " page_compression_level:" << page_compression_level
+				<< " atomic_blobs:" << atomic_blobs;
+
 			return(ULINT_UNDEFINED);
 		}
 	}
 
 	/* Validate that the atomic writes number is within allowed range. */
 	if (atomic_writes > ATOMIC_WRITES_OFF) {
-		fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, atomic_writes %lu\n",
-				type, atomic_writes);
+		ib::error() << "SYS_TABLES::TYPE=" << type
+			    << " atomic_writes:" << atomic_writes;
 			return(ULINT_UNDEFINED);
 	}
 
@@ -783,7 +899,7 @@ dict_sys_tables_type_validate(
 Determine the file format from dict_table_t::flags
 The low order bit will be zero for REDUNDANT and 1 for COMPACT. For any
 other row_format, file_format is > 0 and DICT_TF_COMPACT will also be set.
-@return	file format version */
+@return file format version */
 UNIV_INLINE
 rec_format_t
 dict_tf_get_rec_format(
@@ -809,7 +925,7 @@ dict_tf_get_rec_format(
 
 /********************************************************************//**
 Determine the file format from a dict_table_t::flags.
-@return	file format version */
+@return file format version */
 UNIV_INLINE
 ulint
 dict_tf_get_format(
@@ -825,7 +941,7 @@ dict_tf_get_format(
 
 /********************************************************************//**
 Determine the file format of a table.
-@return	file format version */
+@return file format version */
 UNIV_INLINE
 ulint
 dict_table_get_format(
@@ -837,26 +953,29 @@ dict_table_get_format(
 	return(dict_tf_get_format(table->flags));
 }
 
-/********************************************************************//**
-Set the file format and zip size in a dict_table_t::flags.  If zip size
-is not needed, it should be 0. */
+/** Set the various values in a dict_table_t::flags pointer.
+@param[in,out]	flags,		Pointer to a 4 byte Table Flags
+@param[in]	format		File Format
+@param[in]	zip_ssize	Zip Shift Size
+@param[in]	use_data_dir	Table uses DATA DIRECTORY
+@param[in]	atomic_writes   Does table use atomic writes
+@param[in]	shared_space	Table uses a General Shared Tablespace
+@param[in]	page_compressed Table uses page compression
+@param[in]	page_compression_level Page compression level
+@param[in]	atomic_writes	Table uses atomic writes */
 UNIV_INLINE
 void
 dict_tf_set(
 /*========*/
-	ulint*		flags,		/*!< in/out: table flags */
-	rec_format_t	format,		/*!< in: file format */
-	ulint		zip_ssize,	/*!< in: zip shift size */
-	bool		use_data_dir,	/*!< in: table uses DATA DIRECTORY
-					*/
-	bool		page_compressed,/*!< in: table uses page compressed
-					pages */
-	ulint		page_compression_level, /*!< in: table page compression
-						 level */
-	ulint		atomic_writes)  /*!< in: table atomic writes setup */
-{
-	atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
-
+	ulint*		flags,
+	rec_format_t	format,
+	ulint		zip_ssize,
+	bool		use_data_dir,
+	bool		shared_space,
+	bool		page_compressed,
+	ulint		page_compression_level,
+	ulint		atomic_writes)
+{
 	switch (format) {
 	case REC_FORMAT_REDUNDANT:
 		*flags = 0;
@@ -878,9 +997,17 @@ dict_tf_set(
 		break;
 	}
 
+	if (use_data_dir) {
+		*flags |= (1 << DICT_TF_POS_DATA_DIR);
+	}
+
+	if (shared_space) {
+		*flags |= (1 << DICT_TF_POS_SHARED_SPACE);
+	}
+
 	if (page_compressed) {
 		*flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS)
-                       | (1 << DICT_TF_POS_PAGE_COMPRESSION)
+		       | (1 << DICT_TF_POS_PAGE_COMPRESSION)
 		       | (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
 
 		ut_ad(zip_ssize == 0);
@@ -888,69 +1015,71 @@ dict_tf_set(
 		ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level);
 	}
 
-	*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
-	ut_a(dict_tf_get_atomic_writes(*flags) == awrites);
-
-	if (use_data_dir) {
-		*flags |= (1 << DICT_TF_POS_DATA_DIR);
+	if (atomic_writes) {
+		*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
+		ut_a(dict_tf_get_atomic_writes(*flags) == atomic_writes);
 	}
 }
 
-/********************************************************************//**
-Convert a 32 bit integer table flags to the 32 bit integer that is
-written into the tablespace header at the offset FSP_SPACE_FLAGS and is
-also stored in the fil_space_t::flags field.  The following chart shows
-the translation of the low order bit.  Other bits are the same.
-========================= Low order bit ==========================
-                    | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
-dict_table_t::flags |     0     |    1    |     1      |    1
-fil_space_t::flags  |     0     |    0    |     1      |    1
-==================================================================
-@return	tablespace flags (fil_space_t::flags) */
+/** Initialize a dict_table_t::flags pointer.
+@param[in]	compact,	Table uses Compact or greater
+@param[in]	zip_ssize	Zip Shift Size (log 2 minus 9)
+@param[in]	atomic_blobs	Table uses Compressed or Dynamic
+@param[in]	data_dir	Table uses DATA DIRECTORY
+@param[in]	shared_space	Table uses a General Shared Tablespace
+@param[in]	page_compression Table uses page compression
+@param[in]	page_compression_level used compression level
+@param[in]	atomic_writes	Table atomic writes option */
 UNIV_INLINE
 ulint
-dict_tf_to_fsp_flags(
-/*=================*/
-	ulint	table_flags)	/*!< in: dict_table_t::flags */
-{
-	ulint fsp_flags;
-	ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
-	ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
-	ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
-
-	DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
-			return(ULINT_UNDEFINED););
-
-	/* Adjust bit zero. */
-	fsp_flags = DICT_TF_HAS_ATOMIC_BLOBS(table_flags) ? 1 : 0;
+dict_tf_init(
+	bool		compact,
+	ulint		zip_ssize,
+	bool		atomic_blobs,
+	bool		data_dir,
+	bool		shared_space,
+	bool		page_compressed,
+	ulint		page_compression_level,
+	ulint		atomic_writes)
+{
+	ulint	flags = 0;
+
+	if (compact) {
+		flags |= DICT_TF_COMPACT;
+	}
 
-	/* ZIP_SSIZE and ATOMIC_BLOBS are at the same position. */
-	fsp_flags |= table_flags & DICT_TF_MASK_ZIP_SSIZE;
-	fsp_flags |= table_flags & DICT_TF_MASK_ATOMIC_BLOBS;
+	if (zip_ssize) {
+		flags |= (zip_ssize << DICT_TF_POS_ZIP_SSIZE);
+	}
 
-	/* In addition, tablespace flags also contain the page size. */
-	fsp_flags |= fsp_flags_set_page_size(fsp_flags, UNIV_PAGE_SIZE);
+	if (atomic_blobs) {
+		flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS);
+	}
 
-	/* The DATA_DIR flag is in a different position in fsp_flag */
-	fsp_flags |= DICT_TF_HAS_DATA_DIR(table_flags)
-		     ? FSP_FLAGS_MASK_DATA_DIR : 0;
+	if (data_dir) {
+		flags |= (1 << DICT_TF_POS_DATA_DIR);
+	}
 
-	/* In addition, tablespace flags also contain if the page
-	compression is used for this table. */
-	fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION(fsp_flags, page_compression);
+	if (shared_space) {
+		flags |= (1 << DICT_TF_POS_SHARED_SPACE);
+	}
 
-	/* In addition, tablespace flags also contain page compression level
-	if page compression is used for this table. */
-	fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level);
+	if (page_compressed) {
+		flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS)
+		      | (1 << DICT_TF_POS_PAGE_COMPRESSION)
+		      | (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
 
-	/* In addition, tablespace flags also contain flag if atomic writes
-	is used for this table */
-	fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes);
+		ut_ad(zip_ssize == 0);
+		ut_ad(dict_tf_get_page_compression(flags) == TRUE);
+		ut_ad(dict_tf_get_page_compression_level(flags) == page_compression_level);
+	}
 
-	ut_a(fsp_flags_is_valid(fsp_flags));
-	ut_a(dict_tf_verify_flags(table_flags, fsp_flags));
+	if (atomic_writes) {
+		flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
+		ut_a(dict_tf_get_atomic_writes(flags) == atomic_writes);
+	}
 
-	return(fsp_flags);
+	return(flags);
 }
 
 /********************************************************************//**
@@ -962,7 +1091,7 @@ Other bits are the same.
 SYS_TABLES.TYPE     |     1     |    1    |     1
 dict_table_t::flags |     0     |    1    |     1
 ==================================================================
-@return	ulint containing SYS_TABLES.TYPE */
+@return ulint containing SYS_TABLES.TYPE */
 UNIV_INLINE
 ulint
 dict_sys_tables_type_to_tf(
@@ -984,9 +1113,9 @@ dict_sys_tables_type_to_tf(
 			 | DICT_TF_MASK_PAGE_COMPRESSION
 			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
 			 | DICT_TF_MASK_ATOMIC_WRITES
+			 | DICT_TF_MASK_SHARED_SPACE);
 
-	);
-
+	ut_ad(!DICT_TF_GET_ZIP_SSIZE(flags) || DICT_TF_HAS_ATOMIC_BLOBS(flags));
 	return(flags);
 }
 
@@ -999,7 +1128,7 @@ the low order bit.  Other bits are the same.
 dict_table_t::flags |     0     |    1    |     1
 SYS_TABLES.TYPE     |     1     |    1    |     1
 ==================================================================
-@return	ulint containing SYS_TABLES.TYPE */
+@return ulint containing SYS_TABLES.TYPE */
 UNIV_INLINE
 ulint
 dict_tf_to_sys_tables_type(
@@ -1020,43 +1149,46 @@ dict_tf_to_sys_tables_type(
 			 | DICT_TF_MASK_DATA_DIR
 			 | DICT_TF_MASK_PAGE_COMPRESSION
 			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
-			 | DICT_TF_MASK_ATOMIC_WRITES);
+			 | DICT_TF_MASK_ATOMIC_WRITES
+			 | DICT_TF_MASK_SHARED_SPACE);
 
 	return(type);
 }
 
-/********************************************************************//**
-Extract the compressed page size from dict_table_t::flags.
-These flags are in memory, so assert that they are valid.
-@return	compressed page size, or 0 if not compressed */
+/** Extract the page size info from table flags.
+@param[in]	flags	flags
+@return a structure containing the compressed and uncompressed
+page sizes and a boolean indicating if the page is compressed. */
 UNIV_INLINE
-ulint
-dict_tf_get_zip_size(
-/*=================*/
-	ulint	flags)	/*!< in: flags */
+const page_size_t
+dict_tf_get_page_size(
+	ulint	flags)
 {
-	ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
-	ulint zip_size = (zip_ssize
-			  ? (UNIV_ZIP_SIZE_MIN >> 1) << zip_ssize
-			  : 0);
+	const ulint	zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
+
+	if (zip_ssize == 0) {
+		return(univ_page_size);
+	}
+
+	const ulint	zip_size = (UNIV_ZIP_SIZE_MIN >> 1) << zip_ssize;
 
 	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
 
-	return(zip_size);
+	return(page_size_t(zip_size, univ_page_size.logical(), true));
 }
 
-/********************************************************************//**
-Check whether the table uses the compressed compact page format.
-@return	compressed page size, or 0 if not compressed */
+/** Get the table page size.
+@param[in]	table	table
+@return a structure containing the compressed and uncompressed
+page sizes and a boolean indicating if the page is compressed */
 UNIV_INLINE
-ulint
-dict_table_zip_size(
-/*================*/
-	const dict_table_t*	table)	/*!< in: table */
+const page_size_t
+dict_table_page_size(
+	const dict_table_t*	table)
 {
-	ut_ad(table);
+	ut_ad(table != NULL);
 
-	return(dict_tf_get_zip_size(table->flags));
+	return(dict_tf_get_page_size(table->flags));
 }
 
 #ifndef UNIV_HOTBACKUP
@@ -1073,7 +1205,7 @@ dict_table_x_lock_indexes(
 	dict_index_t*   index;
 
 	ut_a(table);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	/* Loop through each index of the table and lock them */
 	for (index = dict_table_get_first_index(table);
@@ -1094,7 +1226,7 @@ dict_table_x_unlock_indexes(
 	dict_index_t*   index;
 
 	ut_a(table);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	for (index = dict_table_get_first_index(table);
 	     index != NULL;
@@ -1107,7 +1239,7 @@ dict_table_x_unlock_indexes(
 /********************************************************************//**
 Gets the number of fields in the internal representation of an index,
 including fields added by the dictionary system.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_fields(
@@ -1127,7 +1259,7 @@ Gets the number of fields in the internal representation of an index
 that uniquely determine the position of an index entry in the index, if
 we do not take multiversioning into account: in the B-tree use the value
 returned by dict_index_get_n_unique_in_tree.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_unique(
@@ -1146,7 +1278,7 @@ dict_index_get_n_unique(
 Gets the number of fields in the internal representation of an index
 which uniquely determine the position of an index entry in the index, if
 we also take multiversioning into account.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_unique_in_tree(
@@ -1166,12 +1298,38 @@ dict_index_get_n_unique_in_tree(
 	return(dict_index_get_n_fields(index));
 }
 
+/**
+Gets the number of fields on nonleaf page level in the internal representation
+of an index which uniquely determine the position of an index entry in the
+index, if we also take multiversioning into account. Note, it doesn't
+include page no field.
+@param[in]	index	index
+@return number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree_nonleaf(
+	const dict_index_t*	index)
+{
+	ut_ad(index != NULL);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(index->cached);
+
+	if (dict_index_is_spatial(index)) {
+		/* For spatial index, on non-leaf page, we have only
+		2 fields(mbr+page_no). So, except page no field,
+		there's one field there. */
+		return(DICT_INDEX_SPATIAL_NODEPTR_SIZE);
+	} else {
+		return(dict_index_get_n_unique_in_tree(index));
+	}
+}
+
 /********************************************************************//**
 Gets the number of user-defined ordering fields in the index. In the internal
 representation of clustered indexes we add the row id to the ordering fields
 to make a clustered index unique, but this function returns the number of
 fields the user defined in the index as ordering fields.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_ordering_defined_by_user(
@@ -1185,7 +1343,7 @@ dict_index_get_n_ordering_defined_by_user(
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth field of an index.
-@return	pointer to field object */
+@return pointer to field object */
 UNIV_INLINE
 dict_field_t*
 dict_index_get_nth_field(
@@ -1203,7 +1361,7 @@ dict_index_get_nth_field(
 
 /********************************************************************//**
 Returns the position of a system column in an index.
-@return	position, ULINT_UNDEFINED if not contained */
+@return position, ULINT_UNDEFINED if not contained */
 UNIV_INLINE
 ulint
 dict_index_get_sys_col_pos(
@@ -1213,7 +1371,7 @@ dict_index_get_sys_col_pos(
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-	ut_ad(!dict_index_is_univ(index));
+	ut_ad(!dict_index_is_ibuf(index));
 
 	if (dict_index_is_clust(index)) {
 
@@ -1223,13 +1381,12 @@ dict_index_get_sys_col_pos(
 	}
 
 	return(dict_index_get_nth_col_pos(
-			index, dict_table_get_sys_col_no(index->table, type),
-			NULL));
+			index, dict_table_get_sys_col_no(index->table, type), NULL));
 }
 
 /*********************************************************************//**
 Gets the field column.
-@return	field->col, pointer to the table column */
+@return field->col, pointer to the table column */
 UNIV_INLINE
 const dict_col_t*
 dict_field_get_col(
@@ -1243,7 +1400,7 @@ dict_field_get_col(
 
 /********************************************************************//**
 Gets pointer to the nth column in an index.
-@return	column */
+@return column */
 UNIV_INLINE
 const dict_col_t*
 dict_index_get_nth_col(
@@ -1256,7 +1413,7 @@ dict_index_get_nth_col(
 
 /********************************************************************//**
 Gets the column number the nth field in an index.
-@return	column number */
+@return column number */
 UNIV_INLINE
 ulint
 dict_index_get_nth_col_no(
@@ -1279,14 +1436,14 @@ dict_index_get_nth_col_pos(
 	ulint			n,	/*!< in: column number */
 	ulint*			prefix_col_pos) /*!< out: col num if prefix */
 {
-	return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE,
+	return(dict_index_get_nth_col_or_prefix_pos(index, n, false, false,
 						    prefix_col_pos));
 }
 
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Returns the minimum data size of an index record.
-@return	minimum data size in bytes */
+@return minimum data size in bytes */
 UNIV_INLINE
 ulint
 dict_index_get_min_size(
@@ -1306,7 +1463,7 @@ dict_index_get_min_size(
 
 /*********************************************************************//**
 Gets the space id of the root of the index tree.
-@return	space id */
+@return space id */
 UNIV_INLINE
 ulint
 dict_index_get_space(
@@ -1336,7 +1493,7 @@ dict_index_set_space(
 
 /*********************************************************************//**
 Gets the page number of the root of the index tree.
-@return	page number */
+@return page number */
 UNIV_INLINE
 ulint
 dict_index_get_page(
@@ -1351,7 +1508,7 @@ dict_index_get_page(
 
 /*********************************************************************//**
 Gets the read-write lock of the index tree.
-@return	read-write lock */
+@return read-write lock */
 UNIV_INLINE
 rw_lock_t*
 dict_index_get_lock(
@@ -1368,7 +1525,7 @@ dict_index_get_lock(
 Returns free space reserved for future updates of records. This is
 relevant only in the case of many consecutive inserts, as updates
 which make the records bigger might fragment the index.
-@return	number of free bytes on page, reserved for updates */
+@return number of free bytes on page, reserved for updates */
 UNIV_INLINE
 ulint
 dict_index_get_space_reserve(void)
@@ -1420,9 +1577,8 @@ dict_index_set_online_status(
 	enum online_index_status	status)	/*!< in: status */
 {
 	ut_ad(!(index->type & DICT_FTS));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+
 #ifdef UNIV_DEBUG
 	switch (dict_index_get_online_status(index)) {
 	case ONLINE_INDEX_COMPLETE:
@@ -1482,7 +1638,8 @@ ulint
 dict_table_is_fts_column(
 /*=====================*/
 	ib_vector_t*	indexes,/*!< in: vector containing only FTS indexes */
-	ulint		col_no)	/*!< in: col number to search for */
+	ulint		col_no,	/*!< in: col number to search for */
+	bool		is_virtual) /*!< in: whether it is a virtual column */
 
 {
 	ulint		i;
@@ -1492,7 +1649,8 @@ dict_table_is_fts_column(
 
 		index = (dict_index_t*) ib_vector_getp(indexes, i);
 
-		if (dict_index_contains_col_or_prefix(index, col_no)) {
+		if (dict_index_contains_col_or_prefix(
+			index, col_no, is_virtual)) {
 
 			return(i);
 		}
@@ -1526,9 +1684,57 @@ dict_max_field_len_store_undo(
 	return(prefix_len);
 }
 
+/** Determine maximum bytes of a virtual column need to be stored
+in the undo log.
+@param[in]	table		dict_table_t for the table
+@param[in]	col_no		virtual column number
+@return maximum bytes of virtual column to be stored in the undo log */
+UNIV_INLINE
+ulint
+dict_max_v_field_len_store_undo(
+	dict_table_t*		table,
+	ulint			col_no)
+{
+	const dict_col_t*	col
+		= &dict_table_get_nth_v_col(table, col_no)->m_col;
+	ulint			max_log_len;
+
+	/* This calculation conforms to the non-virtual column
+	maximum log length calculation:
+	1) for UNIV_FORMAT_A, upto REC_ANTELOPE_MAX_INDEX_COL_LEN
+	for UNIV_FORMAT_B, upto col->max_prefix or
+	2) REC_VERSION_56_MAX_INDEX_COL_LEN, whichever is less */
+	if (dict_table_get_format(table) >= UNIV_FORMAT_B) {
+		if (DATA_BIG_COL(col) && col->max_prefix > 0) {
+			max_log_len = col->max_prefix;
+		} else {
+			max_log_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
+		}
+	} else {
+		max_log_len = REC_ANTELOPE_MAX_INDEX_COL_LEN;
+	}
+
+	return(max_log_len);
+}
+
+/**********************************************************************//**
+Prevent table eviction by moving a table to the non-LRU list from the
+LRU list if it is not already there. */
+UNIV_INLINE
+void
+dict_table_prevent_eviction(
+/*========================*/
+	dict_table_t*	table)	/*!< in: table to prevent eviction */
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+	if (table->can_be_evicted) {
+		dict_table_move_from_lru_to_non_lru(table);
+	}
+}
+
 /********************************************************************//**
 Check whether the table is corrupted.
-@return	nonzero for corrupted table, zero for valid tables */
+@return nonzero for corrupted table, zero for valid tables */
 UNIV_INLINE
 ulint
 dict_table_is_corrupted(
@@ -1543,7 +1749,7 @@ dict_table_is_corrupted(
 
 /********************************************************************//**
 Check whether the index is corrupted.
-@return	nonzero for corrupted index, zero for valid indexes */
+@return nonzero for corrupted index, zero for valid indexes */
 UNIV_INLINE
 ulint
 dict_index_is_corrupted(
@@ -1559,7 +1765,7 @@ dict_index_is_corrupted(
 
 /********************************************************************//**
 Check if the tablespace for the table has been discarded.
-@return	true if the tablespace has been discarded. */
+@return true if the tablespace has been discarded. */
 UNIV_INLINE
 bool
 dict_table_is_discarded(
@@ -1571,7 +1777,7 @@ dict_table_is_discarded(
 
 /********************************************************************//**
 Check if it is a temporary table.
-@return	true if temporary table flag is set. */
+@return true if temporary table flag is set. */
 UNIV_INLINE
 bool
 dict_table_is_temporary(
@@ -1581,6 +1787,78 @@ dict_table_is_temporary(
 	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY));
 }
 
+/********************************************************************//**
+Check if it is a encrypted table.
+@return true if table encrypted flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_encrypted(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table to check */
+{
+	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_ENCRYPTION));
+}
+
+/** Check if the table is in a shared tablespace (System or General).
+@param[in]	id	Space ID to check
+@return true if id is a shared tablespace, false if not. */
+UNIV_INLINE
+bool
+dict_table_in_shared_tablespace(
+	const dict_table_t*	table)
+{
+	return(is_system_tablespace(table->space)
+		|| DICT_TF_HAS_SHARED_SPACE(table->flags));
+}
+
+/********************************************************************//**
+Turn-off redo-logging if temporary table. */
+UNIV_INLINE
+void
+dict_disable_redo_if_temporary(
+/*===========================*/
+	const dict_table_t*	table,	/*!< in: table to check */
+	mtr_t*			mtr)	/*!< out: mini-transaction */
+{
+	if (dict_table_is_temporary(table)) {
+		mtr_set_log_mode(mtr, MTR_LOG_NO_REDO);
+	}
+}
+
+/** Check if the table is found is a file_per_table tablespace.
+This test does not use table flags2 since some REDUNDANT tables in the
+system tablespace may have garbage in the MIX_LEN field where flags2 is
+stored. These garbage MIX_LEN fields were written before v3.23.52.
+A patch was added to v3.23.52 which initializes the MIX_LEN field to 0.
+Since file-per-table tablespaces were added in 4.1, any SYS_TABLES
+record with a non-zero space ID will have a reliable MIX_LEN field.
+However, this test does not use flags2 from SYS_TABLES.MIX_LEN.  Instead,
+assume that if the tablespace is not a predefined system tablespace and it
+is not a general shared tablespace, then it must be file-per-table.
+Also, during ALTER TABLE, the DICT_TF2_USE_FILE_PER_TABLE flag may not be
+set on one of the file-per-table tablespaces.
+This test cannot be done on a table in the process of being created
+because the space_id will be zero until the tablespace is created.
+@param[in]	table	An existing open table to check
+@return true if this table was created as a file-per-table tablespace. */
+UNIV_INLINE
+bool
+dict_table_is_file_per_table(
+	const dict_table_t*	table)	/*!< in: table to check */
+{
+	bool is_file_per_table =
+		!is_system_tablespace(table->space)
+		&& !DICT_TF_HAS_SHARED_SPACE(table->flags);
+
+	/* If the table is file-per-table and it is not redundant, then
+	it should have the flags2 bit for DICT_TF2_USE_FILE_PER_TABLE. */
+	ut_ad(!is_file_per_table
+	      || !DICT_TF_GET_COMPACT(table->flags)
+	      || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE));
+
+	return(is_file_per_table );
+}
+
 /**********************************************************************//**
 Get index by first field of the index
 @return index which is having first field matches
@@ -1589,9 +1867,10 @@ UNIV_INLINE
 dict_index_t*
 dict_table_get_index_on_first_col(
 /*==============================*/
-	const dict_table_t*	table,		/*!< in: table */
-	ulint			col_index)	/*!< in: position of column
+        const dict_table_t*     table,		/*!< in: table */
+        ulint                   col_index,	/*!< in: position of column
 						in table */
+	const char*             field_name)     /*!< in: field name */
 {
 	ut_ad(col_index < table->n_cols);
 
@@ -1604,8 +1883,131 @@ dict_table_get_index_on_first_col(
 			return(index);
 		}
 	}
+
+	/* If not yet found use field_name */
+	for (dict_index_t* index = dict_table_get_first_index(table);
+		index != NULL; index = dict_table_get_next_index(index)) {
+		if (!strcmp(index->fields[0].name, field_name)) {
+			return (index);
+		}
+	}
 	ut_error;
 	return(0);
 }
 
+/** Get reference count.
+@return current value of n_ref_count */
+inline
+ulint
+dict_table_t::get_ref_count() const
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+	return(n_ref_count);
+}
+
+/** Acquire the table handle. */
+inline
+void
+dict_table_t::acquire()
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+	++n_ref_count;
+}
+
+/** Release the table handle. */
+inline
+void
+dict_table_t::release()
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(n_ref_count > 0);
+	--n_ref_count;
+}
+
+/** Check if tablespace name is "innodb_general".
+@param[in]	tablespace_name	tablespace name
+@retval		true		if name is "innodb_general"
+@retval		false		if name is not "innodb_general" */
+inline
+bool
+dict_table_has_temp_general_tablespace_name(
+	const char*	tablespace_name) {
+
+	return(tablespace_name != NULL
+	       && strncmp(tablespace_name, general_space_name,
+			  strlen(general_space_name)) == 0);
+}
+
+/** Encode the number of columns and number of virtual columns in a
+4 bytes value. We could do this because the number of columns in
+InnoDB is limited to 1017
+@param[in]      n_col   number of non-virtual column
+@param[in]      n_v_col number of virtual column
+@return encoded value */
+UNIV_INLINE
+ulint
+dict_table_encode_n_col(
+                ulint   n_col,
+                ulint   n_v_col)
+{
+	return(n_col + (n_v_col<<16));
+}
+
+/** decode number of virtual and non-virtual columns in one 4 bytes value.
+@param[in]      encoded encoded value
+@param[in,out]     n_col   number of non-virtual column
+@param[in,out]     n_v_col number of virtual column */
+UNIV_INLINE
+void
+dict_table_decode_n_col(
+                ulint   encoded,
+                ulint*  n_col,
+                ulint*  n_v_col)
+{
+
+	ulint	num = encoded & ~DICT_N_COLS_COMPACT;
+	*n_v_col = num >> 16;
+	*n_col = num & 0xFFFF;
+}
+
+/** Free the virtual column template
+@param[in,out]	vc_templ	virtual column template */
+void
+dict_free_vc_templ(
+	dict_vcol_templ_t*	vc_templ)
+{
+	if (vc_templ->vtempl != NULL) {
+		ut_ad(vc_templ->n_v_col > 0);
+		for (ulint i = 0; i < vc_templ->n_col
+		     + vc_templ->n_v_col; i++) {
+			if (vc_templ->vtempl[i] != NULL) {
+				ut_free(vc_templ->vtempl[i]);
+			}
+		}
+		ut_free(vc_templ->vtempl);
+		vc_templ->vtempl = NULL;
+	}
+}
+
+/** Check whether the table have virtual index.
+@param[in]	table	InnoDB table
+@return true if the table have virtual index, false otherwise. */
+UNIV_INLINE
+bool
+dict_table_have_virtual_index(
+	dict_table_t*	table)
+{
+	for (ulint col_no = 0; col_no < dict_table_get_n_v_cols(table);
+	     col_no++) {
+		const dict_v_col_t*	col
+			= dict_table_get_nth_v_col(table, col_no);
+
+		if (col->m_col.ord_part) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
index dcbc3de8e94..6d01c38c432 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innobase/include/dict0load.h
@@ -33,6 +33,12 @@ Created 4/24/1996 Heikki Tuuri
 #include "ut0byte.h"
 #include "mem0mem.h"
 #include "btr0types.h"
+#include "ut0new.h"
+
+#include <deque>
+
+/** A stack of table names related through foreign key constraints */
+typedef std::deque<const char*, ut_allocator<const char*> >	dict_names_t;
 
 /** enum that defines all system table IDs. @see SYSTEM_TABLE_NAME[] */
 enum dict_system_id_t {
@@ -44,6 +50,7 @@ enum dict_system_id_t {
 	SYS_FOREIGN_COLS,
 	SYS_TABLESPACES,
 	SYS_DATAFILES,
+	SYS_VIRTUAL,
 
 	/* This must be last item. Defines the number of system tables. */
 	SYS_NUM_SYSTEM_TABLES
@@ -58,57 +65,37 @@ enum dict_table_info_t {
 					is in the cache, if so, return it */
 };
 
-/** Check type for dict_check_tablespaces_and_store_max_id() */
-enum dict_check_t {
-	/** No user tablespaces have been opened
-	(no crash recovery, no transactions recovered). */
-	DICT_CHECK_NONE_LOADED = 0,
-	/** Some user tablespaces may have been opened
-	(no crash recovery; recovered table locks for transactions). */
-	DICT_CHECK_SOME_LOADED,
-	/** All user tablespaces have been opened (crash recovery). */
-	DICT_CHECK_ALL_LOADED
-};
+/** Check each tablespace found in the data dictionary.
+Look at each table defined in SYS_TABLES that has a space_id > 0.
+If the tablespace is not yet in the fil_system cache, look up the
+tablespace in SYS_DATAFILES to ensure the correct path.
 
-/********************************************************************//**
-In a crash recovery we already have all the tablespace objects created.
-This function compares the space id information in the InnoDB data dictionary
-to what we already read with fil_load_single_table_tablespaces().
-
-In a normal startup, we create the tablespace objects for every table in
-InnoDB's data dictionary, if the corresponding .ibd file exists.
-We also scan the biggest space id, and store it to fil_system. */
-UNIV_INTERN
+In a crash recovery we already have some tablespace objects created from
+processing the REDO log.  Any other tablespace in SYS_TABLESPACES not
+previously used in recovery will be opened here.  We will compare the
+space_id information in the data dictionary to what we find in the
+tablespace file. In addition, more validation will be done if recovery
+was needed and force_recovery is not set.
+
+We also scan the biggest space id, and store it to fil_system.
+@param[in]	validate	true if recovery was needed */
 void
 dict_check_tablespaces_and_store_max_id(
-/*====================================*/
-	dict_check_t	dict_check);	/*!< in: how to check */
+	bool		validate);
+
 /********************************************************************//**
 Finds the first table name in the given database.
 @return own: table name, NULL if does not exist; the caller must free
 the memory in the string! */
-UNIV_INTERN
 char*
 dict_get_first_table_name_in_db(
 /*============================*/
 	const char*	name);	/*!< in: database name which ends to '/' */
 
 /********************************************************************//**
-Loads a table definition from a SYS_TABLES record to dict_table_t.
-Does not load any columns or indexes.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_table_low(
-/*================*/
-	const char*	name,		/*!< in: table name */
-	const rec_t*	rec,		/*!< in: SYS_TABLES record */
-	dict_table_t**	table);		/*!< out,own: table, or NULL */
-/********************************************************************//**
 Loads a table column definition from a SYS_COLUMNS record to
 dict_table_t.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_load_column_low(
 /*=================*/
@@ -122,14 +109,36 @@ dict_load_column_low(
 					or NULL if table != NULL */
 	table_id_t*	table_id,	/*!< out: table id */
 	const char**	col_name,	/*!< out: column name */
-	const rec_t*	rec);		/*!< in: SYS_COLUMNS record */
+	const rec_t*	rec,		/*!< in: SYS_COLUMNS record */
+	ulint*		nth_v_col);	/*!< out: if not NULL, this
+					records the "n" of "nth" virtual
+					column */
+
+/** Loads a virtual column "mapping" (to base columns) information
+from a SYS_VIRTUAL record
+@param[in,out]	table		table
+@param[in,out]	heap		memory heap
+@param[in,out]	column		mapped base column's dict_column_t
+@param[in,out]	table_id	table id
+@param[in,out]	pos		virtual column position
+@param[in,out]	base_pos	base column position
+@param[in]	rec		SYS_VIRTUAL record
+@return error message, or NULL on success */
+const char*
+dict_load_virtual_low(
+	dict_table_t*   table,
+	mem_heap_t*     heap,
+	dict_col_t**    column,
+	table_id_t*     table_id,
+	ulint*		pos,
+	ulint*		base_pos,
+	const rec_t*    rec);
 /********************************************************************//**
 Loads an index definition from a SYS_INDEXES record to dict_index_t.
 If allocate=TRUE, we will create a dict_index_t structure and fill it
 accordingly. If allocated=FALSE, the dict_index_t will be supplied by
 the caller and filled with information read from the record.  @return
 error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_load_index_low(
 /*================*/
@@ -147,7 +156,6 @@ dict_load_index_low(
 Loads an index field definition from a SYS_FIELDS record to
 dict_index_t.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_load_field_low(
 /*================*/
@@ -170,44 +178,58 @@ Using the table->heap, copy the null-terminated filepath into
 table->data_dir_path and put a null byte before the extension.
 This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
 Make this data directory path only if it has not yet been saved. */
-UNIV_INTERN
 void
 dict_save_data_dir_path(
 /*====================*/
 	dict_table_t*	table,		/*!< in/out: table */
 	char*		filepath);	/*!< in: filepath of tablespace */
-/*****************************************************************//**
-Make sure the data_file_name is saved in dict_table_t if needed. Try to
-read it from the file dictionary first, then from SYS_DATAFILES. */
-UNIV_INTERN
+
+/** Get the first filepath from SYS_DATAFILES for a given space_id.
+@param[in]	space_id	Tablespace ID
+@return First filepath (caller must invoke ut_free() on it)
+@retval NULL if no SYS_DATAFILES entry was found. */
+char*
+dict_get_first_path(
+	ulint	space_id);
+
+/** Make sure the data_file_name is saved in dict_table_t if needed.
+Try to read it from the fil_system first, then from SYS_DATAFILES.
+@param[in]	table		Table object
+@param[in]	dict_mutex_own	true if dict_sys->mutex is owned already */
 void
 dict_get_and_save_data_dir_path(
-/*============================*/
-	dict_table_t*	table,		/*!< in/out: table */
-	bool		dict_mutex_own);	/*!< in: true if dict_sys->mutex
-					is owned already */
-/********************************************************************//**
-Loads a table definition and also all its index definitions, and also
+	dict_table_t*	table,
+	bool		dict_mutex_own);
+
+/** Make sure the tablespace name is saved in dict_table_t if needed.
+Try to read it from the file dictionary first, then from SYS_TABLESPACES.
+@param[in]	table		Table object
+@param[in]	dict_mutex_own)	true if dict_sys->mutex is owned already */
+void
+dict_get_and_save_space_name(
+	dict_table_t*	table,
+	bool		dict_mutex_own);
+
+/** Loads a table definition and also all its index definitions, and also
 the cluster definition if the table is a member in a cluster. Also loads
 all foreign key constraints where the foreign key is in the table or where
 a foreign key references columns in this table.
+@param[in]	name		Table name in the dbname/tablename format
+@param[in]	cached		true=add to cache, false=do not
+@param[in]	ignore_err	Error to be ignored when loading
+				table and its index definition
 @return table, NULL if does not exist; if the table is stored in an
-.ibd file, but the file does not exist, then we set the
-ibd_file_missing flag TRUE in the table object we return */
-UNIV_INTERN
+.ibd file, but the file does not exist, then we set the ibd_file_missing
+flag in the table object we return. */
 dict_table_t*
 dict_load_table(
-/*============*/
-	const char*	name,	/*!< in: table name in the
-				databasename/tablename format */
-	ibool		cached,	/*!< in: TRUE=add to cache, FALSE=do not */
+	const char*	name,
+	bool		cached,
 	dict_err_ignore_t ignore_err);
-				/*!< in: error to be ignored when loading
-				table and its indexes' definition */
+
 /***********************************************************************//**
 Loads a table object based on the table id.
-@return	table; NULL if table does not exist */
-UNIV_INTERN
+@return table; NULL if table does not exist */
 dict_table_t*
 dict_load_table_on_id(
 /*==================*/
@@ -218,7 +240,6 @@ dict_load_table_on_id(
 This function is called when the database is booted.
 Loads system table index definitions except for the clustered index which
 is added to the dictionary cache at booting before calling this function. */
-UNIV_INTERN
 void
 dict_load_sys_table(
 /*================*/
@@ -226,11 +247,13 @@ dict_load_sys_table(
 /***********************************************************************//**
 Loads foreign key constraints where the table is either the foreign key
 holder or where the table is referenced by a foreign key. Adds these
-constraints to the data dictionary. Note that we know that the dictionary
-cache already contains all constraints where the other relevant table is
-already in the dictionary cache.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+constraints to the data dictionary.
+
+The foreign key constraint is loaded only if the referenced table is also
+in the dictionary cache.  If the referenced table is not in dictionary
+cache, then it is added to the output parameter (fk_tables).
+
+@return DB_SUCCESS or error code */
 dberr_t
 dict_load_foreigns(
 /*===============*/
@@ -242,20 +265,16 @@ dict_load_foreigns(
 						chained by FK */
 	bool			check_charsets,	/*!< in: whether to check
 						charset compatibility */
-	dict_err_ignore_t	ignore_err)	/*!< in: error to be ignored */
+	dict_err_ignore_t	ignore_err,	/*!< in: error to be ignored */
+	dict_names_t&		fk_tables)	/*!< out: stack of table names
+						which must be loaded
+						subsequently to load all the
+						foreign key constraints. */
 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-/********************************************************************//**
-Prints to the standard output information on all tables found in the data
-dictionary system table. */
-UNIV_INTERN
-void
-dict_print(void);
-/*============*/
 
 /********************************************************************//**
 This function opens a system table, and return the first record.
-@return	first record of the system table */
-UNIV_INTERN
+@return first record of the system table */
 const rec_t*
 dict_startscan_system(
 /*==================*/
@@ -265,8 +284,7 @@ dict_startscan_system(
 	dict_system_id_t system_id);	/*!< in: which system table to open */
 /********************************************************************//**
 This function get the next system table record as we scan the table.
-@return	the record if found, NULL if end of scan. */
-UNIV_INTERN
+@return the record if found, NULL if end of scan. */
 const rec_t*
 dict_getnext_system(
 /*================*/
@@ -278,7 +296,6 @@ This function processes one SYS_TABLES record and populate the dict_table_t
 struct for the table. Extracted out of dict_print() to be used by
 both monitor table output and information schema innodb_sys_tables output.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_tables_rec_and_mtr_commit(
 /*=======================================*/
@@ -296,7 +313,6 @@ This function parses a SYS_INDEXES record and populate a dict_index_t
 structure with the information from the record. For detail information
 about SYS_INDEXES fields, please refer to dict_boot() function.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_indexes_rec(
 /*=========================*/
@@ -309,7 +325,6 @@ dict_process_sys_indexes_rec(
 This function parses a SYS_COLUMNS record and populate a dict_column_t
 structure with the information from the record.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_columns_rec(
 /*=========================*/
@@ -317,12 +332,29 @@ dict_process_sys_columns_rec(
 	const rec_t*	rec,		/*!< in: current SYS_COLUMNS rec */
 	dict_col_t*	column,		/*!< out: dict_col_t to be filled */
 	table_id_t*	table_id,	/*!< out: table id */
-	const char**	col_name);	/*!< out: column name */
+	const char**	col_name,	/*!< out: column name */
+	ulint*		nth_v_col);	/*!< out: if virtual col, this is
+					records its sequence number */
+
+/** This function parses a SYS_VIRTUAL record and extract virtual column
+information
+@param[in,out]	heap		heap memory
+@param[in]	rec		current SYS_COLUMNS rec
+@param[in,out]	table_id	table id
+@param[in,out]	pos		virtual column position
+@param[in,out]	base_pos	base column position
+@return error message, or NULL on success */
+const char*
+dict_process_sys_virtual_rec(
+	mem_heap_t*	heap,
+	const rec_t*	rec,
+	table_id_t*	table_id,
+	ulint*		pos,
+	ulint*		base_pos);
 /********************************************************************//**
 This function parses a SYS_FIELDS record and populate a dict_field_t
 structure with the information from the record.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_fields_rec(
 /*========================*/
@@ -338,7 +370,6 @@ This function parses a SYS_FOREIGN record and populate a dict_foreign_t
 structure with the information from the record. For detail information
 about SYS_FOREIGN fields, please refer to dict_load_foreign() function
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_foreign_rec(
 /*=========================*/
@@ -350,7 +381,6 @@ dict_process_sys_foreign_rec(
 This function parses a SYS_FOREIGN_COLS record and extract necessary
 information from the record and return to caller.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_foreign_col_rec(
 /*=============================*/
@@ -365,7 +395,6 @@ dict_process_sys_foreign_col_rec(
 This function parses a SYS_TABLESPACES record, extracts necessary
 information from the record and returns to caller.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_tablespaces(
 /*=========================*/
@@ -378,7 +407,6 @@ dict_process_sys_tablespaces(
 This function parses a SYS_DATAFILES record, extracts necessary
 information from the record and returns to caller.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_datafiles(
 /*=======================*/
@@ -386,40 +414,29 @@ dict_process_sys_datafiles(
 	const rec_t*	rec,		/*!< in: current SYS_DATAFILES rec */
 	ulint*		space,		/*!< out: pace id */
 	const char**	path);		/*!< out: datafile path */
-/********************************************************************//**
-Get the filepath for a spaceid from SYS_DATAFILES. This function provides
-a temporary heap which is used for the table lookup, but not for the path.
-The caller must free the memory for the path returned. This function can
-return NULL if the space ID is not found in SYS_DATAFILES, then the caller
-will assume that the ibd file is in the normal datadir.
-@return	own: A copy of the first datafile found in SYS_DATAFILES.PATH for
-the given space ID. NULL if space ID is zero or not found. */
-UNIV_INTERN
-char*
-dict_get_first_path(
-/*================*/
-	ulint		space,	/*!< in: space id */
-	const char*	name);	/*!< in: tablespace name */
-/********************************************************************//**
-Update the record for space_id in SYS_TABLESPACES to this filepath.
-@return	DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
+
+/** Update the record for space_id in SYS_TABLESPACES to this filepath.
+@param[in]	space_id	Tablespace ID
+@param[in]	filepath	Tablespace filepath
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
 dberr_t
 dict_update_filepath(
-/*=================*/
-	ulint		space_id,	/*!< in: space id */
-	const char*	filepath);	/*!< in: filepath */
-/********************************************************************//**
-Insert records into SYS_TABLESPACES and SYS_DATAFILES.
-@return	DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
+	ulint		space_id,
+	const char*	filepath);
+
+/** Replace records in SYS_TABLESPACES and SYS_DATAFILES associated with
+the given space_id using an independent transaction.
+@param[in]	space_id	Tablespace ID
+@param[in]	name		Tablespace name
+@param[in]	filepath	First filepath
+@param[in]	fsp_flags	Tablespace flags
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
 dberr_t
-dict_insert_tablespace_and_filepath(
-/*================================*/
-	ulint		space,		/*!< in: space id */
-	const char*	name,		/*!< in: talespace name */
-	const char*	filepath,	/*!< in: filepath */
-	ulint		fsp_flags);	/*!< in: tablespace flags */
+dict_replace_tablespace_and_filepath(
+	ulint		space_id,
+	const char*	name,
+	const char*	filepath,
+	ulint		fsp_flags);
 
 #ifndef UNIV_NONINL
 #include "dict0load.ic"
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 902b960eaa3..8cefdddad65 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -41,13 +41,16 @@ Created 1/8/1996 Heikki Tuuri
 # include "sync0rw.h"
 #endif /* !UNIV_HOTBACKUP */
 #include "ut0mem.h"
-#include "ut0lst.h"
 #include "ut0rnd.h"
 #include "ut0byte.h"
 #include "hash0hash.h"
 #include "trx0types.h"
 #include "fts0fts.h"
+#include "buf0buf.h"
+#include "gis0type.h"
 #include "os0once.h"
+#include "ut0new.h"
+
 #include "fil0fil.h"
 #include <my_crypt.h>
 #include "fil0crypt.h"
@@ -62,7 +65,9 @@ struct ib_rbt_t;
 /** Type flags of an index: OR'ing of the flags is allowed to define a
 combination of types */
 /* @{ */
-#define DICT_CLUSTERED	1	/*!< clustered index */
+#define DICT_CLUSTERED	1	/*!< clustered index; for other than
+				auto-generated clustered indexes,
+				also DICT_UNIQUE will be set */
 #define DICT_UNIQUE	2	/*!< unique index */
 #define	DICT_UNIVERSAL	4	/*!< index which can contain records from any
 				other index */
@@ -71,8 +76,11 @@ combination of types */
 				in SYS_INDEXES.TYPE */
 #define	DICT_FTS	32	/* FTS index; can't be combined with the
 				other flags */
+#define	DICT_SPATIAL	64	/* SPATIAL index; can't be combined with the
+				other flags */
+#define	DICT_VIRTUAL	128	/* Index on Virtual column */
 
-#define	DICT_IT_BITS	6	/*!< number of bits used for
+#define	DICT_IT_BITS	8	/*!< number of bits used for
 				SYS_INDEXES.TYPE */
 /* @} */
 
@@ -115,20 +123,31 @@ the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */
 
 /** Width of the COMPACT flag */
 #define DICT_TF_WIDTH_COMPACT		1
+
 /** Width of the ZIP_SSIZE flag */
 #define DICT_TF_WIDTH_ZIP_SSIZE		4
+
 /** Width of the ATOMIC_BLOBS flag.  The Antelope file formats broke up
 BLOB and TEXT fields, storing the first 768 bytes in the clustered index.
-Brracuda row formats store the whole blob or text field off-page atomically.
+Barracuda row formats store the whole blob or text field off-page atomically.
 Secondary indexes are created from this external data using row_ext_t
 to cache the BLOB prefixes. */
 #define DICT_TF_WIDTH_ATOMIC_BLOBS	1
+
 /** If a table is created with the MYSQL option DATA DIRECTORY and
 innodb-file-per-table, an older engine will not be able to find that table.
 This flag prevents older engines from attempting to open the table and
 allows InnoDB to update_create_info() accordingly. */
 #define DICT_TF_WIDTH_DATA_DIR		1
 
+/** Width of the SHARED tablespace flag.
+It is used to identify tables that exist inside a shared general tablespace.
+If a table is created with the TABLESPACE=tsname option, an older engine will
+not be able to find that table. This flag prevents older engines from attempting
+to open the table and allows InnoDB to quickly find the tablespace. */
+
+#define DICT_TF_WIDTH_SHARED_SPACE	1
+
 /**
 Width of the page compression flag
 */
@@ -148,15 +167,16 @@ DEFAULT=0, ON = 1, OFF = 2
 #define DICT_TF_WIDTH_ATOMIC_WRITES 2
 
 /** Width of all the currently known table flags */
-#define DICT_TF_BITS	(DICT_TF_WIDTH_COMPACT		\
-			+ DICT_TF_WIDTH_ZIP_SSIZE	\
-			+ DICT_TF_WIDTH_ATOMIC_BLOBS	\
-			+ DICT_TF_WIDTH_DATA_DIR        \
-			+ DICT_TF_WIDTH_PAGE_COMPRESSION \
-			+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \
-		        + DICT_TF_WIDTH_ATOMIC_WRITES \
-		        + DICT_TF_WIDTH_PAGE_ENCRYPTION \
-		        + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
+#define DICT_TF_BITS	(DICT_TF_WIDTH_COMPACT			\
+			+ DICT_TF_WIDTH_ZIP_SSIZE		\
+			+ DICT_TF_WIDTH_ATOMIC_BLOBS		\
+			+ DICT_TF_WIDTH_DATA_DIR		\
+			+ DICT_TF_WIDTH_SHARED_SPACE		\
+			+ DICT_TF_WIDTH_PAGE_COMPRESSION	\
+			+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL	\
+			+ DICT_TF_WIDTH_ATOMIC_WRITES		\
+			+ DICT_TF_WIDTH_PAGE_ENCRYPTION		\
+			+ DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
 
 /** A mask of all the known/used bits in table flags */
 #define DICT_TF_BIT_MASK	(~(~0U << DICT_TF_BITS))
@@ -172,9 +192,12 @@ DEFAULT=0, ON = 1, OFF = 2
 /** Zero relative shift position of the DATA_DIR field */
 #define DICT_TF_POS_DATA_DIR		(DICT_TF_POS_ATOMIC_BLOBS	\
 					+ DICT_TF_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the SHARED TABLESPACE field */
+#define DICT_TF_POS_SHARED_SPACE	(DICT_TF_POS_DATA_DIR		\
+					+ DICT_TF_WIDTH_DATA_DIR)
 /** Zero relative shift position of the PAGE_COMPRESSION field */
-#define DICT_TF_POS_PAGE_COMPRESSION	(DICT_TF_POS_DATA_DIR	\
-		                        + DICT_TF_WIDTH_DATA_DIR)
+#define DICT_TF_POS_PAGE_COMPRESSION	(DICT_TF_POS_SHARED_SPACE	\
+					+ DICT_TF_WIDTH_SHARED_SPACE)
 /** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
 #define DICT_TF_POS_PAGE_COMPRESSION_LEVEL	(DICT_TF_POS_PAGE_COMPRESSION	\
 					+ DICT_TF_WIDTH_PAGE_COMPRESSION)
@@ -183,12 +206,12 @@ DEFAULT=0, ON = 1, OFF = 2
 					+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
 /** Zero relative shift position of the PAGE_ENCRYPTION field */
 #define DICT_TF_POS_PAGE_ENCRYPTION	(DICT_TF_POS_ATOMIC_WRITES	\
-		                        + DICT_TF_WIDTH_ATOMIC_WRITES)
+					+ DICT_TF_WIDTH_ATOMIC_WRITES)
 /** Zero relative shift position of the PAGE_ENCRYPTION_KEY field */
 #define DICT_TF_POS_PAGE_ENCRYPTION_KEY	(DICT_TF_POS_PAGE_ENCRYPTION	\
-		                        + DICT_TF_WIDTH_PAGE_ENCRYPTION)
+					+ DICT_TF_WIDTH_PAGE_ENCRYPTION)
 #define DICT_TF_POS_UNUSED		(DICT_TF_POS_PAGE_ENCRYPTION_KEY     \
-		                        + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
+					+ DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
 
 /** Bit mask of the COMPACT field */
 #define DICT_TF_MASK_COMPACT				\
@@ -206,6 +229,10 @@ DEFAULT=0, ON = 1, OFF = 2
 #define DICT_TF_MASK_DATA_DIR				\
 		((~(~0U << DICT_TF_WIDTH_DATA_DIR))	\
 		<< DICT_TF_POS_DATA_DIR)
+/** Bit mask of the SHARED_SPACE field */
+#define DICT_TF_MASK_SHARED_SPACE			\
+		((~(~0U << DICT_TF_WIDTH_SHARED_SPACE))	\
+		<< DICT_TF_POS_SHARED_SPACE)
 /** Bit mask of the PAGE_COMPRESSION field */
 #define DICT_TF_MASK_PAGE_COMPRESSION			\
 		((~(~0U << DICT_TF_WIDTH_PAGE_COMPRESSION)) \
@@ -239,10 +266,14 @@ DEFAULT=0, ON = 1, OFF = 2
 #define DICT_TF_HAS_ATOMIC_BLOBS(flags)			\
 		((flags & DICT_TF_MASK_ATOMIC_BLOBS)	\
 		>> DICT_TF_POS_ATOMIC_BLOBS)
-/** Return the value of the ATOMIC_BLOBS field */
+/** Return the value of the DATA_DIR field */
 #define DICT_TF_HAS_DATA_DIR(flags)			\
 		((flags & DICT_TF_MASK_DATA_DIR)	\
 		>> DICT_TF_POS_DATA_DIR)
+/** Return the value of the SHARED_SPACE field */
+#define DICT_TF_HAS_SHARED_SPACE(flags)			\
+		((flags & DICT_TF_MASK_SHARED_SPACE)	\
+		>> DICT_TF_POS_SHARED_SPACE)
 /** Return the value of the PAGE_COMPRESSION field */
 #define DICT_TF_GET_PAGE_COMPRESSION(flags)	       \
 		((flags & DICT_TF_MASK_PAGE_COMPRESSION) \
@@ -278,21 +309,26 @@ ROW_FORMAT=REDUNDANT.  InnoDB engines do not check these flags
 for unknown bits in order to protect backward incompatibility. */
 /* @{ */
 /** Total number of bits in table->flags2. */
-#define DICT_TF2_BITS			7
-#define DICT_TF2_BIT_MASK		~(~0U << DICT_TF2_BITS)
+#define DICT_TF2_BITS			9
+#define DICT_TF2_UNUSED_BIT_MASK	(~0U << DICT_TF2_BITS)
+#define DICT_TF2_BIT_MASK		~DICT_TF2_UNUSED_BIT_MASK
 
 /** TEMPORARY; TRUE for tables from CREATE TEMPORARY TABLE. */
 #define DICT_TF2_TEMPORARY		1
+
 /** The table has an internal defined DOC ID column */
 #define DICT_TF2_FTS_HAS_DOC_ID		2
+
 /** The table has an FTS index */
 #define DICT_TF2_FTS			4
+
 /** Need to add Doc ID column for FTS index build.
 This is a transient bit for index build */
 #define DICT_TF2_FTS_ADD_DOC_ID		8
+
 /** This bit is used during table creation to indicate that it will
 use its own tablespace instead of the system tablespace. */
-#define DICT_TF2_USE_TABLESPACE		16
+#define DICT_TF2_USE_FILE_PER_TABLE	16
 
 /** Set when we discard/detach the tablespace */
 #define DICT_TF2_DISCARDED		32
@@ -300,15 +336,19 @@ use its own tablespace instead of the system tablespace. */
 /** This bit is set if all aux table names (both common tables and
 index tables) of a FTS table are in HEX format. */
 #define DICT_TF2_FTS_AUX_HEX_NAME	64
+
+/** Encryption table bit. */
+#define DICT_TF2_ENCRYPTION		256
+
 /* @} */
 
-#define DICT_TF2_FLAG_SET(table, flag)				\
+#define DICT_TF2_FLAG_SET(table, flag)		\
 	(table->flags2 |= (flag))
 
-#define DICT_TF2_FLAG_IS_SET(table, flag)			\
+#define DICT_TF2_FLAG_IS_SET(table, flag)	\
 	(table->flags2 & (flag))
 
-#define DICT_TF2_FLAG_UNSET(table, flag)			\
+#define DICT_TF2_FLAG_UNSET(table, flag)	\
 	(table->flags2 &= ~(flag))
 
 /** Tables could be chained together with Foreign key constraint. When
@@ -329,15 +369,17 @@ before proceeds. */
 
 /**********************************************************************//**
 Creates a table memory object.
-@return	own: table object */
-UNIV_INTERN
+@return own: table object */
 dict_table_t*
 dict_mem_table_create(
 /*==================*/
 	const char*	name,		/*!< in: table name */
 	ulint		space,		/*!< in: space where the clustered index
 					of the table is placed */
-	ulint		n_cols,		/*!< in: number of columns */
+	ulint		n_cols,		/*!< in: total number of columns
+					including virtual and non-virtual
+					columns */
+	ulint		n_v_cols,	/*!< in: number of virtual columns */
 	ulint		flags,		/*!< in: table flags */
 	ulint		flags2);	/*!< in: table flags2 */
 /**********************************************************************//**
@@ -350,14 +392,12 @@ dict_mem_table_is_system(
 	char	*name);		/*!< in: table name */
 /****************************************************************//**
 Free a table memory object. */
-UNIV_INTERN
 void
 dict_mem_table_free(
 /*================*/
 	dict_table_t*	table);		/*!< in: table */
 /**********************************************************************//**
 Adds a column definition to a table. */
-UNIV_INTERN
 void
 dict_mem_table_add_col(
 /*===================*/
@@ -368,21 +408,53 @@ dict_mem_table_add_col(
 	ulint		prtype,	/*!< in: precise type */
 	ulint		len)	/*!< in: precision */
 	MY_ATTRIBUTE((nonnull(1)));
+/** Adds a virtual column definition to a table.
+@param[in,out]	table		table
+@param[in]	heap		temporary memory heap, or NULL. It is
+				used to store name when we have not finished
+				adding all columns. When all columns are
+				added, the whole name will copy to memory from
+				table->heap
+@param[in]	name		column name
+@param[in]	mtype		main datatype
+@param[in]	prtype		precise type
+@param[in]	len		length
+@param[in]	pos		position in a table
+@param[in]	num_base	number of base columns
+@return the virtual column definition */
+dict_v_col_t*
+dict_mem_table_add_v_col(
+	dict_table_t*	table,
+	mem_heap_t*	heap,
+	const char*	name,
+	ulint		mtype,
+	ulint		prtype,
+	ulint		len,
+	ulint		pos,
+	ulint		num_base);
+
+/** Adds a stored column definition to a table.
+@param[in]	table		table
+@param[in]	num_base	number of base columns. */
+void
+dict_mem_table_add_s_col(
+	dict_table_t*	table,
+	ulint		num_base);
+
 /**********************************************************************//**
 Renames a column of a table in the data dictionary cache. */
-UNIV_INTERN
 void
 dict_mem_table_col_rename(
 /*======================*/
 	dict_table_t*	table,	/*!< in/out: table */
-	unsigned	nth_col,/*!< in: column index */
+	ulint		nth_col,/*!< in: column index */
 	const char*	from,	/*!< in: old column name */
-	const char*	to)	/*!< in: new column name */
-	MY_ATTRIBUTE((nonnull));
+	const char*	to,	/*!< in: new column name */
+	bool		is_virtual);
+				/*!< in: if this is a virtual column */
 /**********************************************************************//**
 This function populates a dict_col_t memory structure with
 supplied information. */
-UNIV_INTERN
 void
 dict_mem_fill_column_struct(
 /*========================*/
@@ -411,8 +483,7 @@ dict_mem_fill_index_struct(
 	ulint		n_fields);	/*!< in: number of fields */
 /**********************************************************************//**
 Creates an index memory object.
-@return	own: index object */
-UNIV_INTERN
+@return own: index object */
 dict_index_t*
 dict_mem_index_create(
 /*==================*/
@@ -428,7 +499,6 @@ dict_mem_index_create(
 Adds a field definition to an index. NOTE: does not take a copy
 of the column name if the field is a column. The memory occupied
 by the column name may be released only after publishing the index. */
-UNIV_INTERN
 void
 dict_mem_index_add_field(
 /*=====================*/
@@ -439,15 +509,13 @@ dict_mem_index_add_field(
 					INDEX (textcol(25)) */
 /**********************************************************************//**
 Frees an index memory object. */
-UNIV_INTERN
 void
 dict_mem_index_free(
 /*================*/
 	dict_index_t*	index);	/*!< in: index */
 /**********************************************************************//**
 Creates and initializes a foreign constraint memory object.
-@return	own: foreign constraint struct */
-UNIV_INTERN
+@return own: foreign constraint struct */
 dict_foreign_t*
 dict_mem_foreign_create(void);
 /*=========================*/
@@ -457,7 +525,6 @@ Sets the foreign_table_name_lookup pointer based on the value of
 lower_case_table_names.  If that is 0 or 1, foreign_table_name_lookup
 will point to foreign_table_name.  If 2, then another string is
 allocated from the heap and set to lower case. */
-UNIV_INTERN
 void
 dict_mem_foreign_table_name_lookup_set(
 /*===================================*/
@@ -469,13 +536,33 @@ Sets the referenced_table_name_lookup pointer based on the value of
 lower_case_table_names.  If that is 0 or 1, referenced_table_name_lookup
 will point to referenced_table_name.  If 2, then another string is
 allocated from the heap and set to lower case. */
-UNIV_INTERN
 void
 dict_mem_referenced_table_name_lookup_set(
 /*======================================*/
 	dict_foreign_t*	foreign,	/*!< in/out: foreign struct */
 	ibool		do_alloc);	/*!< in: is an alloc needed */
 
+/** Fills the dependent virtual columns in a set.
+Reason for being dependent are
+1) FK can be present on base column of virtual columns
+2) FK can be present on column which is a part of virtual index
+@param[in,out] foreign foreign key information. */
+void
+dict_mem_foreign_fill_vcol_set(
+       dict_foreign_t*	foreign);
+
+/** Fill virtual columns set in each fk constraint present in the table.
+@param[in,out] table   innodb table object. */
+void
+dict_mem_table_fill_foreign_vcol_set(
+        dict_table_t*	table);
+
+/** Free the vcol_set from all foreign key constraint on the table.
+@param[in,out] table   innodb table object. */
+void
+dict_mem_table_free_foreign_vcol_set(
+	dict_table_t*	table);
+
 /** Create a temporary tablename like "#sql-ibtid-inc where
   tid = the Table ID
   inc = a randomly initialized number that is incremented for each file
@@ -488,7 +575,6 @@ reasonably unique temporary file name.
 @param[in]	dbtab	Table name in the form database/table name
 @param[in]	id	Table id
 @return A unique temporary tablename suitable for InnoDB use */
-UNIV_INTERN
 char*
 dict_mem_create_temporary_tablename(
 	mem_heap_t*	heap,
@@ -496,10 +582,59 @@ dict_mem_create_temporary_tablename(
 	table_id_t	id);
 
 /** Initialize dict memory variables */
-
 void
 dict_mem_init(void);
 
+/** SQL identifier name wrapper for pretty-printing */
+class id_name_t
+{
+public:
+	/** Default constructor */
+	id_name_t()
+		: m_name()
+	{}
+	/** Constructor
+	@param[in]	name	identifier to assign */
+	explicit id_name_t(
+		const char*	name)
+		: m_name(name)
+	{}
+
+	/** Assignment operator
+	@param[in]	name	identifier to assign */
+	id_name_t& operator=(
+		const char*	name)
+	{
+		m_name = name;
+		return(*this);
+	}
+
+	/** Implicit type conversion
+	@return the name */
+	operator const char*() const
+	{
+		return(m_name);
+	}
+
+	/** Explicit type conversion
+	@return the name */
+	const char* operator()() const
+	{
+		return(m_name);
+	}
+
+private:
+	/** The name in internal representation */
+	const char*	m_name;
+};
+
+/** Table name wrapper for pretty-printing */
+struct table_name_t
+{
+	/** The name in internal representation */
+	char*	m_name;
+};
+
 /** Data structure for a column in a table */
 struct dict_col_t{
 	/*----------------------*/
@@ -545,6 +680,68 @@ struct dict_col_t{
 					3072 for Barracuda table */
 };
 
+/** Index information put in a list of virtual column structure. Index
+id and virtual column position in the index will be logged.
+There can be multiple entries for a given index, with a different position. */
+struct dict_v_idx_t {
+	/** active index on the column */
+	dict_index_t*	index;
+
+	/** position in this index */
+	ulint		nth_field;
+};
+
+/** Index list to put in dict_v_col_t */
+typedef	std::list<dict_v_idx_t, ut_allocator<dict_v_idx_t> >	dict_v_idx_list;
+
+/** Data structure for a virtual column in a table */
+struct dict_v_col_t{
+	/** column structure */
+	dict_col_t		m_col;
+
+	/** array of base column ptr */
+	dict_col_t**		base_col;
+
+	/** number of base column */
+	ulint			num_base;
+
+	/** column pos in table */
+	ulint			v_pos;
+
+	/** Virtual index list, and column position in the index,
+	the allocated memory is not from table->heap, nor it is
+	tracked by dict_sys->size */
+	dict_v_idx_list*	v_indexes;
+
+};
+
+/** Data structure for newly added virtual column in a table */
+struct dict_add_v_col_t{
+	/** number of new virtual column */
+	ulint			n_v_col;
+
+	/** column structures */
+	const dict_v_col_t*	v_col;
+
+	/** new col names */
+	const char**		v_col_name;
+};
+
+/** Data structure for a stored column in a table. */
+struct dict_s_col_t {
+	/** Stored column ptr */
+	dict_col_t*	m_col;
+	/** array of base col ptr */
+	dict_col_t**	base_col;
+	/** number of base columns */
+	ulint		num_base;
+	/** column pos in table */
+	ulint		s_pos;
+};
+
+/** list to put stored column for create_table_info_t */
+typedef std::list<dict_s_col_t, ut_allocator<dict_s_col_t> >	dict_s_col_list;
+
 /** @brief DICT_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and
 is the maximum indexed column length (or indexed prefix length) in
 ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. Also, in any format,
@@ -575,6 +772,7 @@ be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
 
 /** Defines the maximum fixed length column size */
 #define DICT_MAX_FIXED_COL_LEN		DICT_ANTELOPE_MAX_INDEX_COL_LEN
+
 #ifdef WITH_WSREP
 #define WSREP_MAX_SUPPORTED_KEY_LENGTH 3500
 #endif /* WITH_WSREP */
@@ -582,7 +780,7 @@ be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
 /** Data structure for a field in an index */
 struct dict_field_t{
 	dict_col_t*	col;		/*!< pointer to the table column */
-	const char*	name;		/*!< name of the column */
+	id_name_t	name;		/*!< name of the column */
 	unsigned	prefix_len:12;	/*!< 0 or the length of the column
 					prefix in bytes in a MySQL index of
 					type, e.g., INDEX (textcol(25));
@@ -634,12 +832,11 @@ extern ulong	zip_failure_threshold_pct;
 compression failures */
 extern ulong	zip_pad_max;
 
-/** Data structure to hold information about how much space in
+/** Data structure to hold information about about how much space in
 an uncompressed page should be left as padding to avoid compression
 failures. This estimate is based on a self-adapting heuristic. */
 struct zip_pad_info_t {
-	os_fast_mutex_t*
-			mutex;	/*!< mutex protecting the info */
+	SysMutex*	mutex;	/*!< mutex protecting the info */
 	ulint		pad;	/*!< number of bytes used as pad */
 	ulint		success;/*!< successful compression ops during
 				current round */
@@ -656,22 +853,35 @@ struct zip_pad_info_t {
 a certain index.*/
 #define STAT_DEFRAG_DATA_SIZE_N_SAMPLE	10
 
+/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
+system clustered index when there is no primary key. */
+const char innobase_index_reserve_name[] = "GEN_CLUST_INDEX";
+
+/* Estimated number of offsets in records (based on columns)
+to start with. */
+#define OFFS_IN_REC_NORMAL_SIZE		100
+
 /** Data structure for an index.  Most fields will be
 initialized to 0, NULL or FALSE in dict_mem_index_create(). */
 struct dict_index_t{
 	index_id_t	id;	/*!< id of the index */
 	mem_heap_t*	heap;	/*!< memory heap */
-	const char*	name;	/*!< index name */
+	id_name_t	name;	/*!< index name */
 	const char*	table_name;/*!< table name */
 	dict_table_t*	table;	/*!< back pointer to table */
 #ifndef UNIV_HOTBACKUP
 	unsigned	space:32;
 				/*!< space where the index tree is placed */
 	unsigned	page:32;/*!< index tree root page number */
+	unsigned	merge_threshold:6;
+				/*!< In the pessimistic delete, if the page
+				data size drops below this limit in percent,
+				merging it to a neighbor is tried */
+# define DICT_INDEX_MERGE_THRESHOLD_DEFAULT 50
 #endif /* !UNIV_HOTBACKUP */
 	unsigned	type:DICT_IT_BITS;
 				/*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
-				DICT_UNIVERSAL, DICT_IBUF, DICT_CORRUPT) */
+				DICT_IBUF, DICT_CORRUPT) */
 #define MAX_KEY_LENGTH_BITS 12
 	unsigned	trx_id_offset:MAX_KEY_LENGTH_BITS;
 				/*!< position of the trx id column
@@ -685,6 +895,15 @@ struct dict_index_t{
 				/*!< number of columns the user defined to
 				be in the index: in the internal
 				representation we add more columns */
+	unsigned	nulls_equal:1;
+				/*!< if true, SQL NULL == SQL NULL */
+#ifdef MYSQL_INDEX_DISABLE_AHI
+ 	unsigned	disable_ahi:1;
+				/*!< whether to disable the
+				adaptive hash index.
+				Maybe this could be disabled for
+				temporary tables? */
+#endif
 	unsigned	n_uniq:10;/*!< number of fields from the beginning
 				which are enough to determine an index
 				entry uniquely */
@@ -703,7 +922,24 @@ struct dict_index_t{
 				by dict_operation_lock and
 				dict_sys->mutex. Other changes are
 				protected by index->lock. */
+	unsigned	uncommitted:1;
+				/*!< a flag that is set for secondary indexes
+				that have not been committed to the
+				data dictionary yet */
+
+#ifdef UNIV_DEBUG
+	uint32_t	magic_n;/*!< magic number */
+/** Value of dict_index_t::magic_n */
+# define DICT_INDEX_MAGIC_N	76789786
+#endif
 	dict_field_t*	fields;	/*!< array of field descriptions */
+	st_mysql_ftparser*
+			parser;	/*!< fulltext parser plugin */
+	bool		is_ngram;
+				/*!< true if it's ngram parser */
+	bool		has_new_v_col;
+				/*!< whether it has a newly added virtual
+				column in ALTER */
 #ifndef UNIV_HOTBACKUP
 	UT_LIST_NODE_T(dict_index_t)
 			indexes;/*!< list of indexes of the table */
@@ -764,26 +1000,35 @@ struct dict_index_t{
 				/* in which slot the next sample should be
 				saved. */
 	/* @} */
-	rw_lock_t	lock;	/*!< read-write lock protecting the
-				upper levels of the index tree */
+	rtr_ssn_t	rtr_ssn;/*!< Node sequence number for RTree */
+	rtr_info_track_t*
+			rtr_track;/*!< tracking all R-Tree search cursors */
 	trx_id_t	trx_id; /*!< id of the transaction that created this
 				index, or 0 if the index existed
 				when InnoDB was started up */
 	zip_pad_info_t	zip_pad;/*!< Information about state of
 				compression failures and successes */
+	rw_lock_t	lock;	/*!< read-write lock protecting the
+				upper levels of the index tree */
+
+	/** Determine if the index has been committed to the
+	data dictionary.
+	@return whether the index definition has been committed */
+	bool is_committed() const
+	{
+		ut_ad(!uncommitted || !(type & DICT_CLUSTERED));
+		return(UNIV_LIKELY(!uncommitted));
+	}
+
+	/** Flag an index committed or uncommitted.
+	@param[in]	committed	whether the index is committed */
+	void set_committed(bool committed)
+	{
+		ut_ad(!to_be_dropped);
+		ut_ad(committed || !(type & DICT_CLUSTERED));
+		uncommitted = !committed;
+	}
 #endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_BLOB_DEBUG
-	ib_mutex_t		blobs_mutex;
-				/*!< mutex protecting blobs */
-	ib_rbt_t*	blobs;	/*!< map of (page_no,heap_no,field_no)
-				to first_blob_page_no; protected by
-				blobs_mutex; @see btr_blob_dbg_t */
-#endif /* UNIV_BLOB_DEBUG */
-#ifdef UNIV_DEBUG
-	ulint		magic_n;/*!< magic number */
-/** Value of dict_index_t::magic_n */
-# define DICT_INDEX_MAGIC_N	76789786
-#endif
 };
 
 /** The status of online index creation */
@@ -806,6 +1051,11 @@ enum online_index_status {
 	ONLINE_INDEX_ABORTED_DROPPED
 };
 
+/** Set to store the virtual columns which are affected by Foreign
+key constraint. */
+typedef std::set<dict_v_col_t*, std::less<dict_v_col_t*>,
+		ut_allocator<dict_v_col_t*> >		dict_vcol_set;
+
 /** Data structure for a foreign key constraint; an example:
 FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D).  Most fields will be
 initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
@@ -841,6 +1091,9 @@ struct dict_foreign_t{
 					does not generate new indexes
 					implicitly */
 	dict_index_t*	referenced_index;/*!< referenced index */
+
+	dict_vcol_set*	v_cols;		/*!< set of virtual columns affected
+					by foreign key constraint. */
 };
 
 std::ostream&
@@ -889,6 +1142,24 @@ struct dict_foreign_with_index {
 	const dict_index_t*	m_index;
 };
 
+#ifdef WITH_WSREP
+/** A function object to find a foreign key with the given index as the
+foreign index. Return the foreign key with matching criteria or NULL */
+struct dict_foreign_with_foreign_index {
+
+	dict_foreign_with_foreign_index(const dict_index_t*	index)
+	: m_index(index)
+	{}
+
+	bool operator()(const dict_foreign_t*	foreign) const
+	{
+		return(foreign->foreign_index == m_index);
+	}
+
+	const dict_index_t*	m_index;
+};
+#endif
+
 /* A function object to check if the foreign constraint is between different
 tables.  Returns true if foreign key constraint is between different tables,
 false otherwise. */
@@ -926,7 +1197,10 @@ struct dict_foreign_matches_id {
 	const char*	m_id;
 };
 
-typedef std::set<dict_foreign_t*, dict_foreign_compare> dict_foreign_set;
+typedef std::set<
+	dict_foreign_t*,
+	dict_foreign_compare,
+	ut_allocator<dict_foreign_t*> >	dict_foreign_set;
 
 std::ostream&
 operator<< (std::ostream& out, const dict_foreign_set& fk_set);
@@ -970,6 +1244,10 @@ dict_foreign_free(
 /*==============*/
 	dict_foreign_t*	foreign)	/*!< in, own: foreign key struct */
 {
+	if (foreign->v_cols != NULL) {
+		UT_DELETE(foreign->v_cols);
+	}
+
 	mem_heap_free(foreign->heap);
 }
 
@@ -1004,6 +1282,67 @@ a foreign key constraint is enforced, therefore RESTRICT just means no flag */
 #define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32	/*!< ON UPDATE NO ACTION */
 /* @} */
 
+/** Display an identifier.
+@param[in,out]	s	output stream
+@param[in]	id_name	SQL identifier (other than table name)
+@return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		s,
+	const id_name_t&	id_name);
+
+/** Display a table name.
+@param[in,out]	s		output stream
+@param[in]	table_name	table name
+@return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		s,
+	const table_name_t&	table_name);
+
+/** List of locks that different transactions have acquired on a table. This
+list has a list node that is embedded in a nested union/structure. We have to
+generate a specific template for it. */
+
+typedef ut_list_base<lock_t, ut_list_node<lock_t> lock_table_t::*>
+	table_lock_list_t;
+
+/** mysql template structure defined in row0mysql.cc */
+struct mysql_row_templ_t;
+
+/** Structure defines template related to virtual columns and
+their base columns */
+struct dict_vcol_templ_t {
+	/** number of regular columns */
+	ulint			n_col;
+
+	/** number of virtual columns */
+	ulint			n_v_col;
+
+	/** array of templates for virtual col and their base columns */
+	mysql_row_templ_t**	vtempl;
+
+	/** table's database name */
+	std::string		db_name;
+
+	/** table name */
+	std::string		tb_name;
+
+	/** MySQL record length */
+	ulint			rec_len;
+
+	/** default column value if any */
+	byte*			default_rec;
+
+	/** cached MySQL TABLE object */
+	TABLE*			mysql_table;
+
+	/** when mysql_table was cached */
+	uint64_t		mysql_table_query_id;
+
+	dict_vcol_templ_t() : vtempl(0), mysql_table_query_id(-1) {}
+};
+
 /* This flag is for sync SQL DDL and memcached DML.
 if table->memcached_sync_count == DICT_TABLE_IN_DDL means there's DDL running on
 the table, DML from memcached will be blocked. */
@@ -1023,62 +1362,145 @@ typedef enum {
 
 /** Data structure for a database table.  Most fields will be
 initialized to 0, NULL or FALSE in dict_mem_table_create(). */
-struct dict_table_t{
+struct dict_table_t {
 
+	/** Get reference count.
+	@return current value of n_ref_count */
+	inline ulint get_ref_count() const;
+
+	/** Acquire the table handle. */
+	inline void acquire();
 
-	table_id_t	id;	/*!< id of the table */
-	mem_heap_t*	heap;	/*!< memory heap */
-	char*		name;	/*!< table name */
 	void*		thd;		/*!< thd */
 	bool		page_0_read; /*!< true if page 0 has
 				     been already read */
 	fil_space_crypt_t *crypt_data; /*!< crypt data if present */
-	const char*	dir_path_of_temp_table;/*!< NULL or the directory path
-				where a TEMPORARY table that was explicitly
-				created by a user should be placed if
-				innodb_file_per_table is defined in my.cnf;
-				in Unix this is usually /tmp/..., in Windows
-				temp\... */
-	char*		data_dir_path; /*!< NULL or the directory path
-				specified by DATA DIRECTORY */
-	unsigned	space:32;
-				/*!< space where the clustered index of the
-				table is placed */
-	unsigned	flags:DICT_TF_BITS;	/*!< DICT_TF_... */
-	unsigned	flags2:DICT_TF2_BITS;	/*!< DICT_TF2_... */
-	unsigned	ibd_file_missing:1;
-				/*!< TRUE if this is in a single-table
-				tablespace and the .ibd file is missing; then
-				we must return in ha_innodb.cc an error if the
-				user tries to query such an orphaned table */
-	unsigned	cached:1;/*!< TRUE if the table object has been added
-				to the dictionary cache */
-	unsigned	to_be_dropped:1;
-				/*!< TRUE if the table is to be dropped, but
-				not yet actually dropped (could in the bk
-				drop list); It is turned on at the beginning
-				of row_drop_table_for_mysql() and turned off
-				just before we start to update system tables
-				for the drop. It is protected by
-				dict_operation_lock */
-	unsigned	n_def:10;/*!< number of columns defined so far */
-	unsigned	n_cols:10;/*!< number of columns */
-	unsigned	can_be_evicted:1;
-				/*!< TRUE if it's not an InnoDB system table
-				or a table that has no FK relationships */
-	unsigned	corrupted:1;
-				/*!< TRUE if table is corrupted */
-	unsigned	drop_aborted:1;
-				/*!< TRUE if some indexes should be dropped
-				after ONLINE_INDEX_ABORTED
-				or ONLINE_INDEX_ABORTED_DROPPED */
-	dict_col_t*	cols;	/*!< array of column descriptions */
-	const char*	col_names;
-				/*!< Column names packed in a character string
-				"name1\0name2\0...nameN\0".  Until
-				the string contains n_cols, it will be
-				allocated from a temporary heap.  The final
-				string will be allocated from table->heap. */
+
+	/** Release the table handle. */
+	inline void release();
+
+	/** Id of the table. */
+	table_id_t				id;
+
+	/** Memory heap. If you allocate from this heap after the table has
+	been created then be sure to account the allocation into
+	dict_sys->size. When closing the table we do something like
+	dict_sys->size -= mem_heap_get_size(table->heap) and if that is going
+	to become negative then we would assert. Something like this should do:
+	old_size = mem_heap_get_size()
+	mem_heap_alloc()
+	new_size = mem_heap_get_size()
+	dict_sys->size += new_size - old_size. */
+	mem_heap_t*				heap;
+
+	/** Table name. */
+	table_name_t				name;
+
+	/** NULL or the directory path where a TEMPORARY table that was
+	explicitly created by a user should be placed if innodb_file_per_table
+	is defined in my.cnf. In Unix this is usually "/tmp/...",
+	in Windows "temp\...". */
+	const char*				dir_path_of_temp_table;
+
+	/** NULL or the directory path specified by DATA DIRECTORY. */
+	char*					data_dir_path;
+
+	/** NULL or the tablespace name that this table is assigned to,
+	specified by the TABLESPACE option.*/
+	id_name_t				tablespace;
+
+	/** Space where the clustered index of the table is placed. */
+	uint32_t				space;
+
+	/** Stores information about:
+	1 row format (redundant or compact),
+	2 compressed page size (zip shift size),
+	3 whether using atomic blobs,
+	4 whether the table has been created with the option DATA DIRECTORY.
+	Use DICT_TF_GET_COMPACT(), DICT_TF_GET_ZIP_SSIZE(),
+	DICT_TF_HAS_ATOMIC_BLOBS() and DICT_TF_HAS_DATA_DIR() to parse this
+	flag. */
+	unsigned				flags:DICT_TF_BITS;
+
+	/** Stores information about:
+	1 whether the table has been created using CREATE TEMPORARY TABLE,
+	2 whether the table has an internally defined DOC ID column,
+	3 whether the table has a FTS index,
+	4 whether DOC ID column need to be added to the FTS index,
+	5 whether the table is being created its own tablespace,
+	6 whether the table has been DISCARDed,
+	7 whether the aux FTS tables names are in hex.
+	8 whether the table is instinc table.
+	9 whether the table has encryption setting.
+	Use DICT_TF2_FLAG_IS_SET() to parse this flag. */
+	unsigned				flags2:DICT_TF2_BITS;
+
+	/** TRUE if this is in a single-table tablespace and the .ibd file is
+	missing. Then we must return in ha_innodb.cc an error if the user
+	tries to query such an orphaned table. */
+	unsigned				ibd_file_missing:1;
+
+	/** TRUE if the table object has been added to the dictionary cache. */
+	unsigned				cached:1;
+
+	/** TRUE if the table is to be dropped, but not yet actually dropped
+	(could in the background drop list). It is turned on at the beginning
+	of row_drop_table_for_mysql() and turned off just before we start to
+	update system tables for the drop. It is protected by
+	dict_operation_lock. */
+	unsigned				to_be_dropped:1;
+
+	/** Number of non-virtual columns defined so far. */
+	unsigned				n_def:10;
+
+	/** Number of non-virtual columns. */
+	unsigned				n_cols:10;
+
+	/** Number of total columns (inlcude virtual and non-virtual) */
+	unsigned				n_t_cols:10;
+
+	/** Number of total columns defined so far. */
+	unsigned                                n_t_def:10;
+
+	/** Number of virtual columns defined so far. */
+	unsigned                                n_v_def:10;
+
+	/** Number of virtual columns. */
+	unsigned                                n_v_cols:10;
+
+	/** TRUE if it's not an InnoDB system table or a table that has no FK
+	relationships. */
+	unsigned				can_be_evicted:1;
+
+	/** TRUE if table is corrupted. */
+	unsigned				corrupted:1;
+
+	/** TRUE if some indexes should be dropped after ONLINE_INDEX_ABORTED
+	or ONLINE_INDEX_ABORTED_DROPPED. */
+	unsigned				drop_aborted:1;
+
+	/** Array of column descriptions. */
+	dict_col_t*				cols;
+
+	/** Array of virtual column descriptions. */
+	dict_v_col_t*				v_cols;
+
+	/** List of stored column descriptions. It is used only for foreign key
+	check during create table and copy alter operations.
+	During copy alter, s_cols list is filled during create table operation
+	and need to preserve till rename table operation. That is the
+	reason s_cols is a part of dict_table_t */
+	dict_s_col_list*			s_cols;
+
+	/** Column names packed in a character string
+	"name1\0name2\0...nameN\0". Until the string contains n_cols, it will
+	be allocated from a temporary heap. The final string will be allocated
+	from table->heap. */
+	const char*				col_names;
+
+	/** Virtual column names */
+	const char*				v_col_names;
+
 	bool		is_system_db;
 				/*!< True if the table belongs to a system
 				database (mysql, information_schema or
@@ -1088,276 +1510,295 @@ struct dict_table_t{
 				dictionary information and
 				MySQL FRM information mismatch. */
 #ifndef UNIV_HOTBACKUP
-	hash_node_t	name_hash; /*!< hash chain node */
-	hash_node_t	id_hash; /*!< hash chain node */
-	UT_LIST_BASE_NODE_T(dict_index_t)
-			indexes; /*!< list of indexes of the table */
-
-	dict_foreign_set	foreign_set;
-				/*!< set of foreign key constraints
-				in the table; these refer to columns
-				in other tables */
-
-	dict_foreign_set	referenced_set;
-				/*!< list of foreign key constraints
-				which refer to this table */
-
-	UT_LIST_NODE_T(dict_table_t)
-			table_LRU; /*!< node of the LRU list of tables */
-	unsigned	fk_max_recusive_level:8;
-				/*!< maximum recursive level we support when
-				loading tables chained together with FK
-				constraints. If exceeds this level, we will
-				stop loading child table into memory along with
-				its parent table */
-	ulint		n_foreign_key_checks_running;
-				/*!< count of how many foreign key check
-				operations are currently being performed
-				on the table: we cannot drop the table while
-				there are foreign key checks running on
-				it! */
-	trx_id_t	def_trx_id;
-				/*!< transaction id that last touched
-				the table definition, either when
-				loading the definition or CREATE
-				TABLE, or ALTER TABLE (prepare,
-				commit, and rollback phases) */
-	trx_id_t	query_cache_inv_trx_id;
-				/*!< transactions whose trx id is
-				smaller than this number are not
-				allowed to store to the MySQL query
-				cache or retrieve from it; when a trx
-				with undo logs commits, it sets this
-				to the value of the trx id counter for
-				the tables it had an IX lock on */
+	/** Hash chain node. */
+	hash_node_t				name_hash;
+
+	/** Hash chain node. */
+	hash_node_t				id_hash;
+
+	/** The FTS_DOC_ID_INDEX, or NULL if no fulltext indexes exist */
+	dict_index_t*				fts_doc_id_index;
+
+	/** List of indexes of the table. */
+	UT_LIST_BASE_NODE_T(dict_index_t)	indexes;
+
+	/** List of foreign key constraints in the table. These refer to
+	columns in other tables. */
+	UT_LIST_BASE_NODE_T(dict_foreign_t)	foreign_list;
+
+	/** List of foreign key constraints which refer to this table. */
+	UT_LIST_BASE_NODE_T(dict_foreign_t)	referenced_list;
+
+	/** Node of the LRU list of tables. */
+	UT_LIST_NODE_T(dict_table_t)		table_LRU;
+
+	/** Maximum recursive level we support when loading tables chained
+	together with FK constraints. If exceeds this level, we will stop
+	loading child table into memory along with its parent table. */
+	unsigned				fk_max_recusive_level:8;
+
+	/** Count of how many foreign key check operations are currently being
+	performed on the table. We cannot drop the table while there are
+	foreign key checks running on it. */
+	ulint					n_foreign_key_checks_running;
+
+	/** Transactions whose view low limit is greater than this number are
+	not allowed to store to the MySQL query cache or retrieve from it.
+	When a trx with undo logs commits, it sets this to the value of the
+	current time. */
+	trx_id_t				query_cache_inv_id;
+
+	/** Transaction id that last touched the table definition. Either when
+	loading the definition or CREATE TABLE, or ALTER TABLE (prepare,
+	commit, and rollback phases). */
+	trx_id_t				def_trx_id;
+
+	/*!< set of foreign key constraints in the table; these refer to
+	columns in other tables */
+	dict_foreign_set			foreign_set;
+
+	/*!< set of foreign key constraints which refer to this table */
+	dict_foreign_set			referenced_set;
+
 #ifdef UNIV_DEBUG
-	/*----------------------*/
-	ibool		does_not_fit_in_memory;
-				/*!< this field is used to specify in
-				simulations tables which are so big
-				that disk should be accessed: disk
-				access is simulated by putting the
-				thread to sleep for a while; NOTE that
-				this flag is not stored to the data
-				dictionary on disk, and the database
-				will forget about value TRUE if it has
-				to reload the table definition from
-				disk */
+	/** This field is used to specify in simulations tables which are so
+	big that disk should be accessed. Disk access is simulated by putting
+	the thread to sleep for a while. NOTE that this flag is not stored to
+	the data dictionary on disk, and the database will forget about value
+	TRUE if it has to reload the table definition from disk. */
+	ibool					does_not_fit_in_memory;
 #endif /* UNIV_DEBUG */
-	/*----------------------*/
-	unsigned	big_rows:1;
-				/*!< flag: TRUE if the maximum length of
-				a single row exceeds BIG_ROW_SIZE;
-				initialized in dict_table_add_to_cache() */
-				/** Statistics for query optimization */
-				/* @{ */
-
-	volatile os_once::state_t	stats_latch_created;
-				/*!< Creation state of 'stats_latch'. */
-
-	rw_lock_t*	stats_latch; /*!< this latch protects:
-				dict_table_t::stat_initialized
-				dict_table_t::stat_n_rows (*)
-				dict_table_t::stat_clustered_index_size
-				dict_table_t::stat_sum_of_other_index_sizes
-				dict_table_t::stat_modified_counter (*)
-				dict_table_t::indexes*::stat_n_diff_key_vals[]
-				dict_table_t::indexes*::stat_index_size
-				dict_table_t::indexes*::stat_n_leaf_pages
-				(*) those are not always protected for
-				performance reasons */
-	unsigned	stat_initialized:1; /*!< TRUE if statistics have
-				been calculated the first time
-				after database startup or table creation */
-#define DICT_TABLE_IN_USED      -1
-	lint		memcached_sync_count;
-				/*!< count of how many handles are opened
-				to this table from memcached; DDL on the
-				table is NOT allowed until this count
-				goes to zero. If it's -1, means there's DDL
-		                on the table, DML from memcached will be
-				blocked. */
-	ib_time_t	stats_last_recalc;
-				/*!< Timestamp of last recalc of the stats */
-	ib_uint32_t	stat_persistent;
-				/*!< The two bits below are set in the
-				::stat_persistent member and have the following
-				meaning:
-				1. _ON=0, _OFF=0, no explicit persistent stats
-				setting for this table, the value of the global
-				srv_stats_persistent is used to determine
-				whether the table has persistent stats enabled
-				or not
-				2. _ON=0, _OFF=1, persistent stats are
-				explicitly disabled for this table, regardless
-				of the value of the global srv_stats_persistent
-				3. _ON=1, _OFF=0, persistent stats are
-				explicitly enabled for this table, regardless
-				of the value of the global srv_stats_persistent
-				4. _ON=1, _OFF=1, not allowed, we assert if
-				this ever happens. */
-#define DICT_STATS_PERSISTENT_ON	(1 << 1)
-#define DICT_STATS_PERSISTENT_OFF	(1 << 2)
-	ib_uint32_t	stats_auto_recalc;
-				/*!< The two bits below are set in the
-				::stats_auto_recalc member and have
-				the following meaning:
-				1. _ON=0, _OFF=0, no explicit auto recalc
-				setting for this table, the value of the global
-				srv_stats_persistent_auto_recalc is used to
-				determine whether the table has auto recalc
-				enabled or not
-				2. _ON=0, _OFF=1, auto recalc is explicitly
-				disabled for this table, regardless of the
-				value of the global
-				srv_stats_persistent_auto_recalc
-				3. _ON=1, _OFF=0, auto recalc is explicitly
-				enabled for this table, regardless of the
-				value of the global
-				srv_stats_persistent_auto_recalc
-				4. _ON=1, _OFF=1, not allowed, we assert if
-				this ever happens. */
-#define DICT_STATS_AUTO_RECALC_ON	(1 << 1)
-#define DICT_STATS_AUTO_RECALC_OFF	(1 << 2)
-	ulint		stats_sample_pages;
-				/*!< the number of pages to sample for this
-				table during persistent stats estimation;
-				if this is 0, then the value of the global
-				srv_stats_persistent_sample_pages will be
-				used instead. */
-	ib_uint64_t	stat_n_rows;
-				/*!< approximate number of rows in the table;
-				we periodically calculate new estimates */
-	ulint		stat_clustered_index_size;
-				/*!< approximate clustered index size in
-				database pages */
-	ulint		stat_sum_of_other_index_sizes;
-				/*!< other indexes in database pages */
-	ib_uint64_t	stat_modified_counter;
-				/*!< when a row is inserted, updated,
-				or deleted,
-				we add 1 to this number; we calculate new
-				estimates for the stat_... values for the
-				table and the indexes when about 1 / 16 of
-				table has been modified;
-				also when the estimate operation is
-				called for MySQL SHOW TABLE STATUS; the
-				counter is reset to zero at statistics
-				calculation; this counter is not protected by
-				any latch, because this is only used for
-				heuristics */
-
-#define BG_STAT_IN_PROGRESS	((byte)(1 << 0))
-				/*!< BG_STAT_IN_PROGRESS is set in
-				stats_bg_flag when the background
-				stats code is working on this table. The DROP
-				TABLE code waits for this to be cleared
-				before proceeding. */
-#define BG_STAT_SHOULD_QUIT	((byte)(1 << 1))
-				/*!< BG_STAT_SHOULD_QUIT is set in
-				stats_bg_flag when DROP TABLE starts
-				waiting on BG_STAT_IN_PROGRESS to be cleared,
-				the background stats thread will detect this
-				and will eventually quit sooner */
-#define BG_SCRUB_IN_PROGRESS	((byte)(1 << 2))
+
+	/** TRUE if the maximum length of a single row exceeds BIG_ROW_SIZE.
+	Initialized in dict_table_add_to_cache(). */
+	unsigned				big_rows:1;
+
+	/** Statistics for query optimization. @{ */
+
+	/** Creation state of 'stats_latch'. */
+	volatile os_once::state_t		stats_latch_created;
+
+	/** This latch protects:
+	dict_table_t::stat_initialized,
+	dict_table_t::stat_n_rows (*),
+	dict_table_t::stat_clustered_index_size,
+	dict_table_t::stat_sum_of_other_index_sizes,
+	dict_table_t::stat_modified_counter (*),
+	dict_table_t::indexes*::stat_n_diff_key_vals[],
+	dict_table_t::indexes*::stat_index_size,
+	dict_table_t::indexes*::stat_n_leaf_pages.
+	(*) Those are not always protected for
+	performance reasons. */
+	rw_lock_t*				stats_latch;
+
+	/** TRUE if statistics have been calculated the first time after
+	database startup or table creation. */
+	unsigned				stat_initialized:1;
+
+	/** Timestamp of last recalc of the stats. */
+	ib_time_t				stats_last_recalc;
+
+	/** The two bits below are set in the 'stat_persistent' member. They
+	have the following meaning:
+	1. _ON=0, _OFF=0, no explicit persistent stats setting for this table,
+	the value of the global srv_stats_persistent is used to determine
+	whether the table has persistent stats enabled or not
+	2. _ON=0, _OFF=1, persistent stats are explicitly disabled for this
+	table, regardless of the value of the global srv_stats_persistent
+	3. _ON=1, _OFF=0, persistent stats are explicitly enabled for this
+	table, regardless of the value of the global srv_stats_persistent
+	4. _ON=1, _OFF=1, not allowed, we assert if this ever happens. */
+	#define DICT_STATS_PERSISTENT_ON	(1 << 1)
+	#define DICT_STATS_PERSISTENT_OFF	(1 << 2)
+
+	/** Indicates whether the table uses persistent stats or not. See
+	DICT_STATS_PERSISTENT_ON and DICT_STATS_PERSISTENT_OFF. */
+	ib_uint32_t				stat_persistent;
+
+	/** The two bits below are set in the 'stats_auto_recalc' member. They
+	have the following meaning:
+	1. _ON=0, _OFF=0, no explicit auto recalc setting for this table, the
+	value of the global srv_stats_persistent_auto_recalc is used to
+	determine whether the table has auto recalc enabled or not
+	2. _ON=0, _OFF=1, auto recalc is explicitly disabled for this table,
+	regardless of the value of the global srv_stats_persistent_auto_recalc
+	3. _ON=1, _OFF=0, auto recalc is explicitly enabled for this table,
+	regardless of the value of the global srv_stats_persistent_auto_recalc
+	4. _ON=1, _OFF=1, not allowed, we assert if this ever happens. */
+	#define DICT_STATS_AUTO_RECALC_ON	(1 << 1)
+	#define DICT_STATS_AUTO_RECALC_OFF	(1 << 2)
+
+	/** Indicates whether the table uses automatic recalc for persistent
+	stats or not. See DICT_STATS_AUTO_RECALC_ON and
+	DICT_STATS_AUTO_RECALC_OFF. */
+	ib_uint32_t				stats_auto_recalc;
+
+	/** The number of pages to sample for this table during persistent
+	stats estimation. If this is 0, then the value of the global
+	srv_stats_persistent_sample_pages will be used instead. */
+	ulint					stats_sample_pages;
+
+	/** Approximate number of rows in the table. We periodically calculate
+	new estimates. */
+	ib_uint64_t				stat_n_rows;
+
+	/** Approximate clustered index size in database pages. */
+	ulint					stat_clustered_index_size;
+
+	/** Approximate size of other indexes in database pages. */
+	ulint					stat_sum_of_other_index_sizes;
+
+	/** How many rows are modified since last stats recalc. When a row is
+	inserted, updated, or deleted, we add 1 to this number; we calculate
+	new estimates for the table and the indexes if the table has changed
+	too much, see row_update_statistics_if_needed(). The counter is reset
+	to zero at statistics calculation. This counter is not protected by
+	any latch, because this is only used for heuristics. */
+	ib_uint64_t				stat_modified_counter;
+
+	/** Background stats thread is not working on this table. */
+	#define BG_STAT_NONE			0
+
+	/** Set in 'stats_bg_flag' when the background stats code is working
+	on this table. The DROP TABLE code waits for this to be cleared before
+	proceeding. */
+	#define BG_STAT_IN_PROGRESS		(1 << 0)
+
+	/** Set in 'stats_bg_flag' when DROP TABLE starts waiting on
+	BG_STAT_IN_PROGRESS to be cleared. The background stats thread will
+	detect this and will eventually quit sooner. */
+	#define BG_STAT_SHOULD_QUIT		(1 << 1)
+
+	/** The state of the background stats thread wrt this table.
+	See BG_STAT_NONE, BG_STAT_IN_PROGRESS and BG_STAT_SHOULD_QUIT.
+	Writes are covered by dict_sys->mutex. Dirty reads are possible. */
+
+	#define BG_SCRUB_IN_PROGRESS	((byte)(1 << 2))
 				/*!< BG_SCRUB_IN_PROGRESS is set in
 				stats_bg_flag when the background
 				scrub code is working on this table. The DROP
 				TABLE code waits for this to be cleared
 				before proceeding. */
 
-#define BG_IN_PROGRESS (BG_STAT_IN_PROGRESS | BG_SCRUB_IN_PROGRESS)
+	#define BG_STAT_SHOULD_QUIT		(1 << 1)
+
+	#define BG_IN_PROGRESS (BG_STAT_IN_PROGRESS | BG_SCRUB_IN_PROGRESS)
+
+
+	/** The state of the background stats thread wrt this table.
+	See BG_STAT_NONE, BG_STAT_IN_PROGRESS and BG_STAT_SHOULD_QUIT.
+	Writes are covered by dict_sys->mutex. Dirty reads are possible. */
+	byte					stats_bg_flag;
 
-	byte 		stats_bg_flag;
-				/*!< see BG_STAT_* above.
-				Writes are covered by dict_sys->mutex.
-				Dirty reads are possible. */
 	bool		stats_error_printed;
 				/*!< Has persistent stats error beein
 				already printed for this table ? */
-				/* @} */
-	/*----------------------*/
-				/**!< The following fields are used by the
-				AUTOINC code.  The actual collection of
-				tables locked during AUTOINC read/write is
-				kept in trx_t. In order to quickly determine
-				whether a transaction has locked the AUTOINC
-				lock we keep a pointer to the transaction
-				here in the autoinc_trx variable. This is to
-				avoid acquiring the lock_sys_t::mutex and
-				scanning the vector in trx_t.
-
-				When an AUTOINC lock has to wait, the
-				corresponding lock instance is created on
-				the trx lock heap rather than use the
-				pre-allocated instance in autoinc_lock below.*/
-				/* @{ */
-	lock_t*		autoinc_lock;
-				/*!< a buffer for an AUTOINC lock
-				for this table: we allocate the memory here
-				so that individual transactions can get it
-				and release it without a need to allocate
-				space from the lock heap of the trx:
-				otherwise the lock heap would grow rapidly
-				if we do a large insert from a select */
-	ib_mutex_t*	autoinc_mutex;
-				/*!< mutex protecting the autoincrement
-				counter */
+	/* @} */
+
+	/** AUTOINC related members. @{ */
+
+	/* The actual collection of tables locked during AUTOINC read/write is
+	kept in trx_t. In order to quickly determine whether a transaction has
+	locked the AUTOINC lock we keep a pointer to the transaction here in
+	the 'autoinc_trx' member. This is to avoid acquiring the
+	lock_sys_t::mutex and scanning the vector in trx_t.
+	When an AUTOINC lock has to wait, the corresponding lock instance is
+	created on the trx lock heap rather than use the pre-allocated instance
+	in autoinc_lock below. */
+
+	/** A buffer for an AUTOINC lock for this table. We allocate the
+	memory here so that individual transactions can get it and release it
+	without a need to allocate space from the lock heap of the trx:
+	otherwise the lock heap would grow rapidly if we do a large insert
+	from a select. */
+	lock_t*					autoinc_lock;
 
 	/** Creation state of autoinc_mutex member */
-	volatile os_once::state_t
-			autoinc_mutex_created;
-
-	ib_uint64_t	autoinc;/*!< autoinc counter value to give to the
-				next inserted row */
-	ulong		n_waiting_or_granted_auto_inc_locks;
-				/*!< This counter is used to track the number
-				of granted and pending autoinc locks on this
-				table. This value is set after acquiring the
-				lock_sys_t::mutex but we peek the contents to
-				determine whether other transactions have
-				acquired the AUTOINC lock or not. Of course
-				only one transaction can be granted the
-				lock but there can be multiple waiters. */
-	const trx_t*	autoinc_trx;
-				/*!< The transaction that currently holds the
-				the AUTOINC lock on this table.
-				Protected by lock_sys->mutex. */
-	fts_t*		fts;	/* FTS specific state variables */
-				/* @} */
-	/*----------------------*/
+	volatile os_once::state_t		autoinc_mutex_created;
 
-	ib_quiesce_t	 quiesce;/*!< Quiescing states, protected by the
-				dict_index_t::lock. ie. we can only change
-				the state if we acquire all the latches
-				(dict_index_t::lock) in X mode of this table's
-				indexes. */
+	/** Mutex protecting the autoincrement counter. */
+	ib_mutex_t*				autoinc_mutex;
+
+	/** Autoinc counter value to give to the next inserted row. */
+	ib_uint64_t				autoinc;
+
+	/** This counter is used to track the number of granted and pending
+	autoinc locks on this table. This value is set after acquiring the
+	lock_sys_t::mutex but we peek the contents to determine whether other
+	transactions have acquired the AUTOINC lock or not. Of course only one
+	transaction can be granted the lock but there can be multiple
+	waiters. */
+	ulong					n_waiting_or_granted_auto_inc_locks;
+
+	/** The transaction that currently holds the the AUTOINC lock on this
+	table. Protected by lock_sys->mutex. */
+	const trx_t*				autoinc_trx;
+
+	/* @} */
+
+	/** Count of how many handles are opened to this table from memcached.
+	DDL on the table is NOT allowed until this count goes to zero. If
+	it is -1, then there's DDL on the table, DML from memcached will be
+	blocked. */
+	lint					memcached_sync_count;
+
+	/** FTS specific state variables. */
+	fts_t*					fts;
+
+	/** Quiescing states, protected by the dict_index_t::lock. ie. we can
+	only change the state if we acquire all the latches (dict_index_t::lock)
+	in X mode of this table's indexes. */
+	ib_quiesce_t				quiesce;
+
+	/** Count of the number of record locks on this table. We use this to
+	determine whether we can evict the table from the dictionary cache.
+	It is protected by lock_sys->mutex. */
+	ulint					n_rec_locks;
+
+#ifndef UNIV_DEBUG
+private:
+#endif
+	/** Count of how many handles are opened to this table. Dropping of the
+	table is NOT allowed until this count gets to zero. MySQL does NOT
+	itself check the number of open handles at DROP. */
+	ulint					n_ref_count;
+
+public:
+	/** List of locks on the table. Protected by lock_sys->mutex. */
+	table_lock_list_t			locks;
+
+	/** Timestamp of the last modification of this table. */
+	time_t					update_time;
 
-	/*----------------------*/
-	ulint		n_rec_locks;
-				/*!< Count of the number of record locks on
-				this table. We use this to determine whether
-				we can evict the table from the dictionary
-				cache. It is protected by lock_sys->mutex. */
-	ulint		n_ref_count;
-				/*!< count of how many handles are opened
-				to this table; dropping of the table is
-				NOT allowed until this count gets to zero;
-				MySQL does NOT itself check the number of
-				open handles at drop */
-	UT_LIST_BASE_NODE_T(lock_t)
-			locks;	/*!< list of locks on the table; protected
-				by lock_sys->mutex */
 #endif /* !UNIV_HOTBACKUP */
-	ibool		is_encrypted;
+
+	bool					is_encrypted;
 
 #ifdef UNIV_DEBUG
-	ulint		magic_n;/*!< magic number */
-/** Value of dict_table_t::magic_n */
-# define DICT_TABLE_MAGIC_N	76333786
+	/** Value of 'magic_n'. */
+	#define DICT_TABLE_MAGIC_N		76333786
+
+	/** Magic number. */
+	ulint					magic_n;
 #endif /* UNIV_DEBUG */
+	/** mysql_row_templ_t for base columns used for compute the virtual
+	columns */
+	dict_vcol_templ_t*			vc_templ;
+
+	/** encryption key, it's only for export/import */
+	byte*					encryption_key;
+
+	/** encryption iv, it's only for export/import */
+	byte*					encryption_iv;
 };
 
+/*******************************************************************//**
+Initialise the table lock list. */
+void
+lock_table_lock_list_init(
+/*======================*/
+	table_lock_list_t*	locks);		/*!< List to initialise */
+
 /** A function object to add the foreign key constraint to the referenced set
 of the referenced table, if it exists in the dictionary cache. */
 struct dict_foreign_add_to_referenced_table {
@@ -1383,24 +1824,10 @@ dict_table_autoinc_destroy(
 	if (table->autoinc_mutex_created == os_once::DONE
 	    && table->autoinc_mutex != NULL) {
 		mutex_free(table->autoinc_mutex);
-		delete table->autoinc_mutex;
+		UT_DELETE(table->autoinc_mutex);
 	}
 }
 
-/** Allocate and init the autoinc latch of a given table.
-This function must not be called concurrently on the same table object.
-@param[in,out]	table_void	table whose autoinc latch to create */
-void
-dict_table_autoinc_alloc(
-	void*	table_void);
-
-/** Allocate and init the zip_pad_mutex of a given index.
-This function must not be called concurrently on the same index object.
-@param[in,out]	index_void	index whose zip_pad_mutex to create */
-void
-dict_index_zip_pad_alloc(
-	void*	index_void);
-
 /** Request for lazy creation of the autoinc latch of a given table.
 This function is only called from either single threaded environment
 or from a thread that has not shared the table object with other threads.
@@ -1410,13 +1837,8 @@ void
 dict_table_autoinc_create_lazy(
 	dict_table_t*	table)
 {
-#ifdef HAVE_ATOMIC_BUILTINS
 	table->autoinc_mutex = NULL;
 	table->autoinc_mutex_created = os_once::NEVER_DONE;
-#else /* HAVE_ATOMIC_BUILTINS */
-	dict_table_autoinc_alloc(table);
-	table->autoinc_mutex_created = os_once::DONE;
-#endif /* HAVE_ATOMIC_BUILTINS */
 }
 
 /** Request a lazy creation of dict_index_t::zip_pad::mutex.
@@ -1428,13 +1850,8 @@ void
 dict_index_zip_pad_mutex_create_lazy(
 	dict_index_t*	index)
 {
-#ifdef HAVE_ATOMIC_BUILTINS
 	index->zip_pad.mutex = NULL;
 	index->zip_pad.mutex_created = os_once::NEVER_DONE;
-#else /* HAVE_ATOMIC_BUILTINS */
-	dict_index_zip_pad_alloc(index);
-	index->zip_pad.mutex_created = os_once::DONE;
-#endif /* HAVE_ATOMIC_BUILTINS */
 }
 
 /** Destroy the zip_pad_mutex of the given index.
@@ -1448,8 +1865,8 @@ dict_index_zip_pad_mutex_destroy(
 {
 	if (index->zip_pad.mutex_created == os_once::DONE
 	    && index->zip_pad.mutex != NULL) {
-		os_fast_mutex_free(index->zip_pad.mutex);
-		delete index->zip_pad.mutex;
+		mutex_free(index->zip_pad.mutex);
+		UT_DELETE(index->zip_pad.mutex);
 	}
 }
 
@@ -1460,7 +1877,7 @@ void
 dict_index_zip_pad_unlock(
 	dict_index_t*	index)
 {
-	os_fast_mutex_unlock(index->zip_pad.mutex);
+	mutex_exit(index->zip_pad.mutex);
 }
 
 #ifdef UNIV_DEBUG
@@ -1476,8 +1893,36 @@ dict_table_autoinc_own(
 }
 #endif /* UNIV_DEBUG */
 
+/** Check whether the col is used in spatial index or regular index.
+@param[in]	col	column to check
+@return spatial status */
+inline
+spatial_status_t
+dict_col_get_spatial_status(
+	const dict_col_t*	col)
+{
+	spatial_status_t	spatial_status = SPATIAL_NONE;
+
+	/* Column is not a part of any index. */
+	if (!col->ord_part) {
+		return(spatial_status);
+	}
+
+	if (DATA_GEOMETRY_MTYPE(col->mtype)) {
+		if (col->max_prefix == 0) {
+			spatial_status = SPATIAL_ONLY;
+		} else {
+			/* Any regular index on a geometry column
+			should have a prefix. */
+			spatial_status = SPATIAL_MIXED;
+		}
+	}
+
+	return(spatial_status);
+}
+
 #ifndef UNIV_NONINL
 #include "dict0mem.ic"
 #endif
 
-#endif
+#endif /* dict0mem_h */
diff --git a/storage/innobase/include/dict0mem.ic b/storage/innobase/include/dict0mem.ic
index 38d51f61789..a50fb615a09 100644
--- a/storage/innobase/include/dict0mem.ic
+++ b/storage/innobase/include/dict0mem.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -63,11 +63,17 @@ dict_mem_fill_index_struct(
 #ifndef UNIV_HOTBACKUP
 	index->space = (unsigned int) space;
 	index->page = FIL_NULL;
+	index->merge_threshold = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
 #endif /* !UNIV_HOTBACKUP */
 	index->table_name = table_name;
 	index->n_fields = (unsigned int) n_fields;
 	/* The '1 +' above prevents allocation
 	of an empty mem block */
+	index->nulls_equal = false;
+#ifdef MYSQL_INDEX_DISABLE_AHI
+	index->disable_ahi = false;
+#endif
+
 #ifdef UNIV_DEBUG
 	index->magic_n = DICT_INDEX_MAGIC_N;
 #endif /* UNIV_DEBUG */
diff --git a/storage/innobase/include/dict0priv.h b/storage/innobase/include/dict0priv.h
index e034662aba0..35548faeb93 100644
--- a/storage/innobase/include/dict0priv.h
+++ b/storage/innobase/include/dict0priv.h
@@ -26,10 +26,12 @@ Created  Fri 2 Jul 2010 13:30:38 EST - Sunny Bains
 #ifndef dict0priv_h
 #define dict0priv_h
 
+#include "univ.i"
+
 /**********************************************************************//**
 Gets a table; loads it to the dictionary cache if necessary. A low-level
 function. Note: Not to be called from outside dict0*c functions.
-@return	table, NULL if not found */
+@return table, NULL if not found */
 UNIV_INLINE
 dict_table_t*
 dict_table_get_low(
@@ -38,7 +40,7 @@ dict_table_get_low(
 
 /**********************************************************************//**
 Checks if a table is in the dictionary cache.
-@return	table, NULL if not found */
+@return table, NULL if not found */
 UNIV_INLINE
 dict_table_t*
 dict_table_check_if_in_cache_low(
@@ -47,7 +49,7 @@ dict_table_check_if_in_cache_low(
 
 /**********************************************************************//**
 Returns a table object based on table id.
-@return	table, NULL if does not exist */
+@return table, NULL if does not exist */
 UNIV_INLINE
 dict_table_t*
 dict_table_open_on_id_low(
diff --git a/storage/innobase/include/dict0priv.ic b/storage/innobase/include/dict0priv.ic
index 983218af78a..fd10c566be6 100644
--- a/storage/innobase/include/dict0priv.ic
+++ b/storage/innobase/include/dict0priv.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,7 +31,7 @@ Created  Wed 13 Oct 2010 16:10:14 EST Sunny Bains
 /**********************************************************************//**
 Gets a table; loads it to the dictionary cache if necessary. A low-level
 function.
-@return	table, NULL if not found */
+@return table, NULL if not found */
 UNIV_INLINE
 dict_table_t*
 dict_table_get_low(
@@ -41,24 +41,22 @@ dict_table_get_low(
 	dict_table_t*	table;
 
 	ut_ad(table_name);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	table = dict_table_check_if_in_cache_low(table_name);
 
 	if (table && table->corrupted) {
-		fprintf(stderr, "InnoDB: table");
-		ut_print_name(stderr, NULL, TRUE, table->name);
+		ib::error	error;
+		error << "Table " << table->name << "is corrupted";
 		if (srv_load_corrupted) {
-			fputs(" is corrupted, but"
-			      " innodb_force_load_corrupted is set\n", stderr);
+			error << ", but innodb_force_load_corrupted is set";
 		} else {
-			fputs(" is corrupted\n", stderr);
 			return(NULL);
 		}
 	}
 
 	if (table == NULL) {
-		table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE);
+		table = dict_load_table(table_name, true, DICT_ERR_IGNORE_NONE);
 	}
 
 	ut_ad(!table || table->cached);
@@ -68,7 +66,7 @@ dict_table_get_low(
 
 /**********************************************************************//**
 Returns a table object based on table id.
-@return	table, NULL if does not exist */
+@return table, NULL if does not exist */
 UNIV_INLINE
 dict_table_t*
 dict_table_open_on_id_low(
@@ -81,7 +79,7 @@ dict_table_open_on_id_low(
 	dict_table_t*	table;
 	ulint		fold;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	/* Look for the table name in the hash table */
 	fold = ut_fold_ull(table_id);
@@ -102,7 +100,7 @@ dict_table_open_on_id_low(
 
 /**********************************************************************//**
 Checks if a table is in the dictionary cache.
-@return	table, NULL if not found */
+@return table, NULL if not found */
 UNIV_INLINE
 dict_table_t*
 dict_table_check_if_in_cache_low(
@@ -112,15 +110,19 @@ dict_table_check_if_in_cache_low(
 	dict_table_t*	table;
 	ulint		table_fold;
 
+	DBUG_ENTER("dict_table_check_if_in_cache_low");
+	DBUG_PRINT("dict_table_check_if_in_cache_low",
+		   ("table: '%s'", table_name));
+
 	ut_ad(table_name);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	/* Look for the table name in the hash table */
 	table_fold = ut_fold_string(table_name);
 
 	HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
 		    dict_table_t*, table, ut_ad(table->cached),
-		    !strcmp(table->name, table_name));
-	return(table);
+		    !strcmp(table->name.m_name, table_name));
+	DBUG_RETURN(table);
 }
 #endif /*! UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0stats.h b/storage/innobase/include/dict0stats.h
index 72501bf9429..8941b399f7d 100644
--- a/storage/innobase/include/dict0stats.h
+++ b/storage/innobase/include/dict0stats.h
@@ -28,7 +28,6 @@ Created Jan 06, 2010 Vasil Dimov
 
 #include "univ.i"
 
-#include "db0err.h"
 #include "dict0types.h"
 #include "trx0types.h"
 
@@ -60,7 +59,6 @@ is relatively quick and is used to calculate transient statistics that
 are not saved on disk.
 This was the only way to calculate statistics before the
 Persistent Statistics feature was introduced. */
-UNIV_INTERN
 void
 dict_stats_update_transient(
 /*========================*/
@@ -133,7 +131,6 @@ dict_stats_deinit(
 Calculates new estimates for table and index statistics. The statistics
 are used in query optimization.
 @return DB_* error code or DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 dict_stats_update(
 /*==============*/
@@ -148,7 +145,6 @@ Removes the information for a particular index's stats from the persistent
 storage if it exists and if there is data stored for this index.
 This function creates its own trx and commits it.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 dict_stats_drop_index(
 /*==================*/
@@ -163,7 +159,6 @@ Removes the statistics for a table and all of its indexes from the
 persistent storage if it exists and if there is data stored for the table.
 This function creates its own transaction and commits it.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 dict_stats_drop_table(
 /*==================*/
@@ -174,7 +169,6 @@ dict_stats_drop_table(
 
 /*********************************************************************//**
 Fetches or calculates new estimates for index statistics. */
-UNIV_INTERN
 void
 dict_stats_update_for_index(
 /*========================*/
@@ -185,7 +179,6 @@ dict_stats_update_for_index(
 Renames a table in InnoDB persistent stats storage.
 This function creates its own transaction and commits it.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 dict_stats_rename_table(
 /*====================*/
@@ -194,7 +187,19 @@ dict_stats_rename_table(
 	char*		errstr,		/*!< out: error string if != DB_SUCCESS
 					is returned */
 	size_t		errstr_sz);	/*!< in: errstr size */
-
+/*********************************************************************//**
+Renames an index in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code. DB_STATS_DO_NOT_EXIST will be returned
+if the persistent stats do not exist. */
+dberr_t
+dict_stats_rename_index(
+/*====================*/
+	const dict_table_t*	table,		/*!< in: table whose index
+						is renamed */
+	const char*		old_index_name,	/*!< in: old index name */
+	const char*		new_index_name)	/*!< in: new index name */
+	__attribute__((warn_unused_result));
 /*********************************************************************//**
 Save defragmentation result.
 @return DB_SUCCESS or error code */
@@ -228,8 +233,48 @@ dict_stats_empty_defrag_stats(
 	dict_index_t* index);	/*!< in: index to clear defragmentation stats */
 
 
+/*********************************************************************//**
+Renames an index in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code. DB_STATS_DO_NOT_EXIST will be returned
+if the persistent stats do not exist. */
+dberr_t
+dict_stats_rename_index(
+/*====================*/
+	const dict_table_t*	table,		/*!< in: table whose index
+						is renamed */
+	const char*		old_index_name,	/*!< in: old index name */
+	const char*		new_index_name)	/*!< in: new index name */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Save an individual index's statistic into the persistent statistics
+storage.
+@param[in]	index			index to be updated
+@param[in]	last_update		timestamp of the stat
+@param[in]	stat_name		name of the stat
+@param[in]	stat_value		value of the stat
+@param[in]	sample_size		n pages sampled or NULL
+@param[in]	stat_description	description of the stat
+@param[in,out]	trx			in case of NULL the function will
+allocate and free the trx object. If it is not NULL then it will be
+rolled back only in the case of error, but not freed.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_index_stat(
+	dict_index_t*	index,
+	lint		last_update,
+	const char*	stat_name,
+	ib_uint64_t	stat_value,
+	ib_uint64_t*	sample_size,
+	const char*	stat_description,
+	trx_t*		trx);
+
 #ifndef UNIV_NONINL
 #include "dict0stats.ic"
 #endif
 
+#ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS
+void test_dict_stats_all();
+#endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */
+
 #endif /* dict0stats_h */
diff --git a/storage/innobase/include/dict0stats.ic b/storage/innobase/include/dict0stats.ic
index ec9a9065470..61c88773912 100644
--- a/storage/innobase/include/dict0stats.ic
+++ b/storage/innobase/include/dict0stats.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,10 +23,9 @@ Code used for calculating and manipulating table statistics.
 Created Jan 23, 2012 Vasil Dimov
 *******************************************************/
 
-#include "univ.i"
-#include "dict0dict.h" /* dict_table_stats_lock() */
-#include "dict0types.h" /* dict_table_t */
-#include "srv0srv.h" /* srv_stats_persistent, srv_stats_auto_recalc */
+#include "dict0dict.h"
+#include "dict0types.h"
+#include "srv0srv.h"
 
 /*********************************************************************//**
 Set the persistent statistics flag for a given table. This is set only
@@ -185,7 +184,7 @@ dict_stats_deinit(
 {
 	ut_ad(mutex_own(&dict_sys->mutex));
 
-	ut_a(table->n_ref_count == 0);
+	ut_a(table->get_ref_count() == 0);
 
 	dict_table_stats_lock(table, RW_X_LATCH);
 
diff --git a/storage/innobase/include/dict0stats_bg.h b/storage/innobase/include/dict0stats_bg.h
index 34dc4657829..50c2591332e 100644
--- a/storage/innobase/include/dict0stats_bg.h
+++ b/storage/innobase/include/dict0stats_bg.h
@@ -28,20 +28,28 @@ Created Apr 26, 2012 Vasil Dimov
 
 #include "univ.i"
 
-#include "dict0types.h" /* dict_table_t, table_id_t */
-#include "os0sync.h" /* os_event_t */
-#include "os0thread.h" /* DECLARE_THREAD */
+#include "dict0types.h"
+#include "os0event.h"
+#include "os0thread.h"
 
 /** Event to wake up the stats thread */
 extern os_event_t	dict_stats_event;
 
+#ifdef HAVE_PSI_INTERFACE
+extern mysql_pfs_key_t	dict_stats_recalc_pool_mutex_key;
+#endif /* HAVE_PSI_INTERFACE */
+
+#ifdef UNIV_DEBUG
+/** Value of MySQL global used to disable dict_stats thread. */
+extern my_bool		innodb_dict_stats_disabled_debug;
+#endif /* UNIV_DEBUG */
+
 /*****************************************************************//**
 Add a table to the recalc pool, which is processed by the
 background stats gathering thread. Only the table id is added to the
 list, so the table can be closed after being enqueued and it will be
 opened when needed. If the table does not exist later (has been DROPped),
 then it will be removed from the pool and skipped. */
-UNIV_INTERN
 void
 dict_stats_recalc_pool_add(
 /*=======================*/
@@ -50,37 +58,14 @@ dict_stats_recalc_pool_add(
 /*****************************************************************//**
 Delete a given table from the auto recalc pool.
 dict_stats_recalc_pool_del() */
-UNIV_INTERN
 void
 dict_stats_recalc_pool_del(
 /*=======================*/
 	const dict_table_t*	table);	/*!< in: table to remove */
 
-/*****************************************************************//**
-Add an index in a table to the defrag pool, which is processed by the
-background stats gathering thread. Only the table id and index id are
-added to the list, so the table can be closed after being enqueued and
-it will be opened when needed. If the table or index does not exist later
-(has been DROPped), then it will be removed from the pool and skipped. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_add(
-/*=======================*/
-	const dict_index_t*	index);	/*!< in: table to add */
-
-/*****************************************************************//**
-Delete a given index from the auto defrag pool. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_del(
-/*=======================*/
-	const dict_table_t*	table,	/*!<in: if given, remove
-					all entries for the table */
-	const dict_index_t*	index);	/*!< in: index to remove */
-
 /** Yield the data dictionary latch when waiting
 for the background thread to stop accessing a table.
-@param trx	transaction holding the data dictionary locks */
+@param trx transaction holding the data dictionary locks */
 #define DICT_STATS_BG_YIELD(trx)	do {	\
 	row_mysql_unlock_data_dictionary(trx);	\
 	os_thread_sleep(250000);		\
@@ -107,7 +92,6 @@ The background stats thread is guaranteed not to start using the specified
 table after this function returns and before the caller unlocks the data
 dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
 under dict_sys->mutex. */
-UNIV_INTERN
 void
 dict_stats_wait_bg_to_stop_using_table(
 /*===================================*/
@@ -117,7 +101,6 @@ dict_stats_wait_bg_to_stop_using_table(
 /*****************************************************************//**
 Initialize global variables needed for the operation of dict_stats_thread().
 Must be called before dict_stats_thread() is started. */
-UNIV_INTERN
 void
 dict_stats_thread_init();
 /*====================*/
@@ -125,23 +108,41 @@ dict_stats_thread_init();
 /*****************************************************************//**
 Free resources allocated by dict_stats_thread_init(), must be called
 after dict_stats_thread() has exited. */
-UNIV_INTERN
 void
 dict_stats_thread_deinit();
 /*======================*/
 
+#ifdef UNIV_DEBUG
+/** Disables dict stats thread. It's used by:
+	SET GLOBAL innodb_dict_stats_disabled_debug = 1 (0).
+@param[in]	thd		thread handle
+@param[in]	var		pointer to system variable
+@param[out]	var_ptr		where the formal string goes
+@param[in]	save		immediate result from check function */
+void
+dict_stats_disabled_debug_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save);
+#endif /* UNIV_DEBUG */
+
 /*****************************************************************//**
 This is the thread for background stats gathering. It pops tables, from
 the auto recalc list and proceeds them, eventually recalculating their
 statistics.
 @return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(dict_stats_thread)(
 /*==============================*/
 	void*	arg);	/*!< in: a dummy parameter
 			required by os_thread_create */
 
+/** Shutdown the dict stats thread. */
+void
+dict_stats_shutdown();
+
 # ifndef UNIV_NONINL
 #  include "dict0stats_bg.ic"
 # endif
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index 35430e8ea62..ae002dd9487 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2016, MariaDB Corporation. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,15 +27,19 @@ Created 1/8/1996 Heikki Tuuri
 #ifndef dict0types_h
 #define dict0types_h
 
+#include <ut0mutex.h>
+
 struct dict_sys_t;
 struct dict_col_t;
 struct dict_field_t;
 struct dict_index_t;
 struct dict_table_t;
 struct dict_foreign_t;
+struct dict_v_col_t;
 
 struct ind_node_t;
 struct tab_node_t;
+struct dict_add_v_col_t;
 
 /* Space id and page no where the dictionary header resides */
 #define	DICT_HDR_SPACE		0	/* the SYSTEM tablespace */
@@ -83,10 +87,14 @@ typedef enum {
 	ATOMIC_WRITES_OFF = 2
 } atomic_writes_t;
 
+#ifndef UNIV_INNOCHECKSUM
+typedef ib_mutex_t DictSysMutex;
+#endif /* !UNIV_INNOCHECKSUM */
+
 /** Prefix for tmp tables, adopted from sql/table.h */
-#define tmp_file_prefix		"#sql"
-#define tmp_file_prefix_length	4
-#define TEMP_FILE_PREFIX_INNODB	"#sql-ib"
+#define TEMP_FILE_PREFIX		"#sql"
+#define TEMP_FILE_PREFIX_LENGTH		4
+#define TEMP_FILE_PREFIX_INNODB		"#sql-ib"
 
 #define TEMP_TABLE_PREFIX                "#sql"
 #define TEMP_TABLE_PATH_PREFIX           "/" TEMP_TABLE_PREFIX
@@ -96,4 +104,31 @@ typedef enum {
 extern uint		ibuf_debug;
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
+/** Shift for spatial status */
+#define SPATIAL_STATUS_SHIFT	12
+
+/** Mask to encode/decode spatial status. */
+#define SPATIAL_STATUS_MASK	(3 << SPATIAL_STATUS_SHIFT)
+
+#if SPATIAL_STATUS_MASK < REC_VERSION_56_MAX_INDEX_COL_LEN
+# error SPATIAL_STATUS_MASK < REC_VERSION_56_MAX_INDEX_COL_LEN
+#endif
+
+/** whether a col is used in spatial index or regular index
+Note: the spatial status is part of persistent undo log,
+so we should not modify the values in MySQL 5.7 */
+enum spatial_status_t {
+	/* Unkown status (undo format in 5.7.9) */
+	SPATIAL_UNKNOWN = 0,
+
+	/** Not used in gis index. */
+	SPATIAL_NONE	= 1,
+
+	/** Used in both spatial index and regular index. */
+	SPATIAL_MIXED	= 2,
+
+	/** Only used in spatial index. */
+	SPATIAL_ONLY	= 3
+};
+
 #endif
diff --git a/storage/innobase/include/dyn0buf.h b/storage/innobase/include/dyn0buf.h
new file mode 100644
index 00000000000..3126c8e4683
--- /dev/null
+++ b/storage/innobase/include/dyn0buf.h
@@ -0,0 +1,505 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dyn0buf.h
+The dynamically allocated buffer implementation
+
+Created 2013-03-16 Sunny Bains
+*******************************************************/
+
+#ifndef dyn0buf_h
+#define dyn0buf_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "mem0mem.h"
+#include "dyn0types.h"
+
+/** Class that manages dynamic buffers. It uses a UT_LIST of
+dyn_buf_t::block_t instances. We don't use STL containers in
+order to avoid the overhead of heap calls. Using a custom memory
+allocator doesn't solve the problem either because we have to get
+the memory from somewhere. We can't use the block_t::m_data as the
+backend for the custom allocator because we would like the data in
+the blocks to be contiguous. */
+template <size_t SIZE = DYN_ARRAY_DATA_SIZE>
+class dyn_buf_t {
+public:
+
+	class block_t;
+
+	typedef UT_LIST_NODE_T(block_t) block_node_t;
+	typedef UT_LIST_BASE_NODE_T(block_t) block_list_t;
+
+	class block_t {
+	public:
+
+		block_t()
+		{
+			ut_ad(MAX_DATA_SIZE <= (2 << 15));
+			init();
+		}
+
+		~block_t() { }
+
+		/**
+		Gets the number of used bytes in a block.
+		@return	number of bytes used */
+		ulint used() const
+			MY_ATTRIBUTE((warn_unused_result))
+		{
+			return(static_cast<ulint>(m_used & ~DYN_BLOCK_FULL_FLAG));
+		}
+
+		/**
+		Gets pointer to the start of data.
+		@return	pointer to data */
+		byte* start()
+			MY_ATTRIBUTE((warn_unused_result))
+		{
+			return(m_data);
+		}
+
+		/**
+		@return start of data - non const version */
+		byte* begin()
+			MY_ATTRIBUTE((warn_unused_result))
+		{
+			return(m_data);
+		}
+
+		/**
+		@return end of used data - non const version */
+		byte* end()
+			MY_ATTRIBUTE((warn_unused_result))
+		{
+			return(begin() + m_used);
+		}
+
+		/**
+		@return start of data - const version */
+		const byte* begin() const
+			MY_ATTRIBUTE((warn_unused_result))
+		{
+			return(m_data);
+		}
+
+		/**
+		@return end of used data - const version */
+		const byte* end() const
+			MY_ATTRIBUTE((warn_unused_result))
+		{
+			return(begin() + m_used);
+		}
+
+	private:
+		/**
+		@return pointer to start of reserved space */
+		template <typename Type>
+		Type push(ib_uint32_t size)
+		{
+			Type	ptr = reinterpret_cast<Type>(end());
+
+			m_used += size;
+			ut_ad(m_used <= static_cast<ib_uint32_t>(MAX_DATA_SIZE));
+
+			return(ptr);
+		}
+
+		/**
+		Grow the stack. */
+		void close(const byte* ptr)
+		{
+			/* Check that it is within bounds */
+			ut_ad(ptr >= begin());
+			ut_ad(ptr <= begin() + m_buf_end);
+
+			/* We have done the boundary check above */
+			m_used = static_cast<ib_uint32_t>(ptr - begin());
+
+			ut_ad(m_used <= MAX_DATA_SIZE);
+			ut_d(m_buf_end = 0);
+		}
+
+		/**
+		Initialise the block */
+		void init()
+		{
+			m_used = 0;
+			ut_d(m_buf_end = 0);
+			ut_d(m_magic_n = DYN_BLOCK_MAGIC_N);
+		}
+	private:
+#ifdef UNIV_DEBUG
+		/** If opened then this is the buffer end offset, else 0 */
+		ulint		m_buf_end;
+
+		/** Magic number (DYN_BLOCK_MAGIC_N) */
+		ulint		m_magic_n;
+#endif /* UNIV_DEBUG */
+
+		/** SIZE - sizeof(m_node) + sizeof(m_used) */
+		enum {
+			MAX_DATA_SIZE = SIZE
+				      - sizeof(block_node_t)
+				      + sizeof(ib_uint32_t)
+		};
+
+		/** Storage */
+		byte		m_data[MAX_DATA_SIZE];
+
+		/** Doubly linked list node. */
+		block_node_t	m_node;
+
+		/** number of data bytes used in this block;
+		DYN_BLOCK_FULL_FLAG is set when the block becomes full */
+		ib_uint32_t	m_used;
+
+		friend class dyn_buf_t;
+	};
+
+	enum { MAX_DATA_SIZE = block_t::MAX_DATA_SIZE};
+
+	/** Default constructor */
+	dyn_buf_t()
+		:
+		m_heap(),
+		m_size()
+	{
+		UT_LIST_INIT(m_list, &block_t::m_node);
+		push_back(&m_first_block);
+	}
+
+	/** Destructor */
+	~dyn_buf_t()
+	{
+		erase();
+	}
+
+	/** Reset the buffer vector */
+	void erase()
+	{
+		if (m_heap != NULL) {
+			mem_heap_free(m_heap);
+			m_heap = NULL;
+
+			/* Initialise the list and add the first block. */
+			UT_LIST_INIT(m_list, &block_t::m_node);
+			push_back(&m_first_block);
+		} else {
+			m_first_block.init();
+			ut_ad(UT_LIST_GET_LEN(m_list) == 1);
+		}
+
+		m_size = 0;
+	}
+
+	/**
+	Makes room on top and returns a pointer to a buffer in it. After
+	copying the elements, the caller must close the buffer using close().
+	@param size	in bytes of the buffer; MUST be <= MAX_DATA_SIZE!
+	@return	pointer to the buffer */
+	byte* open(ulint size)
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_ad(size > 0);
+		ut_ad(size <= MAX_DATA_SIZE);
+
+		block_t*	block;
+
+		block = has_space(size) ? back() : add_block();
+
+		ut_ad(block->m_used <= MAX_DATA_SIZE);
+		ut_d(block->m_buf_end = block->m_used + size);
+
+		return(block->end());
+	}
+
+	/**
+	Closes the buffer returned by open.
+	@param ptr	end of used space */
+	void close(const byte* ptr)
+	{
+		ut_ad(UT_LIST_GET_LEN(m_list) > 0);
+		block_t*	block = back();
+
+		m_size -= block->used();
+
+		block->close(ptr);
+
+		m_size += block->used();
+	}
+
+	/**
+	Makes room on top and returns a pointer to the added element.
+	The caller must copy the element to the pointer returned.
+	@param size	in bytes of the element
+	@return	pointer to the element */
+	template <typename Type>
+	Type push(ib_uint32_t size)
+	{
+		ut_ad(size > 0);
+		ut_ad(size <= MAX_DATA_SIZE);
+
+		block_t*	block;
+
+		block = has_space(size) ? back() : add_block();
+
+		m_size += size;
+
+		/* See ISO C++03 14.2/4 for why "template" is required. */
+
+		return(block->template push<Type>(size));
+	}
+
+	/**
+	Pushes n bytes.
+	@param str	string to write
+	@param len	string length */
+	void push(const byte* ptr, ib_uint32_t len)
+	{
+		while (len > 0) {
+			ib_uint32_t	n_copied;
+
+			if (len >= MAX_DATA_SIZE) {
+				n_copied = MAX_DATA_SIZE;
+			} else {
+				n_copied = len;
+			}
+
+			::memmove(push<byte*>(n_copied), ptr, n_copied);
+
+			ptr += n_copied;
+			len -= n_copied;
+		}
+	}
+
+	/**
+	Returns a pointer to an element in the buffer. const version.
+	@param pos	position of element in bytes from start
+	@return	pointer to element */
+	template <typename Type>
+	const Type at(ulint pos) const
+	{
+		block_t*	block = const_cast<block_t*>(
+			const_cast<dyn_buf_t*>(this)->find(pos));
+
+		return(reinterpret_cast<Type>(block->begin() + pos));
+	}
+
+	/**
+	Returns a pointer to an element in the buffer. non const version.
+	@param pos	position of element in bytes from start
+	@return	pointer to element */
+	template <typename Type>
+	Type at(ulint pos)
+	{
+		block_t*	block = const_cast<block_t*>(find(pos));
+
+		return(reinterpret_cast<Type>(block->begin() + pos));
+	}
+
+	/**
+	Returns the size of the total stored data.
+	@return	data size in bytes */
+	ulint size() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+#ifdef UNIV_DEBUG
+		ulint	total_size = 0;
+
+		for (const block_t* block = UT_LIST_GET_FIRST(m_list);
+		     block != NULL;
+		     block = UT_LIST_GET_NEXT(m_node, block)) {
+
+			total_size += block->used();
+		}
+
+		ut_ad(total_size == m_size);
+#endif /* UNIV_DEBUG */
+		return(m_size);
+	}
+
+	/**
+	Iterate over each block and call the functor.
+	@return	false if iteration was terminated. */
+	template <typename Functor>
+	bool for_each_block(Functor& functor) const
+	{
+		for (const block_t* block = UT_LIST_GET_FIRST(m_list);
+		     block != NULL;
+		     block = UT_LIST_GET_NEXT(m_node, block)) {
+
+			if (!functor(block)) {
+				return(false);
+			}
+		}
+
+		return(true);
+	}
+
+	/**
+	Iterate over all the blocks in reverse and call the iterator
+	@return	false if iteration was terminated. */
+	template <typename Functor>
+	bool for_each_block_in_reverse(Functor& functor) const
+	{
+		for (block_t* block = UT_LIST_GET_LAST(m_list);
+		     block != NULL;
+		     block = UT_LIST_GET_PREV(m_node, block)) {
+
+			if (!functor(block)) {
+				return(false);
+			}
+		}
+
+		return(true);
+	}
+
+	/**
+	@return the first block */
+	block_t* front()
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_ad(UT_LIST_GET_LEN(m_list) > 0);
+		return(UT_LIST_GET_FIRST(m_list));
+	}
+
+	/**
+	@return true if m_first_block block was not filled fully */
+	bool is_small() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(m_heap == NULL);
+	}
+
+private:
+	// Disable copying
+	dyn_buf_t(const dyn_buf_t&);
+	dyn_buf_t& operator=(const dyn_buf_t&);
+
+	/**
+	Add the block to the end of the list*/
+	void push_back(block_t* block)
+	{
+		block->init();
+
+		UT_LIST_ADD_LAST(m_list, block);
+	}
+
+	/** @return the last block in the list */
+	block_t* back()
+	{
+		return(UT_LIST_GET_LAST(m_list));
+	}
+
+	/*
+	@return true if request can be fullfilled */
+	bool has_space(ulint size) const
+	{
+		return(back()->m_used + size <= MAX_DATA_SIZE);
+	}
+
+	/*
+	@return true if request can be fullfilled */
+	bool has_space(ulint size)
+	{
+		return(back()->m_used + size <= MAX_DATA_SIZE);
+	}
+
+	/** Find the block that contains the pos.
+	@param pos	absolute offset, it is updated to make it relative
+			to the block
+	@return the block containing the pos. */
+	block_t* find(ulint& pos)
+	{
+		block_t*	block;
+
+		ut_ad(UT_LIST_GET_LEN(m_list) > 0);
+
+		for (block = UT_LIST_GET_FIRST(m_list);
+		     block != NULL;
+		     block = UT_LIST_GET_NEXT(m_node, block)) {
+
+			if (pos < block->used()) {
+				break;
+			}
+
+			pos -= block->used();
+		}
+
+		ut_ad(block != NULL);
+		ut_ad(block->used() >= pos);
+
+		return(block);
+	}
+
+	/**
+	Allocate and add a new block to m_list */
+	block_t* add_block()
+	{
+		block_t*	block;
+
+		if (m_heap == NULL) {
+			m_heap = mem_heap_create(sizeof(*block));
+		}
+
+		block = reinterpret_cast<block_t*>(
+			mem_heap_alloc(m_heap, sizeof(*block)));
+
+		push_back(block);
+
+		return(block);
+	}
+
+private:
+	/** Heap to use for memory allocation */
+	mem_heap_t*		m_heap;
+
+	/** Allocated blocks */
+	block_list_t		m_list;
+
+	/** Total size used by all blocks */
+	ulint			m_size;
+
+	/** The default block, should always be the first element. This
+	is for backwards compatibility and to avoid an extra heap allocation
+	for small REDO log records */
+	block_t			m_first_block;
+};
+
+typedef dyn_buf_t<DYN_ARRAY_DATA_SIZE> mtr_buf_t;
+
+/** mtr_buf_t copier */
+struct mtr_buf_copy_t {
+	/** The copied buffer */
+	mtr_buf_t	m_buf;
+
+	/** Append a block to the redo log buffer.
+	@return whether the appending should continue (always true here) */
+	bool operator()(const mtr_buf_t::block_t* block)
+	{
+		byte*	buf = m_buf.open(block->used());
+		memcpy(buf, block->begin(), block->used());
+		m_buf.close(buf + block->used());
+		return(true);
+	}
+};
+
+#endif /* dyn0buf_h */
diff --git a/storage/innobase/include/dyn0dyn.h b/storage/innobase/include/dyn0dyn.h
deleted file mode 100644
index 1bd10b6bf58..00000000000
--- a/storage/innobase/include/dyn0dyn.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dyn0dyn.h
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dyn0dyn_h
-#define dyn0dyn_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "mem0mem.h"
-
-/** A block in a dynamically allocated array */
-struct dyn_block_t;
-/** Dynamically allocated array */
-typedef dyn_block_t		dyn_array_t;
-
-/** This is the initial 'payload' size of a dynamic array;
-this must be > MLOG_BUF_MARGIN + 30! */
-#define	DYN_ARRAY_DATA_SIZE	512
-
-/*********************************************************************//**
-Initializes a dynamic array.
-@return	initialized dyn array */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
-	dyn_array_t*	arr)	/*!< in/out memory buffer of
-				size sizeof(dyn_array_t) */
-	MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
-	dyn_array_t*	arr)	/*!< in,own: dyn array */
-	MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close.
-@return	pointer to the buffer */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
-	dyn_array_t*	arr,	/*!< in: dynamic array */
-	ulint		size)	/*!< in: size in bytes of the buffer; MUST be
-				smaller than DYN_ARRAY_DATA_SIZE! */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
-	dyn_array_t*	arr,	/*!< in: dynamic array */
-	const byte*	ptr)	/*!< in: end of used space */
-	MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to
-the added element. The caller must copy the element to
-the pointer returned.
-@return	pointer to the element */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
-	dyn_array_t*	arr,	/*!< in/out: dynamic array */
-	ulint		size)	/*!< in: size in bytes of the element */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/************************************************************//**
-Returns pointer to an element in dyn array.
-@return	pointer to element */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
-	const dyn_array_t*	arr,	/*!< in: dyn array */
-	ulint			pos)	/*!< in: position of element
-					in bytes from array start */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/************************************************************//**
-Returns the size of stored data in a dyn array.
-@return	data size in bytes */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
-	const dyn_array_t*	arr)	/*!< in: dyn array */
-	MY_ATTRIBUTE((nonnull, warn_unused_result, pure));
-/************************************************************//**
-Gets the first block in a dyn array.
-@param arr	dyn array
-@return		first block */
-#define dyn_array_get_first_block(arr) (arr)
-/************************************************************//**
-Gets the last block in a dyn array.
-@param arr	dyn array
-@return		last block */
-#define dyn_array_get_last_block(arr)				\
-	((arr)->heap ? UT_LIST_GET_LAST((arr)->base) : (arr))
-/********************************************************************//**
-Gets the next block in a dyn array.
-@param arr	dyn array
-@param block	dyn array block
-@return		pointer to next, NULL if end of list */
-#define dyn_array_get_next_block(arr, block)			\
-	((arr)->heap ? UT_LIST_GET_NEXT(list, block) : NULL)
-/********************************************************************//**
-Gets the previous block in a dyn array.
-@param arr	dyn array
-@param block	dyn array block
-@return		pointer to previous, NULL if end of list */
-#define dyn_array_get_prev_block(arr, block)			\
-	((arr)->heap ? UT_LIST_GET_PREV(list, block) : NULL)
-/********************************************************************//**
-Gets the number of used bytes in a dyn array block.
-@return	number of bytes used */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
-	const dyn_block_t*	block)	/*!< in: dyn array block */
-	MY_ATTRIBUTE((nonnull, warn_unused_result, pure));
-/********************************************************************//**
-Gets pointer to the start of data in a dyn array block.
-@return	pointer to data */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
-	const dyn_block_t*	block)	/*!< in: dyn array block */
-	MY_ATTRIBUTE((nonnull, warn_unused_result, pure));
-/********************************************************//**
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
-	dyn_array_t*	arr,	/*!< in/out: dyn array */
-	const byte*	str,	/*!< in: string to write */
-	ulint		len)	/*!< in: string length */
-	MY_ATTRIBUTE((nonnull));
-
-/*#################################################################*/
-
-/** @brief A block in a dynamically allocated array.
-NOTE! Do not access the fields of the struct directly: the definition
-appears here only for the compiler to know its size! */
-struct dyn_block_t{
-	mem_heap_t*	heap;	/*!< in the first block this is != NULL
-				if dynamic allocation has been needed */
-	ulint		used;	/*!< number of data bytes used in this block;
-				DYN_BLOCK_FULL_FLAG is set when the block
-				becomes full */
-	byte		data[DYN_ARRAY_DATA_SIZE];
-				/*!< storage for array elements */
-	UT_LIST_BASE_NODE_T(dyn_block_t) base;
-				/*!< linear list of dyn blocks: this node is
-				used only in the first block */
-	UT_LIST_NODE_T(dyn_block_t) list;
-				/*!< linear list node: used in all blocks */
-#ifdef UNIV_DEBUG
-	ulint		buf_end;/*!< only in the debug version: if dyn
-				array is opened, this is the buffer
-				end offset, else this is 0 */
-	ulint		magic_n;/*!< magic number (DYN_BLOCK_MAGIC_N) */
-#endif
-};
-
-
-#ifndef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/dyn0dyn.ic b/storage/innobase/include/dyn0dyn.ic
deleted file mode 100644
index f18f2e6dff9..00000000000
--- a/storage/innobase/include/dyn0dyn.ic
+++ /dev/null
@@ -1,306 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dyn0dyn.ic
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-/** Value of dyn_block_t::magic_n */
-#define DYN_BLOCK_MAGIC_N	375767
-/** Flag for dyn_block_t::used that indicates a full block */
-#define DYN_BLOCK_FULL_FLAG	0x1000000UL
-
-/************************************************************//**
-Adds a new block to a dyn array.
-@return	created block */
-UNIV_INTERN
-dyn_block_t*
-dyn_array_add_block(
-/*================*/
-	dyn_array_t*	arr)	/*!< in/out: dyn array */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/********************************************************************//**
-Gets the number of used bytes in a dyn array block.
-@return	number of bytes used */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
-	const dyn_block_t*	block)	/*!< in: dyn array block */
-{
-	ut_ad(block);
-
-	return((block->used) & ~DYN_BLOCK_FULL_FLAG);
-}
-
-/********************************************************************//**
-Gets pointer to the start of data in a dyn array block.
-@return	pointer to data */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
-	const dyn_block_t*	block)	/*!< in: dyn array block */
-{
-	ut_ad(block);
-
-	return(const_cast<byte*>(block->data));
-}
-
-/*********************************************************************//**
-Initializes a dynamic array.
-@return	initialized dyn array */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
-	dyn_array_t*	arr)	/*!< in/out: memory buffer of
-				size sizeof(dyn_array_t) */
-{
-	ut_ad(arr);
-#if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG
-# error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG"
-#endif
-
-	arr->heap = NULL;
-	arr->used = 0;
-
-	ut_d(arr->buf_end = 0);
-	ut_d(arr->magic_n = DYN_BLOCK_MAGIC_N);
-
-	return(arr);
-}
-
-/************************************************************//**
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
-	dyn_array_t*	arr)	/*!< in: dyn array */
-{
-	if (arr->heap != NULL) {
-		mem_heap_free(arr->heap);
-	}
-
-	ut_d(arr->magic_n = 0);
-}
-
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to the added element.
-The caller must copy the element to the pointer returned.
-@return	pointer to the element */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
-	dyn_array_t*	arr,	/*!< in/out: dynamic array */
-	ulint		size)	/*!< in: size in bytes of the element */
-{
-	dyn_block_t*	block;
-	ulint		used;
-
-	ut_ad(arr);
-	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-	ut_ad(size <= DYN_ARRAY_DATA_SIZE);
-	ut_ad(size);
-
-	block = arr;
-
-	if (block->used + size > DYN_ARRAY_DATA_SIZE) {
-		/* Get the last array block */
-
-		block = dyn_array_get_last_block(arr);
-
-		if (block->used + size > DYN_ARRAY_DATA_SIZE) {
-			block = dyn_array_add_block(arr);
-		}
-	}
-
-	used = block->used;
-
-	block->used = used + size;
-	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-
-	return(block->data + used);
-}
-
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close.
-@return	pointer to the buffer */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
-	dyn_array_t*	arr,	/*!< in: dynamic array */
-	ulint		size)	/*!< in: size in bytes of the buffer; MUST be
-				smaller than DYN_ARRAY_DATA_SIZE! */
-{
-	dyn_block_t*	block;
-
-	ut_ad(arr);
-	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-	ut_ad(size <= DYN_ARRAY_DATA_SIZE);
-	ut_ad(size);
-
-	block = arr;
-
-	if (block->used + size > DYN_ARRAY_DATA_SIZE) {
-		/* Get the last array block */
-
-		block = dyn_array_get_last_block(arr);
-
-		if (block->used + size > DYN_ARRAY_DATA_SIZE) {
-			block = dyn_array_add_block(arr);
-			ut_a(size <= DYN_ARRAY_DATA_SIZE);
-		}
-	}
-
-	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-	ut_ad(arr->buf_end == 0);
-	ut_d(arr->buf_end = block->used + size);
-
-	return(block->data + block->used);
-}
-
-/*********************************************************************//**
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
-	dyn_array_t*	arr,	/*!< in/out: dynamic array */
-	const byte*	ptr)	/*!< in: end of used space */
-{
-	dyn_block_t*	block;
-
-	ut_ad(arr);
-	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
-	block = dyn_array_get_last_block(arr);
-
-	ut_ad(arr->buf_end + block->data >= ptr);
-
-	block->used = ptr - block->data;
-
-	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-
-	ut_d(arr->buf_end = 0);
-}
-
-/************************************************************//**
-Returns pointer to an element in dyn array.
-@return	pointer to element */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
-	const dyn_array_t*	arr,	/*!< in: dyn array */
-	ulint			pos)	/*!< in: position of element
-					in bytes from array start */
-{
-	const dyn_block_t*	block;
-
-	ut_ad(arr);
-	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
-	/* Get the first array block */
-	block = dyn_array_get_first_block(arr);
-
-	if (arr->heap != NULL) {
-		for (;;) {
-			ulint	used = dyn_block_get_used(block);
-
-			if (pos < used) {
-				break;
-			}
-
-			pos -= used;
-			block = UT_LIST_GET_NEXT(list, block);
-			ut_ad(block);
-		}
-	}
-
-	ut_ad(block);
-	ut_ad(dyn_block_get_used(block) >= pos);
-
-	return(const_cast<byte*>(block->data) + pos);
-}
-
-/************************************************************//**
-Returns the size of stored data in a dyn array.
-@return	data size in bytes */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
-	const dyn_array_t*	arr)	/*!< in: dyn array */
-{
-	const dyn_block_t*	block;
-	ulint			sum	= 0;
-
-	ut_ad(arr);
-	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
-	if (arr->heap == NULL) {
-
-		return(arr->used);
-	}
-
-	/* Get the first array block */
-	block = dyn_array_get_first_block(arr);
-
-	while (block != NULL) {
-		sum += dyn_block_get_used(block);
-		block = dyn_array_get_next_block(arr, block);
-	}
-
-	return(sum);
-}
-
-/********************************************************//**
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
-	dyn_array_t*	arr,	/*!< in/out: dyn array */
-	const byte*	str,	/*!< in: string to write */
-	ulint		len)	/*!< in: string length */
-{
-	ulint	n_copied;
-
-	while (len > 0) {
-		if (len > DYN_ARRAY_DATA_SIZE) {
-			n_copied = DYN_ARRAY_DATA_SIZE;
-		} else {
-			n_copied = len;
-		}
-
-		memcpy(dyn_array_push(arr, n_copied), str, n_copied);
-
-		str += n_copied;
-		len -= n_copied;
-	}
-}
diff --git a/storage/innobase/include/mem0pool.ic b/storage/innobase/include/dyn0types.h
index f4bafb8ba63..058a22f46e1 100644
--- a/storage/innobase/include/mem0pool.ic
+++ b/storage/innobase/include/dyn0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -16,9 +16,24 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 *****************************************************************************/
 
-/********************************************************************//**
-@file include/mem0pool.ic
-The lowest-level memory management
+/**************************************************//**
+@file include/dyn0types.h
+The dynamically allocated buffer types and constants
 
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
+Created 2013-03-16 Sunny Bains
+*******************************************************/
+
+#ifndef dyn0types_h
+#define dyn0types_h
+
+/** Value of dyn_block_t::magic_n */
+#define DYN_BLOCK_MAGIC_N	375767
+
+/** This is the initial 'payload' size of a dynamic array;
+this must be > MLOG_BUF_MARGIN + 30! */
+#define	DYN_ARRAY_DATA_SIZE	512
+
+/** Flag for dyn_block_t::used that indicates a full block */
+#define DYN_BLOCK_FULL_FLAG	0x1000000UL
+
+#endif /* dyn0types_h */
diff --git a/storage/innobase/include/eval0eval.h b/storage/innobase/include/eval0eval.h
index e3b1e6c16b6..f0e5b4006b6 100644
--- a/storage/innobase/include/eval0eval.h
+++ b/storage/innobase/include/eval0eval.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -36,7 +36,6 @@ Created 12/29/1997 Heikki Tuuri
 Free the buffer from global dynamic memory for a value of a que_node,
 if it has been allocated in the above function. The freeing for pushed
 column values is done in sel_col_prefetch_buf_free. */
-UNIV_INTERN
 void
 eval_node_free_val_buf(
 /*===================*/
@@ -65,7 +64,7 @@ eval_node_set_int_val(
 	lint		val);	/*!< in: value to set */
 /*****************************************************************//**
 Gets an integer value from an expression node.
-@return	integer value */
+@return integer value */
 UNIV_INLINE
 lint
 eval_node_get_int_val(
@@ -91,7 +90,7 @@ eval_node_copy_val(
 	que_node_t*	node2);	/*!< in: node to copy from */
 /*****************************************************************//**
 Gets a iboolean value from a query node.
-@return	iboolean value */
+@return iboolean value */
 UNIV_INLINE
 ibool
 eval_node_get_ibool_val(
@@ -99,8 +98,7 @@ eval_node_get_ibool_val(
 	que_node_t*	node);	/*!< in: query graph node */
 /*****************************************************************//**
 Evaluates a comparison node.
-@return	the result of the comparison */
-UNIV_INTERN
+@return the result of the comparison */
 ibool
 eval_cmp(
 /*=====*/
diff --git a/storage/innobase/include/eval0eval.ic b/storage/innobase/include/eval0eval.ic
index e4b1dd08017..2f759301c03 100644
--- a/storage/innobase/include/eval0eval.ic
+++ b/storage/innobase/include/eval0eval.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,7 +30,6 @@ Created 12/29/1997 Heikki Tuuri
 
 /*****************************************************************//**
 Evaluates a function node. */
-UNIV_INTERN
 void
 eval_func(
 /*======*/
@@ -41,8 +40,7 @@ NOTE that this memory must be explicitly freed when the query graph is
 freed. If the node already has allocated buffer, that buffer is freed
 here. NOTE that this is the only function where dynamic memory should be
 allocated for a query node val field.
-@return	pointer to allocated buffer */
-UNIV_INTERN
+@return pointer to allocated buffer */
 byte*
 eval_node_alloc_val_buf(
 /*====================*/
@@ -54,7 +52,7 @@ eval_node_alloc_val_buf(
 
 /*****************************************************************//**
 Allocates a new buffer if needed.
-@return	pointer to buffer */
+@return pointer to buffer */
 UNIV_INLINE
 byte*
 eval_node_ensure_val_buf(
@@ -145,7 +143,7 @@ eval_node_set_int_val(
 
 /*****************************************************************//**
 Gets an integer non-SQL null value from an expression node.
-@return	integer value */
+@return integer value */
 UNIV_INLINE
 lint
 eval_node_get_int_val(
@@ -165,7 +163,7 @@ eval_node_get_int_val(
 
 /*****************************************************************//**
 Gets a iboolean value from a query node.
-@return	iboolean value */
+@return iboolean value */
 UNIV_INLINE
 ibool
 eval_node_get_ibool_val(
diff --git a/storage/innobase/include/eval0proc.h b/storage/innobase/include/eval0proc.h
index 7755fb10343..6705c2c7b64 100644
--- a/storage/innobase/include/eval0proc.h
+++ b/storage/innobase/include/eval0proc.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1998, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,7 @@ Created 1/20/1998 Heikki Tuuri
 
 /**********************************************************************//**
 Performs an execution step of a procedure node.
-@return	query thread to run next or NULL */
+@return query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 proc_step(
@@ -41,39 +41,35 @@ proc_step(
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of an if-statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 if_step(
 /*====*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of a while-statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 while_step(
 /*=======*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of a for-loop node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 for_step(
 /*=====*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of an assignment statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 assign_step(
 /*========*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of a procedure call node.
-@return	query thread to run next or NULL */
+@return query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 proc_eval_step(
@@ -81,16 +77,14 @@ proc_eval_step(
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of an exit statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 exit_step(
 /*======*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of a return-statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 return_step(
 /*========*/
diff --git a/storage/innobase/include/eval0proc.ic b/storage/innobase/include/eval0proc.ic
index 81418bae2c9..cda3fd7b874 100644
--- a/storage/innobase/include/eval0proc.ic
+++ b/storage/innobase/include/eval0proc.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1998, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1998, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,7 @@ Created 1/20/1998 Heikki Tuuri
 
 /**********************************************************************//**
 Performs an execution step of a procedure node.
-@return	query thread to run next or NULL */
+@return query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 proc_step(
@@ -64,7 +64,7 @@ proc_step(
 
 /**********************************************************************//**
 Performs an execution step of a procedure call node.
-@return	query thread to run next or NULL */
+@return query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 proc_eval_step(
diff --git a/storage/innobase/include/fil0crypt.h b/storage/innobase/include/fil0crypt.h
index 8bb0ce65a6b..eb6eaa229b5 100644
--- a/storage/innobase/include/fil0crypt.h
+++ b/storage/innobase/include/fil0crypt.h
@@ -78,10 +78,6 @@ struct key_struct
 /** is encryption enabled */
 extern ulong	srv_encrypt_tables;
 
-#ifdef UNIV_PFS_MUTEX
-extern mysql_pfs_key_t fil_crypt_data_mutex_key;
-#endif
-
 /** Mutex helper for crypt_data->scheme
 @param[in, out]	schme	encryption scheme
 @param[in]	exit	should we exit or enter mutex ? */
@@ -132,8 +128,7 @@ struct fil_space_crypt_struct : st_encryption_scheme
 		key_found = new_min_key_version;
 		key_id = new_key_id;
 		my_random_bytes(iv, sizeof(iv));
-		mutex_create(fil_crypt_data_mutex_key,
-			&mutex, SYNC_NO_ORDER_CHECK);
+		mutex_create(LATCH_ID_FIL_CRYPT_DATA_MUTEX, &mutex);
 		locker = crypt_data_scheme_locker;
 		type = new_type;
 
@@ -240,7 +235,8 @@ fil_space_crypt_t *
 fil_space_create_crypt_data(
 /*========================*/
 	fil_encryption_t	encrypt_mode,	/*!< in: encryption mode */
-	uint			key_id);	/*!< in: encryption key id */
+	uint			key_id)		/*!< in: encryption key id */
+	__attribute__((warn_unused_result));
 
 /*********************************************************************
 Destroy crypt data */
@@ -323,7 +319,8 @@ UNIV_INTERN
 bool
 fil_space_check_encryption_read(
 /*============================*/
-	ulint space);          /*!< in: tablespace id */
+	ulint	space)	/*!< in: tablespace id */
+	__attribute__((warn_unused_result));
 
 /******************************************************************
 Decrypt a page
@@ -334,10 +331,11 @@ fil_space_decrypt(
 /*==============*/
 	fil_space_crypt_t*	crypt_data,	/*!< in: crypt data */
 	byte*			tmp_frame,	/*!< in: temporary buffer */
-	ulint			page_size,	/*!< in: page size */
+	const page_size_t&	page_size,	/*!< in: page size */
 	byte*			src_frame,	/*!< in:out: page buffer */
-	dberr_t*		err);		/*!< in: out: DB_SUCCESS or
+	dberr_t*		err)		/*!< in: out: DB_SUCCESS or
 						error code */
+	__attribute__((warn_unused_result));
 
 /*********************************************************************
 Encrypt buffer page
@@ -351,8 +349,9 @@ fil_space_encrypt(
 	ulint	offset,		/*!< in: page no */
 	lsn_t	lsn,		/*!< in: page lsn */
 	byte*	src_frame,	/*!< in: page frame */
-	ulint	size,		/*!< in: size of data to encrypt */
-	byte*	dst_frame);	/*!< in: where to encrypt to */
+	const page_size_t&	page_size,	/*!< in: page size */
+	byte*	dst_frame)	/*!< in: where to encrypt to */
+	__attribute__((warn_unused_result));
 
 /*********************************************************************
 Decrypt buffer page
@@ -362,10 +361,10 @@ UNIV_INTERN
 byte*
 fil_space_decrypt(
 /*==============*/
-	ulint	space,		/*!< in: tablespace id */
-	byte*	src_frame,	/*!< in: page frame */
-	ulint	page_size,	/*!< in: size of data to encrypt */
-	byte*	dst_frame)	/*!< in: where to decrypt to */
+	ulint			space,		/*!< in: tablespace id */
+	byte*			src_frame,	/*!< in: page frame */
+	const page_size_t&	page_size,	/*!< in: page size */
+	byte*			dst_frame)	/*!< in: where to decrypt to */
 	__attribute__((warn_unused_result));
 
 /*********************************************************************
@@ -377,8 +376,9 @@ UNIV_INTERN
 bool
 fil_space_verify_crypt_checksum(
 /*============================*/
-	const byte* src_frame,/*!< in: page frame */
-	ulint zip_size);      /*!< in: size of data to encrypt */
+	const byte*		src_frame,/*!< in: page frame */
+	const page_size_t&	page_size)	/*!< in: page size */
+	__attribute__((warn_unused_result));
 
 /*********************************************************************
 Init threads for key rotation */
@@ -521,9 +521,9 @@ fil_encrypt_buf(
 	ulint		offset,		/*!< in: Page offset */
 	lsn_t		lsn,		/*!< in: lsn */
 	byte*		src_frame,	/*!< in: Source page to be encrypted */
-	ulint		zip_size,	/*!< in: compressed size if
-					row_format compressed */
-	byte*		dst_frame);	/*!< in: outbut buffer */
+	const page_size_t&	page_size,	/*!< in: page size */
+	byte*		dst_frame)	/*!< in: outbut buffer */
+	__attribute__((warn_unused_result));
 
 /******************************************************************
 Calculate post encryption checksum
@@ -533,8 +533,9 @@ UNIV_INTERN
 ulint
 fil_crypt_calculate_checksum(
 /*=========================*/
-	ulint	zip_size,	/*!< in: zip_size or 0 */
-	byte*	dst_frame);	/*!< in: page where to calculate */
+	const page_size_t&	page_size,	/*!< in: page size */
+	byte*	dst_frame)	/*!< in: page where to calculate */
+	__attribute__((warn_unused_result));
 
 #ifndef UNIV_NONINL
 #include "fil0crypt.ic"
diff --git a/storage/innobase/include/fil0crypt.ic b/storage/innobase/include/fil0crypt.ic
index 5fafa6cd3f0..65ca4def85f 100644
--- a/storage/innobase/include/fil0crypt.ic
+++ b/storage/innobase/include/fil0crypt.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2015, MariaDB Corporation.
+Copyright (c) 2015, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -17,7 +17,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
 *****************************************************************************/
 
 /**************************************************//**
-@file include/fil0fil.h
+@file include/fil0crypt.ic
 The low-level file system encryption support functions
 
 Created 04/01/2015 Jan Lindström
@@ -66,3 +66,54 @@ fil_page_encryption_status(
 	}
 	return 0;
 }
+
+/*******************************************************************//**
+Get current encryption mode from crypt_data.
+@return string representation */
+UNIV_INLINE
+const char *
+fil_crypt_get_mode(
+/*===============*/
+	const fil_space_crypt_t* crypt_data)
+{
+	ut_ad(crypt_data != NULL);
+
+	switch(crypt_data->encryption) {
+	case FIL_SPACE_ENCRYPTION_DEFAULT:
+		return("Default tablespace encryption mode");
+		break;
+	case FIL_SPACE_ENCRYPTION_ON:
+		return("Tablespace encrypted");
+		break;
+	case FIL_SPACE_ENCRYPTION_OFF:
+		return("Tablespace not encrypted");
+		break;
+	default:
+		ut_error;
+	}
+
+	return ("NULL");
+}
+
+/*******************************************************************//**
+Get current encryption type from crypt_data.
+@return string representation */
+UNIV_INLINE
+const char *
+fil_crypt_get_type(
+	const fil_space_crypt_t* crypt_data)
+{
+	ut_ad(crypt_data != NULL);
+	switch (crypt_data->type) {
+	case CRYPT_SCHEME_UNENCRYPTED:
+		return("scheme unencrypted");
+		break;
+	case CRYPT_SCHEME_1:
+		return("scheme encrypted");
+		break;
+	default:
+		ut_error;
+	}
+
+	return ("NULL");
+}
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index ae8224d77bb..65f73448c6e 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2016, Oracle and/or its affiliates.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2013, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -30,28 +30,396 @@ Created 10/25/1995 Heikki Tuuri
 
 #ifndef UNIV_INNOCHECKSUM
 
+#include "log0recv.h"
 #include "dict0types.h"
-#include "ut0byte.h"
-#include "os0file.h"
-#include "hash0hash.h"
+#include "page0size.h"
 #ifndef UNIV_HOTBACKUP
-#include "sync0rw.h"
 #include "ibuf0types.h"
+#else
 #include "log0log.h"
+#include "os0file.h"
+#include "m_string.h"
 #endif /* !UNIV_HOTBACKUP */
 
 #include <list>
+#include <vector>
+
+#ifdef UNIV_HOTBACKUP
+#include <cstring>
+/** determine if file is intermediate / temporary.These files are created during
+reorganize partition, rename tables, add / drop columns etc.
+@param[in]	filepath asbosolute / relative or simply file name
+@retvalue	true	if it is intermediate file
+@retvalue	false	if it is normal file */
+inline
+bool
+is_intermediate_file(const std::string& filepath)
+{
+	std::string file_name = filepath;
+
+	// extract file name from relative or absolute file name
+	std::size_t pos = file_name.rfind(OS_PATH_SEPARATOR);
+	if (pos != std::string::npos)
+		file_name = file_name.substr(++pos);
+
+	transform(file_name.begin(), file_name.end(),
+		file_name.begin(), ::tolower);
+
+	if (file_name[0] != '#') {
+		pos = file_name.rfind("#tmp#.ibd");
+		if (pos != std::string::npos)
+			return true;
+		else
+			return false;  /* normal file name */
+	}
+
+	std::vector<std::string> file_name_patterns = {"#sql-", "#sql2-",
+		"#tmp#", "#ren#"};
+
+	/* search for the unsupported patterns */
+	for (auto itr = file_name_patterns.begin();
+		itr != file_name_patterns.end();
+		itr++) {
+
+		if (0 == std::strncmp(file_name.c_str(),
+			itr->c_str(), itr->length())){
+			return true;
+		}
+	}
+
+	return false;
+}
+#endif /* UNIV_HOTBACKUP */
+
+extern const char general_space_name[];
 
 // Forward declaration
 struct trx_t;
-struct fil_space_t;
+class page_id_t;
+class truncate_t;
+struct btr_create_t;
+
+/* structure containing encryption specification */
+typedef struct fil_space_crypt_struct fil_space_crypt_t;
+
+typedef std::list<char*, ut_allocator<char*> >	space_name_list_t;
+
+/** File types */
+enum fil_type_t {
+	/** temporary tablespace (temporary undo log or tables) */
+	FIL_TYPE_TEMPORARY,
+	/** a tablespace that is being imported (no logging until finished) */
+	FIL_TYPE_IMPORT,
+	/** persistent tablespace (for system, undo log or tables) */
+	FIL_TYPE_TABLESPACE,
+	/** redo log covering changes to files of FIL_TYPE_TABLESPACE */
+	FIL_TYPE_LOG
+};
+
+/** Check if fil_type is any of FIL_TYPE_TEMPORARY, FIL_TYPE_IMPORT
+or FIL_TYPE_TABLESPACE.
+@param[in]	type	variable of type fil_type_t
+@return true if any of FIL_TYPE_TEMPORARY, FIL_TYPE_IMPORT
+or FIL_TYPE_TABLESPACE */
+inline
+bool
+fil_type_is_data(
+	fil_type_t	type)
+{
+	return(type == FIL_TYPE_TEMPORARY
+	       || type == FIL_TYPE_IMPORT
+	       || type == FIL_TYPE_TABLESPACE);
+}
+
+struct fil_node_t;
+
+/** Tablespace or log data space */
+struct fil_space_t {
+	char*		name;	/*!< Tablespace name */
+	ulint		id;	/*!< space id */
+	lsn_t		max_lsn;
+				/*!< LSN of the most recent
+				fil_names_write_if_was_clean().
+				Reset to 0 by fil_names_clear().
+				Protected by log_sys->mutex.
+				If and only if this is nonzero, the
+				tablespace will be in named_spaces. */
+	bool		stop_ios;/*!< true if we want to rename the
+				.ibd file of tablespace and want to
+				stop temporarily posting of new i/o
+				requests on the file */
+	bool		stop_new_ops;
+				/*!< we set this true when we start
+				deleting a single-table tablespace.
+				When this is set following new ops
+				are not allowed:
+				* read IO request
+				* ibuf merge
+				* file flush
+				Note that we can still possibly have
+				new write operations because we don't
+				check this flag when doing flush
+				batches. */
+	bool		is_being_truncated;
+				/*!< this is set to true when we prepare to
+				truncate a single-table tablespace and its
+				.ibd file */
+#ifdef UNIV_DEBUG
+	ulint		redo_skipped_count;
+				/*!< reference count for operations who want
+				to skip redo log in the file space in order
+				to make fsp_space_modify_check pass. */
+#endif
+	fil_type_t	purpose;/*!< purpose */
+	UT_LIST_BASE_NODE_T(fil_node_t) chain;
+				/*!< base node for the file chain */
+	ulint		size;	/*!< tablespace file size in pages;
+				0 if not known yet */
+	ulint		size_in_header;
+				/* FSP_SIZE in the tablespace header;
+				0 if not known yet */
+	ulint		free_len;
+				/*!< length of the FSP_FREE list */
+	ulint		free_limit;
+				/*!< contents of FSP_FREE_LIMIT */
+	ulint		flags;	/*!< tablespace flags; see
+				fsp_flags_is_valid(),
+				page_size_t(ulint) (constructor) */
+	ulint		n_reserved_extents;
+				/*!< number of reserved free extents for
+				ongoing operations like B-tree page split */
+	ulint		n_pending_flushes; /*!< this is positive when flushing
+				the tablespace to disk; dropping of the
+				tablespace is forbidden if this is positive */
+	ulint		n_pending_ops;/*!< this is positive when we
+				have pending operations against this
+				tablespace. The pending operations can
+				be ibuf merges or lock validation code
+				trying to read a block.
+				Dropping of the tablespace is forbidden
+				if this is positive.
+				Protected by fil_system->mutex. */
+	hash_node_t	hash;	/*!< hash chain node */
+	hash_node_t	name_hash;/*!< hash chain the name_hash table */
+#ifndef UNIV_HOTBACKUP
+	rw_lock_t	latch;	/*!< latch protecting the file space storage
+				allocation */
+#endif /* !UNIV_HOTBACKUP */
+	UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
+				/*!< list of spaces with at least one unflushed
+				file we have written to */
+	UT_LIST_NODE_T(fil_space_t) named_spaces;
+				/*!< list of spaces for which MLOG_FILE_NAME
+				records have been issued */
+	bool		is_in_unflushed_spaces;
+				/*!< true if this space is currently in
+				unflushed_spaces */
+	UT_LIST_NODE_T(fil_space_t) space_list;
+				/*!< list of all spaces */
+
+	/** Compression algorithm */
+	Compression::Type	compression_type;
+
+	/** Encryption algorithm */
+	Encryption::Type	encryption_type;
+
+	/** Encrypt key */
+	byte			encryption_key[ENCRYPTION_KEY_LEN];
+
+	/** Encrypt key length*/
+	ulint			encryption_klen;
 
-typedef std::list<const char*> space_name_list_t;
+	/** Encrypt initial vector */
+	byte			encryption_iv[ENCRYPTION_KEY_LEN];
+
+	/** MariaDB encryption data */
+        fil_space_crypt_t* crypt_data;
+
+	/** tablespace crypt data has been read */
+	bool		page_0_crypt_read;
+
+	/** Space file block size */
+	ulint		file_block_size;
+
+	/** True if we have already printed compression failure */
+	bool		printed_compression_failure;
+
+	/** True if page 0 of tablespace is read */
+	bool		read_page0;
+
+	/** Release the reserved free extents.
+	@param[in]	n_reserved	number of reserved extents */
+	void release_free_extents(ulint n_reserved);
+
+	ulint		magic_n;/*!< FIL_SPACE_MAGIC_N */
+};
+
+/** Value of fil_space_t::magic_n */
+#define	FIL_SPACE_MAGIC_N	89472
+
+/** File node of a tablespace or the log data space */
+struct fil_node_t {
+	/** tablespace containing this file */
+	fil_space_t*	space;
+	/** file name; protected by fil_system->mutex and log_sys->mutex. */
+	char*		name;
+	/** whether this file is open */
+	bool		is_open;
+	/** file handle (valid if is_open) */
+	os_file_t	handle;
+	/** event that groups and serializes calls to fsync */
+	os_event_t	sync_event;
+	/** whether the file actually is a raw device or disk partition */
+	bool		is_raw_disk;
+	/** size of the file in database pages (0 if not known yet);
+	the possible last incomplete megabyte may be ignored
+	if space->id == 0 */
+	ulint		size;
+	/** initial size of the file in database pages;
+	FIL_IBD_FILE_INITIAL_SIZE by default */
+	ulint		init_size;
+	/** maximum size of the file in database pages (0 if unlimited) */
+	ulint		max_size;
+	/** count of pending i/o's; is_open must be true if nonzero */
+	ulint		n_pending;
+	/** count of pending flushes; is_open must be true if nonzero */
+	ulint		n_pending_flushes;
+	/** whether the file is currently being extended */
+	bool		being_extended;
+	/** number of writes to the file since the system was started */
+	int64_t		modification_counter;
+	/** the modification_counter of the latest flush to disk */
+	int64_t		flush_counter;
+	/** link to other files in this tablespace */
+	UT_LIST_NODE_T(fil_node_t) chain;
+	/** link to the fil_system->LRU list (keeping track of open files) */
+	UT_LIST_NODE_T(fil_node_t) LRU;
+
+	/** whether the file system of this file supports PUNCH HOLE */
+	bool		punch_hole;
+
+	/** block size to use for punching holes */
+	ulint		block_size;
+
+	/** whether atomic write is enabled for this file */
+	bool		atomic_write;
+
+	/** FIL_NODE_MAGIC_N */
+	ulint		magic_n;
+};
+
+/** Value of fil_node_t::magic_n */
+#define	FIL_NODE_MAGIC_N	89389
+
+/** Common InnoDB file extentions */
+enum ib_extention {
+	NO_EXT = 0,
+	IBD = 1,
+	ISL = 2,
+	CFG = 3,
+	CFP = 4
+};
+extern const char* dot_ext[];
+#define DOT_IBD dot_ext[IBD]
+#define DOT_ISL dot_ext[ISL]
+#define DOT_CFG dot_ext[CFG]
+#define DOT_CPF dot_ext[CFP]
+
+/** Wrapper for a path to a directory.
+This folder may or may not yet esist.  Since not all directory paths
+end in "/", we should only use this for a directory path or a filepath
+that has a ".ibd" extension. */
+class Folder
+{
+public:
+	/** Default constructor */
+	Folder() : m_folder(NULL) {}
+
+	/** Constructor
+	@param[in]	path	pathname (not necessarily NUL-terminated)
+	@param[in]	len	length of the path, in bytes */
+	Folder(const char* path, size_t len);
+
+	/** Assignment operator
+	@param[in]	folder	folder string provided */
+	class Folder& operator=(const char* path);
+
+	/** Destructor */
+	~Folder()
+	{
+		ut_free(m_folder);
+	}
+
+	/** Implicit type conversion
+	@return the wrapped object */
+	operator const char*() const
+	{
+		return(m_folder);
+	}
+
+	/** Explicit type conversion
+	@return the wrapped object */
+	const char* operator()() const
+	{
+		return(m_folder);
+	}
+
+	/** return the length of m_folder
+	@return the length of m_folder */
+	size_t len()
+	{
+		return m_folder_len;
+	}
+
+	/** Determine if two folders are equal
+	@param[in]	other	folder to compare to
+	@return whether the folders are equal */
+	bool operator==(const Folder& other) const;
+
+	/** Determine if the left folder is the same or an ancestor of
+	(contains) the right folder.
+	@param[in]	other	folder to compare to
+	@return whether this is the same or an ancestor or the other folder. */
+	bool operator>=(const Folder& other) const;
+
+	/** Determine if the left folder is an ancestor of (contains)
+	the right folder.
+	@param[in]	other	folder to compare to
+	@return whether this is an ancestor of the other folder */
+	bool operator>(const Folder& other) const;
+
+	/** Determine if the directory referenced by m_folder exists.
+	@return whether the directory exists */
+	bool exists();
+
+private:
+	/** Build the basic folder name from the path and length provided
+	@param[in]	path	pathname (not necessarily NUL-terminated)
+	@param[in]	len	length of the path, in bytes */
+	void	make_path(const char* path, size_t len);
+
+	/** Resolve a relative path in m_folder to an absolute path
+	in m_abs_path setting m_abs_len. */
+	void	make_abs_path();
+
+	/** The wrapped folder string */
+	char*	m_folder;
+
+	/** Length of m_folder */
+	size_t	m_folder_len;
+
+	/** A full absolute path to the same file. */
+	char	m_abs_path[FN_REFLEN + 2];
+
+	/** Length of m_abs_path to the deepest folder */
+	size_t	m_abs_len;
+};
 
 /** When mysqld is run, the default directory "." is the mysqld datadir,
 but in the MySQL Embedded Server Library and mysqlbackup it is not the default
 directory, and we must set the base file path explicitly */
 extern const char*	fil_path_to_mysql_datadir;
+extern Folder   	folder_mysql_datadir;
 
 /** Initial size of a single-table tablespace in pages */
 #define FIL_IBD_FILE_INITIAL_SIZE	4
@@ -66,17 +434,15 @@ of the address is FIL_NULL, the address is considered undefined. */
 typedef	byte	fil_faddr_t;	/*!< 'type' definition in C: an address
 				stored in a file page is a string of bytes */
 
-#endif /* !UNIV_INNOCHECKSUM */
-
 #define FIL_ADDR_PAGE	0	/* first in address is the page offset */
 #define	FIL_ADDR_BYTE	4	/* then comes 2-byte byte offset within page*/
-
+#endif /* !UNIV_INNOCHECKSUM */
 #define	FIL_ADDR_SIZE	6	/* address size is 6 bytes */
 
 #ifndef UNIV_INNOCHECKSUM
 
 /** File space address */
-struct fil_addr_t{
+struct fil_addr_t {
 	ulint	page;		/*!< page number within a space */
 	ulint	boffset;	/*!< byte offset within the page */
 };
@@ -135,11 +501,34 @@ extern fil_addr_t	fil_addr_null;
 					used to encrypt the page + 32-bit checksum
 					or 64 bits of zero if no encryption
 					*/
-#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34 /*!< starting from 4.1.x this
-					contains the space id of the page */
+/** If page type is FIL_PAGE_COMPRESSED then the 8 bytes starting at
+FIL_PAGE_FILE_FLUSH_LSN are broken down as follows: */
+
+/** Control information version format (u8) */
+static const ulint FIL_PAGE_VERSION = FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
+
+/** Compression algorithm (u8) */
+static const ulint FIL_PAGE_ALGORITHM_V1 = FIL_PAGE_VERSION + 1;
+
+/** Original page type (u16) */
+static const ulint FIL_PAGE_ORIGINAL_TYPE_V1 = FIL_PAGE_ALGORITHM_V1 + 1;
+
+/** Original data size in bytes (u16)*/
+static const ulint FIL_PAGE_ORIGINAL_SIZE_V1 = FIL_PAGE_ORIGINAL_TYPE_V1 + 2;
+
+/** Size after compression (u16) */
+static const ulint FIL_PAGE_COMPRESS_SIZE_V1 = FIL_PAGE_ORIGINAL_SIZE_V1 + 2;
+
+/** This overloads FIL_PAGE_FILE_FLUSH_LSN for RTREE Split Sequence Number */
+#define	FIL_RTREE_SPLIT_SEQ_NUM	FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+
+/** starting from 4.1.x this contains the space id of the page */
+#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34
+
 #define FIL_PAGE_SPACE_ID  FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
 
-#define FIL_PAGE_DATA		38	/*!< start of the data on the page */
+#define FIL_PAGE_DATA		38U	/*!< start of the data on the page */
+
 /* Following are used when page compression is used */
 #define FIL_PAGE_COMPRESSED_SIZE 2      /*!< Number of bytes used to store
 					actual payload data size on
@@ -161,6 +550,7 @@ extern fil_addr_t	fil_addr_null;
 						 then encrypted */
 #define FIL_PAGE_PAGE_COMPRESSED 34354  /*!< page compressed page */
 #define FIL_PAGE_INDEX		17855	/*!< B-tree node */
+#define FIL_PAGE_RTREE		17854	/*!< B-tree node */
 #define FIL_PAGE_UNDO_LOG	2	/*!< Undo log page */
 #define FIL_PAGE_INODE		3	/*!< Index node */
 #define FIL_PAGE_IBUF_FREE_LIST	4	/*!< Insert buffer free list */
@@ -174,20 +564,30 @@ extern fil_addr_t	fil_addr_null;
 #define FIL_PAGE_TYPE_BLOB	10	/*!< Uncompressed BLOB page */
 #define FIL_PAGE_TYPE_ZBLOB	11	/*!< First compressed BLOB page */
 #define FIL_PAGE_TYPE_ZBLOB2	12	/*!< Subsequent compressed BLOB page */
-#define FIL_PAGE_TYPE_COMPRESSED	13	/*!< Compressed page */
-#define FIL_PAGE_TYPE_LAST	FIL_PAGE_TYPE_COMPRESSED
+#define FIL_PAGE_TYPE_UNKNOWN	13	/*!< In old tablespaces, garbage
+					in FIL_PAGE_TYPE is replaced with this
+					value when flushing pages. */
+#define FIL_PAGE_COMPRESSED	14	/*!< Compressed page */
+#define FIL_PAGE_ENCRYPTED	15	/*!< Encrypted page */
+#define FIL_PAGE_COMPRESSED_AND_ENCRYPTED 16
+					/*!< Compressed and Encrypted page */
+#define FIL_PAGE_ENCRYPTED_RTREE 17	/*!< Encrypted R-tree page */
+
+/** Used by i_s.cc to index into the text description. */
+#define FIL_PAGE_TYPE_LAST	FIL_PAGE_TYPE_UNKNOWN
 					/*!< Last page type */
 /* @} */
 
-/** Space types @{ */
-#define FIL_TABLESPACE		501	/*!< tablespace */
-#define FIL_LOG			502	/*!< redo log */
-/* @} */
+/** macro to check whether the page type is index (Btree or Rtree) type */
+#define fil_page_type_is_index(page_type)                          \
+        (page_type == FIL_PAGE_INDEX || page_type == FIL_PAGE_RTREE)
 
-#ifndef UNIV_INNOCHECKSUM
+/** Check whether the page is index page (either regular Btree index or Rtree
+index */
+#define fil_page_index_page_check(page)                         \
+        fil_page_type_is_index(fil_page_get_type(page))
 
-/* structure containing encryption specification */
-typedef struct fil_space_crypt_struct fil_space_crypt_t;
+#ifndef UNIV_INNOCHECKSUM
 
 /** The number of fsyncs done to the log */
 extern ulint	fil_n_log_flushes;
@@ -200,159 +600,26 @@ extern ulint	fil_n_pending_tablespace_flushes;
 /** Number of files currently open */
 extern ulint	fil_n_file_opened;
 
-struct fsp_open_info {
-	ibool		success;	/*!< Has the tablespace been opened? */
-	const char*	check_msg;	/*!< fil_check_first_page() message */
-	ibool		valid;		/*!< Is the tablespace valid? */
-	os_file_t	file;		/*!< File handle */
-	char*		filepath;	/*!< File path to open */
-	lsn_t		lsn;		/*!< Flushed LSN from header page */
-	ulint		id;		/*!< Space ID */
-	ulint		flags;		/*!< Tablespace flags */
-	ulint		encryption_error; /*!< if an encryption error occurs */
-#ifdef UNIV_LOG_ARCHIVE
-	ulint		arch_log_no;	/*!< latest archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
-	fil_space_crypt_t* crypt_data;	/*!< crypt data */
-	dict_table_t*	table;		/*!< table */
-};
-
-struct fil_space_t;
-
-/** File node of a tablespace or the log data space */
-struct fil_node_t {
-	fil_space_t*	space;	/*!< backpointer to the space where this node
-				belongs */
-	char*		name;	/*!< path to the file */
-	ibool		open;	/*!< TRUE if file open */
-	os_file_t	handle;	/*!< OS handle to the file, if file open */
-	os_event_t	sync_event;/*!< Condition event to group and
-				serialize calls to fsync */
-	ibool		is_raw_disk;/*!< TRUE if the 'file' is actually a raw
-				device or a raw disk partition */
-	ulint		size;	/*!< size of the file in database pages, 0 if
-				not known yet; the possible last incomplete
-				megabyte may be ignored if space == 0 */
-	ulint		n_pending;
-				/*!< count of pending i/o's on this file;
-				closing of the file is not allowed if
-				this is > 0 */
-	ulint		n_pending_flushes;
-				/*!< count of pending flushes on this file;
-				closing of the file is not allowed if
-				this is > 0 */
-	ibool		being_extended;
-				/*!< TRUE if the node is currently
-				being extended. */
-	ib_int64_t	modification_counter;/*!< when we write to the file we
-				increment this by one */
-	ib_int64_t	flush_counter;/*!< up to what
-				modification_counter value we have
-				flushed the modifications to disk */
-	ulint		file_block_size;/*!< file system block size */
-	UT_LIST_NODE_T(fil_node_t) chain;
-				/*!< link field for the file chain */
-	UT_LIST_NODE_T(fil_node_t) LRU;
-				/*!< link field for the LRU list */
-	ulint		magic_n;/*!< FIL_NODE_MAGIC_N */
-};
-
-/** Value of fil_node_t::magic_n */
-#define	FIL_NODE_MAGIC_N	89389
-
-/** Tablespace or log data space: let us call them by a common name space */
-struct fil_space_t {
-	char*		name;	/*!< space name = the path to the first file in
-				it */
-	ulint		id;	/*!< space id */
-	ib_int64_t	tablespace_version;
-				/*!< in DISCARD/IMPORT this timestamp
-				is used to check if we should ignore
-				an insert buffer merge request for a
-				page because it actually was for the
-				previous incarnation of the space */
-	ibool		mark;	/*!< this is set to TRUE at database startup if
-				the space corresponds to a table in the InnoDB
-				data dictionary; so we can print a warning of
-				orphaned tablespaces */
-	ibool		stop_ios;/*!< TRUE if we want to rename the
-				.ibd file of tablespace and want to
-				stop temporarily posting of new i/o
-				requests on the file */
-	ibool		stop_new_ops;
-				/*!< we set this TRUE when we start
-				deleting a single-table tablespace.
-				When this is set following new ops
-				are not allowed:
-				* read IO request
-				* ibuf merge
-				* file flush
-				Note that we can still possibly have
-				new write operations because we don't
-				check this flag when doing flush
-				batches. */
-	ulint		purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
-				FIL_ARCH_LOG */
-	UT_LIST_BASE_NODE_T(fil_node_t) chain;
-				/*!< base node for the file chain */
-	ulint		size;	/*!< space size in pages; 0 if a single-table
-				tablespace whose size we do not know yet;
-				last incomplete megabytes in data files may be
-				ignored if space == 0 */
-	ulint		flags;	/*!< tablespace flags; see
-				fsp_flags_is_valid(),
-				fsp_flags_get_zip_size() */
-	ulint		n_reserved_extents;
-				/*!< number of reserved free extents for
-				ongoing operations like B-tree page split */
-	ulint		n_pending_flushes; /*!< this is positive when flushing
-				the tablespace to disk; dropping of the
-				tablespace is forbidden if this is positive */
-	ulint		n_pending_ops;/*!< this is positive when we
-				have pending operations against this
-				tablespace. The pending operations can
-				be ibuf merges or lock validation code
-				trying to read a block.
-				Dropping of the tablespace is forbidden
-				if this is positive */
-	hash_node_t	hash;	/*!< hash chain node */
-	hash_node_t	name_hash;/*!< hash chain the name_hash table */
-#ifndef UNIV_HOTBACKUP
-	rw_lock_t	latch;	/*!< latch protecting the file space storage
-				allocation */
-#endif /* !UNIV_HOTBACKUP */
-	UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
-				/*!< list of spaces with at least one unflushed
-				file we have written to */
-	bool		is_in_unflushed_spaces;
-				/*!< true if this space is currently in
-				unflushed_spaces */
-	bool		printed_compression_failure;
-				/*!< true if we have already printed
-				compression failure */
-	fil_space_crypt_t* crypt_data;
-				/*!< tablespace crypt data or NULL */
-	bool		page_0_crypt_read;
-				/*!< tablespace crypt data has been
-				read */
-	ulint		file_block_size;
-				/*!< file system block size */
-
-	UT_LIST_NODE_T(fil_space_t) space_list;
-				/*!< list of all spaces */
-
-	ulint		magic_n;/*!< FIL_SPACE_MAGIC_N */
-};
-
-/** Value of fil_space_t::magic_n */
-#define	FIL_SPACE_MAGIC_N	89472
+/** Look up a tablespace.
+The caller should hold an InnoDB table lock or a MDL that prevents
+the tablespace from being dropped during the operation,
+or the caller should be in single-threaded crash recovery mode
+(no user connections that could drop tablespaces).
+If this is not the case, fil_space_acquire() and fil_space_release()
+should be used instead.
+@param[in]	id	tablespace ID
+@return tablespace, or NULL if not found */
+fil_space_t*
+fil_space_get(
+	ulint	id)
+	MY_ATTRIBUTE((warn_unused_result));
 
 /** The tablespace memory cache; also the totality of logs (the log
 data space) is stored here; below we talk about tablespaces, but also
 the ib_logfiles form a 'space' and it is handled here */
 struct fil_system_t {
 #ifndef UNIV_HOTBACKUP
-	ib_mutex_t		mutex;		/*!< The mutex protecting the cache */
+	ib_mutex_t	mutex;		/*!< The mutex protecting the cache */
 #endif /* !UNIV_HOTBACKUP */
 	hash_table_t*	spaces;		/*!< The hash table of spaces in the
 					system; they are hashed on the space
@@ -379,7 +646,7 @@ struct fil_system_t {
 	ulint		n_open;		/*!< number of files currently open */
 	ulint		max_n_open;	/*!< n_open is not allowed to exceed
 					this */
-	ib_int64_t	modification_counter;/*!< when we write to a file we
+	int64_t		modification_counter;/*!< when we write to a file we
 					increment this by one */
 	ulint		max_assigned_id;/*!< maximum space id in the existing
 					tables, or assigned during the time
@@ -387,7 +654,7 @@ struct fil_system_t {
 					startup we scan the data dictionary
 					and set here the maximum of the
 					space id's of the tables there */
-	ib_int64_t	tablespace_version;
+	int64_t		tablespace_version;
 					/*!< a counter which is incremented for
 					every space object memory creation;
 					every space mem object gets a
@@ -397,6 +664,12 @@ struct fil_system_t {
 					request */
 	UT_LIST_BASE_NODE_T(fil_space_t) space_list;
 					/*!< list of all file spaces */
+	UT_LIST_BASE_NODE_T(fil_space_t) named_spaces;
+					/*!< list of all file spaces
+					for which a MLOG_FILE_NAME
+					record has been written since
+					the latest redo log checkpoint.
+					Protected only by log_sys->mutex. */
 	ibool		space_id_reuse_warned;
 					/* !< TRUE if fil_space_create()
 					has issued a warning about
@@ -407,104 +680,119 @@ struct fil_system_t {
 initialized. */
 extern fil_system_t*	fil_system;
 
+#include "fil0crypt.h"
+
 #ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Returns the version number of a tablespace, -1 if not found.
-@return version number, -1 if the tablespace does not exist in the
-memory cache */
-UNIV_INTERN
-ib_int64_t
-fil_space_get_version(
-/*==================*/
-	ulint	id);	/*!< in: space id */
-/*******************************************************************//**
-Returns the latch of a file space.
-@return	latch protecting storage allocation */
-UNIV_INTERN
+/** Returns the latch of a file space.
+@param[in]	id	space id
+@param[out]	flags	tablespace flags
+@return latch protecting storage allocation */
 rw_lock_t*
 fil_space_get_latch(
-/*================*/
-	ulint	id,	/*!< in: space id */
-	ulint*	zip_size);/*!< out: compressed page size, or
-			0 for uncompressed tablespaces */
-/*******************************************************************//**
-Returns the type of a file space.
-@return	FIL_TABLESPACE or FIL_LOG */
-UNIV_INTERN
-ulint
+	ulint	id,
+	ulint*	flags);
+
+/** Gets the type of a file space.
+@param[in]	id	tablespace identifier
+@return file type */
+fil_type_t
 fil_space_get_type(
-/*===============*/
-	ulint	id);	/*!< in: space id */
+	ulint	id);
+
+/** Note that a tablespace has been imported.
+It is initially marked as FIL_TYPE_IMPORT so that no logging is
+done during the import process when the space ID is stamped to each page.
+Now we change it to FIL_SPACE_TABLESPACE to start redo and undo logging.
+NOTE: temporary tablespaces are never imported.
+@param[in]	id	tablespace identifier */
+void
+fil_space_set_imported(
+	ulint	id);
 
+# ifdef UNIV_DEBUG
+/** Determine if a tablespace is temporary.
+@param[in]	id	tablespace identifier
+@return whether it is a temporary tablespace */
+bool
+fsp_is_temporary(ulint id)
+MY_ATTRIBUTE((warn_unused_result, pure));
+# endif /* UNIV_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed.
-@return pointer to the file name, or NULL on error */
-UNIV_INTERN
+
+/** Append a file to the chain of files of a space.
+@param[in]	name		file name of a file that is not open
+@param[in]	size		file size in entire database blocks
+@param[in,out]	space		tablespace from fil_space_create()
+@param[in]	is_raw		whether this is a raw device or partition
+@param[in]	atomic_write	true if atomic write enabled
+@param[in]	max_pages	maximum number of pages in file,
+ULINT_MAX means the file size is unlimited.
+@return pointer to the file name
+@retval NULL if error */
 char*
 fil_node_create(
-/*============*/
-	const char*	name,	/*!< in: file name (file must be closed) */
-	ulint		size,	/*!< in: file size in database blocks, rounded
-				downwards to an integer */
-	ulint		id,	/*!< in: space id where to append */
-	ibool		is_raw)	/*!< in: TRUE if a raw device or
-				a raw disk partition */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifdef UNIV_LOG_ARCHIVE
-/****************************************************************//**
-Drops files from the start of a file space, so that its size is cut by
-the amount given. */
-UNIV_INTERN
-void
-fil_space_truncate_start(
-/*=====================*/
-	ulint	id,		/*!< in: space id */
-	ulint	trunc_len);	/*!< in: truncate by this much; it is an error
-				if this does not equal to the combined size of
-				some initial files in the space */
-#endif /* UNIV_LOG_ARCHIVE */
-/*******************************************************************//**
-Creates a space memory object and puts it to the 'fil system' hash table.
-If there is an error, prints an error message to the .err log.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+	const char*	name,
+	ulint		size,
+	fil_space_t*	space,
+	bool		is_raw,
+	bool		atomic_write,
+	ulint		max_pages = ULINT_MAX)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Create a space memory object and put it to the fil_system hash table.
+The tablespace name is independent from the tablespace file-name.
+Error messages are issued to the server log.
+@param[in]	name	tablespace name
+@param[in]	id	tablespace identifier
+@param[in]	flags	tablespace flags
+@param[in]	purpose	tablespace purpose
+@return pointer to created tablespace, to be filled in with fil_node_create()
+@retval NULL on failure (such as when the same tablespace exists) */
+fil_space_t*
 fil_space_create(
-/*=============*/
-	const char*	name,	/*!< in: space name */
-	ulint		id,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size, or
-				0 for uncompressed tablespaces */
-	ulint		purpose, /*!< in: FIL_TABLESPACE, or FIL_LOG if log */
+	const char*	name,
+	ulint		id,
+	ulint		flags,
+	fil_type_t	purpose,	/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
 	fil_space_crypt_t* crypt_data, /*!< in: crypt data */
-	bool		create_table); /*!< in: true if create table */
+	bool		create_table)  /*!< in: true if create table */
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*******************************************************************//**
 Assigns a new space id for a new single-table tablespace. This works simply by
 incrementing the global counter. If 4 billion id's is not enough, we may need
 to recycle id's.
-@return	TRUE if assigned, FALSE if not */
-UNIV_INTERN
-ibool
+@return true if assigned, false if not */
+bool
 fil_assign_new_space_id(
 /*====================*/
 	ulint*	space_id);	/*!< in/out: space id */
-/*******************************************************************//**
-Returns the path from the first fil_node_t found for the space ID sent.
+
+/** Frees a space object from the tablespace memory cache.
+Closes the files in the chain but does not delete them.
+There must not be any pending i/o's or flushes on the files.
+@param[in]	id		tablespace identifier
+@param[in]	x_latched	whether the caller holds X-mode space->latch
+@return true if success */
+bool
+fil_space_free(
+	ulint		id,
+	bool		x_latched);
+
+/** Returns the path from the first fil_node_t found with this space ID.
 The caller is responsible for freeing the memory allocated here for the
 value returned.
-@return	a copy of fil_node_t::path, NULL if space is zero or not found. */
-UNIV_INTERN
+@param[in]	id	Tablespace ID
+@return own: A copy of fil_node_t::path, NULL if space ID is zero
+or not found. */
 char*
 fil_space_get_first_path(
-/*=====================*/
-	ulint	id);	/*!< in: space id */
+	ulint		id);
+
 /*******************************************************************//**
 Returns the size of the space in pages. The tablespace must be cached in the
 memory cache.
-@return	space size, 0 if space not found */
-UNIV_INTERN
+@return space size, 0 if space not found */
 ulint
 fil_space_get_size(
 /*===============*/
@@ -512,34 +800,44 @@ fil_space_get_size(
 /*******************************************************************//**
 Returns the flags of the space. The tablespace must be cached
 in the memory cache.
-@return	flags, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
+@return flags, ULINT_UNDEFINED if space not found */
 ulint
 fil_space_get_flags(
 /*================*/
 	ulint	id);	/*!< in: space id */
-/*******************************************************************//**
-Returns the compressed page size of the space, or 0 if the space
-is not compressed. The tablespace must be cached in the memory cache.
-@return	compressed page size, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_zip_size(
-/*===================*/
-	ulint	id);	/*!< in: space id */
-/*******************************************************************//**
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache.
-@return	TRUE if the address is meaningful */
-UNIV_INTERN
-ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
-	ulint	id,	/*!< in: space id */
-	ulint	page_no);/*!< in: page number */
+
+/** Check if table is mark for truncate.
+@param[in]	id	space id
+@return true if tablespace is marked for truncate. */
+bool
+fil_space_is_being_truncated(
+	ulint id);
+
+/** Open each fil_node_t of a named fil_space_t if not already open.
+@param[in]	name	Tablespace name
+@return true if all file nodes are opened. */
+bool
+fil_space_open(
+	const char*	name);
+
+/** Close each fil_node_t of a named fil_space_t if open.
+@param[in]	name	Tablespace name */
+void
+fil_space_close(
+	const char*	name);
+
+/** Returns the page size of the space and whether it is compressed or not.
+The tablespace must be cached in the memory cache.
+@param[in]	id	space id
+@param[out]	found	true if tablespace was found
+@return page size */
+const page_size_t
+fil_space_get_page_size(
+	ulint	id,
+	bool*	found);
+
 /****************************************************************//**
 Initializes the tablespace memory cache. */
-UNIV_INTERN
 void
 fil_init(
 /*=====*/
@@ -547,7 +845,6 @@ fil_init(
 	ulint	max_n_open);	/*!< in: max number of open files */
 /*******************************************************************//**
 Initializes the tablespace memory cache. */
-UNIV_INTERN
 void
 fil_close(void);
 /*===========*/
@@ -557,21 +854,18 @@ database server shutdown. This should be called at a server startup after the
 space objects for the log and the system tablespace have been created. The
 purpose of this operation is to make sure we never run out of file descriptors
 if we need to read from the insert buffer or to write to the log. */
-UNIV_INTERN
 void
 fil_open_log_and_system_tablespace_files(void);
 /*==========================================*/
 /*******************************************************************//**
 Closes all open files. There must not be any pending i/o's or not flushed
 modifications in the files. */
-UNIV_INTERN
 void
 fil_close_all_files(void);
 /*=====================*/
 /*******************************************************************//**
 Closes the redo log files. There must not be any pending i/o's or not
 flushed modifications in the files. */
-UNIV_INTERN
 void
 fil_close_log_files(
 /*================*/
@@ -579,112 +873,198 @@ fil_close_log_files(
 /*******************************************************************//**
 Sets the max tablespace id counter if the given number is bigger than the
 previous value. */
-UNIV_INTERN
 void
 fil_set_max_space_id_if_bigger(
 /*===========================*/
 	ulint	max_id);/*!< in: maximum known id */
 #ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file in the system tablespace.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
+
+/** Write the flushed LSN to the page header of the first page in the
+system tablespace.
+@param[in]	lsn	flushed LSN
+@return DB_SUCCESS or error number */
 dberr_t
-fil_write_flushed_lsn_to_data_files(
-/*================================*/
-	lsn_t	lsn,		/*!< in: lsn to write */
-	ulint	arch_log_no);	/*!< in: latest archived log file number */
-/*******************************************************************//**
-Reads the flushed lsn, arch no, and tablespace flag fields from a data
-file at database startup.
-@retval NULL on success, or if innodb_force_recovery is set
-@return pointer to an error message string */
-UNIV_INTERN
-const char*
-fil_read_first_page(
-/*================*/
-	os_file_t	data_file,		/*!< in: open data file */
-	ibool		one_read_already,	/*!< in: TRUE if min and max
-						parameters below already
-						contain sensible data */
-	ulint*		flags,			/*!< out: tablespace flags */
-	ulint*		space_id,		/*!< out: tablespace ID */
-#ifdef UNIV_LOG_ARCHIVE
-	ulint*		min_arch_log_no,	/*!< out: min of archived
-						log numbers in data files */
-	ulint*		max_arch_log_no,	/*!< out: max of archived
-						log numbers in data files */
-#endif /* UNIV_LOG_ARCHIVE */
-	lsn_t*		min_flushed_lsn,	/*!< out: min of flushed
-						lsn values in data files */
-	lsn_t*		max_flushed_lsn,	/*!< out: max of flushed
-						lsn values in data files */
-	fil_space_crypt_t** crypt_data)		/*!< out: crypt data */
-
-	__attribute__((warn_unused_result));
-/*******************************************************************//**
-Increments the count of pending operation, if space is not being deleted.
-@return	TRUE if being deleted, and operation should be skipped */
-UNIV_INTERN
-ibool
-fil_inc_pending_ops(
-/*================*/
-	ulint	id,		/*!< in: space id */
-	ibool	print_err);	/*!< in: need to print error or not */
-/*******************************************************************//**
-Decrements the count of pending operations. */
-UNIV_INTERN
+fil_write_flushed_lsn(
+	lsn_t	lsn)
+MY_ATTRIBUTE((warn_unused_result));
+
+/** Acquire a tablespace when it could be dropped concurrently.
+Used by background threads that do not necessarily hold proper locks
+for concurrency control.
+@param[in]	id	tablespace ID
+@return the tablespace, or NULL if missing or being deleted */
+fil_space_t*
+fil_space_acquire(
+	ulint	id)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Acquire a tablespace that may not exist.
+Used by background threads that do not necessarily hold proper locks
+for concurrency control.
+@param[in]	id	tablespace ID
+@return the tablespace, or NULL if missing or being deleted */
+fil_space_t*
+fil_space_acquire_silent(
+	ulint	id)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Release a tablespace acquired with fil_space_acquire().
+@param[in,out]	space	tablespace to release  */
 void
-fil_decr_pending_ops(
-/*=================*/
-	ulint	id);	/*!< in: space id */
+fil_space_release(
+	fil_space_t*	space);
+
+/** Wrapper with reference-counting for a fil_space_t. */
+class FilSpace
+{
+public:
+	/** Default constructor: Use this when reference counting
+	is done outside this wrapper. */
+	FilSpace() : m_space(NULL) {}
+
+	/** Constructor: Look up the tablespace and increment the
+	referece count if found.
+	@param[in]	space_id	tablespace ID */
+	explicit FilSpace(ulint space_id)
+		: m_space(fil_space_acquire(space_id)) {}
+
+	/** Assignment operator: This assumes that fil_space_acquire()
+	has already been done for the fil_space_t. The caller must
+	assign NULL if it calls fil_space_release().
+	@param[in]	space	tablespace to assign */
+	class FilSpace& operator=(
+		fil_space_t*	space)
+	{
+		/* fil_space_acquire() must have been invoked. */
+		ut_ad(space == NULL || space->n_pending_ops > 0);
+		m_space = space;
+		return(*this);
+	}
+
+	/** Destructor - Decrement the reference count if a fil_space_t
+	is still assigned. */
+	~FilSpace()
+	{
+		if (m_space != NULL) {
+			fil_space_release(m_space);
+		}
+	}
+
+	/** Implicit type conversion
+	@return the wrapped object */
+	operator const fil_space_t*() const
+	{
+		return(m_space);
+	}
+
+	/** Explicit type conversion
+	@return the wrapped object */
+	const fil_space_t* operator()() const
+	{
+		return(m_space);
+	}
+
+private:
+	/** The wrapped pointer */
+	fil_space_t*	m_space;
+};
+
 #endif /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Parses the body of a log record written about an .ibd file operation. That is,
-the log record part after the standard (type, space id, page no) header of the
-log record.
-
-If desired, also replays the delete or rename operation if the .ibd file
-exists and the space id in it matches. Replays the create operation if a file
-at that path does not exist yet. If the database directory for the file to be
-created does not exist, then we create the directory, too.
-
-Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to
-the datadir that we should use in replaying the file operations.
-@return end of log record, or NULL if the record was not completely
-contained between ptr and end_ptr */
-UNIV_INTERN
-byte*
-fil_op_log_parse_or_replay(
-/*=======================*/
-	byte*	ptr,		/*!< in: buffer containing the log record body,
-				or an initial segment of it, if the record does
-				not fir completely between ptr and end_ptr */
-	byte*	end_ptr,	/*!< in: buffer end */
-	ulint	type,		/*!< in: the type of this log record */
-	ulint	space_id,	/*!< in: the space id of the tablespace in
-				question, or 0 if the log record should
-				only be parsed but not replayed */
-	ulint	log_flags);	/*!< in: redo log flags
-				(stored in the page number parameter) */
-/*******************************************************************//**
-Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache.
-@return	TRUE if success */
-UNIV_INTERN
+
+/********************************************************//**
+Creates the database directory for a table if it does not exist yet. */
+void
+fil_create_directory_for_tablename(
+/*===============================*/
+	const char*	name);	/*!< in: name in the standard
+				'databasename/tablename' format */
+/********************************************************//**
+Recreates table indexes by applying
+TRUNCATE log record during recovery.
+@return DB_SUCCESS or error code */
+dberr_t
+fil_recreate_table(
+/*===============*/
+	ulint			space_id,	/*!< in: space id */
+	ulint			format_flags,	/*!< in: page format */
+	ulint			flags,		/*!< in: tablespace flags */
+	const char*		name,		/*!< in: table name */
+	truncate_t&		truncate);	/*!< in/out: The information of
+						TRUNCATE log record */
+/********************************************************//**
+Recreates the tablespace and table indexes by applying
+TRUNCATE log record during recovery.
+@return DB_SUCCESS or error code */
+dberr_t
+fil_recreate_tablespace(
+/*====================*/
+	ulint			space_id,	/*!< in: space id */
+	ulint			format_flags,	/*!< in: page format */
+	ulint			flags,		/*!< in: tablespace flags */
+	const char*		name,		/*!< in: table name */
+	truncate_t&		truncate,	/*!< in/out: The information of
+						TRUNCATE log record */
+	lsn_t			recv_lsn);	/*!< in: the end LSN of
+						the log record */
+/** Replay a file rename operation if possible.
+@param[in]	space_id	tablespace identifier
+@param[in]	first_page_no	first page number in the file
+@param[in]	name		old file name
+@param[in]	new_name	new file name
+@return	whether the operation was successfully applied
+(the name did not exist, or new_name did not exist and
+name was successfully renamed to new_name)  */
+bool
+fil_op_replay_rename(
+	ulint		space_id,
+	ulint		first_page_no,
+	const char*	name,
+	const char*	new_name)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Deletes an IBD tablespace, either general or single-table.
+The tablespace must be cached in the memory cache. This will delete the
+datafile, fil_space_t & fil_node_t entries from the file_system_t cache.
+@param[in]	space_id	Tablespace id
+@param[in]	buf_remove	Specify the action to take on the pages
+for this table in the buffer pool.
+@return true if success */
 dberr_t
 fil_delete_tablespace(
-/*==================*/
-	ulint		id,		/*!< in: space id */
-	buf_remove_t	buf_remove);	/*!< in: specify the action to take
-					on the tables pages in the buffer
-					pool */
+	ulint		id,
+	buf_remove_t	buf_remove);
+
+/** Truncate the tablespace to needed size.
+@param[in]	space_id	id of tablespace to truncate
+@param[in]	size_in_pages	truncate size.
+@return true if truncate was successful. */
+bool
+fil_truncate_tablespace(
+	ulint		space_id,
+	ulint		size_in_pages);
+
+/*******************************************************************//**
+Prepare for truncating a single-table tablespace. The tablespace
+must be cached in the memory cache.
+1) Check pending operations on a tablespace;
+2) Remove all insert buffer entries for the tablespace;
+@return DB_SUCCESS or error */
+dberr_t
+fil_prepare_for_truncate(
+/*=====================*/
+	ulint	id);			/*!< in: space id */
+/**********************************************************************//**
+Reinitialize the original tablespace header with the same space id
+for single tablespace */
+void
+fil_reinit_space_header(
+/*====================*/
+	ulint		id,	/*!< in: space id */
+	ulint		size);	/*!< in: size in blocks */
 /*******************************************************************//**
 Closes a single-table tablespace. The tablespace must be cached in the
 memory cache. Free all pages used by the tablespace.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 dberr_t
 fil_close_tablespace(
 /*=================*/
@@ -703,9 +1083,8 @@ memory cache. Discarding is like deleting a tablespace, but
  3. When the user does IMPORT TABLESPACE, the tablespace will have the
     same id as it originally had.
 
- 4. Free all the pages in use by the tablespace if rename=TRUE.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+ 4. Free all the pages in use by the tablespace if rename=true.
+@return DB_SUCCESS or error */
 dberr_t
 fil_discard_tablespace(
 /*===================*/
@@ -727,105 +1106,56 @@ fil_rename_tablespace_check(
 	const char*	new_path,
 	bool		is_discarded);
 
-/*******************************************************************//**
-Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+/** Rename a single-table tablespace.
+The tablespace must exist in the memory cache.
+@param[in]	id		tablespace identifier
+@param[in]	old_path	old file name
+@param[in]	new_name	new table name in the
+databasename/tablename format
+@param[in]	new_path_in	new file name,
+or NULL if it is located in the normal data directory
+@return true if success */
+bool
 fil_rename_tablespace(
-/*==================*/
-	const char*	old_name_in,	/*!< in: old table name in the
-					standard databasename/tablename
-					format of InnoDB, or NULL if we
-					do the rename based on the space
-					id only */
-	ulint		id,		/*!< in: space id */
-	const char*	new_name,	/*!< in: new table name in the
-					standard databasename/tablename
-					format of InnoDB */
-	const char*	new_path);	/*!< in: new full datafile path
-					if the tablespace is remotely
-					located, or NULL if it is located
-					in the normal data directory. */
+	ulint		id,
+	const char*	old_path,
+	const char*	new_name,
+	const char*	new_path_in);
 
 /*******************************************************************//**
-Allocates a file name for a single-table tablespace. The string must be freed
-by caller with mem_free().
-@return	own: file name */
-UNIV_INTERN
+Allocates and builds a file name from a path, a table or tablespace name
+and a suffix. The string must be freed by caller with ut_free().
+@param[in] path NULL or the direcory path or the full path and filename.
+@param[in] name NULL if path is full, or Table/Tablespace name
+@param[in] suffix NULL or the file extention to use.
+@return own: file name */
 char*
-fil_make_ibd_name(
-/*==============*/
-	const char*	name,		/*!< in: table name or a dir path */
-	bool		is_full_path);	/*!< in: TRUE if it is a dir path */
-/*******************************************************************//**
-Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
-The string must be freed by caller with mem_free().
-@return	own: file name */
-UNIV_INTERN
-char*
-fil_make_isl_name(
-/*==============*/
-	const char*	name);	/*!< in: table name */
-/*******************************************************************//**
-Creates a new InnoDB Symbolic Link (ISL) file.  It is always created
-under the 'datadir' of MySQL. The datadir is the directory of a
-running mysqld program. We can refer to it by simply using the path '.'.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+fil_make_filepath(
+	const char*	path,
+	const char*	name,
+	ib_extention	suffix,
+	bool		strip_name);
+
+/** Creates a new General or Single-Table tablespace
+@param[in]	space_id	Tablespace ID
+@param[in]	name		Tablespace name in dbname/tablename format.
+For general tablespaces, the 'dbname/' part may be missing.
+@param[in]	path		Path and filename of the datafile to create.
+@param[in]	flags		Tablespace flags
+@param[in]	size		Initial size of the tablespace file in pages,
+must be >= FIL_IBD_FILE_INITIAL_SIZE
+@return DB_SUCCESS or error code */
 dberr_t
-fil_create_link_file(
-/*=================*/
-	const char*	tablename,	/*!< in: tablename */
-	const char*	filepath);	/*!< in: pathname of tablespace */
-/*******************************************************************//**
-Deletes an InnoDB Symbolic Link (ISL) file. */
-UNIV_INTERN
-void
-fil_delete_link_file(
-/*==================*/
-	const char*	tablename);	/*!< in: name of table */
-/*******************************************************************//**
-Reads an InnoDB Symbolic Link (ISL) file.
-It is always created under the 'datadir' of MySQL.  The name is of the
-form {databasename}/{tablename}. and the isl file is expected to be in a
-'{databasename}' directory called '{tablename}.isl'. The caller must free
-the memory of the null-terminated path returned if it is not null.
-@return	own: filepath found in link file, NULL if not found. */
-UNIV_INTERN
-char*
-fil_read_link_file(
-/*===============*/
-	const char*	name);		/*!< in: tablespace name */
-
-#include "fil0crypt.h"
-
-/*******************************************************************//**
-Creates a new single-table tablespace to a database directory of MySQL.
-Database directories are under the 'datadir' of MySQL. The datadir is the
-directory of a running mysqld program. We can refer to it by simply the
-path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_create_new_single_table_tablespace(
-/*===================================*/
-	ulint		space_id,	/*!< in: space id */
-	const char*	tablename,	/*!< in: the table name in the usual
-					databasename/tablename format
-					of InnoDB */
-	const char*	dir_path,	/*!< in: NULL or a dir path */
-	ulint		flags,		/*!< in: tablespace flags */
-	ulint		flags2,		/*!< in: table flags2 */
-	ulint		size,		/*!< in: the initial size of the
-					tablespace file in pages,
-					must be >= FIL_IBD_FILE_INITIAL_SIZE */
+fil_ibd_create(
+	ulint		space_id,
+	const char*	name,
+	const char*	path,
+	ulint		flags,
+	ulint		size,
 	fil_encryption_t mode,	/*!< in: encryption mode */
-	ulint		key_id)	/*!< in: encryption key_id */
-	__attribute__((nonnull, warn_unused_result));
-#ifndef UNIV_HOTBACKUP
+	ulint		key_id) /*!< in: encryption key_id */
+	MY_ATTRIBUTE((warn_unused_result));
+
 /********************************************************************//**
 Tries to open a single-table tablespace and optionally checks the space id is
 right in it. If does not succeed, prints an error message to the .err log. This
@@ -840,80 +1170,85 @@ If the validate boolean is set, we read the first page of the file and
 check that the space id in the file is what we expect. We assume that
 this function runs much faster if no check is made, since accessing the
 file inode probably is much faster (the OS caches them) than accessing
-the first page of the file.  This boolean may be initially FALSE, but if
+the first page of the file.  This boolean may be initially false, but if
 a remote tablespace is found it will be changed to true.
 
 If the fix_dict boolean is set, then it is safe to use an internal SQL
 statement to update the dictionary tables if they are incorrect.
 
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@param[in]	validate	true if we should validate the tablespace
+@param[in]	fix_dict	true if the dictionary is available to be fixed
+@param[in]	purpose		FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
+@param[in]	id		tablespace ID
+@param[in]	flags		tablespace flags
+@param[in]	space_name	tablespace name of the datafile
+If file-per-table, it is the table name in the databasename/tablename format
+@param[in]	path_in		expected filepath, usually read from dictionary
+@return DB_SUCCESS or error code */
 dberr_t
-fil_open_single_table_tablespace(
-/*=============================*/
-	bool		validate,	/*!< in: Do we validate tablespace? */
-	bool		fix_dict,	/*!< in: Can we fix the dictionary? */
-	ulint		id,		/*!< in: space id */
-	ulint		flags,		/*!< in: tablespace flags */
-	const char*	tablename,	/*!< in: table name in the
-					databasename/tablename format */
-	const char*	filepath,	/*!< in: tablespace filepath */
-	dict_table_t*	table)		/*!< in: table */
-	__attribute__((nonnull(5), warn_unused_result));
+fil_ibd_open(
+	bool		validate,
+	bool		fix_dict,
+	fil_type_t	purpose,
+	ulint		id,
+	ulint		flags,
+	const char*	tablename,
+	const char*	path_in,
+	dict_table_t*	table)	/*!< in: table */
+	MY_ATTRIBUTE((warn_unused_result));
 
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-At the server startup, if we need crash recovery, scans the database
-directories under the MySQL datadir, looking for .ibd files. Those files are
-single-table tablespaces. We need to know the space id in each of them so that
-we know into which file we should look to check the contents of a page stored
-in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-fil_load_single_table_tablespaces(void);
-/*===================================*/
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there.
-@return	TRUE if does not exist or is being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_deleted_or_being_deleted_in_mem(
-/*===========================================*/
-	ulint		id,	/*!< in: space id */
-	ib_int64_t	version);/*!< in: tablespace_version should be this; if
-				you pass -1 as the value of this, then this
-				parameter is ignored */
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace exists in the memory cache.
-@return	TRUE if exists */
-UNIV_INTERN
-ibool
-fil_tablespace_exists_in_mem(
-/*=========================*/
-	ulint	id);	/*!< in: space id */
+enum fil_load_status {
+	/** The tablespace file(s) were found and valid. */
+	FIL_LOAD_OK,
+	/** The name no longer matches space_id */
+	FIL_LOAD_ID_CHANGED,
+	/** The file(s) were not found */
+	FIL_LOAD_NOT_FOUND,
+	/** The file(s) were not valid */
+	FIL_LOAD_INVALID
+};
+
+/** Open a single-file tablespace and add it to the InnoDB data structures.
+@param[in]	space_id	tablespace ID
+@param[in]	filename	path/to/databasename/tablename.ibd
+@param[out]	space		the tablespace, or NULL on error
+@return status of the operation */
+enum fil_load_status
+fil_ibd_load(
+	ulint		space_id,
+	const char*	filename,
+	fil_space_t*&	space)
+	MY_ATTRIBUTE((warn_unused_result));
+
+
+/***********************************************************************//**
+A fault-tolerant function that tries to read the next file name in the
+directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
+idea is to read as much good data as we can and jump over bad data.
+@return 0 if ok, -1 if error even after the retries, 1 if at the end
+of the directory */
+int
+fil_file_readdir_next_file(
+/*=======================*/
+	dberr_t*	err,	/*!< out: this is set to DB_ERROR if an error
+				was encountered, otherwise not changed */
+	const char*	dirname,/*!< in: directory name or path */
+	os_file_dir_t	dir,	/*!< in: directory stream */
+	os_file_stat_t*	info);	/*!< in/out: buffer where the
+				info is returned */
 #ifndef UNIV_HOTBACKUP
 /*******************************************************************//**
-Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
+Returns true if a matching tablespace exists in the InnoDB tablespace memory
 cache. Note that if we have not done a crash recovery at the database startup,
 there may be many tablespaces which are not yet in the memory cache.
-@return	TRUE if a matching tablespace exists in the memory cache */
-UNIV_INTERN
-ibool
+@return true if a matching tablespace exists in the memory cache */
+bool
 fil_space_for_table_exists_in_mem(
 /*==============================*/
 	ulint		id,		/*!< in: space id */
 	const char*	name,		/*!< in: table name in the standard
 					'databasename/tablename' format */
-	ibool		mark_space,	/*!< in: in crash recovery, at database
-					startup we mark all spaces which have
-					an associated table in the InnoDB
-					data dictionary, so that
-					we can print a warning about orphaned
-					tablespaces */
-	ibool		print_error_if_does_not_exist,
+	bool		print_error_if_does_not_exist,
 					/*!< in: print detailed error
 					information to the .err log if a
 					matching tablespace is not found from
@@ -921,39 +1256,30 @@ fil_space_for_table_exists_in_mem(
 	bool		adjust_space,	/*!< in: whether to adjust space id
 					when find table space mismatch */
 	mem_heap_t*	heap,		/*!< in: heap memory */
-	table_id_t	table_id);	/*!< in: table id */
+	table_id_t	table_id,	/*!< in: table id */
+	dict_table_t*	table);		/*!< in: table or NULL */
 #else /* !UNIV_HOTBACKUP */
 /********************************************************************//**
 Extends all tablespaces to the size stored in the space header. During the
 mysqlbackup --apply-log phase we extended the spaces on-demand so that log
 records could be appllied, but that may have left spaces still too small
 compared to the size stored in the space header. */
-UNIV_INTERN
 void
 fil_extend_tablespaces_to_stored_len(void);
 /*======================================*/
 #endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Tries to extend a data file so that it would accommodate the number of pages
-given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-fil_extend_space_to_desired_size(
-/*=============================*/
-	ulint*	actual_size,	/*!< out: size of the space after extension;
-				if we ran out of disk space this may be lower
-				than the desired size */
-	ulint	space_id,	/*!< in: space id */
-	ulint	size_after_extend);/*!< in: desired size in pages after the
-				extension; if the current space size is bigger
-				than this already, the function does nothing */
+/** Try to extend a tablespace if it is smaller than the specified size.
+@param[in,out]	space	tablespace
+@param[in]	size	desired size in pages
+@return whether the tablespace is at least as big as requested */
+bool
+fil_space_extend(
+	fil_space_t*	space,
+	ulint		size);
 /*******************************************************************//**
 Tries to reserve free extents in a file space.
-@return	TRUE if succeed */
-UNIV_INTERN
-ibool
+@return true if succeed */
+bool
 fil_space_reserve_free_extents(
 /*===========================*/
 	ulint	id,		/*!< in: space id */
@@ -961,7 +1287,6 @@ fil_space_reserve_free_extents(
 	ulint	n_to_reserve);	/*!< in: how many one wants to reserve */
 /*******************************************************************//**
 Releases free extents in a file space. */
-UNIV_INTERN
 void
 fil_space_release_free_extents(
 /*===========================*/
@@ -970,56 +1295,51 @@ fil_space_release_free_extents(
 /*******************************************************************//**
 Gets the number of reserved extents. If the database is silent, this number
 should be zero. */
-UNIV_INTERN
 ulint
 fil_space_get_n_reserved_extents(
 /*=============================*/
 	ulint	id);		/*!< in: space id */
-/********************************************************************//**
-Reads or writes data. This operation is asynchronous (aio).
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INTERN
+
+/** Reads or writes data. This operation could be asynchronous (aio).
+
+@param[in]	type		IO context
+@param[in]	sync		true if synchronous aio is desired
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	byte_offset	remainder of offset in bytes; in aio this
+				must be divisible by the OS block size
+@param[in]	len		how many bytes to read or write; this must
+				not cross a file boundary; in aio this must
+				be a block size multiple
+@param[in,out]	buf		buffer where to store read data or from where
+				to write; in aio this must be appropriately
+				aligned
+@param[in]	message		message for aio handler if non-sync aio
+				used, else ignored
+@param[in,out]	write_size	Actual write size initialized
+				after fist successfull trim
+				operation for this page and if
+				nitialized we do not trim again if
+				Actual page
+
+@return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED
+if we are trying to do i/o on a tablespace which does not exist */
 dberr_t
 fil_io(
-/*===*/
-	ulint	type,		/*!< in: OS_FILE_READ or OS_FILE_WRITE,
-				ORed to OS_FILE_LOG, if a log i/o
-				and ORed to OS_AIO_SIMULATED_WAKE_LATER
-				if simulated aio and we want to post a
-				batch of i/os; NOTE that a simulated batch
-				may introduce hidden chances of deadlocks,
-				because i/os are not actually handled until
-				all have been posted: use with great
-				caution! */
-	bool	sync,		/*!< in: true if synchronous aio is desired */
-	ulint	space_id,	/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	block_offset,	/*!< in: offset in number of blocks */
-	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in
-				aio this must be divisible by the OS block
-				size */
-	ulint	len,		/*!< in: how many bytes to read or write; this
-				must not cross a file boundary; in aio this
-				must be a block size multiple */
-	void*	buf,		/*!< in/out: buffer where to store read data
-				or from where to write; in aio this must be
-				appropriately aligned */
-	void*	message,	/*!< in: message for aio handler if non-sync
-				aio used, else ignored */
-	ulint*	write_size)	/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-	__attribute__((nonnull(8)));
+	const IORequest&	type,
+	bool			sync,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			byte_offset,
+	ulint			len,
+	void*			buf,
+	void*			message,
+	ulint*			write_size);
 /**********************************************************************//**
 Waits for an aio operation to complete. This function is used to write the
 handler for completed requests. The aio array of pending requests is divided
 into segments (see os0file.cc for more info). The thread specifies which
 segment it wants to wait for. */
-UNIV_INTERN
 void
 fil_aio_wait(
 /*=========*/
@@ -1028,82 +1348,135 @@ fil_aio_wait(
 /**********************************************************************//**
 Flushes to disk possible writes cached by the OS. If the space does not exist
 or is being dropped, does not do anything. */
-UNIV_INTERN
 void
 fil_flush(
 /*======*/
 	ulint	space_id);	/*!< in: file space id (this can be a group of
 				log files or a tablespace of the database) */
-/**********************************************************************//**
-Flushes to disk writes in file spaces of the given type possibly cached by
-the OS. */
-UNIV_INTERN
+/** Flush to disk the writes in file spaces of the given type
+possibly cached by the OS.
+@param[in]	purpose	FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */
 void
 fil_flush_file_spaces(
-/*==================*/
-	ulint	purpose);	/*!< in: FIL_TABLESPACE, FIL_LOG */
+	fil_type_t	purpose);
 /******************************************************************//**
 Checks the consistency of the tablespace cache.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
+@return true if ok */
+bool
 fil_validate(void);
 /*==============*/
 /********************************************************************//**
-Returns TRUE if file address is undefined.
-@return	TRUE if undefined */
-UNIV_INTERN
-ibool
+Returns true if file address is undefined.
+@return true if undefined */
+bool
 fil_addr_is_null(
 /*=============*/
 	fil_addr_t	addr);	/*!< in: address */
 /********************************************************************//**
 Get the predecessor of a file page.
-@return	FIL_PAGE_PREV */
-UNIV_INTERN
+@return FIL_PAGE_PREV */
 ulint
 fil_page_get_prev(
 /*==============*/
 	const byte*	page);	/*!< in: file page */
 /********************************************************************//**
 Get the successor of a file page.
-@return	FIL_PAGE_NEXT */
-UNIV_INTERN
+@return FIL_PAGE_NEXT */
 ulint
 fil_page_get_next(
 /*==============*/
 	const byte*	page);	/*!< in: file page */
 /*********************************************************************//**
 Sets the file page type. */
-UNIV_INTERN
 void
 fil_page_set_type(
 /*==============*/
 	byte*	page,	/*!< in/out: file page */
 	ulint	type);	/*!< in: type */
-/*********************************************************************//**
-Gets the file page type.
-@return type; NOTE that if the type has not been written to page, the
-return value not defined */
-UNIV_INTERN
+/** Reset the page type.
+Data files created before MySQL 5.1 may contain garbage in FIL_PAGE_TYPE.
+In MySQL 3.23.53, only undo log pages and index pages were tagged.
+Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
+@param[in]	page_id	page number
+@param[in,out]	page	page with invalid FIL_PAGE_TYPE
+@param[in]	type	expected page type
+@param[in,out]	mtr	mini-transaction */
+void
+fil_page_reset_type(
+	const page_id_t&	page_id,
+	byte*			page,
+	ulint			type,
+	mtr_t*			mtr);
+/** Get the file page type.
+@param[in]	page	file page
+@return page type */
+inline
 ulint
 fil_page_get_type(
-/*==============*/
-	const byte*	page);	/*!< in: file page */
+	const byte*	page)
+{
+	return(mach_read_from_2(page + FIL_PAGE_TYPE));
+}
+/** Check (and if needed, reset) the page type.
+Data files created before MySQL 5.1 may contain
+garbage in the FIL_PAGE_TYPE field.
+In MySQL 3.23.53, only undo log pages and index pages were tagged.
+Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
+@param[in]	page_id	page number
+@param[in,out]	page	page with possibly invalid FIL_PAGE_TYPE
+@param[in]	type	expected page type
+@param[in,out]	mtr	mini-transaction */
+inline
+void
+fil_page_check_type(
+	const page_id_t&	page_id,
+	byte*			page,
+	ulint			type,
+	mtr_t*			mtr)
+{
+	ulint	page_type	= fil_page_get_type(page);
+
+	if (page_type != type) {
+		fil_page_reset_type(page_id, page, type, mtr);
+	}
+}
+
+/** Check (and if needed, reset) the page type.
+Data files created before MySQL 5.1 may contain
+garbage in the FIL_PAGE_TYPE field.
+In MySQL 3.23.53, only undo log pages and index pages were tagged.
+Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
+@param[in,out]	block	block with possibly invalid FIL_PAGE_TYPE
+@param[in]	type	expected page type
+@param[in,out]	mtr	mini-transaction */
+#define fil_block_check_type(block, type, mtr)				\
+	fil_page_check_type(block->page.id, block->frame, type, mtr)
+
+#ifdef UNIV_DEBUG
+/** Increase redo skipped of a tablespace.
+@param[in]	id	space id */
+void
+fil_space_inc_redo_skipped_count(
+	ulint		id);
+
+/** Decrease redo skipped of a tablespace.
+@param[in]	id	space id */
+void
+fil_space_dec_redo_skipped_count(
+	ulint		id);
 
 /*******************************************************************//**
-Returns TRUE if a single-table tablespace is being deleted.
-@return TRUE if being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_is_being_deleted(
-/*============================*/
+Check whether a single-table tablespace is redo skipped.
+@return true if redo skipped */
+bool
+fil_space_is_redo_skipped(
+/*======================*/
 	ulint		id);	/*!< in: space id */
+#endif
 
 /********************************************************************//**
 Delete the tablespace file and any related files like .cfg.
 This should not be called for temporary tables. */
-UNIV_INTERN
 void
 fil_delete_file(
 /*============*/
@@ -1112,42 +1485,37 @@ fil_delete_file(
 /** Callback functor. */
 struct PageCallback {
 
-	/**
-	Default constructor */
+	/** Default constructor */
 	PageCallback()
 		:
-		m_zip_size(),
-		m_page_size(),
+		m_page_size(0, 0, false),
 		m_filepath() UNIV_NOTHROW {}
 
 	virtual ~PageCallback() UNIV_NOTHROW {}
 
-	/**
-	Called for page 0 in the tablespace file at the start.
-	@param file_size - size of the file in bytes
-	@param block - contents of the first page in the tablespace file
-	@retval DB_SUCCESS or error code.*/
+	/** Called for page 0 in the tablespace file at the start.
+	@param file_size size of the file in bytes
+	@param block contents of the first page in the tablespace file
+	@retval DB_SUCCESS or error code. */
 	virtual dberr_t init(
 		os_offset_t		file_size,
 		const buf_block_t*	block) UNIV_NOTHROW = 0;
 
-	/**
-	Called for every page in the tablespace. If the page was not
+	/** Called for every page in the tablespace. If the page was not
 	updated then its state must be set to BUF_PAGE_NOT_USED. For
 	compressed tables the page descriptor memory will be at offset:
-       		block->frame + UNIV_PAGE_SIZE;
-	@param offset - physical offset within the file
-	@param block - block read from file, note it is not from the buffer pool
+	block->frame + UNIV_PAGE_SIZE;
+	@param offset physical offset within the file
+	@param block block read from file, note it is not from the buffer pool
 	@retval DB_SUCCESS or error code. */
 	virtual dberr_t operator()(
 		os_offset_t 	offset,
 		buf_block_t*	block) UNIV_NOTHROW = 0;
 
-	/**
-	Set the name of the physical file and the file handle that is used
+	/** Set the name of the physical file and the file handle that is used
 	to open it for the file that is being iterated over.
-	@param filename - then physical name of the tablespace file.
-	@param file - OS file handle */
+	@param filename then physical name of the tablespace file.
+	@param file OS file handle */
 	void set_file(const char* filename, os_file_t file) UNIV_NOTHROW
 	{
 		m_file = file;
@@ -1158,30 +1526,23 @@ struct PageCallback {
 	@return the space id of the tablespace */
 	virtual ulint get_space_id() const UNIV_NOTHROW = 0;
 
-	/** The compressed page size
-	@return the compressed page size */
-	ulint get_zip_size() const
-	{
-		return(m_zip_size);
-	}
-
 	/**
-	Set the tablespace compressed table size.
-	@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
-	dberr_t set_zip_size(const buf_frame_t* page) UNIV_NOTHROW;
+	@retval the space flags of the tablespace being iterated over */
+	virtual ulint get_space_flags() const UNIV_NOTHROW = 0;
+
+	/** Set the tablespace table size.
+	@param[in] page a page belonging to the tablespace */
+	void set_page_size(const buf_frame_t* page) UNIV_NOTHROW;
 
 	/** The compressed page size
 	@return the compressed page size */
-	ulint get_page_size() const
+	const page_size_t& get_page_size() const
 	{
 		return(m_page_size);
 	}
 
-	/** Compressed table page size */
-	ulint			m_zip_size;
-
 	/** The tablespace page size. */
-	ulint			m_page_size;
+	page_size_t		m_page_size;
 
 	/** File handle to the tablespace */
 	os_file_t		m_file;
@@ -1197,36 +1558,52 @@ protected:
 
 /********************************************************************//**
 Iterate over all the pages in the tablespace.
-@param table - the table definiton in the server
-@param n_io_buffers - number of blocks to read and write together
-@param callback - functor that will do the page updates
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@param table the table definiton in the server
+@param n_io_buffers number of blocks to read and write together
+@param callback functor that will do the page updates
+@return DB_SUCCESS or error code */
 dberr_t
 fil_tablespace_iterate(
 /*===================*/
 	dict_table_t*		table,
 	ulint			n_io_buffers,
 	PageCallback&		callback)
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
-/*******************************************************************//**
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache.
-@return	space id, ULINT_UNDEFINED if not found */
-UNIV_INTERN
+/********************************************************************//**
+Looks for a pre-existing fil_space_t with the given tablespace ID
+and, if found, returns the name and filepath in newly allocated buffers that the caller must free.
+@param[in] space_id The tablespace ID to search for.
+@param[out] name Name of the tablespace found.
+@param[out] fileapth The filepath of the first datafile for thtablespace found.
+@return true if tablespace is found, false if not. */
+bool
+fil_space_read_name_and_filepath(
+	ulint	space_id,
+	char**	name,
+	char**	filepath);
+
+/** Convert a file name to a tablespace name.
+@param[in]	filename	directory/databasename/tablename.ibd
+@return database/tablename string, to be freed with ut_free() */
+char*
+fil_path_to_space_name(
+	const char*	filename);
+
+/** Returns the space ID based on the tablespace name.
+The tablespace must be found in the tablespace memory cache.
+This call is made from external to this module, so the mutex is not owned.
+@param[in]	tablespace	Tablespace name
+@return space ID if tablespace found, ULINT_UNDEFINED if space not. */
 ulint
-fil_get_space_id_for_table(
-/*=======================*/
-	const char*	name);	/*!< in: table name in the standard
-				'databasename/tablename' format */
+fil_space_get_id_by_name(
+	const char*	tablespace);
 
 /**
 Iterate over all the spaces in the space list and fetch the
 tablespace names. It will return a copy of the name that must be
 freed by the caller using: delete[].
 @return DB_SUCCESS if all OK. */
-UNIV_INTERN
 dberr_t
 fil_get_space_names(
 /*================*/
@@ -1240,44 +1617,13 @@ fil_get_space_names(
 @param[in]	tmp_name	temporary table name
 @param[in,out]	mtr		mini-transaction
 @return innodb error code */
-UNIV_INTERN
 dberr_t
 fil_mtr_rename_log(
 	const dict_table_t*	old_table,
 	const dict_table_t*	new_table,
 	const char*		tmp_name,
 	mtr_t*			mtr)
-	MY_ATTRIBUTE((nonnull));
-
-/*******************************************************************//**
-Finds the given page_no of the given space id from the double write buffer,
-and copies it to the corresponding .ibd file.
-@return true if copy was successful, or false. */
-bool
-fil_user_tablespace_restore_page(
-/*==============================*/
-	fsp_open_info*	fsp,		/* in: contains space id and .ibd
-					file information */
-	ulint		page_no);	/* in: page_no to obtain from double
-					write buffer */
-
-/*******************************************************************//**
-Return space flags */
-UNIV_INLINE
-ulint
-fil_space_flags(
-/*===========*/
-	fil_space_t*	space);	/*!< in: space */
-
-/*******************************************************************//**
-Returns a pointer to the file_space_t that is in the memory cache
-associated with a space id.
-@return	file_space_t pointer, NULL if space not found */
-fil_space_t*
-fil_space_get(
-/*==========*/
-	ulint	id);	/*!< in: space id */
-#endif /* !UNIV_INNOCHECKSUM */
+	MY_ATTRIBUTE((warn_unused_result));
 
 /****************************************************************//**
 Acquire fil_system mutex */
@@ -1290,7 +1636,6 @@ void
 fil_system_exit(void);
 /*==================*/
 
-#ifndef UNIV_INNOCHECKSUM
 /*******************************************************************//**
 Returns the table space by a given id, NULL if not found. */
 fil_space_t*
@@ -1317,7 +1662,6 @@ Get id of next tablespace or ULINT_UNDEFINED if none */
 UNIV_INTERN
 ulint
 fil_get_next_space(
-/*===============*/
 	ulint id);      /*!< in: space id */
 
 /******************************************************************
@@ -1337,6 +1681,143 @@ fil_get_next_space_safe(
 
 
 /*******************************************************************//**
+by redo log.
+@param[in,out]	space	tablespace */
+void
+fil_names_dirty(
+	fil_space_t*	space);
+
+/** Write MLOG_FILE_NAME records when a non-predefined persistent
+tablespace was modified for the first time since the latest
+fil_names_clear().
+@param[in,out]	space	tablespace
+@param[in,out]	mtr	mini-transaction */
+void
+fil_names_dirty_and_write(
+	fil_space_t*	space,
+	mtr_t*		mtr);
+
+/** Set the compression type for the tablespace of a table
+@param[in]	table		Table that should be compressesed
+@param[in]	algorithm	Text representation of the algorithm
+@return DB_SUCCESS or error code */
+dberr_t
+fil_set_compression(
+	dict_table_t*	table,
+	const char*	algorithm)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Get the compression type for the tablespace
+@param[in]	space_id	Space ID to check
+@return the compression algorithm */
+Compression::Type
+fil_get_compression(
+	ulint		space_id)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Set the encryption type for the tablespace
+@param[in] space		Space ID of tablespace for which to set
+@param[in] algorithm		Encryption algorithm
+@param[in] key			Encryption key
+@param[in] iv			Encryption iv
+@return DB_SUCCESS or error code */
+dberr_t
+fil_set_encryption(
+	ulint			space_id,
+	Encryption::Type	algorithm,
+	byte*			key,
+	byte*			iv)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/**
+@return true if the re-encrypt success */
+bool
+fil_encryption_rotate();
+
+/** Write MLOG_FILE_NAME records if a persistent tablespace was modified
+for the first time since the latest fil_names_clear().
+@param[in,out]	space	tablespace
+@param[in,out]	mtr	mini-transaction
+@return whether any MLOG_FILE_NAME record was written */
+inline MY_ATTRIBUTE((warn_unused_result))
+bool
+fil_names_write_if_was_clean(
+	fil_space_t*	space,
+	mtr_t*		mtr)
+{
+	ut_ad(log_mutex_own());
+
+	if (space == NULL) {
+		return(false);
+	}
+
+	const bool	was_clean = space->max_lsn == 0;
+	ut_ad(space->max_lsn <= log_sys->lsn);
+	space->max_lsn = log_sys->lsn;
+
+	if (was_clean) {
+		fil_names_dirty_and_write(space, mtr);
+	}
+
+	return(was_clean);
+}
+
+extern volatile bool	recv_recovery_on;
+
+/** During crash recovery, open a tablespace if it had not been opened
+yet, to get valid size and flags.
+@param[in,out]	space	tablespace */
+inline
+void
+fil_space_open_if_needed(
+	fil_space_t*	space)
+{
+	ut_ad(recv_recovery_on);
+
+	if (space->size == 0) {
+		/* Initially, size and flags will be set to 0,
+		until the files are opened for the first time.
+		fil_space_get_size() will open the file
+		and adjust the size and flags. */
+#ifdef UNIV_DEBUG
+		ulint		size	=
+#endif /* UNIV_DEBUG */
+			fil_space_get_size(space->id);
+		ut_ad(size == space->size);
+	}
+}
+
+/** On a log checkpoint, reset fil_names_dirty_and_write() flags
+and write out MLOG_FILE_NAME and MLOG_CHECKPOINT if needed.
+@param[in]	lsn		checkpoint LSN
+@param[in]	do_write	whether to always write MLOG_CHECKPOINT
+@return whether anything was written to the redo log
+@retval false	if no flags were set and nothing written
+@retval true	if anything was written to the redo log */
+bool
+fil_names_clear(
+	lsn_t	lsn,
+	bool	do_write);
+
+#if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
+/**
+Try and enable FusionIO atomic writes.
+@param[in] file		OS file handle
+@return true if successful */
+bool
+fil_fusionio_enable_atomic_write(os_file_t file);
+#endif /* !NO_FALLOCATE && UNIV_LINUX */
+
+/** Note that the file system where the file resides doesn't support PUNCH HOLE
+@param[in,out]	node		Node to set */
+void fil_no_punch_hole(fil_node_t* node);
+
+#ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
+void test_make_filepath();
+#endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
+
+
+/*******************************************************************//**
 Returns the block size of the file space
 @return	block size */
 UNIV_INTERN
@@ -1347,6 +1828,23 @@ fil_space_get_block_size(
 	ulint   offset, /*!< in: page offset */
 	ulint   len);	/*!< in: page len */
 
+/*******************************************************************//**
+Increments the count of pending operation, if space is not being deleted.
+@return	TRUE if being deleted, and operation should be skipped */
+UNIV_INTERN
+ibool
+fil_inc_pending_ops(
+/*================*/
+	ulint	id,		/*!< in: space id */
+	ibool	print_err);	/*!< in: need to print error or not */
+/*******************************************************************//**
+Decrements the count of pending operations. */
+UNIV_INTERN
+void
+fil_decr_pending_ops(
+/*=================*/
+	ulint	id);	/*!< in: space id */
+
 #endif /* UNIV_INNOCHECKSUM */
 
 #ifndef UNIV_INNOCHECKSUM
diff --git a/storage/innobase/include/fil0fil.ic b/storage/innobase/include/fil0fil.ic
index ceebf6c1ab3..8f8a4194c0d 100644
--- a/storage/innobase/include/fil0fil.ic
+++ b/storage/innobase/include/fil0fil.ic
@@ -57,10 +57,14 @@ fil_get_page_type_name(
 	ulint	page_type)	/*!< in: FIL_PAGE_TYPE */
 {
 	switch(page_type) {
+	case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED:
+		return (const char*)"PAGE_COMPRESSED_ENRYPTED";
 	case FIL_PAGE_PAGE_COMPRESSED:
 		return (const char*)"PAGE_COMPRESSED";
 	case FIL_PAGE_INDEX:
 		return (const char*)"INDEX";
+	case FIL_PAGE_RTREE:
+		return (const char*)"RTREE";
 	case FIL_PAGE_UNDO_LOG:
 		return (const char*)"UNDO LOG";
 	case FIL_PAGE_INODE:
@@ -85,8 +89,16 @@ fil_get_page_type_name(
 		return (const char*)"ZBLOB";
 	case FIL_PAGE_TYPE_ZBLOB2:
 		return (const char*)"ZBLOB2";
-	case FIL_PAGE_TYPE_COMPRESSED:
+	case FIL_PAGE_COMPRESSED:
 		return (const char*)"ORACLE PAGE COMPRESSED";
+	case FIL_PAGE_ENCRYPTED:
+		return (const char*)"ORACLE PAGE ENCRYPTED";
+	case FIL_PAGE_COMPRESSED_AND_ENCRYPTED:
+		return (const char*)"ORACLE PAGE COMPRESSED AND ENCRYPTED";
+	case FIL_PAGE_ENCRYPTED_RTREE:
+		return (const char*)"ORACLE RTREE ENCRYPTED";
+	case FIL_PAGE_TYPE_UNKNOWN:
+		return (const char*)"OLD UNKOWN PAGE TYPE";
 	default:
 		return (const char*)"PAGE TYPE CORRUPTED";
 	}
@@ -102,7 +114,7 @@ fil_node_get_block_size(
 	fil_node_t*     node)		/*!< in: Node where to get block
 					size */
 {
-	return (node->file_block_size);
+	return (node->block_size);
 }
 
 /****************************************************************//**
@@ -120,6 +132,7 @@ fil_page_type_validate(
 	if (!((page_type == FIL_PAGE_PAGE_COMPRESSED ||
 		page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED ||
 		page_type == FIL_PAGE_INDEX ||
+		page_type == FIL_PAGE_RTREE ||
 		page_type == FIL_PAGE_UNDO_LOG ||
 		page_type == FIL_PAGE_INODE ||
 		page_type == FIL_PAGE_IBUF_FREE_LIST ||
@@ -132,7 +145,11 @@ fil_page_type_validate(
 		page_type == FIL_PAGE_TYPE_BLOB ||
 		page_type == FIL_PAGE_TYPE_ZBLOB ||
 		page_type == FIL_PAGE_TYPE_ZBLOB2 ||
-		page_type == FIL_PAGE_TYPE_COMPRESSED))) {
+		page_type == FIL_PAGE_COMPRESSED ||
+		page_type == FIL_PAGE_TYPE_UNKNOWN ||
+		page_type == FIL_PAGE_ENCRYPTED ||
+		page_type == FIL_PAGE_COMPRESSED_AND_ENCRYPTED ||
+		page_type == FIL_PAGE_ENCRYPTED_RTREE))) {
 
 		uint key_version = mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
 		bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED);
@@ -155,6 +172,7 @@ fil_page_type_validate(
 		ut_ad(page_type == FIL_PAGE_PAGE_COMPRESSED ||
 			page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED ||
 			page_type == FIL_PAGE_INDEX ||
+			page_type == FIL_PAGE_RTREE ||
 			page_type == FIL_PAGE_UNDO_LOG ||
 			page_type == FIL_PAGE_INODE ||
 			page_type == FIL_PAGE_IBUF_FREE_LIST ||
@@ -167,7 +185,12 @@ fil_page_type_validate(
 			page_type == FIL_PAGE_TYPE_BLOB ||
 			page_type == FIL_PAGE_TYPE_ZBLOB ||
 			page_type == FIL_PAGE_TYPE_ZBLOB2 ||
-			page_type == FIL_PAGE_TYPE_COMPRESSED);
+			page_type == FIL_PAGE_COMPRESSED ||
+			page_type == FIL_PAGE_TYPE_UNKNOWN ||
+			page_type == FIL_PAGE_ENCRYPTED ||
+			page_type == FIL_PAGE_COMPRESSED_AND_ENCRYPTED ||
+			page_type == FIL_PAGE_ENCRYPTED_RTREE);
+
 		return false;
 	}
 
diff --git a/storage/innobase/include/fil0pagecompress.h b/storage/innobase/include/fil0pagecompress.h
index 10db59fb218..d4cc54c7b2a 100644
--- a/storage/innobase/include/fil0pagecompress.h
+++ b/storage/innobase/include/fil0pagecompress.h
@@ -44,7 +44,7 @@ Returns the page compression flag of the space, or false if the space
 is not compressed. The tablespace must be cached in the memory cache.
 @return	true if page compressed, false if not or space not found */
 UNIV_INLINE
-ibool
+bool
 fil_space_is_page_compressed(
 /*=========================*/
 	ulint	id);	/*!< in: space id */
@@ -53,7 +53,7 @@ Returns the page compression flag of the space, or false if the space
 is not compressed. The tablespace must be cached in the memory cache.
 @return	true if page compressed, false if not or space not found */
 UNIV_INTERN
-ibool
+bool
 fil_space_get_page_compressed(
 /*=========================*/
 	fil_space_t*	space);	/*!< in: space id */
@@ -66,24 +66,6 @@ atomic_writes_t
 fil_space_get_atomic_writes(
 /*=========================*/
 	ulint	id);	/*!< in: space id */
-/*******************************************************************//**
-Find out wheather the page is index page or not
-@return	true if page type index page, false if not */
-UNIV_INLINE
-ibool
-fil_page_is_index_page(
-/*===================*/
-	byte	*buf);	/*!< in: page */
-
-/****************************************************************//**
-Get the name of the compression algorithm used for page
-compression.
-@return compression algorithm name or "UNKNOWN" if not known*/
-UNIV_INLINE
-const char*
-fil_get_compression_alg_name(
-/*=========================*/
-       ulint	comp_alg);	/*!<in: compression algorithm number */
 
 /****************************************************************//**
 For page compressed pages compress the page before actual write
@@ -139,32 +121,7 @@ Get block size from fil node
 UNIV_INLINE
 ulint
 fil_node_get_block_size(
+/*====================*/
 	fil_node_t*	node);	/*!< in: Node where to get block
 				size */
-/*******************************************************************//**
-Find out wheather the page is page compressed
-@return	true if page is page compressed*/
-UNIV_INLINE
-ibool
-fil_page_is_compressed(
-/*===================*/
-	byte*	buf);	/*!< in: page */
-
-/*******************************************************************//**
-Find out wheather the page is page compressed
-@return	true if page is page compressed*/
-UNIV_INLINE
-ibool
-fil_page_is_compressed_encrypted(
-/*=============================*/
-	byte*	buf);	/*!< in: page */
-
-/*******************************************************************//**
-Find out wheather the page is page compressed with lzo method
-@return	true if page is page compressed with lzo method*/
-UNIV_INLINE
-ibool
-fil_page_is_lzo_compressed(
-/*=======================*/
-	byte*	buf);	/*!< in: page */
 #endif
diff --git a/storage/innobase/include/fsp0file.h b/storage/innobase/include/fsp0file.h
new file mode 100644
index 00000000000..83aa370abf0
--- /dev/null
+++ b/storage/innobase/include/fsp0file.h
@@ -0,0 +1,608 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0file.h
+Tablespace data file implementation.
+
+Created 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#ifndef fsp0file_h
+#define fsp0file_h
+
+#include "ha_prototypes.h"
+#include "log0log.h"
+#include "mem0mem.h"
+#include "os0file.h"
+#include "fil0crypt.h"
+#include <vector>
+
+/** Types of raw partitions in innodb_data_file_path */
+enum device_t {
+	SRV_NOT_RAW = 0,	/*!< Not a raw partition */
+	SRV_NEW_RAW,		/*!< A 'newraw' partition, only to be
+				initialized */
+	SRV_OLD_RAW		/*!< An initialized raw partition */
+};
+
+/** Data file control information. */
+class Datafile {
+
+	friend class Tablespace;
+	friend class SysTablespace;
+
+public:
+
+	Datafile()
+		:
+		m_name(),
+		m_filepath(),
+		m_filename(),
+		m_handle(OS_FILE_CLOSED),
+		m_open_flags(OS_FILE_OPEN),
+		m_size(),
+		m_order(),
+		m_type(SRV_NOT_RAW),
+		m_space_id(ULINT_UNDEFINED),
+		m_flags(),
+		m_exists(),
+		m_is_valid(),
+		m_first_page_buf(),
+		m_first_page(),
+		m_atomic_write(),
+		m_last_os_error(),
+		m_file_info(),
+		m_encryption_key(NULL),
+		m_encryption_iv(NULL),
+		m_crypt_info()
+	{
+		/* No op */
+	}
+
+	Datafile(const char* name, ulint flags, ulint size, ulint order)
+		:
+		m_name(mem_strdup(name)),
+		m_filepath(),
+		m_filename(),
+		m_handle(OS_FILE_CLOSED),
+		m_open_flags(OS_FILE_OPEN),
+		m_size(size),
+		m_order(order),
+		m_type(SRV_NOT_RAW),
+		m_space_id(ULINT_UNDEFINED),
+		m_flags(flags),
+		m_exists(),
+		m_is_valid(),
+		m_first_page_buf(),
+		m_first_page(),
+		m_atomic_write(),
+		m_last_os_error(),
+		m_file_info(),
+		m_encryption_key(NULL),
+		m_encryption_iv(NULL),
+		m_crypt_info()
+	{
+		ut_ad(m_name != NULL);
+		/* No op */
+	}
+
+	Datafile(const Datafile& file)
+		:
+		m_handle(file.m_handle),
+		m_open_flags(file.m_open_flags),
+		m_size(file.m_size),
+		m_order(file.m_order),
+		m_type(file.m_type),
+		m_space_id(file.m_space_id),
+		m_flags(file.m_flags),
+		m_exists(file.m_exists),
+		m_is_valid(file.m_is_valid),
+		m_first_page_buf(),
+		m_first_page(),
+		m_atomic_write(file.m_atomic_write),
+		m_last_os_error(),
+		m_file_info(),
+		m_encryption_key(NULL),
+		m_encryption_iv(NULL),
+		m_crypt_info()
+	{
+		m_name = mem_strdup(file.m_name);
+		ut_ad(m_name != NULL);
+
+		if (file.m_filepath != NULL) {
+			m_filepath = mem_strdup(file.m_filepath);
+			ut_a(m_filepath != NULL);
+			set_filename();
+		} else {
+			m_filepath = NULL;
+			m_filename = NULL;
+		}
+	}
+
+	virtual ~Datafile()
+	{
+		shutdown();
+	}
+
+	Datafile& operator=(const Datafile& file)
+	{
+		ut_a(this != &file);
+
+		ut_ad(m_name == NULL);
+		m_name = mem_strdup(file.m_name);
+		ut_a(m_name != NULL);
+
+		m_size = file.m_size;
+		m_order = file.m_order;
+		m_type = file.m_type;
+
+		ut_a(m_handle == OS_FILE_CLOSED);
+		m_handle = file.m_handle;
+
+		m_exists = file.m_exists;
+		m_is_valid = file.m_is_valid;
+		m_open_flags = file.m_open_flags;
+		m_space_id = file.m_space_id;
+		m_flags = file.m_flags;
+		m_last_os_error = 0;
+
+		if (m_filepath != NULL) {
+			ut_free(m_filepath);
+			m_filepath = NULL;
+			m_filename = NULL;
+		}
+
+		if (file.m_filepath != NULL) {
+			m_filepath = mem_strdup(file.m_filepath);
+			ut_a(m_filepath != NULL);
+			set_filename();
+		}
+
+		/* Do not make a copy of the first page,
+		it should be reread if needed */
+		m_first_page_buf = NULL;
+		m_first_page = NULL;
+		m_encryption_key = NULL;
+		m_encryption_iv = NULL;
+		/* Do not copy crypt info it is read from first page */
+		m_crypt_info = NULL;
+
+		m_atomic_write = file.m_atomic_write;
+
+		return(*this);
+	}
+
+	/** Initialize the name and flags of this datafile.
+	@param[in]	name	tablespace name, will be copied
+	@param[in]	flags	tablespace flags */
+	void init(const char* name, ulint flags);
+
+	/** Release the resources. */
+	virtual void shutdown();
+
+	/** Open a data file in read-only mode to check if it exists
+	so that it can be validated.
+	@param[in]	strict	whether to issue error messages
+	@return DB_SUCCESS or error code */
+	virtual dberr_t open_read_only(bool strict);
+
+	/** Open a data file in read-write mode during start-up so that
+	doublewrite pages can be restored and then it can be validated.
+	@param[in]	read_only_mode	if true, then readonly mode checks
+					are enforced.
+	@return DB_SUCCESS or error code */
+	virtual dberr_t open_read_write(bool read_only_mode)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Initialize OS specific file info. */
+	void init_file_info();
+
+	/** Close a data file.
+	@return DB_SUCCESS or error code */
+	dberr_t close();
+
+	/** Make a full filepath from a directory path and a filename.
+	Prepend the dirpath to filename using the extension given.
+	If dirpath is NULL, prepend the default datadir to filepath.
+	Store the result in m_filepath.
+	@param[in]	dirpath		directory path
+	@param[in]	filename	filename or filepath
+	@param[in]	ext		filename extension */
+	void make_filepath(
+		const char*	dirpath,
+		const char*	filename,
+		ib_extention	ext);
+
+	/** Set the filepath by duplicating the filepath sent in */
+	void set_filepath(const char* filepath);
+
+	/** Allocate and set the datafile or tablespace name in m_name.
+	If a name is provided, use it; else if the datafile is file-per-table,
+	extract a file-per-table tablespace name from m_filepath; else it is a
+	general tablespace, so just call it that for now. The value of m_name
+	will be freed in the destructor.
+	@param[in]	name	Tablespace Name if known, NULL if not */
+	void set_name(const char*	name);
+
+	/** Validates the datafile and checks that it conforms with
+	the expected space ID and flags.  The file should exist and be
+	successfully opened in order for this function to validate it.
+	@param[in]	space_id	The expected tablespace ID.
+	@param[in]	flags		The expected tablespace flags.
+	@param[in]	for_import	is it for importing
+	@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
+	m_is_valid is also set true on success, else false. */
+	dberr_t validate_to_dd(
+		ulint		space_id,
+		ulint		flags,
+		bool		for_import)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Validates this datafile for the purpose of recovery.
+	The file should exist and be successfully opened. We initially
+	open it in read-only mode because we just want to read the SpaceID.
+	However, if the first page is corrupt and needs to be restored
+	from the doublewrite buffer, we will reopen it in write mode and
+	ry to restore that page.
+	@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
+	m_is_valid is also set true on success, else false. */
+	dberr_t validate_for_recovery()
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Checks the consistency of the first page of a datafile when the
+	tablespace is opened.  This occurs before the fil_space_t is created
+	so the Space ID found here must not already be open.
+	m_is_valid is set true on success, else false.
+	@param[out]	flush_lsn	contents of FIL_PAGE_FILE_FLUSH_LSN
+	@param[in]	for_import	if it is for importing
+	(only valid for the first file of the system tablespace)
+	@retval DB_SUCCESS on if the datafile is valid
+	@retval DB_CORRUPTION if the datafile is not readable
+	@retval DB_TABLESPACE_EXISTS if there is a duplicate space_id */
+	dberr_t validate_first_page(lsn_t*	flush_lsn,
+				    bool	for_import)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Get Datafile::m_name.
+	@return m_name */
+	const char*	name()	const
+	{
+		return(m_name);
+	}
+
+	/** Get Datafile::m_filepath.
+	@return m_filepath */
+	const char*	filepath()	const
+	{
+		return(m_filepath);
+	}
+
+	/** Get Datafile::m_handle.
+	@return m_handle */
+	os_file_t	handle()	const
+	{
+		return(m_handle);
+	}
+
+	/** Get Datafile::m_order.
+	@return m_order */
+	ulint	order()	const
+	{
+		return(m_order);
+	}
+
+	/** Get Datafile::m_space_id.
+	@return m_space_id */
+	ulint	space_id()	const
+	{
+		return(m_space_id);
+	}
+
+	/** Get Datafile::m_flags.
+	@return m_flags */
+	ulint	flags()	const
+	{
+		return(m_flags);
+	}
+
+	/**
+	@return true if m_handle is open, false if not */
+	bool	is_open()	const
+	{
+		return(m_handle != OS_FILE_CLOSED);
+	}
+
+	/** Get Datafile::m_is_valid.
+	@return m_is_valid */
+	bool	is_valid()	const
+	{
+		return(m_is_valid);
+	}
+
+	/** Get the last OS error reported
+	@return m_last_os_error */
+	ulint	last_os_error()		const
+	{
+		return(m_last_os_error);
+	}
+
+	fil_space_crypt_t* get_crypt_info() const
+	{
+		return(m_crypt_info);
+	}
+
+	/** Test if the filepath provided looks the same as this filepath
+	by string comparison. If they are two different paths to the same
+	file, same_as() will be used to show that after the files are opened.
+	@param[in]	other	filepath to compare with
+	@retval true if it is the same filename by char comparison
+	@retval false if it looks different */
+	bool same_filepath_as(const char* other) const;
+
+	/** Test if another opened datafile is the same file as this object.
+	@param[in]	other	Datafile to compare with
+	@return true if it is the same file, else false */
+	bool same_as(const Datafile&	other) const;
+
+private:
+	/** Free the filepath buffer. */
+	void free_filepath();
+
+	/** Set the filename pointer to the start of the file name
+	in the filepath. */
+	void set_filename()
+	{
+		if (m_filepath == NULL) {
+			return;
+		}
+
+		char* last_slash = strrchr(m_filepath, OS_PATH_SEPARATOR);
+
+		m_filename = last_slash ? last_slash + 1 : m_filepath;
+	}
+
+	/** Create/open a data file.
+	@param[in]	read_only_mode	if true, then readonly mode checks
+					are enforced.
+	@return DB_SUCCESS or error code */
+	dberr_t open_or_create(bool read_only_mode)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Reads a few significant fields from the first page of the
+	datafile, which must already be open.
+	@param[in]	read_only_mode	if true, then readonly mode checks
+					are enforced.
+	@return DB_SUCCESS or DB_IO_ERROR if page cannot be read */
+	dberr_t read_first_page(bool read_first_page)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Free the first page from memory when it is no longer needed. */
+	void free_first_page();
+
+	/** Set the Datafile::m_open_flags.
+	@param open_flags	The Open flags to set. */
+	void set_open_flags(os_file_create_t	open_flags)
+	{
+		m_open_flags = open_flags;
+	};
+
+	/** Determine if this datafile is on a Raw Device
+	@return true if it is a RAW device. */
+	bool is_raw_device()
+	{
+		return(m_type != SRV_NOT_RAW);
+	}
+
+	/* DATA MEMBERS */
+
+	/** Datafile name at the tablespace location.
+	This is either the basename of the file if an absolute path
+	was entered, or it is the relative path to the datadir or
+	Tablespace::m_path. */
+	char*			m_name;
+
+protected:
+	/** Physical file path with base name and extension */
+	char*			m_filepath;
+
+private:
+	/** Determine the space id of the given file descriptor by reading
+	a few pages from the beginning of the .ibd file.
+	@return DB_SUCCESS if space id was successfully identified,
+	else DB_ERROR. */
+	dberr_t find_space_id();
+
+	/** Finds a given page of the given space id from the double write
+	buffer and copies it to the corresponding .ibd file.
+	@param[in]	page_no		Page number to restore
+	@return DB_SUCCESS if page was restored, else DB_ERROR */
+	dberr_t restore_from_doublewrite(
+		ulint	restore_page_no);
+
+	/** Points into m_filepath to the file name with extension */
+	char*			m_filename;
+
+	/** Open file handle */
+	os_file_t		m_handle;
+
+	/** Flags to use for opening the data file */
+	os_file_create_t	m_open_flags;
+
+	/** size in database pages */
+	ulint			m_size;
+
+	/** ordinal position of this datafile in the tablespace */
+	ulint			m_order;
+
+	/** The type of the data file */
+	device_t		m_type;
+
+	/** Tablespace ID. Contained in the datafile header.
+	If this is a system tablespace, FSP_SPACE_ID is only valid
+	in the first datafile. */
+	ulint			m_space_id;
+
+	/** Tablespace flags. Contained in the datafile header.
+	If this is a system tablespace, FSP_SPACE_FLAGS are only valid
+	in the first datafile. */
+	ulint			m_flags;
+
+	/** true if file already existed on startup */
+	bool			m_exists;
+
+	/* true if the tablespace is valid */
+	bool			m_is_valid;
+
+	/** Buffer to hold first page */
+	byte*			m_first_page_buf;
+
+	/** Pointer to the first page held in the buffer above */
+	byte*			m_first_page;
+
+	/** true if atomic writes enabled for this file */
+	bool			m_atomic_write;
+
+protected:
+	/** Last OS error received so it can be reported if needed. */
+	ulint			m_last_os_error;
+
+public:
+	/** Use the following to determine the uniqueness of this datafile. */
+#ifdef _WIN32
+	/* Use fields dwVolumeSerialNumber, nFileIndexLow, nFileIndexHigh. */
+	BY_HANDLE_FILE_INFORMATION	m_file_info;
+#else
+	/* Use field st_ino. */
+	struct stat			m_file_info;
+#endif	/* WIN32 */
+
+	/** Encryption key read from first page */
+	byte*			m_encryption_key;
+
+	/** Encryption iv read from first page */
+	byte*			m_encryption_iv;
+
+	/** Encryption information */
+	fil_space_crypt_t* 	m_crypt_info;
+};
+
+
+/** Data file control information. */
+class RemoteDatafile : public Datafile
+{
+private:
+	/** Link filename (full path) */
+	char*	m_link_filepath;
+
+public:
+
+	RemoteDatafile()
+		:
+		m_link_filepath()
+	{
+		/* No op - base constructor is called. */
+	}
+
+	RemoteDatafile(const char* name, ulint size, ulint order)
+		:
+		m_link_filepath()
+	{
+		/* No op - base constructor is called. */
+	}
+
+	~RemoteDatafile()
+	{
+		shutdown();
+	}
+
+	/** Release the resources. */
+	void shutdown();
+
+	/** Get the link filepath.
+	@return m_link_filepath */
+	const char*	link_filepath()	const
+	{
+		return(m_link_filepath);
+	}
+
+	/** Set the link filepath. Use default datadir, the base name of
+	the path provided without its suffix, plus DOT_ISL.
+	@param[in]	path	filepath which contains a basename to use.
+				If NULL, use m_name as the basename. */
+	void set_link_filepath(const char* path);
+
+	/** Create a link filename based on the contents of m_name,
+	open that file, and read the contents into m_filepath.
+	@retval DB_SUCCESS if remote linked tablespace file is opened and read.
+	@retval DB_CANNOT_OPEN_FILE if the link file does not exist. */
+	dberr_t open_link_file();
+
+	/** Delete an InnoDB Symbolic Link (ISL) file. */
+	void delete_link_file(void);
+
+	/** Open a handle to the file linked to in an InnoDB Symbolic Link file
+	in read-only mode so that it can be validated.
+	@param[in]	strict	whether to issue error messages
+	@return DB_SUCCESS or error code */
+	dberr_t open_read_only(bool strict);
+
+	/** Opens a handle to the file linked to in an InnoDB Symbolic Link
+	file in read-write mode so that it can be restored from doublewrite
+	and validated.
+	@param[in]	read_only_mode	If true, then readonly mode checks
+					are enforced.
+	@return DB_SUCCESS or error code */
+	dberr_t open_read_write(bool read_only_mode)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/******************************************************************
+	Global Static Functions;  Cannot refer to data members.
+	******************************************************************/
+
+	/** Creates a new InnoDB Symbolic Link (ISL) file.  It is always
+	created under the 'datadir' of MySQL. The datadir is the directory
+	of a running mysqld program. We can refer to it by simply using
+	the path ".".
+	@param[in]	name		tablespace name
+	@param[in]	filepath	remote filepath of tablespace datafile
+	@param[in]	is_shared	true for general tablespace,
+					false for file-per-table
+	@return DB_SUCCESS or error code */
+	static dberr_t create_link_file(
+		const char*	name,
+		const char*	filepath,
+		bool		is_shared = false);
+
+	/** Delete an InnoDB Symbolic Link (ISL) file by name.
+	@param[in]	name	tablespace name */
+	static void delete_link_file(const char* name);
+
+	/** Read an InnoDB Symbolic Link (ISL) file by name.
+	It is always created under the datadir of MySQL.
+	For file-per-table tablespaces, the isl file is expected to be
+	in a 'database' directory and called 'tablename.isl'.
+	For general tablespaces, there will be no 'database' directory.
+	The 'basename.isl' will be in the datadir.
+	The caller must free the memory returned if it is not null.
+	@param[in]	link_filepath	filepath of the ISL file
+	@return Filepath of the IBD file read from the ISL file */
+	static char* read_link_file(
+		const char*	link_filepath);
+};
+#endif /* fsp0file_h */
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index abcd5721a47..17ccb113e91 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -33,177 +33,27 @@ Created 12/18/1995 Heikki Tuuri
 
 #ifndef UNIV_INNOCHECKSUM
 
-#include "mtr0mtr.h"
+#include "fsp0space.h"
 #include "fut0lst.h"
-#include "ut0byte.h"
+#include "mtr0mtr.h"
 #include "page0types.h"
+#include "rem0types.h"
+#include "ut0byte.h"
 
 #endif /* !UNIV_INNOCHECKSUM */
+#include "fsp0types.h"
 
-/* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */
-
-/** Width of the POST_ANTELOPE flag */
-#define FSP_FLAGS_WIDTH_POST_ANTELOPE	1
-/** Number of flag bits used to indicate the tablespace zip page size */
-#define FSP_FLAGS_WIDTH_ZIP_SSIZE	4
-/** Width of the ATOMIC_BLOBS flag.  The ability to break up a long
-column into an in-record prefix and an externally stored part is available
-to the two Barracuda row formats COMPRESSED and DYNAMIC. */
-#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS	1
-/** Number of flag bits used to indicate the tablespace page size */
-#define FSP_FLAGS_WIDTH_PAGE_SSIZE	4
-/** Width of the DATA_DIR flag.  This flag indicates that the tablespace
-is found in a remote location, not the default data directory. */
-#define FSP_FLAGS_WIDTH_DATA_DIR	1
-/** Number of flag bits used to indicate the page compression and compression level */
-#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION  1
-#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4
-
-/** Number of flag bits used to indicate atomic writes for this tablespace */
-#define FSP_FLAGS_WIDTH_ATOMIC_WRITES  2
-
-/** Width of all the currently known tablespace flags */
-#define FSP_FLAGS_WIDTH		(FSP_FLAGS_WIDTH_POST_ANTELOPE	\
-				+ FSP_FLAGS_WIDTH_ZIP_SSIZE	\
-				+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS	\
-				+ FSP_FLAGS_WIDTH_PAGE_SSIZE	\
-				+ FSP_FLAGS_WIDTH_DATA_DIR      \
-				+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION \
-				+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \
-				+ FSP_FLAGS_WIDTH_ATOMIC_WRITES )
-
-/** A mask of all the known/used bits in tablespace flags */
-#define FSP_FLAGS_MASK		(~(~0 << FSP_FLAGS_WIDTH))
-
-/** Zero relative shift position of the POST_ANTELOPE field */
-#define FSP_FLAGS_POS_POST_ANTELOPE	0
-/** Zero relative shift position of the ZIP_SSIZE field */
-#define FSP_FLAGS_POS_ZIP_SSIZE		(FSP_FLAGS_POS_POST_ANTELOPE	\
-					+ FSP_FLAGS_WIDTH_POST_ANTELOPE)
-/** Zero relative shift position of the ATOMIC_BLOBS field */
-#define FSP_FLAGS_POS_ATOMIC_BLOBS	(FSP_FLAGS_POS_ZIP_SSIZE	\
-					+ FSP_FLAGS_WIDTH_ZIP_SSIZE)
-/** Note that these need to be before the page size to be compatible with
-dictionary */
-/** Zero relative shift position of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_POS_PAGE_COMPRESSION	(FSP_FLAGS_POS_ATOMIC_BLOBS	\
-					+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
-/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
-#define FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL	(FSP_FLAGS_POS_PAGE_COMPRESSION	\
-					+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION)
-/** Zero relative shift position of the ATOMIC_WRITES field */
-#define FSP_FLAGS_POS_ATOMIC_WRITES	(FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL	\
-					+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)
-/** Zero relative shift position of the PAGE_SSIZE field */
-#define FSP_FLAGS_POS_PAGE_SSIZE	(FSP_FLAGS_POS_ATOMIC_WRITES	\
-					+ FSP_FLAGS_WIDTH_ATOMIC_WRITES)
-/** Zero relative shift position of the start of the UNUSED bits */
-#define FSP_FLAGS_POS_DATA_DIR		(FSP_FLAGS_POS_PAGE_SSIZE	\
-					+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
 #define FSP_FLAGS_POS_DATA_DIR_ORACLE	(FSP_FLAGS_POS_ATOMIC_BLOBS	\
 					+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS  \
 					+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
-/** Zero relative shift position of the start of the UNUSED bits */
-#define FSP_FLAGS_POS_UNUSED		(FSP_FLAGS_POS_DATA_DIR	\
-					+ FSP_FLAGS_WIDTH_DATA_DIR)
-
-/** Bit mask of the POST_ANTELOPE field */
-#define FSP_FLAGS_MASK_POST_ANTELOPE				\
-		((~(~0U << FSP_FLAGS_WIDTH_POST_ANTELOPE))	\
-		<< FSP_FLAGS_POS_POST_ANTELOPE)
-/** Bit mask of the ZIP_SSIZE field */
-#define FSP_FLAGS_MASK_ZIP_SSIZE				\
-		((~(~0U << FSP_FLAGS_WIDTH_ZIP_SSIZE))		\
-		<< FSP_FLAGS_POS_ZIP_SSIZE)
-/** Bit mask of the ATOMIC_BLOBS field */
-#define FSP_FLAGS_MASK_ATOMIC_BLOBS				\
-		((~(~0U << FSP_FLAGS_WIDTH_ATOMIC_BLOBS))	\
-		<< FSP_FLAGS_POS_ATOMIC_BLOBS)
-/** Bit mask of the PAGE_SSIZE field */
-#define FSP_FLAGS_MASK_PAGE_SSIZE				\
-		((~(~0U << FSP_FLAGS_WIDTH_PAGE_SSIZE))		\
-		<< FSP_FLAGS_POS_PAGE_SSIZE)
-/** Bit mask of the DATA_DIR field */
-#define FSP_FLAGS_MASK_DATA_DIR					\
-		((~(~0U << FSP_FLAGS_WIDTH_DATA_DIR))		\
-		<< FSP_FLAGS_POS_DATA_DIR)
 /** Bit mask of the DATA_DIR field */
 #define FSP_FLAGS_MASK_DATA_DIR_ORACLE				\
 		((~(~0U << FSP_FLAGS_WIDTH_DATA_DIR))		\
 		<< FSP_FLAGS_POS_DATA_DIR_ORACLE)
-/** Bit mask of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_MASK_PAGE_COMPRESSION			\
-		((~(~0U << FSP_FLAGS_WIDTH_PAGE_COMPRESSION))	\
-		<< FSP_FLAGS_POS_PAGE_COMPRESSION)
-/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
-#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL		\
-		((~(~0U << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL))	\
-		<< FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
-/** Bit mask of the ATOMIC_WRITES field */
-#define FSP_FLAGS_MASK_ATOMIC_WRITES		\
-		((~(~0U << FSP_FLAGS_WIDTH_ATOMIC_WRITES))	\
-		<< FSP_FLAGS_POS_ATOMIC_WRITES)
-/** Return the value of the POST_ANTELOPE field */
-#define FSP_FLAGS_GET_POST_ANTELOPE(flags)			\
-		((flags & FSP_FLAGS_MASK_POST_ANTELOPE)		\
-		>> FSP_FLAGS_POS_POST_ANTELOPE)
-/** Return the value of the ZIP_SSIZE field */
-#define FSP_FLAGS_GET_ZIP_SSIZE(flags)				\
-		((flags & FSP_FLAGS_MASK_ZIP_SSIZE)		\
-		>> FSP_FLAGS_POS_ZIP_SSIZE)
-/** Return the value of the ATOMIC_BLOBS field */
-#define FSP_FLAGS_HAS_ATOMIC_BLOBS(flags)			\
-		((flags & FSP_FLAGS_MASK_ATOMIC_BLOBS)		\
-		>> FSP_FLAGS_POS_ATOMIC_BLOBS)
-/** Return the value of the PAGE_SSIZE field */
-#define FSP_FLAGS_GET_PAGE_SSIZE(flags)				\
-		((flags & FSP_FLAGS_MASK_PAGE_SSIZE)		\
-		>> FSP_FLAGS_POS_PAGE_SSIZE)
-/** Return the value of the DATA_DIR field */
-#define FSP_FLAGS_HAS_DATA_DIR(flags)				\
-		((flags & FSP_FLAGS_MASK_DATA_DIR)		\
-		>> FSP_FLAGS_POS_DATA_DIR)
+
 #define FSP_FLAGS_HAS_DATA_DIR_ORACLE(flags)			\
 		((flags & FSP_FLAGS_MASK_DATA_DIR_ORACLE)	\
 		>> FSP_FLAGS_POS_DATA_DIR_ORACLE)
-/** Return the contents of the UNUSED bits */
-#define FSP_FLAGS_GET_UNUSED(flags)				\
-		(flags >> FSP_FLAGS_POS_UNUSED)
-
-/** Return the value of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_GET_PAGE_COMPRESSION(flags)		\
-		((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION)	\
-		>> FSP_FLAGS_POS_PAGE_COMPRESSION)
-/** Return the value of the PAGE_COMPRESSION_LEVEL field */
-#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags)		\
-		((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL) \
-		>> FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
-/** Return the value of the ATOMIC_WRITES field */
-#define FSP_FLAGS_GET_ATOMIC_WRITES(flags)		\
-		((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) \
-		>> FSP_FLAGS_POS_ATOMIC_WRITES)
-
-/** Set a PAGE_SSIZE into the correct bits in a given
-tablespace flags. */
-#define FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize)			\
-		(flags | (ssize << FSP_FLAGS_POS_PAGE_SSIZE))
-
-/** Set a PAGE_COMPRESSION into the correct bits in a given
-tablespace flags. */
-#define FSP_FLAGS_SET_PAGE_COMPRESSION(flags, compression)	\
-		(flags | (compression << FSP_FLAGS_POS_PAGE_COMPRESSION))
-
-/** Set a PAGE_COMPRESSION_LEVEL into the correct bits in a given
-tablespace flags. */
-#define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level)	\
-		(flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL))
-
-/** Set a ATOMIC_WRITES into the correct bits in a given
-tablespace flags. */
-#define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics)	\
-		(flags | (atomics << FSP_FLAGS_POS_ATOMIC_WRITES))
-
-/* @} */
 
 /* @defgroup Tablespace Header Constants (moved from fsp0fsp.c) @{ */
 
@@ -237,7 +87,7 @@ descriptor page, but used only in the first. */
 					< 64 pages, this number is 64, i.e.,
 					we have initialized the space
 					about the first extent, but have not
-					physically allocted those pages to the
+					physically allocated those pages to the
 					file */
 #define	FSP_SPACE_FLAGS		16	/* fsp_space_t.flags, similar to
 					dict_table_t::flags */
@@ -270,6 +120,7 @@ descriptor page, but used only in the first. */
 					FSP_FREE_LIMIT at a time */
 /* @} */
 
+#ifndef UNIV_INNOCHECKSUM
 
 /* @defgroup File Segment Inode Constants (moved from fsp0fsp.c) @{ */
 
@@ -318,9 +169,8 @@ typedef	byte	fseg_inode_t;
 	(16 + 3 * FLST_BASE_NODE_SIZE			\
 	 + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
 
-#define FSP_SEG_INODES_PER_PAGE(zip_size)		\
-	(((zip_size ? zip_size : UNIV_PAGE_SIZE)	\
-	  - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
+#define FSP_SEG_INODES_PER_PAGE(page_size)		\
+	((page_size.physical() - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
 				/* Number of segment inodes which fit on a
 				single page */
 
@@ -407,60 +257,123 @@ the extent are free and which contain old tuple version to clean. */
 
 /* @} */
 
-#ifndef UNIV_INNOCHECKSUM
 /**********************************************************************//**
 Initializes the file space system. */
-UNIV_INTERN
 void
 fsp_init(void);
 /*==========*/
+
 /**********************************************************************//**
 Gets the size of the system tablespace from the tablespace header.  If
 we do not have an auto-extending data file, this should be equal to
 the size of the data files.  If there is an auto-extending data file,
 this can be smaller.
-@return	size in pages */
-UNIV_INTERN
+@return size in pages */
 ulint
 fsp_header_get_tablespace_size(void);
 /*================================*/
-/**********************************************************************//**
-Reads the file space size stored in the header page.
-@return	tablespace size stored in the space header */
-UNIV_INTERN
+
+/** Calculate the number of pages to extend a datafile.
+We extend single-table and general tablespaces first one extent at a time,
+but 4 at a time for bigger tablespaces. It is not enough to extend always
+by one extent, because we need to add at least one extent to FSP_FREE.
+A single extent descriptor page will track many extents. And the extent
+that uses its extent descriptor page is put onto the FSP_FREE_FRAG list.
+Extents that do not use their extent descriptor page are added to FSP_FREE.
+The physical page size is used to determine how many extents are tracked
+on one extent descriptor page. See xdes_calc_descriptor_page().
+@param[in]	page_size	page_size of the datafile
+@param[in]	size		current number of pages in the datafile
+@return number of pages to extend the file. */
 ulint
-fsp_get_size_low(
-/*=============*/
-	page_t*	page);	/*!< in: header page (page 0 in the tablespace) */
+fsp_get_pages_to_extend_ibd(
+	const page_size_t&	page_size,
+	ulint			size);
+
+/** Calculate the number of physical pages in an extent for this file.
+@param[in]	page_size	page_size of the datafile
+@return number of pages in an extent for this file. */
+UNIV_INLINE
+ulint
+fsp_get_extent_size_in_pages(const page_size_t&	page_size)
+{
+	return(FSP_EXTENT_SIZE * UNIV_PAGE_SIZE / page_size.physical());
+}
+
 /**********************************************************************//**
 Reads the space id from the first page of a tablespace.
-@return	space id, ULINT UNDEFINED if error */
-UNIV_INTERN
+@return space id, ULINT UNDEFINED if error */
 ulint
 fsp_header_get_space_id(
 /*====================*/
 	const page_t*	page);	/*!< in: first page of a tablespace */
-/**********************************************************************//**
-Reads the space flags from the first page of a tablespace.
-@return	flags */
-UNIV_INTERN
+
+/** Read a tablespace header field.
+@param[in]	page	first page of a tablespace
+@param[in]	field	the header field
+@return the contents of the header field */
+inline
 ulint
-fsp_header_get_flags(
-/*=================*/
-	const page_t*	page);	/*!< in: first page of a tablespace */
-/**********************************************************************//**
-Reads the compressed page size from the first page of a tablespace.
-@return	compressed page size in bytes, or 0 if uncompressed */
-UNIV_INTERN
+fsp_header_get_field(const page_t* page, ulint field)
+{
+	return(mach_read_from_4(FSP_HEADER_OFFSET + field + page));
+}
+
+/** Read the flags from the tablespace header page.
+@param[in]	page	first page of a tablespace
+@return the contents of FSP_SPACE_FLAGS */
+inline
 ulint
-fsp_header_get_zip_size(
-/*====================*/
-	const page_t*	page);	/*!< in: first page of a tablespace */
+fsp_header_get_flags(const page_t* page)
+{
+	return(fsp_header_get_field(page, FSP_SPACE_FLAGS));
+}
+
+/** Reads the page size from the first page of a tablespace.
+@param[in]	page	first page of a tablespace
+@return page size */
+page_size_t
+fsp_header_get_page_size(
+	const page_t*	page);
+
+/** Decoding the encryption info
+from the first page of a tablespace.
+@param[in/out]	key		key
+@param[in/out]	iv		iv
+@param[in]	encryption_info	encrytion info.
+@return true if success */
+bool
+fsp_header_decode_encryption_info(
+	byte*		key,
+	byte*		iv,
+	byte*		encryption_info);
+
+/** Reads the encryption key from the first page of a tablespace.
+@param[in]	fsp_flags	tablespace flags
+@param[in/out]	key		tablespace key
+@param[in/out]	iv		tablespace iv
+@param[in]	page	first page of a tablespace
+@return true if success */
+bool
+fsp_header_get_encryption_key(
+	ulint		fsp_flags,
+	byte*		key,
+	byte*		iv,
+	page_t*		page);
+
+/** Check the encryption key from the first page of a tablespace.
+@param[in]	fsp_flags	tablespace flags
+@param[in]	page		first page of a tablespace
+@return true if success */
+bool
+fsp_header_check_encryption_key(
+	ulint			fsp_flags,
+	page_t*			page);
+
 /**********************************************************************//**
 Writes the space id and flags to a tablespace header.  The flags contain
 row type, physical/compressed page size, and logical/uncompressed page
 size of the tablespace. */
-UNIV_INTERN
 void
 fsp_header_init_fields(
 /*===================*/
@@ -468,34 +381,46 @@ fsp_header_init_fields(
 	ulint	space_id,	/*!< in: space id */
 	ulint	flags);		/*!< in: tablespace flags (FSP_SPACE_FLAGS):
 				0, or table->flags if newer than COMPACT */
-/**********************************************************************//**
-Initializes the space header of a new created space and creates also the
-insert buffer tree root if space == 0. */
-UNIV_INTERN
-void
+
+/** Rotate the encryption info in the space header.
+@param[in]	space		tablespace
+@param[in]      encrypt_info	buffer for re-encrypt key.
+@param[in,out]	mtr		mini-transaction
+@return true if success. */
+bool
+fsp_header_rotate_encryption(
+	fil_space_t*		space,
+	byte*			encrypt_info,
+	mtr_t*			mtr);
+
+/** Initializes the space header of a new created space and creates also the
+insert buffer tree root if space == 0.
+@param[in]	space_id	space id
+@param[in]	size		current size in blocks
+@param[in,out]	mtr		min-transaction
+@return	true on success, otherwise false. */
+bool
 fsp_header_init(
-/*============*/
-	ulint	space,		/*!< in: space id */
-	ulint	size,		/*!< in: current size in blocks */
-	mtr_t*	mtr);		/*!< in/out: mini-transaction */
+	ulint	space_id,
+	ulint	size,
+	mtr_t*	mtr);
+
 /**********************************************************************//**
 Increases the space size field of a space. */
-UNIV_INTERN
 void
 fsp_header_inc_size(
 /*================*/
-	ulint	space,		/*!< in: space id */
+	ulint	space_id,	/*!< in: space id */
 	ulint	size_inc,	/*!< in: size increment in pages */
 	mtr_t*	mtr);		/*!< in/out: mini-transaction */
 /**********************************************************************//**
 Creates a new segment.
 @return the block where the segment header is placed, x-latched, NULL
 if could not create segment because of lack of space */
-UNIV_INTERN
 buf_block_t*
 fseg_create(
 /*========*/
-	ulint	space,	/*!< in: space id */
+	ulint	space_id,/*!< in: space id */
 	ulint	page,	/*!< in: page where the segment header is placed: if
 			this is != 0, the page must belong to another segment,
 			if this is 0, a new page will be allocated and it
@@ -507,11 +432,10 @@ fseg_create(
 Creates a new segment.
 @return the block where the segment header is placed, x-latched, NULL
 if could not create segment because of lack of space */
-UNIV_INTERN
 buf_block_t*
 fseg_create_general(
 /*================*/
-	ulint	space,	/*!< in: space id */
+	ulint	space_id,/*!< in: space id */
 	ulint	page,	/*!< in: page where the segment header is placed: if
 			this is != 0, the page must belong to another segment,
 			if this is 0, a new page will be allocated and it
@@ -528,8 +452,7 @@ fseg_create_general(
 /**********************************************************************//**
 Calculates the number of pages reserved by a segment, and how many pages are
 currently used.
-@return	number of reserved pages */
-UNIV_INTERN
+@return number of reserved pages */
 ulint
 fseg_n_reserved_pages(
 /*==================*/
@@ -540,15 +463,15 @@ fseg_n_reserved_pages(
 Allocates a single free page from a segment. This function implements
 the intelligent allocation strategy which tries to minimize
 file space fragmentation.
-@param[in/out] seg_header	segment header
-@param[in] hint			hint of which page would be desirable
-@param[in] direction		if the new page is needed because
+@param[in,out] seg_header segment header
+@param[in] hint hint of which page would be desirable
+@param[in] direction if the new page is needed because
 				of an index page split, and records are
 				inserted there in order, into which
 				direction they go alphabetically: FSP_DOWN,
 				FSP_UP, FSP_NO_DIR
-@param[in/out] mtr		mini-transaction
-@return	X-latched block, or NULL if no page could be allocated */
+@param[in,out] mtr mini-transaction
+@return X-latched block, or NULL if no page could be allocated */
 #define fseg_alloc_free_page(seg_header, hint, direction, mtr)		\
 	fseg_alloc_free_page_general(seg_header, hint, direction,	\
 				     FALSE, mtr, mtr)
@@ -560,7 +483,6 @@ fragmentation.
 @retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
 (init_mtr == mtr, or the page was not previously freed in mtr)
 @retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
 buf_block_t*
 fseg_alloc_free_page_general(
 /*=========================*/
@@ -583,8 +505,8 @@ fseg_alloc_free_page_general(
 				If init_mtr!=mtr, but the page is already
 				latched in mtr, do not initialize the page. */
 	MY_ATTRIBUTE((warn_unused_result, nonnull));
-/**********************************************************************//**
-Reserves free pages from a tablespace. All mini-transactions which may
+
+/** Reserves free pages from a tablespace. All mini-transactions which may
 use several pages from the tablespace should call this function beforehand
 and reserve enough free extents so that they certainly will be able
 to do their operation, like a B-tree page split, fully. Reservations
@@ -603,53 +525,71 @@ The purpose is to avoid dead end where the database is full but the
 user cannot free any space because these freeing operations temporarily
 reserve some space.
 
-Single-table tablespaces whose size is < 32 pages are a special case. In this
-function we would liberally reserve several 64 page extents for every page
-split or merge in a B-tree. But we do not want to waste disk space if the table
-only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available.
-@return	TRUE if we were able to make the reservation */
-UNIV_INTERN
-ibool
+Single-table tablespaces whose size is < FSP_EXTENT_SIZE pages are a special
+case. In this function we would liberally reserve several extents for
+every page split or merge in a B-tree. But we do not want to waste disk space
+if the table only occupies < FSP_EXTENT_SIZE pages. That is why we apply
+different rules in that special case, just ensuring that there are n_pages
+free pages available.
+
+@param[out]	n_reserved	number of extents actually reserved; if we
+				return true and the tablespace size is <
+				FSP_EXTENT_SIZE pages, then this can be 0,
+				otherwise it is n_ext
+@param[in]	space_id	tablespace identifier
+@param[in]	n_ext		number of extents to reserve
+@param[in]	alloc_type	page reservation type (FSP_BLOB, etc)
+@param[in,out]	mtr		the mini transaction
+@param[in]	n_pages		for small tablespaces (tablespace size is
+				less than FSP_EXTENT_SIZE), number of free
+				pages to reserve.
+@return true if we were able to make the reservation */
+bool
 fsp_reserve_free_extents(
-/*=====================*/
-	ulint*	n_reserved,/*!< out: number of extents actually reserved; if we
-			return TRUE and the tablespace size is < 64 pages,
-			then this can be 0, otherwise it is n_ext */
-	ulint	space,	/*!< in: space id */
-	ulint	n_ext,	/*!< in: number of extents to reserve */
-	ulint	alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
-	mtr_t*	mtr);	/*!< in: mini-transaction */
-/**********************************************************************//**
-This function should be used to get information on how much we still
-will be able to insert new data to the database without running out the
-tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents.
-@return	available space in kB */
-UNIV_INTERN
-ullint
+	ulint*		n_reserved,
+	ulint		space_id,
+	ulint		n_ext,
+	fsp_reserve_t	alloc_type,
+	mtr_t*		mtr,
+	ulint		n_pages = 2);
+
+/** Calculate how many KiB of new data we will be able to insert to the
+tablespace without running out of space.
+@param[in]	space_id	tablespace ID
+@return available space in KiB
+@retval UINTMAX_MAX if unknown */
+uintmax_t
 fsp_get_available_space_in_free_extents(
-/*====================================*/
-	ulint	space);	/*!< in: space id */
+	ulint		space_id);
+
+/** Calculate how many KiB of new data we will be able to insert to the
+tablespace without running out of space. Start with a space object that has
+been acquired by the caller who holds it for the calculation,
+@param[in]	space		tablespace object from fil_space_acquire()
+@return available space in KiB */
+uintmax_t
+fsp_get_available_space_in_free_extents(
+	const fil_space_t*	space);
+
 /**********************************************************************//**
 Frees a single page of a segment. */
-UNIV_INTERN
 void
 fseg_free_page(
 /*===========*/
 	fseg_header_t*	seg_header, /*!< in: segment header */
-	ulint		space,	/*!< in: space id */
+	ulint		space_id, /*!< in: space id */
 	ulint		page,	/*!< in: page offset */
+	bool		ahi,	/*!< in: whether we may need to drop
+				the adaptive hash index */
 	mtr_t*		mtr);	/*!< in/out: mini-transaction */
 /**********************************************************************//**
 Checks if a single page of a segment is free.
-@return	true if free */
-UNIV_INTERN
+@return true if free */
 bool
 fseg_page_is_free(
 /*==============*/
 	fseg_header_t*	seg_header,	/*!< in: segment header */
-	ulint		space,		/*!< in: space id */
+	ulint		space_id,	/*!< in: space id */
 	ulint		page)		/*!< in: page offset */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /**********************************************************************//**
@@ -657,8 +597,7 @@ Frees part of a segment. This function can be used to free a segment
 by repeatedly calling this function in different mini-transactions.
 Doing the freeing in a single mini-transaction might result in
 too big a mini-transaction.
-@return	TRUE if freeing completed */
-UNIV_INTERN
+@return TRUE if freeing completed */
 ibool
 fseg_free_step(
 /*===========*/
@@ -666,58 +605,47 @@ fseg_free_step(
 				resides on the first page of the frag list
 				of the segment, this pointer becomes obsolete
 				after the last freeing step */
-	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+	bool		ahi,	/*!< in: whether we may need to drop
+				the adaptive hash index */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************************//**
 Frees part of a segment. Differs from fseg_free_step because this function
 leaves the header page unfreed.
-@return	TRUE if freeing completed, except the header page */
-UNIV_INTERN
+@return TRUE if freeing completed, except the header page */
 ibool
 fseg_free_step_not_header(
 /*======================*/
 	fseg_header_t*	header,	/*!< in: segment header which must reside on
 				the first fragment page of the segment */
-	mtr_t*		mtr);	/*!< in/out: mini-transaction */
-/***********************************************************************//**
-Checks if a page address is an extent descriptor page address.
-@return	TRUE if a descriptor page */
+	bool		ahi,	/*!< in: whether we may need to drop
+				the adaptive hash index */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Checks if a page address is an extent descriptor page address.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return TRUE if a descriptor page */
 UNIV_INLINE
 ibool
 fsp_descr_page(
-/*===========*/
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	ulint	page_no);/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size);
+
 /***********************************************************//**
 Parses a redo log record of a file page init.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 fsp_parse_init_file_page(
 /*=====================*/
 	byte*		ptr,	/*!< in: buffer */
 	byte*		end_ptr, /*!< in: buffer end */
 	buf_block_t*	block);	/*!< in: block or NULL */
-/*******************************************************************//**
-Validates the file space system and its segments.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-fsp_validate(
-/*=========*/
-	ulint	space);	/*!< in: space id */
-/*******************************************************************//**
-Prints info of a file space. */
-UNIV_INTERN
-void
-fsp_print(
-/*======*/
-	ulint	space);	/*!< in: space id */
 #ifdef UNIV_DEBUG
 /*******************************************************************//**
 Validates a segment.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 fseg_validate(
 /*==========*/
@@ -727,7 +655,6 @@ fseg_validate(
 #ifdef UNIV_BTR_PRINT
 /*******************************************************************//**
 Writes info of a segment. */
-UNIV_INTERN
 void
 fseg_print(
 /*=======*/
@@ -735,42 +662,95 @@ fseg_print(
 	mtr_t*		mtr);	/*!< in/out: mini-transaction */
 #endif /* UNIV_BTR_PRINT */
 
-/********************************************************************//**
-Validate and return the tablespace flags, which are stored in the
-tablespace header at offset FSP_SPACE_FLAGS.  They should be 0 for
-ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats,
-COMPRESSED and DYNAMIC, use a file format > Antelope so they should
-have a file format number plus the DICT_TF_COMPACT bit set.
-@return	true if check ok */
+/** Determine if the tablespace is compressed from tablespace flags.
+@param[in]	flags	Tablespace flags
+@return true if compressed, false if not compressed */
 UNIV_INLINE
 bool
-fsp_flags_is_valid(
-/*===============*/
-	ulint	flags)		/*!< in: tablespace flags */
-	MY_ATTRIBUTE((warn_unused_result, const));
-/********************************************************************//**
-Determine if the tablespace is compressed from dict_table_t::flags.
-@return	TRUE if compressed, FALSE if not compressed */
-UNIV_INLINE
-ibool
 fsp_flags_is_compressed(
-/*====================*/
-	ulint	flags);	/*!< in: tablespace flags */
+	ulint	flags);
 
-/********************************************************************//**
-Calculates the descriptor index within a descriptor page.
-@return	descriptor index */
+/** Determine if two tablespaces are equivalent or compatible.
+@param[in]	flags1	First tablespace flags
+@param[in]	flags2	Second tablespace flags
+@return true the flags are compatible, false if not */
+UNIV_INLINE
+bool
+fsp_flags_are_equal(
+	ulint	flags1,
+	ulint	flags2);
+
+/** Initialize an FSP flags integer.
+@param[in]	page_size	page sizes in bytes and compression flag.
+@param[in]	atomic_blobs	Used by Dynammic and Compressed.
+@param[in]	has_data_dir	This tablespace is in a remote location.
+@param[in]	is_shared	This tablespace can be shared by many tables.
+@param[in]	is_temporary	This tablespace is temporary.
+@param[in]	is_encrypted	This tablespace is encrypted.
+@return tablespace flags after initialization */
+UNIV_INLINE
+ulint
+fsp_flags_init(
+	const page_size_t&	page_size,
+	bool			atomic_blobs,
+	bool			has_data_dir,
+	bool			is_shared,
+	bool			is_temporary,
+	bool			page_compression,
+	ulint			page_compression_level,
+	ulint			atomic_writes,
+	bool			is_encrypted = false);
+
+/** Convert a 32 bit integer tablespace flags to the 32 bit table flags.
+This can only be done for a tablespace that was built as a file-per-table
+tablespace. Note that the fsp_flags cannot show the difference between a
+Compact and Redundant table, so an extra Compact boolean must be supplied.
+			Low order bit
+                    | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
+fil_space_t::flags  |     0     |    0    |     1      |    1
+dict_table_t::flags |     0     |    1    |     1      |    1
+@param[in]	fsp_flags	fil_space_t::flags
+@param[in]	compact		true if not Redundant row format
+@return tablespace flags (fil_space_t::flags) */
+ulint
+fsp_flags_to_dict_tf(
+	ulint	fsp_flags,
+	bool	compact);
+
+/** Calculates the descriptor index within a descriptor page.
+@param[in]	page_size	page size
+@param[in]	offset		page offset
+@return descriptor index */
 UNIV_INLINE
 ulint
 xdes_calc_descriptor_index(
-/*=======================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	offset);	/*!< in: page offset */
+	const page_size_t&	page_size,
+	ulint			offset);
+
+/** Gets pointer to a the extent descriptor of a page.
+The page where the extent descriptor resides is x-locked. If the page offset
+is equal to the free limit of the space, adds new extents from above the free
+limit to the space free list, if not free limit == space size. This adding
+is necessary to make the descriptor defined, as they are uninitialized
+above the free limit.
+@param[in]	space_id	space id
+@param[in]	offset		page offset; if equal to the free limit, we
+try to add new extents to the space free list
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return pointer to the extent descriptor, NULL if the page does not
+exist in the space or if the offset exceeds the free limit */
+xdes_t*
+xdes_get_descriptor(
+	ulint			space_id,
+	ulint			offset,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
+MY_ATTRIBUTE((warn_unused_result));
 
 /**********************************************************************//**
 Gets a descriptor bit of a page.
-@return	TRUE if free */
+@return TRUE if free */
 UNIV_INLINE
 ibool
 xdes_get_bit(
@@ -780,51 +760,30 @@ xdes_get_bit(
 	ulint		offset);/*!< in: page offset within extent:
 				0 ... FSP_EXTENT_SIZE - 1 */
 
-/********************************************************************//**
-Calculates the page where the descriptor of a page resides.
-@return	descriptor page offset */
+/** Calculates the page where the descriptor of a page resides.
+@param[in]	page_size	page size
+@param[in]	offset		page offset
+@return descriptor page offset */
 UNIV_INLINE
 ulint
 xdes_calc_descriptor_page(
-/*======================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	offset);	/*!< in: page offset */
+	const page_size_t&	page_size,
+	ulint			offset);
 
 #endif /* !UNIV_INNOCHECKSUM */
 
-/********************************************************************//**
-Extract the zip size from tablespace flags.  A tablespace has only one
-physical page size whether that page is compressed or not.
-@return	compressed page size of the file-per-table tablespace in bytes,
-or zero if the table is not compressed.  */
-UNIV_INLINE
-ulint
-fsp_flags_get_zip_size(
-/*====================*/
-	ulint	flags);		/*!< in: tablespace flags */
-/********************************************************************//**
-Extract the page size from tablespace flags.
-@return	page size of the tablespace in bytes */
-UNIV_INLINE
-ulint
-fsp_flags_get_page_size(
-/*====================*/
-	ulint	flags);		/*!< in: tablespace flags */
-
-/*********************************************************************/
-/* @return offset into fsp header where crypt data is stored */
+/*********************************************************************//**
+@return offset into fsp header where crypt data is stored */
 UNIV_INTERN
 ulint
 fsp_header_get_crypt_offset(
 /*========================*/
-	ulint zip_size,		/*!< in: zip_size */
+	const page_size_t&	page_size,/*!< in: page size */
 	ulint* max_size);	/*!< out: free space after offset */
 
-#define fsp_page_is_free(space,page,mtr) \
-	fsp_page_is_free_func(space,page,mtr, __FILE__, __LINE__)
 
 #ifndef UNIV_INNOCHECKSUM
+
 /**********************************************************************//**
 Checks if a single page is free.
 @return	true if free */
@@ -837,7 +796,11 @@ fsp_page_is_free_func(
 	mtr_t*		mtr,		/*!< in/out: mini-transaction */
 	const char *file,
 	ulint line);
-#endif
+
+#define fsp_page_is_free(space,page,mtr)				\
+	fsp_page_is_free_func(space,page,mtr, __FILE__, __LINE__)
+
+#endif /* UNIV_INNOCHECKSUM */
 
 #ifndef UNIV_NONINL
 #include "fsp0fsp.ic"
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index 9f09a9d53e1..475dd238728 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,260 +26,255 @@ Created 12/18/1995 Heikki Tuuri
 
 #ifndef UNIV_INNOCHECKSUM
 
-/***********************************************************************//**
-Checks if a page address is an extent descriptor page address.
-@return	TRUE if a descriptor page */
+/** Checks if a page address is an extent descriptor page address.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return TRUE if a descriptor page */
 UNIV_INLINE
 ibool
 fsp_descr_page(
-/*===========*/
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	ulint	page_no)/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size)
 {
-	ut_ad(ut_is_2pow(zip_size));
-
-	if (!zip_size) {
-		return((page_no & (UNIV_PAGE_SIZE - 1)) == FSP_XDES_OFFSET);
-	}
-
-	return((page_no & (zip_size - 1)) == FSP_XDES_OFFSET);
+	return((page_id.page_no() & (page_size.physical() - 1))
+	       == FSP_XDES_OFFSET);
 }
 
-/********************************************************************//**
-Validate and return the tablespace flags, which are stored in the
-tablespace header at offset FSP_SPACE_FLAGS.  They should be 0 for
-ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats,
-COMPRESSED and DYNAMIC, use a file format > Antelope so they should
-have a file format number plus the DICT_TF_COMPACT bit set.
-@return	true if check ok */
+/** Determine if the tablespace is compressed from tablespace flags.
+@param[in]	flags	Tablespace flags
+@return true if compressed, false if not compressed */
 UNIV_INLINE
 bool
-fsp_flags_is_valid(
-/*===============*/
-	ulint	flags)		/*!< in: tablespace flags */
+fsp_flags_is_compressed(
+	ulint	flags)
 {
-	ulint	post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(flags);
-	ulint	zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
-	ulint	atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
-	ulint	page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
-	ulint	unused = FSP_FLAGS_GET_UNUSED(flags);
-	ulint	page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags);
-	ulint	page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags);
-	ulint	atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
-
-	DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
-
-	/* fsp_flags is zero unless atomic_blobs is set. */
-	/* Make sure there are no bits that we do not know about. */
-	if (unused != 0 || flags == 1) {
-		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted unused %lu\n",
-			flags, unused);
-		return(false);
-	} else if (post_antelope) {
-		/* The Antelope row formats REDUNDANT and COMPACT did
-		not use tablespace flags, so this flag and the entire
-		4-byte field is zero for Antelope row formats. */
-
-		if (!atomic_blobs) {
-			fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_blobs %lu\n",
-				flags, atomic_blobs);
-			return(false);
-		}
-	}
-
-	if (!atomic_blobs) {
-		/* Barracuda row formats COMPRESSED and DYNAMIC build on
-		the page structure introduced for the COMPACT row format
-		by allowing long fields to be broken into prefix and
-		externally stored parts. */
-
-		if (post_antelope || zip_ssize != 0) {
-			fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu atomic_blobs %lu\n",
-				flags, zip_ssize, atomic_blobs);
-			return(false);
-		}
-
-	} else if (!post_antelope || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
-		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu max %d\n",
-			flags, zip_ssize, PAGE_ZIP_SSIZE_MAX);
-		return(false);
-	} else if (page_ssize > UNIV_PAGE_SSIZE_MAX) {
-
-		/* The page size field can be used for any row type, or it may
-		be zero for an original 16k page size.
-		Validate the page shift size is within allowed range. */
+	return(FSP_FLAGS_GET_ZIP_SSIZE(flags) != 0);
+}
 
-		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu\n",
-			flags, page_ssize, UNIV_PAGE_SSIZE_MAX);
-		return(false);
+#define ACTUAL_SSIZE(ssize)	(0 == ssize ? UNIV_PAGE_SSIZE_ORIG : ssize)
 
-	} else if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_ORIG && !page_ssize) {
-		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu:%d\n",
-			flags, page_ssize, UNIV_PAGE_SIZE, UNIV_PAGE_SIZE_ORIG);
+/** Determine if two tablespaces are equivalent or compatible.
+@param[in]	flags1	First tablespace flags
+@param[in]	flags2	Second tablespace flags
+@return true the flags are compatible, false if not */
+UNIV_INLINE
+bool
+fsp_flags_are_equal(
+	ulint	flags1,
+	ulint	flags2)
+{
+	/* If either one of these flags is ULINT_UNDEFINED,
+	then they are not equal */
+	if (flags1 == ULINT_UNDEFINED || flags2 == ULINT_UNDEFINED) {
 		return(false);
 	}
 
-	/* Page compression level requires page compression and atomic blobs
-	to be set */
-        if (page_compression_level || page_compression) {
-		if (!page_compression || !atomic_blobs) {
-			fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_compression %lu\n"
-				"InnoDB: Error: page_compression_level %lu atomic_blobs %lu\n",
-				flags, page_compression, page_compression_level, atomic_blobs);
-			return(false);
-		}
+	if (!fsp_is_shared_tablespace(flags1) || !fsp_is_shared_tablespace(flags2)) {
+		/* At least one of these is a single-table tablespaces so all
+		flags must match. */
+		return(flags1 == flags2);
 	}
 
-	if (atomic_writes > ATOMIC_WRITES_OFF) {
-		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_writes %lu\n",
-			flags, atomic_writes);
-		return (false);
-	}
-
-#if UNIV_FORMAT_MAX != UNIV_FORMAT_B
-# error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations."
-#endif
+	/* Both are shared tablespaces which can contain all formats.
+	But they must have the same logical and physical page size.
+	Once InnoDB can support multiple page sizes together,
+	the logical page size will not matter. */
+	ulint zip_ssize1 = ACTUAL_SSIZE(FSP_FLAGS_GET_ZIP_SSIZE(flags1));
+	ulint zip_ssize2 = ACTUAL_SSIZE(FSP_FLAGS_GET_ZIP_SSIZE(flags2));
+	ulint page_ssize1 = ACTUAL_SSIZE(FSP_FLAGS_GET_PAGE_SSIZE(flags1));
+	ulint page_ssize2 = ACTUAL_SSIZE(FSP_FLAGS_GET_PAGE_SSIZE(flags2));
 
-	/* The DATA_DIR field can be used for any row type so there is
-	nothing here to validate. */
-
-	return(true);
+	return(zip_ssize1 == zip_ssize2 && page_ssize1 == page_ssize2);
 }
 
-/********************************************************************//**
-Determine if the tablespace is compressed from dict_table_t::flags.
-@return	TRUE if compressed, FALSE if not compressed */
+/** Convert a page size, which is a power of 2, to an ssize, which is
+the number of bit shifts from 512 to make that page size.
+@param[in]	page_size	compressed page size in bytes
+@return an ssize created from the page size provided. */
 UNIV_INLINE
-ibool
-fsp_flags_is_compressed(
-/*====================*/
-	ulint	flags)	/*!< in: tablespace flags */
+ulint
+page_size_to_ssize(
+	ulint	page_size)
 {
-	return(FSP_FLAGS_GET_ZIP_SSIZE(flags) != 0);
-}
+	ulint ssize;
 
-#endif /* !UNIV_INNOCHECKSUM */
+	for (ssize = UNIV_ZIP_SIZE_SHIFT_MIN;
+	     ((ulint) 1 << ssize) < page_size;
+	     ssize++) {};
 
-/********************************************************************//**
-Extract the zip size from tablespace flags.
-@return	compressed page size of the file-per-table tablespace in bytes,
-or zero if the table is not compressed. */
+	return(ssize - UNIV_ZIP_SIZE_SHIFT_MIN + 1);
+}
+
+/** Add the compressed page size to the tablespace flags.
+@param[in]	flags		Tablespace flags
+@param[in]	page_size	page sizes in bytes and compression flag.
+@return tablespace flags after zip size is added */
 UNIV_INLINE
 ulint
-fsp_flags_get_zip_size(
-/*===================*/
-	ulint	flags)	/*!< in: tablespace flags */
+fsp_flags_set_zip_size(
+	ulint			flags,
+	const page_size_t&	page_size)
 {
-	ulint	zip_size = 0;
-	ulint	ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
+	if (!page_size.is_compressed()) {
+		return(flags);
+	}
 
-	/* Convert from a 'log2 minus 9' to a page size in bytes. */
-	if (ssize) {
-		zip_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
+	/* Zip size should be a power of 2 between UNIV_ZIP_SIZE_MIN
+	and UNIV_ZIP_SIZE_MAX */
+	ut_ad(page_size.physical() >= UNIV_ZIP_SIZE_MIN);
+	ut_ad(page_size.physical() <= UNIV_ZIP_SIZE_MAX);
+	ut_ad(ut_is_2pow(page_size.physical()));
 
-		ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-	}
+	ulint	ssize = page_size_to_ssize(page_size.physical());
+
+	ut_ad(ssize > 0);
+	ut_ad(ssize <= UNIV_PAGE_SSIZE_MAX);
 
-	return(zip_size);
+	flags |= (ssize << FSP_FLAGS_POS_ZIP_SSIZE);
+
+	ut_ad(fsp_flags_is_valid(flags));
+
+	return(flags);
 }
 
-/********************************************************************//**
-Extract the page size from tablespace flags.
-@return	page size of the tablespace in bytes */
+/** Add the page size to the tablespace flags.
+@param[in]	flags		Tablespace flags
+@param[in]	page_size	page sizes in bytes and compression flag.
+@return tablespace flags after page size is added */
 UNIV_INLINE
 ulint
-fsp_flags_get_page_size(
-/*====================*/
-	ulint	flags)	/*!< in: tablespace flags */
+fsp_flags_set_page_size(
+	ulint			flags,
+	const page_size_t&	page_size)
 {
-	ulint	page_size = 0;
-	ulint	ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
-
-	/* Convert from a 'log2 minus 9' to a page size in bytes. */
-	if (UNIV_UNLIKELY(ssize)) {
-		page_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
+	/* Page size should be a power of two between UNIV_PAGE_SIZE_MIN
+	and UNIV_PAGE_SIZE */
+	ut_ad(page_size.logical() >= UNIV_PAGE_SIZE_MIN);
+	ut_ad(page_size.logical() <= UNIV_PAGE_SIZE_MAX);
+	ut_ad(ut_is_2pow(page_size.logical()));
+
+	/* Remove this assert once we add support for different
+	page size per tablespace. Currently all tablespaces must
+	have a page size that is equal to innodb-page-size */
+	ut_ad(page_size.logical() == UNIV_PAGE_SIZE);
+
+	if (page_size.logical() == UNIV_PAGE_SIZE_ORIG) {
+		ut_ad(0 == FSP_FLAGS_GET_PAGE_SSIZE(flags));
 
-		ut_ad(page_size <= UNIV_PAGE_SIZE_MAX);
 	} else {
-		/* If the page size was not stored, then it is the
-		original 16k. */
-		page_size = UNIV_PAGE_SIZE_ORIG;
+		ulint	ssize = page_size_to_ssize(page_size.logical());
+
+		ut_ad(ssize);
+		ut_ad(ssize <= UNIV_PAGE_SSIZE_MAX);
+
+		flags |= (ssize << FSP_FLAGS_POS_PAGE_SSIZE);
 	}
 
-	return(page_size);
-}
+	ut_ad(fsp_flags_is_valid(flags));
 
-#ifndef UNIV_INNOCHECKSUM
+	return(flags);
+}
 
-/********************************************************************//**
-Add the page size to the tablespace flags.
-@return	tablespace flags after page size is added */
+/** Initialize an FSP flags integer.
+@param[in]	page_size	page sizes in bytes and compression flag.
+@param[in]	atomic_blobs	Used by Dynammic and Compressed.
+@param[in]	has_data_dir	This tablespace is in a remote location.
+@param[in]	is_shared	This tablespace can be shared by many tables.
+@param[in]	is_temporary	This tablespace is temporary.
+@param[in]	is_encrypted	This tablespace is encrypted.
+@param[in]	page_compressed Table uses page compression
+@param[in]	page_compression_level Page compression level
+@param[in]	atomic_writes	Table uses atomic writes
+@@return tablespace flags after initialization */
 UNIV_INLINE
 ulint
-fsp_flags_set_page_size(
-/*====================*/
-	ulint	flags,		/*!< in: tablespace flags */
-	ulint	page_size)	/*!< in: page size in bytes */
+fsp_flags_init(
+	const page_size_t&	page_size,
+	bool			atomic_blobs,
+	bool			has_data_dir,
+	bool			is_shared,
+	bool			is_temporary,
+	bool			page_compression,
+	ulint			page_compression_level,
+	ulint			atomic_writes,
+	bool			is_encrypted)
 {
-	ulint ssize = 0;
-	ulint shift;
+	ut_ad(page_size.physical() <= page_size.logical());
+	ut_ad(!page_size.is_compressed() || atomic_blobs);
 
-	/* Page size should be > UNIV_PAGE_SIZE_MIN */
-	ut_ad(page_size >= UNIV_PAGE_SIZE_MIN);
-	ut_ad(page_size <= UNIV_PAGE_SIZE_MAX);
+	/* Page size should be a power of two between UNIV_PAGE_SIZE_MIN
+	and UNIV_PAGE_SIZE, but zip_size may be 0 if not compressed. */
+	ulint flags = fsp_flags_set_page_size(0, page_size);
 
-	if (page_size == UNIV_PAGE_SIZE_ORIG) {
-		ut_ad(0 == FSP_FLAGS_GET_PAGE_SSIZE(flags));
-		return(flags);
+	if (atomic_blobs) {
+		flags |= FSP_FLAGS_MASK_POST_ANTELOPE
+			| FSP_FLAGS_MASK_ATOMIC_BLOBS;
 	}
 
-	for (shift = UNIV_PAGE_SIZE_SHIFT_MAX;
-	     shift >= UNIV_PAGE_SIZE_SHIFT_MIN;
-	     shift--) {
-		ulint	mask = (1 << shift);
-		if (page_size & mask) {
-			ut_ad(!(page_size & ~mask));
-			ssize = shift - UNIV_ZIP_SIZE_SHIFT_MIN + 1;
-			break;
-		}
+	/* If the zip_size is explicit and different from the default,
+	compressed row format is implied. */
+	flags = fsp_flags_set_zip_size(flags, page_size);
+
+	if (has_data_dir) {
+		flags |= FSP_FLAGS_MASK_DATA_DIR;
 	}
 
-	ut_ad(ssize);
-	ut_ad(ssize <= UNIV_PAGE_SSIZE_MAX);
+	/* Shared tablespaces can hold all row formats, so we only mark the
+	POST_ANTELOPE and ATOMIC_BLOB bits if it is compressed. */
+	if (is_shared) {
+		ut_ad(!has_data_dir);
+		flags |= FSP_FLAGS_MASK_SHARED;
+	}
+
+	if (is_temporary) {
+		ut_ad(!has_data_dir);
+		flags |= FSP_FLAGS_MASK_TEMPORARY;
+	}
+
+	if (is_encrypted) {
+		flags |= FSP_FLAGS_MASK_ENCRYPTION;
+	}
 
-	flags = FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize);
+	/* In addition, tablespace flags also contain if the page
+	compression is used for this table. */
+	if (page_compression) {
+		flags |= FSP_FLAGS_SET_PAGE_COMPRESSION(flags, page_compression);
+	}
+
+	/* In addition, tablespace flags also contain page compression level
+	if page compression is used for this table. */
+	if (page_compression && page_compression_level) {
+		flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, page_compression_level);
+	}
+
+	/* In addition, tablespace flags also contain flag if atomic writes
+	is used for this table */
+	if (atomic_writes) {
+		flags |= FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomic_writes);
+	}
 
 	ut_ad(fsp_flags_is_valid(flags));
 
 	return(flags);
 }
 
-/********************************************************************//**
-Calculates the descriptor index within a descriptor page.
-@return	descriptor index */
+/** Calculates the descriptor index within a descriptor page.
+@param[in]	page_size	page size
+@param[in]	offset		page offset
+@return descriptor index */
 UNIV_INLINE
 ulint
 xdes_calc_descriptor_index(
-/*=======================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	offset)		/*!< in: page offset */
+	const page_size_t&	page_size,
+	ulint			offset)
 {
-	ut_ad(ut_is_2pow(zip_size));
-
-	if (zip_size == 0) {
-		return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
-		       / FSP_EXTENT_SIZE);
-	} else {
-		return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
-	}
+	return(ut_2pow_remainder(offset, page_size.physical())
+	       / FSP_EXTENT_SIZE);
 }
-#endif /* !UNIV_INNOCHECKSUM */
 
 /**********************************************************************//**
 Gets a descriptor bit of a page.
-@return	TRUE if free */
+@return TRUE if free */
 UNIV_INLINE
 ibool
 xdes_get_bit(
@@ -303,17 +298,15 @@ xdes_get_bit(
 			bit_index));
 }
 
-#ifndef UNIV_INNOCHECKSUM
-/********************************************************************//**
-Calculates the page where the descriptor of a page resides.
-@return	descriptor page offset */
+/** Calculates the page where the descriptor of a page resides.
+@param[in]	page_size	page size
+@param[in]	offset		page offset
+@return descriptor page offset */
 UNIV_INLINE
 ulint
 xdes_calc_descriptor_page(
-/*======================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	offset)		/*!< in: page offset */
+	const page_size_t&	page_size,
+	ulint			offset)
 {
 #ifndef DOXYGEN /* Doxygen gets confused by these */
 # if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET				\
@@ -335,16 +328,24 @@ xdes_calc_descriptor_page(
 	      + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
 	      * XDES_SIZE);
 
-	ut_ad(ut_is_2pow(zip_size));
-
-	if (zip_size == 0) {
-		return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
-	} else {
-		ut_ad(zip_size > XDES_ARR_OFFSET
-		      + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
-		return(ut_2pow_round(offset, zip_size));
+#ifdef UNIV_DEBUG
+	if (page_size.is_compressed()) {
+		ut_a(page_size.physical() > XDES_ARR_OFFSET
+		     + (page_size.physical() / FSP_EXTENT_SIZE) * XDES_SIZE);
 	}
+#endif /* UNIV_DEBUG */
+
+	return(ut_2pow_round(offset, page_size.physical()));
 }
 
+/** Calculates the descriptor array size.
+@param[in]	page_size	page size
+@return size of descriptor array */
+UNIV_INLINE
+ulint
+xdes_arr_size(
+	const page_size_t&	page_size)
+{
+	return(page_size.physical()/FSP_EXTENT_SIZE);
+}
 #endif /* !UNIV_INNOCHECKSUM */
-
diff --git a/storage/innobase/include/fsp0pagecompress.h b/storage/innobase/include/fsp0pagecompress.h
index 5f943ee2b83..44bdddfa3bf 100644
--- a/storage/innobase/include/fsp0pagecompress.h
+++ b/storage/innobase/include/fsp0pagecompress.h
@@ -51,7 +51,7 @@ fsp_header_get_compression_level(
 Determine if the tablespace is page compressed from dict_table_t::flags.
 @return	TRUE if page compressed, FALSE if not compressed */
 UNIV_INLINE
-ibool
+bool
 fsp_flags_is_page_compressed(
 /*=========================*/
 	ulint	flags);	/*!< in: tablespace flags */
diff --git a/storage/innobase/include/fsp0pagecompress.ic b/storage/innobase/include/fsp0pagecompress.ic
index e879aa2c16e..a3971da6772 100644
--- a/storage/innobase/include/fsp0pagecompress.ic
+++ b/storage/innobase/include/fsp0pagecompress.ic
@@ -29,7 +29,7 @@ Created 11/12/2013 Jan Lindström jan.lindstrom@mariadb.com
 Determine if the tablespace is page compressed from dict_table_t::flags.
 @return	TRUE if page compressed, FALSE if not page compressed */
 UNIV_INLINE
-ibool
+bool
 fsp_flags_is_page_compressed(
 /*=========================*/
 	ulint	flags)	/*!< in: tablespace flags */
@@ -65,7 +65,7 @@ fsp_flags_get_atomic_writes(
 Find out wheather the page is index page or not
 @return	true if page type index page, false if not */
 UNIV_INLINE
-ibool
+bool
 fil_page_is_index_page(
 /*===================*/
 	byte*	buf)	/*!< in: page */
@@ -77,10 +77,10 @@ fil_page_is_index_page(
 Find out wheather the page is page compressed
 @return	true if page is page compressed, false if not */
 UNIV_INLINE
-ibool
+bool
 fil_page_is_compressed(
 /*===================*/
-	byte*	buf)	/*!< in: page */
+	const byte*	buf)	/*!< in: page */
 {
 	return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
 }
@@ -89,10 +89,10 @@ fil_page_is_compressed(
 Find out wheather the page is page compressed
 @return	true if page is page compressed, false if not */
 UNIV_INLINE
-ibool
+bool
 fil_page_is_compressed_encrypted(
 /*=============================*/
-	byte*	buf)	/*!< in: page */
+	const byte*	buf)	/*!< in: page */
 {
 	return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
 }
@@ -125,7 +125,7 @@ Extract the page compression from space.
 @return true if space is page compressed, false if space is not found
 or space is not page compressed. */
 UNIV_INLINE
-ibool
+bool
 fil_space_is_page_compressed(
 /*=========================*/
 	ulint	id)	/*!< in: space id */
@@ -209,7 +209,7 @@ fil_space_get_atomic_writes(
 Find out wheather the page is page compressed with lzo method
 @return	true if page is page compressed with lzo method, false if not */
 UNIV_INLINE
-ibool
+bool
 fil_page_is_lzo_compressed(
 /*=======================*/
 	byte*	buf)	/*!< in: page */
diff --git a/storage/innobase/include/fsp0space.h b/storage/innobase/include/fsp0space.h
new file mode 100644
index 00000000000..31a1a4abc75
--- /dev/null
+++ b/storage/innobase/include/fsp0space.h
@@ -0,0 +1,259 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0space.h
+General shared tablespace implementation.
+
+Created 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#ifndef fsp0space_h
+#define fsp0space_h
+
+#include "univ.i"
+#include "fsp0file.h"
+#include "fsp0fsp.h"
+#include "fsp0types.h"
+#include "ut0new.h"
+
+#include <vector>
+
+/** Data structure that contains the information about shared tablespaces.
+Currently this can be the system tablespace or a temporary table tablespace */
+class Tablespace {
+
+public:
+	typedef std::vector<Datafile, ut_allocator<Datafile> >	files_t;
+
+	/** Data file information - each Datafile can be accessed globally */
+	files_t		m_files;
+
+	Tablespace()
+		:
+		m_files(),
+		m_name(),
+		m_space_id(ULINT_UNDEFINED),
+		m_path(),
+		m_flags(),
+		m_ignore_read_only(false)
+	{
+		/* No op */
+	}
+
+	virtual ~Tablespace()
+	{
+		shutdown();
+		ut_ad(m_files.empty());
+		ut_ad(m_space_id == ULINT_UNDEFINED);
+		if (m_name != NULL) {
+			ut_free(m_name);
+			m_name = NULL;
+		}
+		if (m_path != NULL) {
+			ut_free(m_path);
+			m_path = NULL;
+		}
+	}
+
+	// Disable copying
+	Tablespace(const Tablespace&);
+	Tablespace& operator=(const Tablespace&);
+
+	/** Set tablespace name
+	@param[in]	name	tablespace name */
+	void set_name(const char* name)
+	{
+		ut_ad(m_name == NULL);
+		m_name = mem_strdup(name);
+		ut_ad(m_name != NULL);
+	}
+
+	/** Get tablespace name
+	@return tablespace name */
+	const char* name()	const
+	{
+		return(m_name);
+	}
+
+	/** Set tablespace path and filename members.
+	@param[in]	path	where tablespace file(s) resides
+	@param[in]	len	length of the file path */
+	void set_path(const char* path, size_t len)
+	{
+		ut_ad(m_path == NULL);
+		m_path = mem_strdupl(path, len);
+		ut_ad(m_path != NULL);
+
+		os_normalize_path(m_path);
+	}
+
+	/** Set tablespace path and filename members.
+	@param[in]	path	where tablespace file(s) resides */
+	void set_path(const char* path)
+	{
+		set_path(path, strlen(path));
+	}
+
+	/** Get tablespace path
+	@return tablespace path */
+	const char* path()	const
+	{
+		return(m_path);
+	}
+
+	/** Set the space id of the tablespace
+	@param[in]	space_id	 tablespace ID to set */
+	void set_space_id(ulint space_id)
+	{
+		ut_ad(m_space_id == ULINT_UNDEFINED);
+		m_space_id = space_id;
+	}
+
+	/** Get the space id of the tablespace
+	@return m_space_id space id of the tablespace */
+	ulint space_id()	const
+	{
+		return(m_space_id);
+	}
+
+	/** Set the tablespace flags
+	@param[in]	fsp_flags	tablespace flags */
+	void set_flags(ulint fsp_flags)
+	{
+		ut_ad(fsp_flags_is_valid(fsp_flags));
+		m_flags = fsp_flags;
+	}
+
+	/** Get the tablespace flags
+	@return m_flags tablespace flags */
+	ulint flags()	const
+	{
+		return(m_flags);
+	}
+
+	/** Get the tablespace encryption mode
+	@return m_mode tablespace encryption mode */
+	fil_encryption_t encryption_mode() const
+	{
+		return (m_mode);
+	}
+
+	/** Get the tablespace encryption key_id
+	@return m_key_id tablespace encryption key_id */
+	ulint key_id() const
+	{
+		return (m_key_id);
+	}
+
+	/** Set Ignore Read Only Status for tablespace.
+	@param[in]	read_only_status	read only status indicator */
+	void set_ignore_read_only(bool read_only_status)
+	{
+		m_ignore_read_only = read_only_status;
+	}
+
+	/** Free the memory allocated by the Tablespace object */
+	void shutdown();
+
+	/** @return the sum of the file sizes of each Datafile */
+	ulint get_sum_of_sizes() const
+	{
+		ulint	sum = 0;
+
+		for (files_t::const_iterator it = m_files.begin();
+		     it != m_files.end(); ++it) {
+			sum += it->m_size;
+		}
+
+		return(sum);
+	}
+
+	/** Open or Create the data files if they do not exist.
+	@param[in]	is_temp	whether this is a temporary tablespace
+	@return DB_SUCCESS or error code */
+	dberr_t open_or_create(bool is_temp)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Delete all the data files. */
+	void delete_files();
+
+	/** Check if two tablespaces have common data file names.
+	@param[in]	other_space	Tablespace to check against this.
+	@return true if they have the same data filenames and paths */
+	bool intersection(const Tablespace* other_space);
+
+	/** Use the ADD DATAFILE path to create a Datafile object and add
+	it to the front of m_files. Parse the datafile path into a path
+	and a basename with extension 'ibd'. This datafile_path provided
+	may be an absolute or relative path, but it must end with the
+	extension .ibd and have a basename of at least 1 byte.
+
+	Set tablespace m_path member and add a Datafile with the filename.
+	@param[in]	datafile_path	full path of the tablespace file. */
+	dberr_t add_datafile(
+		const char*	datafile_path);
+
+	/* Return a pointer to the first Datafile for this Tablespace
+	@return pointer to the first Datafile for this Tablespace*/
+	Datafile* first_datafile()
+	{
+		ut_a(!m_files.empty());
+		return(&m_files.front());
+	}
+
+	/** Check if undo tablespace.
+	@return true if undo tablespace */
+	static bool is_undo_tablespace(ulint id)
+	{
+		return(id <= srv_undo_tablespaces_open);
+	}
+private:
+	/**
+	@param[in]	filename	Name to lookup in the data files.
+	@return true if the filename exists in the data files */
+	bool find(const char* filename);
+
+	/** Note that the data file was found.
+	@param[in]	file	data file object */
+	void file_found(Datafile& file);
+
+	/* DATA MEMBERS */
+
+	/** Name of the tablespace. */
+	char*		m_name;
+
+	/** Tablespace ID */
+	ulint		m_space_id;
+
+	/** Path where tablespace files will reside, not including a filename.*/
+	char*		m_path;
+
+	/** Tablespace flags */
+	ulint		m_flags;
+
+	/** Encryption mode and key_id */
+	fil_encryption_t m_mode;
+	ulint		m_key_id;
+
+protected:
+	/** Ignore server read only configuration for this tablespace. */
+	bool		m_ignore_read_only;
+};
+
+#endif /* fsp0space_h */
diff --git a/storage/innobase/include/fsp0sysspace.h b/storage/innobase/include/fsp0sysspace.h
new file mode 100644
index 00000000000..c25093491a2
--- /dev/null
+++ b/storage/innobase/include/fsp0sysspace.h
@@ -0,0 +1,321 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0sysspace.h
+Multi file, shared, system tablespace implementation.
+
+Created 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#ifndef fsp0sysspace_h
+#define fsp0sysspace_h
+
+#include "univ.i"
+#include "fsp0space.h"
+
+/** If the last data file is auto-extended, we add this many pages to it
+at a time. We have to make this public because it is a config variable. */
+extern ulong sys_tablespace_auto_extend_increment;
+
+#ifdef UNIV_DEBUG
+/** Control if extra debug checks need to be done for temporary tablespace.
+Default = true that is disable such checks.
+This variable is not exposed to end-user but still kept as variable for
+developer to enable it during debug. */
+extern bool srv_skip_temp_table_checks_debug;
+#endif /* UNIV_DEBUG */
+
+/** Data structure that contains the information about shared tablespaces.
+Currently this can be the system tablespace or a temporary table tablespace */
+class SysTablespace : public Tablespace
+{
+public:
+
+	SysTablespace()
+		:
+		m_auto_extend_last_file(),
+		m_last_file_size_max(),
+		m_created_new_raw(),
+		m_is_tablespace_full(false),
+		m_sanity_checks_done(false),
+		m_crypt_info()
+	{
+		/* No op */
+	}
+
+	~SysTablespace()
+	{
+		shutdown();
+	}
+
+	/** Set tablespace full status
+	@param[in]	is_full		true if full */
+	void set_tablespace_full_status(bool is_full)
+	{
+		m_is_tablespace_full = is_full;
+	}
+
+	/** Get tablespace full status
+	@return true if table is full */
+	bool get_tablespace_full_status()
+	{
+		return(m_is_tablespace_full);
+	}
+
+	/** Set sanity check status
+	@param[in]	status	true if sanity checks are done */
+	void set_sanity_check_status(bool status)
+	{
+		m_sanity_checks_done = status;
+	}
+
+	/** Get sanity check status
+	@return true if sanity checks are done */
+	bool get_sanity_check_status()
+	{
+		return(m_sanity_checks_done);
+	}
+
+	/** Parse the input params and populate member variables.
+	@param	filepath	path to data files
+	@param	supports_raw	true if it supports raw devices
+	@return true on success parse */
+	bool parse_params(const char* filepath, bool supports_raw);
+
+	/** Check the data file specification.
+	@param[out]	create_new_db		true if a new database
+	is to be created
+	@param[in]	min_expected_size	expected tablespace
+	size in bytes
+	@return DB_SUCCESS if all OK else error code */
+	dberr_t check_file_spec(
+		bool*	create_new_db,
+		ulint	min_expected_tablespace_size);
+
+	/** Free the memory allocated by parse() */
+	void shutdown();
+
+	/** Normalize the file size, convert to extents. */
+	void normalize();
+
+	/**
+	@return true if a new raw device was created. */
+	bool created_new_raw() const
+	{
+		return(m_created_new_raw);
+	}
+
+	/**
+	@return auto_extend value setting */
+	ulint can_auto_extend_last_file() const
+	{
+		return(m_auto_extend_last_file);
+	}
+
+	/** Set the last file size.
+	@param[in]	size	the size to set */
+	void set_last_file_size(ulint size)
+	{
+		ut_ad(!m_files.empty());
+		m_files.back().m_size = size;
+	}
+
+	/** Get the size of the last data file in the tablespace
+	@return the size of the last data file in the array */
+	ulint last_file_size() const
+	{
+		ut_ad(!m_files.empty());
+		return(m_files.back().m_size);
+	}
+
+	/**
+	@return the autoextend increment in pages. */
+	ulint get_autoextend_increment() const
+	{
+		return(sys_tablespace_auto_extend_increment
+		       * ((1024 * 1024) / UNIV_PAGE_SIZE));
+	}
+
+	/** Roundoff to MegaBytes is similar as done in
+	SysTablespace::parse_units() function.
+	@return the pages when given size of file (bytes). */
+	ulint get_pages_from_size(os_offset_t size)
+	{
+		return (ulint)((size / (1024 * 1024))
+			       * ((1024 * 1024) / UNIV_PAGE_SIZE));
+	}
+
+	/**
+	@return next increment size */
+	ulint get_increment() const;
+
+	/** Open or create the data files
+	@param[in]  is_temp		whether this is a temporary tablespace
+	@param[in]  create_new_db	whether we are creating a new database
+	@param[out] sum_new_sizes	sum of sizes of the new files added
+	@param[out] flush_lsn		FIL_PAGE_FILE_FLUSH_LSN of first file
+	@return DB_SUCCESS or error code */
+	dberr_t open_or_create(
+		bool	is_temp,
+		bool	create_new_db,
+		ulint*	sum_new_sizes,
+		lsn_t*	flush_lsn)
+		MY_ATTRIBUTE((warn_unused_result));
+
+private:
+	/** Check the tablespace header for this tablespace.
+	@param[out]	flushed_lsn	the value of FIL_PAGE_FILE_FLUSH_LSN
+	@return DB_SUCCESS or error code */
+	dberr_t read_lsn_and_check_flags(lsn_t* flushed_lsn);
+
+	/**
+	@return true if the last file size is valid. */
+	bool is_valid_size() const
+	{
+		return(m_last_file_size_max >= last_file_size());
+	}
+
+	/**
+	@return true if configured to use raw devices */
+	bool has_raw_device();
+
+	/** Note that the data file was not found.
+	@param[in]	file		data file object
+	@param[out]	create_new_db	true if a new instance to be created
+	@return DB_SUCESS or error code */
+	dberr_t file_not_found(Datafile& file, bool* create_new_db);
+
+	/** Note that the data file was found.
+	@param[in,out]	file	data file object
+	@return true if a new instance to be created */
+	bool file_found(Datafile& file);
+
+	/** Create a data file.
+	@param[in,out]	file	data file object
+	@return DB_SUCCESS or error code */
+	dberr_t create(Datafile& file);
+
+	/** Create a data file.
+	@param[in,out]	file	data file object
+	@return DB_SUCCESS or error code */
+	dberr_t create_file(Datafile& file);
+
+	/** Open a data file.
+	@param[in,out]	file	data file object
+	@return DB_SUCCESS or error code */
+	dberr_t open_file(Datafile& file);
+
+	/** Set the size of the file.
+	@param[in,out]	file	data file object
+	@return DB_SUCCESS or error code */
+	dberr_t set_size(Datafile& file);
+
+	/** Convert a numeric string that optionally ends in G or M, to a
+	number containing megabytes.
+	@param[in]	ptr	string with a quantity in bytes
+	@param[out]	megs	the number in megabytes
+	@return next character in string */
+	static char* parse_units(char* ptr, ulint* megs);
+
+private:
+	enum file_status_t {
+		FILE_STATUS_VOID = 0,		/** status not set */
+		FILE_STATUS_RW_PERMISSION_ERROR,/** permission error */
+		FILE_STATUS_READ_WRITE_ERROR,	/** not readable/writable */
+		FILE_STATUS_NOT_REGULAR_FILE_ERROR /** not a regular file */
+	};
+
+	/** Verify the size of the physical file
+	@param[in]	file	data file object
+	@return DB_SUCCESS if OK else error code. */
+	dberr_t check_size(Datafile& file);
+
+	/** Check if a file can be opened in the correct mode.
+	@param[in,out]	file	data file object
+	@param[out]	reason	exact reason if file_status check failed.
+	@return DB_SUCCESS or error code. */
+	dberr_t check_file_status(
+		const Datafile& 	file,
+		file_status_t& 		reason);
+
+	/* DATA MEMBERS */
+
+	/** if true, then we auto-extend the last data file */
+	bool		m_auto_extend_last_file;
+
+	/** if != 0, this tells the max size auto-extending may increase the
+	last data file size */
+	ulint		m_last_file_size_max;
+
+	/** If the following is true we do not allow
+	inserts etc. This protects the user from forgetting
+	the 'newraw' keyword to my.cnf */
+	bool		m_created_new_raw;
+
+	/** Tablespace full status */
+	bool		m_is_tablespace_full;
+
+	/** if false, then sanity checks are still pending */
+	bool		m_sanity_checks_done;
+
+	/** Encryption information */
+	fil_space_crypt_t* m_crypt_info;
+};
+
+/* GLOBAL OBJECTS */
+
+/** The control info of the system tablespace. */
+extern SysTablespace srv_sys_space;
+
+/** The control info of a temporary table shared tablespace. */
+extern SysTablespace srv_tmp_space;
+
+/** Check if the space_id is for a system-tablespace (shared + temp).
+@param[in]	id	Space ID to check
+@return true if id is a system tablespace, false if not. */
+UNIV_INLINE
+bool
+is_system_tablespace(ulint	id)
+{
+	return(id == TRX_SYS_SPACE || id == SRV_TMP_SPACE_ID);
+}
+
+/** Check if shared-system or undo tablespace.
+@return true if shared-system or undo tablespace */
+UNIV_INLINE
+bool
+is_system_or_undo_tablespace(
+	ulint   id)
+{
+	return(id <= srv_undo_tablespaces_open);
+}
+
+/** Check if predefined shared tablespace.
+@return true if predefined shared tablespace */
+UNIV_INLINE
+bool
+is_predefined_tablespace(
+	ulint   id)
+{
+	ut_ad(srv_sys_space.space_id() == TRX_SYS_SPACE);
+	ut_ad(TRX_SYS_SPACE == 0);
+	return(id <= srv_undo_tablespaces_open
+	       || id == SRV_TMP_SPACE_ID);
+}
+#endif /* fsp0sysspace_h */
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index 4f2ca2594cb..c264fe1b595 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2014, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,9 +27,15 @@ Created May 26, 2009 Vasil Dimov
 #ifndef fsp0types_h
 #define fsp0types_h
 
-#include "univ.i"
+#ifndef UNIV_INNOCHECKSUM
+
+/** The fil_space_t::id of the redo log. All persistent tablespaces
+have a smaller fil_space_t::id. */
+#define SRV_LOG_SPACE_FIRST_ID		0xFFFFFFF0U
+/** The fil_space_t::id of the innodb_temporary tablespace. */
+#define SRV_TMP_SPACE_ID		0xFFFFFFFEU
 
-#include "fil0fil.h" /* for FIL_PAGE_DATA */
+#include "univ.i"
 #include "ut0byte.h"
 
 /** @name Flags for inserting records in order
@@ -42,6 +49,7 @@ fseg_alloc_free_page) */
 #define	FSP_NO_DIR	((byte)113)	/*!< no order */
 /* @} */
 
+#endif /* !UNIV_INNOCHECKSUM */
 /** File space extent size in pages
 page size | file space extent size
 ----------+-----------------------
@@ -51,23 +59,23 @@ page size | file space extent size
   32 KiB  |  64 pages = 2 MiB
   64 KiB  |  64 pages = 4 MiB
 */
-/** File space extent size (one megabyte if default two or four if not) in pages */
-#define	FSP_EXTENT_SIZE		((UNIV_PAGE_SIZE <= (16384) ?	\
-				(1048576U / UNIV_PAGE_SIZE) :	\
+#define FSP_EXTENT_SIZE         ((UNIV_PAGE_SIZE <= (16384) ?	\
+				(1048576 / UNIV_PAGE_SIZE) :	\
 				((UNIV_PAGE_SIZE <= (32768)) ?	\
-				(2097152U / UNIV_PAGE_SIZE) :	\
-				(4194304U / UNIV_PAGE_SIZE))))
+				(2097152 / UNIV_PAGE_SIZE) :	\
+				(4194304 / UNIV_PAGE_SIZE))))
 
-/** File space extent size (four megabytes) in pages for MAX page size */
-#define	FSP_EXTENT_SIZE_MAX	(4194304U / UNIV_PAGE_SIZE_MAX)
+/** File space extent size (four megabyte) in pages for MAX page size */
+#define	FSP_EXTENT_SIZE_MAX	(4194304 / UNIV_PAGE_SIZE_MAX)
 
 /** File space extent size (one megabyte) in pages for MIN page size */
-#define	FSP_EXTENT_SIZE_MIN	(1048576U / UNIV_PAGE_SIZE_MIN)
+#define	FSP_EXTENT_SIZE_MIN	(1048576 / UNIV_PAGE_SIZE_MIN)
 
 /** On a page of any file segment, data may be put starting from this
 offset */
 #define FSEG_PAGE_DATA		FIL_PAGE_DATA
 
+#ifndef UNIV_INNOCHECKSUM
 /** @name File segment header
 The file segment header points to the inode describing the file segment. */
 /* @{ */
@@ -82,11 +90,63 @@ typedef	byte	fseg_header_t;
 					header, in bytes */
 /* @} */
 
-/** Flags for fsp_reserve_free_extents @{ */
-#define FSP_NORMAL	1000000
-#define	FSP_UNDO	2000000
-#define FSP_CLEANING	3000000
-/* @} */
+#ifdef UNIV_DEBUG
+
+struct mtr_t;
+
+/** A wrapper class to print the file segment header information. */
+class fseg_header
+{
+public:
+	/** Constructor of fseg_header.
+	@param[in]	header	the underlying file segment header object
+	@param[in]	mtr	the mini-transaction.  No redo logs are
+				generated, only latches are checked within
+				mini-transaction */
+	fseg_header(
+		const fseg_header_t*	header,
+		mtr_t*			mtr)
+		:
+		m_header(header),
+		m_mtr(mtr)
+	{}
+
+	/** Print the file segment header to the given output stream.
+	@param[in,out]	out	the output stream into which the object
+				is printed.
+	@retval	the output stream into which the object was printed. */
+	std::ostream&
+	to_stream(std::ostream&	out) const;
+private:
+	/** The underlying file segment header */
+	const fseg_header_t*	m_header;
+
+	/** The mini transaction, which is used mainly to check whether
+	appropriate latches have been taken by the calling thread. */
+	mtr_t*			m_mtr;
+};
+
+/* Overloading the global output operator to print a file segment header
+@param[in,out]	out	the output stream into which object will be printed
+@param[in]	header	the file segment header to be printed
+@retval the output stream */
+inline
+std::ostream&
+operator<<(
+	std::ostream&		out,
+	const fseg_header&	header)
+{
+	return(header.to_stream(out));
+}
+#endif /* UNIV_DEBUG */
+
+/** Flags for fsp_reserve_free_extents */
+enum fsp_reserve_t {
+	FSP_NORMAL,	/* reservation during normal B-tree operations */
+	FSP_UNDO,	/* reservation done for undo logging */
+	FSP_CLEANING,	/* reservation done during purge operations */
+	FSP_BLOB	/* reservation being done for BLOB insertion */
+};
 
 /* Number of pages described in a single descriptor page: currently each page
 description takes less than 1 byte; a descriptor page is repeated every
@@ -127,4 +187,250 @@ every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
 /*--------------------------------------*/
 /* @} */
 
+/** Validate the tablespace flags.
+These flags are stored in the tablespace header at offset FSP_SPACE_FLAGS.
+They should be 0 for ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT.
+The newer row formats, COMPRESSED and DYNAMIC, use a file format > Antelope
+so they should have a file format number plus the DICT_TF_COMPACT bit set.
+@param[in]	flags	Tablespace flags
+@return true if valid, false if not */
+bool
+fsp_flags_is_valid(
+	ulint	flags)
+	MY_ATTRIBUTE((warn_unused_result, const));
+
+/** Check if tablespace is system temporary.
+@param[in]      space_id        verify is checksum is enabled for given space.
+@return true if tablespace is system temporary. */
+inline
+bool
+fsp_is_system_temporary(ulint	space_id)
+{
+	return(space_id == SRV_TMP_SPACE_ID);
+}
+
+/** Check if checksum is disabled for the given space.
+@param[in]	space_id	verify is checksum is enabled for given space.
+@return true if checksum is disabled for given space. */
+bool
+fsp_is_checksum_disabled(
+	ulint	space_id);
+
+/** Check if tablespace is file-per-table.
+@param[in]	space_id	Tablespace ID
+@param[in]	fsp_flags	Tablespace Flags
+@return true if tablespace is file-per-table. */
+bool
+fsp_is_file_per_table(
+	ulint	space_id,
+	ulint	fsp_flags);
+
+#ifdef UNIV_DEBUG
+/** Skip some of the sanity checks that are time consuming even in debug mode
+and can affect frequent verification runs that are done to ensure stability of
+the product.
+@return true if check should be skipped for given space. */
+bool
+fsp_skip_sanity_check(
+	ulint	space_id);
+#endif /* UNIV_DEBUG */
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */
+
+/** Width of the POST_ANTELOPE flag */
+#define FSP_FLAGS_WIDTH_POST_ANTELOPE	1
+/** Number of flag bits used to indicate the tablespace zip page size */
+#define FSP_FLAGS_WIDTH_ZIP_SSIZE	4
+/** Width of the ATOMIC_BLOBS flag.  The ability to break up a long
+column into an in-record prefix and an externally stored part is available
+to the two Barracuda row formats COMPRESSED and DYNAMIC. */
+#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS	1
+/** Number of flag bits used to indicate the tablespace page size */
+#define FSP_FLAGS_WIDTH_PAGE_SSIZE	4
+/** Width of the DATA_DIR flag.  This flag indicates that the tablespace
+is found in a remote location, not the default data directory. */
+#define FSP_FLAGS_WIDTH_DATA_DIR	1
+/** Width of the SHARED flag.  This flag indicates that the tablespace
+was created with CREATE TABLESPACE and can be shared by multiple tables. */
+#define FSP_FLAGS_WIDTH_SHARED		1
+/** Width of the TEMPORARY flag.  This flag indicates that the tablespace
+is a temporary tablespace and everything in it is temporary, meaning that
+it is for a single client and should be deleted upon startup if it exists. */
+#define FSP_FLAGS_WIDTH_TEMPORARY	1
+/** Width of the encryption flag.  This flag indicates that the tablespace
+is a tablespace with encryption. */
+#define FSP_FLAGS_WIDTH_ENCRYPTION	1
+
+/** Number of flag bits used to indicate the page compression and compression level */
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION  1
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4
+
+/** Number of flag bits used to indicate atomic writes for this tablespace */
+#define FSP_FLAGS_WIDTH_ATOMIC_WRITES  2
+
+/** Width of all the currently known tablespace flags */
+#define FSP_FLAGS_WIDTH		(FSP_FLAGS_WIDTH_POST_ANTELOPE	\
+				+ FSP_FLAGS_WIDTH_ZIP_SSIZE	\
+				+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS	\
+				+ FSP_FLAGS_WIDTH_PAGE_SSIZE	\
+				+ FSP_FLAGS_WIDTH_DATA_DIR	\
+				+ FSP_FLAGS_WIDTH_SHARED	\
+				+ FSP_FLAGS_WIDTH_TEMPORARY	\
+				+ FSP_FLAGS_WIDTH_ENCRYPTION	\
+				+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION \
+				+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \
+				+ FSP_FLAGS_WIDTH_ATOMIC_WRITES )
+
+/** A mask of all the known/used bits in tablespace flags */
+#define FSP_FLAGS_MASK		(~(~0U << FSP_FLAGS_WIDTH))
+
+/** Zero relative shift position of the POST_ANTELOPE field */
+#define FSP_FLAGS_POS_POST_ANTELOPE	0
+/** Zero relative shift position of the ZIP_SSIZE field */
+#define FSP_FLAGS_POS_ZIP_SSIZE		(FSP_FLAGS_POS_POST_ANTELOPE	\
+					+ FSP_FLAGS_WIDTH_POST_ANTELOPE)
+/** Zero relative shift position of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_POS_ATOMIC_BLOBS	(FSP_FLAGS_POS_ZIP_SSIZE	\
+					+ FSP_FLAGS_WIDTH_ZIP_SSIZE)
+/** Zero relative shift position of the PAGE_SSIZE field */
+#define FSP_FLAGS_POS_PAGE_SSIZE	(FSP_FLAGS_POS_ATOMIC_BLOBS	\
+					+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the start of the DATA_DIR bit */
+#define FSP_FLAGS_POS_DATA_DIR		(FSP_FLAGS_POS_PAGE_SSIZE	\
+					+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
+/** Zero relative shift position of the start of the SHARED bit */
+#define FSP_FLAGS_POS_SHARED		(FSP_FLAGS_POS_DATA_DIR		\
+					+ FSP_FLAGS_WIDTH_DATA_DIR)
+/** Zero relative shift position of the start of the TEMPORARY bit */
+#define FSP_FLAGS_POS_TEMPORARY		(FSP_FLAGS_POS_SHARED		\
+					+ FSP_FLAGS_WIDTH_SHARED)
+/** Zero relative shift position of the start of the ENCRYPTION bit */
+#define FSP_FLAGS_POS_ENCRYPTION	(FSP_FLAGS_POS_TEMPORARY	\
+					+ FSP_FLAGS_WIDTH_TEMPORARY)
+/** Zero relative shift position of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION	(FSP_FLAGS_POS_ENCRYPTION	\
+					+ FSP_FLAGS_WIDTH_ENCRYPTION)
+/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL	(FSP_FLAGS_POS_PAGE_COMPRESSION	\
+					+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION)
+/** Zero relative shift position of the ATOMIC_WRITES field */
+#define FSP_FLAGS_POS_ATOMIC_WRITES	(FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL	\
+					+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)
+/** Zero relative shift position of the start of the UNUSED bits */
+#define FSP_FLAGS_POS_UNUSED		(FSP_FLAGS_POS_ATOMIC_WRITES	\
+					+ FSP_FLAGS_WIDTH_ATOMIC_WRITES)
+
+
+/** Bit mask of the POST_ANTELOPE field */
+#define FSP_FLAGS_MASK_POST_ANTELOPE				\
+		((~(~0U << FSP_FLAGS_WIDTH_POST_ANTELOPE))	\
+		<< FSP_FLAGS_POS_POST_ANTELOPE)
+/** Bit mask of the ZIP_SSIZE field */
+#define FSP_FLAGS_MASK_ZIP_SSIZE				\
+		((~(~0U << FSP_FLAGS_WIDTH_ZIP_SSIZE))		\
+		<< FSP_FLAGS_POS_ZIP_SSIZE)
+/** Bit mask of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_MASK_ATOMIC_BLOBS				\
+		((~(~0U << FSP_FLAGS_WIDTH_ATOMIC_BLOBS))	\
+		<< FSP_FLAGS_POS_ATOMIC_BLOBS)
+/** Bit mask of the PAGE_SSIZE field */
+#define FSP_FLAGS_MASK_PAGE_SSIZE				\
+		((~(~0U << FSP_FLAGS_WIDTH_PAGE_SSIZE))		\
+		<< FSP_FLAGS_POS_PAGE_SSIZE)
+/** Bit mask of the DATA_DIR field */
+#define FSP_FLAGS_MASK_DATA_DIR					\
+		((~(~0U << FSP_FLAGS_WIDTH_DATA_DIR))		\
+		<< FSP_FLAGS_POS_DATA_DIR)
+/** Bit mask of the SHARED field */
+#define FSP_FLAGS_MASK_SHARED					\
+		((~(~0U << FSP_FLAGS_WIDTH_SHARED))		\
+		<< FSP_FLAGS_POS_SHARED)
+/** Bit mask of the TEMPORARY field */
+#define FSP_FLAGS_MASK_TEMPORARY				\
+		((~(~0U << FSP_FLAGS_WIDTH_TEMPORARY))		\
+		<< FSP_FLAGS_POS_TEMPORARY)
+/** Bit mask of the ENCRYPTION field */
+#define FSP_FLAGS_MASK_ENCRYPTION				\
+		((~(~0U << FSP_FLAGS_WIDTH_ENCRYPTION))		\
+		<< FSP_FLAGS_POS_ENCRYPTION)
+/** Bit mask of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION			\
+		((~(~0U << FSP_FLAGS_WIDTH_PAGE_COMPRESSION))	\
+		<< FSP_FLAGS_POS_PAGE_COMPRESSION)
+/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL		\
+		((~(~0U << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL))	\
+		<< FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Bit mask of the ATOMIC_WRITES field */
+#define FSP_FLAGS_MASK_ATOMIC_WRITES		\
+		((~(~0U << FSP_FLAGS_WIDTH_ATOMIC_WRITES))	\
+		<< FSP_FLAGS_POS_ATOMIC_WRITES)
+
+/** Return the value of the POST_ANTELOPE field */
+#define FSP_FLAGS_GET_POST_ANTELOPE(flags)			\
+		((flags & FSP_FLAGS_MASK_POST_ANTELOPE)		\
+		>> FSP_FLAGS_POS_POST_ANTELOPE)
+/** Return the value of the ZIP_SSIZE field */
+#define FSP_FLAGS_GET_ZIP_SSIZE(flags)				\
+		((flags & FSP_FLAGS_MASK_ZIP_SSIZE)		\
+		>> FSP_FLAGS_POS_ZIP_SSIZE)
+/** Return the value of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_HAS_ATOMIC_BLOBS(flags)			\
+		((flags & FSP_FLAGS_MASK_ATOMIC_BLOBS)		\
+		>> FSP_FLAGS_POS_ATOMIC_BLOBS)
+/** Return the value of the PAGE_SSIZE field */
+#define FSP_FLAGS_GET_PAGE_SSIZE(flags)				\
+		((flags & FSP_FLAGS_MASK_PAGE_SSIZE)		\
+		>> FSP_FLAGS_POS_PAGE_SSIZE)
+/** Return the value of the DATA_DIR field */
+#define FSP_FLAGS_HAS_DATA_DIR(flags)				\
+		((flags & FSP_FLAGS_MASK_DATA_DIR)		\
+		>> FSP_FLAGS_POS_DATA_DIR)
+/** Return the contents of the SHARED field */
+#define FSP_FLAGS_GET_SHARED(flags)				\
+		((flags & FSP_FLAGS_MASK_SHARED)		\
+		>> FSP_FLAGS_POS_SHARED)
+/** Return the contents of the TEMPORARY field */
+#define FSP_FLAGS_GET_TEMPORARY(flags)				\
+		((flags & FSP_FLAGS_MASK_TEMPORARY)		\
+		>> FSP_FLAGS_POS_TEMPORARY)
+/** Return the contents of the ENCRYPTION field */
+#define FSP_FLAGS_GET_ENCRYPTION(flags)				\
+		((flags & FSP_FLAGS_MASK_ENCRYPTION)		\
+		>> FSP_FLAGS_POS_ENCRYPTION)
+/** Return the contents of the UNUSED bits */
+#define FSP_FLAGS_GET_UNUSED(flags)				\
+		(flags >> FSP_FLAGS_POS_UNUSED)
+/** Return the value of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION(flags)		\
+		((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION)	\
+		>> FSP_FLAGS_POS_PAGE_COMPRESSION)
+/** Return the value of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags)		\
+		((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL) \
+		>> FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Return the value of the ATOMIC_WRITES field */
+#define FSP_FLAGS_GET_ATOMIC_WRITES(flags)		\
+		((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) \
+		>> FSP_FLAGS_POS_ATOMIC_WRITES)
+/** Use an alias in the code for FSP_FLAGS_GET_SHARED() */
+#define fsp_is_shared_tablespace FSP_FLAGS_GET_SHARED
+/* @} */
+
+/** Set a PAGE_COMPRESSION into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_COMPRESSION(flags, compression)	\
+		(flags | (compression << FSP_FLAGS_POS_PAGE_COMPRESSION))
+
+/** Set a PAGE_COMPRESSION_LEVEL into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level)	\
+		(flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL))
+
+/** Set a ATOMIC_WRITES into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics)	\
+		(flags | (atomics << FSP_FLAGS_POS_ATOMIC_WRITES))
 #endif /* fsp0types_h */
diff --git a/storage/innobase/include/fts0ast.h b/storage/innobase/include/fts0ast.h
index 50f62063893..87b7cf709c8 100644
--- a/storage/innobase/include/fts0ast.h
+++ b/storage/innobase/include/fts0ast.h
@@ -26,8 +26,16 @@ Created 2007/03/16/03 Sunny Bains
 #ifndef INNOBASE_FST0AST_H
 #define INNOBASE_FST0AST_H
 
-#include "mem0mem.h"
 #include "ha_prototypes.h"
+#include "mem0mem.h"
+
+#ifdef UNIV_PFS_MEMORY
+
+#define malloc(A)	ut_malloc_nokey(A)
+#define free(A)		ut_free(A)
+#define realloc(P, A)	ut_realloc(P, A)
+
+#endif /* UNIV_PFS_MEMORY */
 
 /* The type of AST Node */
 enum fts_ast_type_t {
@@ -35,6 +43,10 @@ enum fts_ast_type_t {
 	FTS_AST_NUMB,				/*!< Number */
 	FTS_AST_TERM,				/*!< Term (or word) */
 	FTS_AST_TEXT,				/*!< Text string */
+	FTS_AST_PARSER_PHRASE_LIST,		/*!< Phase for plugin parser
+						The difference from text type
+						is that we tokenize text into
+						term list */
 	FTS_AST_LIST,				/*!< Expression list */
 	FTS_AST_SUBEXP_LIST			/*!< Sub-Expression list */
 };
@@ -139,9 +151,8 @@ fts_ast_term_set_wildcard(
 	fts_ast_node_t*	node);			/*!< in: term to change */
 /********************************************************************
 Set the proximity attribute of a text node. */
-
 void
-fts_ast_term_set_distance(
+fts_ast_text_set_distance(
 /*======================*/
 	fts_ast_node_t*	node,			/*!< in/out: text node */
 	ulint		distance);		/*!< in: the text proximity
@@ -149,7 +160,6 @@ fts_ast_term_set_distance(
 /********************************************************************//**
 Free a fts_ast_node_t instance.
 @return next node to free */
-UNIV_INTERN
 fts_ast_node_t*
 fts_ast_free_node(
 /*==============*/
@@ -185,10 +195,16 @@ fts_ast_state_free(
 /*===============*/
 	fts_ast_state_t*state);			/*!< in: state instance
 						to free */
+/** Check only union operation involved in the node
+@param[in]	node	ast node to check
+@return true if the node contains only union else false. */
+bool
+fts_ast_node_check_union(
+	fts_ast_node_t*	node);
+
 /******************************************************************//**
 Traverse the AST - in-order traversal.
 @return DB_SUCCESS if all went well */
-UNIV_INTERN
 dberr_t
 fts_ast_visit(
 /*==========*/
@@ -206,7 +222,6 @@ Process (nested) sub-expression, create a new result set to store the
 sub-expression result by processing nodes under current sub-expression
 list. Merge the sub-expression result with that of parent expression list.
 @return DB_SUCCESS if all went well */
-UNIV_INTERN
 dberr_t
 fts_ast_visit_sub_exp(
 /*==================*/
@@ -216,7 +231,6 @@ fts_ast_visit_sub_exp(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************
 Create a lex instance.*/
-UNIV_INTERN
 fts_lexer_t*
 fts_lexer_create(
 /*=============*/
@@ -226,7 +240,6 @@ fts_lexer_create(
 	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
 /********************************************************************
 Free an fts_lexer_t instance.*/
-UNIV_INTERN
 void
 fts_lexer_free(
 /*===========*/
@@ -240,7 +253,6 @@ has one more byte than len
 @param[in] str		pointer to string
 @param[in] len		length of the string
 @return ast string with NUL-terminator */
-UNIV_INTERN
 fts_ast_string_t*
 fts_ast_string_create(
 	const byte*	str,
@@ -249,7 +261,6 @@ fts_ast_string_create(
 /**
 Free an ast string instance
 @param[in,out] ast_str		string to free */
-UNIV_INTERN
 void
 fts_ast_string_free(
 	fts_ast_string_t*	ast_str);
@@ -259,7 +270,6 @@ Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
 @param[in] str		string to translate
 @param[in] base		the base
 @return translated number */
-UNIV_INTERN
 ulint
 fts_ast_string_to_ul(
 	const fts_ast_string_t*	ast_str,
@@ -268,7 +278,6 @@ fts_ast_string_to_ul(
 /**
 Print the ast string
 @param[in] str		string to print */
-UNIV_INTERN
 void
 fts_ast_string_print(
 	const fts_ast_string_t*	ast_str);
@@ -314,6 +323,9 @@ struct fts_ast_node_t {
 	fts_ast_node_t*	next_alloc;		/*!< For tracking allocations */
 	bool		visited;		/*!< whether this node is
 						already processed */
+	/* Used by plugin parser */
+	fts_ast_node_t* up_node;		/*!< Direct up node */
+	bool		go_up;			/*!< Flag if go one level up */
 };
 
 /* To track state during parsing */
@@ -327,8 +339,32 @@ struct fts_ast_state_t {
 	fts_lexer_t*	lexer;			/*!< Lexer callback + arg */
 	CHARSET_INFO*	charset;		/*!< charset used for
 						tokenization */
+	/* Used by plugin parser */
+	fts_ast_node_t*	cur_node;		/*!< Current node into which
+						 we add new node */
+	int		depth;			/*!< Depth of parsing state */
 };
 
+/******************************************************************//**
+Create an AST term node, makes a copy of ptr for plugin parser
+@return node */
+extern
+fts_ast_node_t*
+fts_ast_create_node_term_for_parser(
+/*==========i=====================*/
+	void*		arg,			/*!< in: ast state */
+	const char*	ptr,			/*!< in: term string */
+	const ulint	len);			/*!< in: term string length */
+
+/******************************************************************//**
+Create an AST phrase list node for plugin parser
+@return node */
+extern
+fts_ast_node_t*
+fts_ast_create_node_phrase_list(
+/*============================*/
+	void*		arg);			/*!< in: ast state */
+
 #ifdef UNIV_DEBUG
 const char*
 fts_ast_oper_name_get(fts_ast_oper_t	oper);
diff --git a/storage/innobase/include/fts0blex.h b/storage/innobase/include/fts0blex.h
index d0e4cae0678..da93ab8617d 100644
--- a/storage/innobase/include/fts0blex.h
+++ b/storage/innobase/include/fts0blex.h
@@ -341,7 +341,7 @@ extern int fts0blex (yyscan_t yyscanner);
 #undef YY_DECL
 #endif
 
-#line 73 "fts0blex.l"
+#line 74 "fts0blex.l"
 
 
 #line 348 "../include/fts0blex.h"
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index 7aa7055640c..c6b39bce286 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -27,7 +27,7 @@ Created 2011/09/02 Sunny Bains
 #ifndef fts0fts_h
 #define fts0fts_h
 
-#include "univ.i"
+#include "ha_prototypes.h"
 
 #include "data0type.h"
 #include "data0types.h"
@@ -42,6 +42,7 @@ Created 2011/09/02 Sunny Bains
 #include "ut0wqueue.h"
 #include "que0types.h"
 #include "ft_global.h"
+#include "mysql/plugin_ftparser.h"
 
 /** "NULL" value of a document id. */
 #define FTS_NULL_DOC_ID			0
@@ -66,7 +67,7 @@ optimize using a 4 byte Doc ID for FIC merge sort to reduce sort size */
 #define MAX_DOC_ID_OPT_VAL		1073741824
 
 /** Document id type. */
-typedef ib_uint64_t doc_id_t;
+typedef ib_id_t doc_id_t;
 
 /** doc_id_t printf format */
 #define FTS_DOC_ID_FORMAT	IB_ID_FMT
@@ -86,12 +87,16 @@ those defined in mysql file ft_global.h */
 #define FTS_BOOL	1
 #define FTS_SORTED	2
 #define FTS_EXPAND	4
-#define FTS_PROXIMITY	8
-#define FTS_PHRASE	16
-#define FTS_OPT_RANKING	32
+#define FTS_NO_RANKING	8
+#define FTS_PROXIMITY	16
+#define FTS_PHRASE	32
+#define FTS_OPT_RANKING	64
 
 #define FTS_INDEX_TABLE_IND_NAME	"FTS_INDEX_TABLE_IND"
 
+/** The number of FTS index partitions for a fulltext idnex */
+#define FTS_NUM_AUX_INDEX		6
+
 /** Threshold where our optimize thread automatically kicks in */
 #define FTS_OPTIMIZE_THRESHOLD		10000000
 
@@ -99,6 +104,30 @@ those defined in mysql file ft_global.h */
 should not exceed FTS_DOC_ID_MAX_STEP */
 #define FTS_DOC_ID_MAX_STEP		65535
 
+/** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */
+#define FTS_MAX_WORD_LEN		(HA_FT_MAXCHARLEN * 4)
+
+/** Maximum possible Fulltext word length (in characters) */
+#define FTS_MAX_WORD_LEN_IN_CHAR	HA_FT_MAXCHARLEN
+
+/** Number of columns in FTS AUX Tables */
+#define FTS_DELETED_TABLE_NUM_COLS	1
+#define FTS_CONFIG_TABLE_NUM_COLS	2
+#define FTS_AUX_INDEX_TABLE_NUM_COLS	5
+
+/** DELETED_TABLE(doc_id BIGINT UNSIGNED) */
+#define FTS_DELETED_TABLE_COL_LEN	8
+/** CONFIG_TABLE(key CHAR(50), value CHAR(200)) */
+#define FTS_CONFIG_TABLE_KEY_COL_LEN	50
+#define FTS_CONFIG_TABLE_VALUE_COL_LEN	200
+
+#define FTS_INDEX_FIRST_DOC_ID_LEN	8
+#define FTS_INDEX_LAST_DOC_ID_LEN	8
+#define FTS_INDEX_DOC_COUNT_LEN		4
+/* BLOB COLUMN, 0 means VARIABLE SIZE */
+#define FTS_INDEX_ILIST_LEN		0
+
+
 /** Variable specifying the FTS parallel sort degree */
 extern ulong		fts_sort_pll_degree;
 
@@ -151,7 +180,7 @@ do {								\
 	(fts_table)->suffix = m_suffix;				\
         (fts_table)->type = m_type;				\
         (fts_table)->table_id = m_table->id;			\
-        (fts_table)->parent = m_table->name;			\
+        (fts_table)->parent = m_table->name.m_name;		\
         (fts_table)->table = m_table;				\
 } while (0);
 
@@ -160,7 +189,7 @@ do {								\
 	(fts_table)->suffix = m_suffix;				\
         (fts_table)->type = m_type;				\
         (fts_table)->table_id = m_index->table->id;		\
-        (fts_table)->parent = m_index->table->name;		\
+        (fts_table)->parent = m_index->table->name.m_name;	\
         (fts_table)->table = m_index->table;			\
         (fts_table)->index_id = m_index->id;			\
 } while (0);
@@ -307,35 +336,45 @@ enum	fts_status {
 typedef	enum fts_status	fts_status_t;
 
 /** The state of the FTS sub system. */
-struct fts_t {
-					/*!< mutex protecting bg_threads* and
-					fts_add_wq. */
-	ib_mutex_t		bg_threads_mutex;
-
-	ulint		bg_threads;	/*!< number of background threads
-					accessing this table */
-
-					/*!< TRUE if background threads running
-					should stop themselves */
-	ulint		fts_status;	/*!< Status bit regarding fts
-					running state */
-
-	ib_wqueue_t*	add_wq;		/*!< Work queue for scheduling jobs
-					for the FTS 'Add' thread, or NULL
-					if the thread has not yet been
-					created. Each work item is a
-					fts_trx_doc_ids_t*. */
-
-	fts_cache_t*	cache;		/*!< FTS memory buffer for this table,
-					or NULL if the table has no FTS
-					index. */
-
-	ulint		doc_col;	/*!< FTS doc id hidden column number
-					in the CLUSTERED index. */
-
-	ib_vector_t*	indexes;	/*!< Vector of FTS indexes, this is
-					mainly for caching purposes. */
-	mem_heap_t*	fts_heap;	/*!< heap for fts_t allocation */
+class fts_t {
+public:
+	/** fts_t constructor.
+	@param[in]	table	table with FTS indexes
+	@param[in,out]	heap	memory heap where 'this' is stored */
+	fts_t(
+		const dict_table_t*	table,
+		mem_heap_t*		heap);
+
+	/** fts_t destructor. */
+	~fts_t();
+
+	/** Mutex protecting bg_threads* and fts_add_wq. */
+	ib_mutex_t	bg_threads_mutex;
+
+	/** Number of background threads accessing this table. */
+	ulint		bg_threads;
+
+	/** Status bit regarding fts running state. TRUE if background
+	threads running should stop themselves. */
+	ulint		fts_status;
+
+	/** Work queue for scheduling jobs for the FTS 'Add' thread, or NULL
+	if the thread has not yet been created. Each work item is a
+	fts_trx_doc_ids_t*. */
+	ib_wqueue_t*	add_wq;
+
+	/** FTS memory buffer for this table, or NULL if the table has no FTS
+	index. */
+	fts_cache_t*	cache;
+
+	/** FTS doc id hidden column number in the CLUSTERED index. */
+	ulint		doc_col;
+
+	/** Vector of FTS indexes, this is mainly for caching purposes. */
+	ib_vector_t*	indexes;
+
+	/** Heap for fts_t allocation. */
+	mem_heap_t*	fts_heap;
 };
 
 struct fts_stopword_t;
@@ -367,16 +406,9 @@ extern ulong		fts_min_token_size;
 need a sync to free some memory */
 extern bool		fts_need_sync;
 
-/** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */
-#define FTS_MAX_WORD_LEN		(HA_FT_MAXCHARLEN * 4)
-
-/** Maximum possible Fulltext word length (in characters) */
-#define FTS_MAX_WORD_LEN_IN_CHAR	HA_FT_MAXCHARLEN
-
 /** Variable specifying the table that has Fulltext index to display its
 content through information schema table */
 extern char*		fts_internal_tbl_name;
-extern char*		fts_internal_tbl_name2;
 
 #define	fts_que_graph_free(graph)			\
 do {							\
@@ -387,7 +419,6 @@ do {							\
 
 /******************************************************************//**
 Create a FTS cache. */
-UNIV_INTERN
 fts_cache_t*
 fts_cache_create(
 /*=============*/
@@ -396,7 +427,6 @@ fts_cache_create(
 /******************************************************************//**
 Create a FTS index cache.
 @return Index Cache */
-UNIV_INTERN
 fts_index_cache_t*
 fts_cache_index_cache_create(
 /*=========================*/
@@ -407,31 +437,26 @@ fts_cache_index_cache_create(
 Get the next available document id. This function creates a new
 transaction to generate the document id.
 @return DB_SUCCESS if OK */
-UNIV_INTERN
 dberr_t
 fts_get_next_doc_id(
 /*================*/
 	const dict_table_t*	table,	/*!< in: table */
-	doc_id_t*		doc_id)	/*!< out: new document id */
-	MY_ATTRIBUTE((nonnull));
+	doc_id_t*		doc_id);/*!< out: new document id */
 /*********************************************************************//**
 Update the next and last Doc ID in the CONFIG table to be the input
 "doc_id" value (+ 1). We would do so after each FTS index build or
 table truncate */
-UNIV_INTERN
 void
 fts_update_next_doc_id(
 /*===================*/
 	trx_t*			trx,		/*!< in/out: transaction */
 	const dict_table_t*	table,		/*!< in: table */
 	const char*		table_name,	/*!< in: table name, or NULL */
-	doc_id_t		doc_id)		/*!< in: DOC ID to set */
-	MY_ATTRIBUTE((nonnull(2)));
+	doc_id_t		doc_id);	/*!< in: DOC ID to set */
 
 /******************************************************************//**
 Create a new document id .
 @return DB_SUCCESS if all went well else error */
-UNIV_INTERN
 dberr_t
 fts_create_doc_id(
 /*==============*/
@@ -441,19 +466,17 @@ fts_create_doc_id(
 						value to this row. This is the
 						current row that is being
 						inserted. */
-	mem_heap_t*	heap)			/*!< in: heap */
-	MY_ATTRIBUTE((nonnull));
+	mem_heap_t*	heap);			/*!< in: heap */
+
 /******************************************************************//**
 Create a new fts_doc_ids_t.
 @return new fts_doc_ids_t. */
-UNIV_INTERN
 fts_doc_ids_t*
 fts_doc_ids_create(void);
 /*=====================*/
 
 /******************************************************************//**
 Free a fts_doc_ids_t. */
-UNIV_INTERN
 void
 fts_doc_ids_free(
 /*=============*/
@@ -461,7 +484,6 @@ fts_doc_ids_free(
 
 /******************************************************************//**
 Notify the FTS system about an operation on an FTS-indexed table. */
-UNIV_INTERN
 void
 fts_trx_add_op(
 /*===========*/
@@ -469,13 +491,11 @@ fts_trx_add_op(
 	dict_table_t*	table,			/*!< in: table */
 	doc_id_t	doc_id,			/*!< in: doc id */
 	fts_row_state	state,			/*!< in: state of the row */
-	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected
+	ib_vector_t*	fts_indexes);		/*!< in: FTS indexes affected
 						(NULL=all) */
-	MY_ATTRIBUTE((nonnull(1,2)));
 
 /******************************************************************//**
 Free an FTS trx. */
-UNIV_INTERN
 void
 fts_trx_free(
 /*=========*/
@@ -486,7 +506,6 @@ Creates the common ancillary tables needed for supporting an FTS index
 on the given table. row_mysql_lock_data_dictionary must have been
 called before this.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_create_common_tables(
 /*=====================*/
@@ -496,25 +515,23 @@ fts_create_common_tables(
 						index */
 	const char*	name,			/*!< in: table name */
 	bool		skip_doc_id_index)	/*!< in: Skip index on doc id */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************************//**
 Wrapper function of fts_create_index_tables_low(), create auxiliary
 tables for an FTS index
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_create_index_tables(
 /*====================*/
 	trx_t*			trx,		/*!< in: transaction handle */
 	const dict_index_t*	index)		/*!< in: the FTS index
 						instance */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************************//**
 Creates the column specific ancillary tables needed for supporting an
 FTS index on the given table. row_mysql_lock_data_dictionary must have
 been called before this.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_create_index_tables_low(
 /*========================*/
@@ -524,62 +541,59 @@ fts_create_index_tables_low(
 						instance */
 	const char*	table_name,		/*!< in: the table name */
 	table_id_t	table_id)		/*!< in: the table id */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************************//**
 Add the FTS document id hidden column. */
-UNIV_INTERN
 void
 fts_add_doc_id_column(
 /*==================*/
 	dict_table_t*	table,	/*!< in/out: Table with FTS index */
-	mem_heap_t*	heap)	/*!< in: temporary memory heap, or NULL */
-	MY_ATTRIBUTE((nonnull(1)));
+	mem_heap_t*	heap);	/*!< in: temporary memory heap, or NULL */
 
 /*********************************************************************//**
 Drops the ancillary tables needed for supporting an FTS index on the
 given table. row_mysql_lock_data_dictionary must have been called before
 this.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_drop_tables(
 /*============*/
 	trx_t*		trx,			/*!< in: transaction */
-	dict_table_t*	table)			/*!< in: table has the FTS
+	dict_table_t*	table);			/*!< in: table has the FTS
 						index */
-	MY_ATTRIBUTE((nonnull));
 /******************************************************************//**
 The given transaction is about to be committed; do whatever is necessary
 from the FTS system's POV.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_commit(
 /*=======*/
 	trx_t*		trx)			/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*******************************************************************//**
-FTS Query entry point.
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** FTS Query entry point.
+@param[in]	trx		transaction
+@param[in]	index		fts index to search
+@param[in]	flags		FTS search mode
+@param[in]	query_str	FTS query
+@param[in]	query_len	FTS query string len in bytes
+@param[in,out]	result		result doc ids
+@param[in]	limit		limit value
 @return DB_SUCCESS if successful otherwise error code */
-UNIV_INTERN
 dberr_t
 fts_query(
-/*======*/
-	trx_t*		trx,			/*!< in: transaction */
-	dict_index_t*	index,			/*!< in: FTS index to search */
-	uint		flags,			/*!< in: FTS search mode */
-	const byte*	query,			/*!< in: FTS query */
-	ulint		query_len,		/*!< in: FTS query string len
-						in bytes */
-	fts_result_t**	result)			/*!< out: query result, to be
-						freed by the caller.*/
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	trx_t*		trx,
+	dict_index_t*	index,
+	uint		flags,
+	const byte*	query_str,
+	ulint		query_len,
+	fts_result_t**	result,
+	ulonglong	limit)
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************************//**
 Retrieve the FTS Relevance Ranking result for doc with doc_id
 @return the relevance ranking value. */
-UNIV_INTERN
 float
 fts_retrieve_ranking(
 /*=================*/
@@ -589,7 +603,6 @@ fts_retrieve_ranking(
 
 /******************************************************************//**
 FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */
-UNIV_INTERN
 void
 fts_query_sort_result_on_rank(
 /*==========================*/
@@ -598,7 +611,6 @@ fts_query_sort_result_on_rank(
 
 /******************************************************************//**
 FTS Query free result, returned by fts_query(). */
-UNIV_INTERN
 void
 fts_query_free_result(
 /*==================*/
@@ -607,7 +619,6 @@ fts_query_free_result(
 
 /******************************************************************//**
 Extract the doc id from the FTS hidden column. */
-UNIV_INTERN
 doc_id_t
 fts_get_doc_id_from_row(
 /*====================*/
@@ -615,37 +626,45 @@ fts_get_doc_id_from_row(
 	dtuple_t*	row);			/*!< in: row whose FTS doc id we
 						want to extract.*/
 
-/******************************************************************//**
-Extract the doc id from the FTS hidden column. */
-UNIV_INTERN
+/** Extract the doc id from the record that belongs to index.
+@param[in]	table	table
+@param[in]	rec	record contains FTS_DOC_ID
+@param[in]	index	index of rec
+@param[in]	heap	heap memory
+@return doc id that was extracted from rec */
 doc_id_t
 fts_get_doc_id_from_rec(
-/*====================*/
-	dict_table_t*	table,			/*!< in: table */
-	const rec_t*	rec,			/*!< in: rec */
-	mem_heap_t*	heap);			/*!< in: heap */
-
-/******************************************************************//**
-Update the query graph with a new document id.
-@return Doc ID used */
-UNIV_INTERN
+        dict_table_t*           table,
+        const rec_t*            rec,
+        const dict_index_t*     index,
+        mem_heap_t*             heap);
+
+/** Add new fts doc id to the update vector.
+@param[in]	table		the table that contains the FTS index.
+@param[in,out]	ufield		the fts doc id field in the update vector.
+				No new memory is allocated for this in this
+				function.
+@param[in,out]	next_doc_id	the fts doc id that has been added to the
+				update vector.  If 0, a new fts doc id is
+				automatically generated.  The memory provided
+				for this argument will be used by the update
+				vector. Ensure that the life time of this
+				memory matches that of the update vector.
+@return the fts doc id used in the update vector */
 doc_id_t
 fts_update_doc_id(
-/*==============*/
-	dict_table_t*	table,			/*!< in: table */
-	upd_field_t*	ufield,			/*!< out: update node */
-	doc_id_t*	next_doc_id);		/*!< out: buffer for writing */
+	dict_table_t*	table,
+	upd_field_t*	ufield,
+	doc_id_t*	next_doc_id);
 
 /******************************************************************//**
 FTS initialize. */
-UNIV_INTERN
 void
 fts_startup(void);
 /*==============*/
 
 /******************************************************************//**
 Signal FTS threads to initiate shutdown. */
-UNIV_INTERN
 void
 fts_start_shutdown(
 /*===============*/
@@ -656,7 +675,6 @@ fts_start_shutdown(
 
 /******************************************************************//**
 Wait for FTS threads to shutdown. */
-UNIV_INTERN
 void
 fts_shutdown(
 /*=========*/
@@ -668,7 +686,6 @@ fts_shutdown(
 /******************************************************************//**
 Create an instance of fts_t.
 @return instance of fts_t */
-UNIV_INTERN
 fts_t*
 fts_create(
 /*=======*/
@@ -677,7 +694,6 @@ fts_create(
 
 /**********************************************************************//**
 Free the FTS resources. */
-UNIV_INTERN
 void
 fts_free(
 /*=====*/
@@ -687,16 +703,13 @@ fts_free(
 /*********************************************************************//**
 Run OPTIMIZE on the given table.
 @return DB_SUCCESS if all OK */
-UNIV_INTERN
 dberr_t
 fts_optimize_table(
 /*===============*/
-	dict_table_t*	table)			/*!< in: table to optimiza */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	table);			/*!< in: table to optimiza */
 
 /**********************************************************************//**
 Startup the optimize thread and create the work queue. */
-UNIV_INTERN
 void
 fts_optimize_init(void);
 /*====================*/
@@ -704,7 +717,6 @@ fts_optimize_init(void);
 /**********************************************************************//**
 Check whether the work queue is initialized.
 @return TRUE if optimze queue is initialized. */
-UNIV_INTERN
 ibool
 fts_optimize_is_init(void);
 /*======================*/
@@ -712,65 +724,49 @@ fts_optimize_is_init(void);
 /****************************************************************//**
 Drops index ancillary tables for a FTS index
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_drop_index_tables(
 /*==================*/
 	trx_t*		trx,			/*!< in: transaction */
 	dict_index_t*	index)			/*!< in: Index to drop */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************************//**
 Remove the table from the OPTIMIZER's list. We do wait for
 acknowledgement from the consumer of the message. */
-UNIV_INTERN
 void
 fts_optimize_remove_table(
 /*======================*/
 	dict_table_t*	table);			/*!< in: table to remove */
 
+/** Shutdown fts optimize thread. */
+void
+fts_optimize_shutdown();
+
 /** Send sync fts cache for the table.
 @param[in]	table	table to sync */
-UNIV_INTERN
 void
 fts_optimize_request_sync_table(
 	dict_table_t*	table);
 
 /**********************************************************************//**
-Signal the optimize thread to prepare for shutdown. */
-UNIV_INTERN
-void
-fts_optimize_start_shutdown(void);
-/*==============================*/
-
-/**********************************************************************//**
-Inform optimize to clean up. */
-UNIV_INTERN
-void
-fts_optimize_end(void);
-/*===================*/
-
-/**********************************************************************//**
 Take a FTS savepoint. */
-UNIV_INTERN
 void
 fts_savepoint_take(
 /*===============*/
 	trx_t*		trx,			/*!< in: transaction */
 	fts_trx_t*	fts_trx,		/*!< in: fts transaction */
-	const char*	name)			/*!< in: savepoint name */
-	MY_ATTRIBUTE((nonnull));
+	const char*	name);			/*!< in: savepoint name */
+
 /**********************************************************************//**
 Refresh last statement savepoint. */
-UNIV_INTERN
 void
 fts_savepoint_laststmt_refresh(
 /*===========================*/
-	trx_t*		trx)			/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*		trx);			/*!< in: transaction */
+
 /**********************************************************************//**
 Release the savepoint data identified by  name. */
-UNIV_INTERN
 void
 fts_savepoint_release(
 /*==================*/
@@ -779,7 +775,6 @@ fts_savepoint_release(
 
 /**********************************************************************//**
 Free the FTS cache. */
-UNIV_INTERN
 void
 fts_cache_destroy(
 /*==============*/
@@ -787,14 +782,12 @@ fts_cache_destroy(
 
 /** Clear cache.
 @param[in,out]	cache	fts cache */
-UNIV_INTERN
 void
 fts_cache_clear(
 	fts_cache_t*	cache);
 
 /*********************************************************************//**
 Initialize things in cache. */
-UNIV_INTERN
 void
 fts_cache_init(
 /*===========*/
@@ -802,7 +795,6 @@ fts_cache_init(
 
 /*********************************************************************//**
 Rollback to and including savepoint indentified by name. */
-UNIV_INTERN
 void
 fts_savepoint_rollback(
 /*===================*/
@@ -811,7 +803,6 @@ fts_savepoint_rollback(
 
 /*********************************************************************//**
 Rollback to and including savepoint indentified by name. */
-UNIV_INTERN
 void
 fts_savepoint_rollback_last_stmt(
 /*=============================*/
@@ -820,31 +811,20 @@ fts_savepoint_rollback_last_stmt(
 /***********************************************************************//**
 Drop all orphaned FTS auxiliary tables, those that don't have a parent
 table or FTS index defined on them. */
-UNIV_INTERN
 void
 fts_drop_orphaned_tables(void);
 /*==========================*/
 
-/* Get parent table name if it's a fts aux table
-@param[in]	aux_table_name	aux table name
-@param[in]	aux_table_len	aux table length
-@return parent table name, or NULL */
-char*
-fts_get_parent_table_name(
-	const char*	aux_table_name,
-	ulint		aux_table_len);
-
 /******************************************************************//**
 Since we do a horizontal split on the index table, we need to drop
 all the split tables.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_drop_index_split_tables(
 /*========================*/
 	trx_t*		trx,			/*!< in: transaction */
 	dict_index_t*	index)			/*!< in: fts instance */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /** Run SYNC on the table, i.e., write out data from the cache to the
 FTS auxiliary INDEX table and clear the cache at the end.
@@ -853,7 +833,6 @@ FTS auxiliary INDEX table and clear the cache at the end.
 @param[in]	wait		whether wait for existing sync to finish
 @param[in]      has_dict        whether has dict operation lock
 @return DB_SUCCESS on success, error code on failure. */
-UNIV_INTERN
 dberr_t
 fts_sync_table(
 	dict_table_t*	table,
@@ -864,7 +843,6 @@ fts_sync_table(
 /****************************************************************//**
 Free the query graph but check whether dict_sys->mutex is already
 held */
-UNIV_INTERN
 void
 fts_que_graph_free_check_lock(
 /*==========================*/
@@ -874,7 +852,6 @@ fts_que_graph_free_check_lock(
 
 /****************************************************************//**
 Create an FTS index cache. */
-UNIV_INTERN
 CHARSET_INFO*
 fts_index_get_charset(
 /*==================*/
@@ -883,7 +860,6 @@ fts_index_get_charset(
 /*********************************************************************//**
 Get the initial Doc ID by consulting the CONFIG table
 @return initial Doc ID */
-UNIV_INTERN
 doc_id_t
 fts_init_doc_id(
 /*============*/
@@ -934,15 +910,31 @@ innobase_mysql_fts_get_token(
 	const byte*	start,			/*!< in: start of text */
 	const byte*	end,			/*!< in: one character past
 						end of text */
-	fts_string_t*	token,			/*!< out: token's text */
-	ulint*		offset);		/*!< out: offset to token,
-						measured as characters from
-						'start' */
+	fts_string_t*	token);			/*!< out: token's text */
+
+/*************************************************************//**
+Get token char size by charset
+@return the number of token char size */
+ulint
+fts_get_token_size(
+/*===============*/
+	const CHARSET_INFO*	cs,		/*!< in: Character set */
+	const char*		token,		/*!< in: token */
+	ulint			len);		/*!< in: token length */
+
+/*************************************************************//**
+FULLTEXT tokenizer internal in MYSQL_FTPARSER_SIMPLE_MODE
+@return 0 if tokenize sucessfully */
+int
+fts_tokenize_document_internal(
+/*===========================*/
+	MYSQL_FTPARSER_PARAM*	param,	/*!< in: parser parameter */
+	const char*			doc,	/*!< in: document to tokenize */
+	int			len);	/*!< in: document length */
 
 /*********************************************************************//**
 Fetch COUNT(*) from specified table.
 @return the number of rows in the table */
-UNIV_INTERN
 ulint
 fts_get_rows_count(
 /*===============*/
@@ -951,7 +943,6 @@ fts_get_rows_count(
 /*************************************************************//**
 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
-UNIV_INTERN
 doc_id_t
 fts_get_max_doc_id(
 /*===============*/
@@ -961,7 +952,6 @@ fts_get_max_doc_id(
 Check whether user supplied stopword table exists and is of
 the right format.
 @return the stopword column charset if qualifies */
-UNIV_INTERN
 CHARSET_INFO*
 fts_valid_stopword_table(
 /*=====================*/
@@ -970,7 +960,6 @@ fts_valid_stopword_table(
 /****************************************************************//**
 This function loads specified stopword into FTS cache
 @return TRUE if success */
-UNIV_INTERN
 ibool
 fts_load_stopword(
 /*==============*/
@@ -989,7 +978,6 @@ fts_load_stopword(
 /****************************************************************//**
 Create the vector of fts_get_doc_t instances.
 @return vector of fts_get_doc_t instances */
-UNIV_INTERN
 ib_vector_t*
 fts_get_docs_create(
 /*================*/
@@ -998,7 +986,6 @@ fts_get_docs_create(
 /****************************************************************//**
 Read the rows from the FTS index
 @return DB_SUCCESS if OK */
-UNIV_INTERN
 dberr_t
 fts_table_fetch_doc_ids(
 /*====================*/
@@ -1012,7 +999,6 @@ used. There are documents that have not yet sync-ed to auxiliary
 tables from last server abnormally shutdown, we will need to bring
 such document into FTS cache before any further operations
 @return TRUE if all OK */
-UNIV_INTERN
 ibool
 fts_init_index(
 /*===========*/
@@ -1021,7 +1007,6 @@ fts_init_index(
 						have cache lock */
 /*******************************************************************//**
 Add a newly create index in FTS cache */
-UNIV_INTERN
 void
 fts_add_index(
 /*==========*/
@@ -1031,19 +1016,16 @@ fts_add_index(
 /*******************************************************************//**
 Drop auxiliary tables related to an FTS index
 @return DB_SUCCESS or error number */
-UNIV_INTERN
 dberr_t
 fts_drop_index(
 /*===========*/
 	dict_table_t*	table,	/*!< in: Table where indexes are dropped */
 	dict_index_t*	index,	/*!< in: Index to be dropped */
-	trx_t*		trx)	/*!< in: Transaction for the drop */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*		trx);	/*!< in: Transaction for the drop */
 
 /****************************************************************//**
 Rename auxiliary tables for all fts index for a table
 @return DB_SUCCESS or error code */
-
 dberr_t
 fts_rename_aux_tables(
 /*==================*/
@@ -1055,10 +1037,21 @@ fts_rename_aux_tables(
 Check indexes in the fts->indexes is also present in index cache and
 table->indexes list
 @return TRUE if all indexes match */
-UNIV_INTERN
 ibool
 fts_check_cached_index(
 /*===================*/
 	dict_table_t*	table);  /*!< in: Table where indexes are dropped */
+
+/** Check if the all the auxillary tables associated with FTS index are in
+consistent state. For now consistency is check only by ensuring
+index->page_no != FIL_NULL
+@param[out]	base_table	table has host fts index
+@param[in,out]	trx		trx handler */
+void
+fts_check_corrupt(
+	dict_table_t*	base_table,
+	trx_t*		trx);
+
+
 #endif /*!< fts0fts.h */
 
diff --git a/storage/innobase/include/fts0opt.h b/storage/innobase/include/fts0opt.h
index 92eaf8270d2..a9185ad8df1 100644
--- a/storage/innobase/include/fts0opt.h
+++ b/storage/innobase/include/fts0opt.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2001, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2001, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,6 @@ Created 2011-02-15 Jimmy Yang
 
 /********************************************************************
 Callback function to fetch the rows in an FTS INDEX record. */
-UNIV_INTERN
 ibool
 fts_optimize_index_fetch_node(
 /*==========================*/
diff --git a/storage/innobase/include/fts0plugin.h b/storage/innobase/include/fts0plugin.h
new file mode 100644
index 00000000000..9bc9b6b9dd7
--- /dev/null
+++ b/storage/innobase/include/fts0plugin.h
@@ -0,0 +1,50 @@
+/*****************************************************************************
+
+Copyright (c) 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0plugin.h
+Full text search plugin header file
+
+Created 2013/06/04 Shaohua Wang
+***********************************************************************/
+
+#ifndef INNOBASE_FTS0PLUGIN_H
+#define INNOBASE_FTS0PLUGIN_H
+
+#include "ha_prototypes.h"
+
+extern struct st_mysql_ftparser fts_default_parser;
+
+struct fts_ast_state_t;
+
+#define PARSER_INIT(parser, arg) if (parser->init) { parser->init(arg); }
+#define PARSER_DEINIT(parser, arg) if (parser->deinit) { parser->deinit(arg); }
+
+/******************************************************************//**
+fts parse query by plugin parser.
+@return 0 if parse successfully, or return non-zero. */
+int
+fts_parse_by_parser(
+/*================*/
+	ibool			mode,	/*!< in: query boolean mode */
+	uchar*			query,	/*!< in: query string */
+	ulint			len,	/*!< in: query string length */
+	st_mysql_ftparser*	parse,	/*!< in: fts plugin parser */
+	fts_ast_state_t*	state);	/*!< in: query parser state */
+
+#endif	/* INNOBASE_FTS0PLUGIN_H */
diff --git a/storage/innobase/include/fts0priv.h b/storage/innobase/include/fts0priv.h
index 2d4e9d88fd1..1fd33c2b103 100644
--- a/storage/innobase/include/fts0priv.h
+++ b/storage/innobase/include/fts0priv.h
@@ -26,6 +26,7 @@ Created 2011/09/02 Sunny Bains
 #ifndef INNOBASE_FTS0PRIV_H
 #define INNOBASE_FTS0PRIV_H
 
+#include "univ.i"
 #include "dict0dict.h"
 #include "pars0pars.h"
 #include "que0que.h"
@@ -114,34 +115,35 @@ component.
 /******************************************************************//**
 Parse an SQL string. %s is replaced with the table's id.
 @return query graph */
-UNIV_INTERN
 que_t*
 fts_parse_sql(
 /*==========*/
 	fts_table_t*	fts_table,	/*!< in: FTS aux table */
 	pars_info_t*	info,		/*!< in: info struct, or NULL */
 	const char*	sql)		/*!< in: SQL string to evaluate */
-	MY_ATTRIBUTE((nonnull(3), malloc, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Evaluate a parsed SQL statement
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_eval_sql(
 /*=========*/
 	trx_t*		trx,		/*!< in: transaction */
 	que_t*		graph)		/*!< in: Parsed statement */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Construct the name of an ancillary FTS table for the given table.
-@return own: table name, must be freed with mem_free() */
-UNIV_INTERN
-char*
+Caller must allocate enough memory(usually size of MAX_FULL_NAME_LEN)
+for param 'table_name'. */
+void
 fts_get_table_name(
 /*===============*/
 	const fts_table_t*
-			fts_table)	/*!< in: FTS aux table info */
-	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
+			fts_table,	/*!< in: FTS aux table info */
+	char*		table_name);	/*!< in/out: aux table name */
+
 /******************************************************************//**
 Construct the column specification part of the SQL string for selecting the
 indexed FTS columns for the given table. Adds the necessary bound
@@ -157,14 +159,13 @@ Two indexed columns named "subject" and "content":
  "$sel0, $sel1",
  info/ids: sel0 -> "subject", sel1 -> "content",
 @return heap-allocated WHERE string */
-UNIV_INTERN
 const char*
 fts_get_select_columns_str(
 /*=======================*/
 	dict_index_t*	index,		/*!< in: FTS index */
 	pars_info_t*	info,		/*!< in/out: parser info */
 	mem_heap_t*	heap)		/*!< in: memory heap */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /** define for fts_doc_fetch_by_doc_id() "option" value, defines whether
 we want to get Doc whose ID is equal to or greater or smaller than supplied
@@ -177,7 +178,6 @@ ID */
 Fetch document (= a single row's indexed text) with the given
 document id.
 @return: DB_SUCCESS if fetch is successful, else error */
-UNIV_INTERN
 dberr_t
 fts_doc_fetch_by_doc_id(
 /*====================*/
@@ -190,24 +190,21 @@ fts_doc_fetch_by_doc_id(
 	fts_sql_callback
 			callback,	/*!< in: callback to read
 					records */
-	void*		arg)		/*!< in: callback arg */
-	MY_ATTRIBUTE((nonnull(6)));
+	void*		arg);		/*!< in: callback arg */
 
 /*******************************************************************//**
 Callback function for fetch that stores the text of an FTS document,
 converting each column to UTF-16.
 @return always FALSE */
-UNIV_INTERN
 ibool
 fts_query_expansion_fetch_doc(
 /*==========================*/
 	void*		row,		/*!< in: sel_node_t* */
-	void*		user_arg)	/*!< in: fts_doc_t* */
-	MY_ATTRIBUTE((nonnull));
+	void*		user_arg);	/*!< in: fts_doc_t* */
+
 /********************************************************************
 Write out a single word's data as new entry/entries in the INDEX table.
 @return DB_SUCCESS if all OK. */
-UNIV_INTERN
 dberr_t
 fts_write_node(
 /*===========*/
@@ -216,22 +213,38 @@ fts_write_node(
 	fts_table_t*	fts_table,	/*!< in: the FTS aux index */
 	fts_string_t*	word,		/*!< in: word in UTF-8 */
 	fts_node_t*	node)		/*!< in: node columns */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Check fts token
+1. for ngram token, check whether the token contains any words in stopwords
+2. for non-ngram token, check if it's stopword or less than fts_min_token_size
+or greater than fts_max_token_size.
+@param[in]	token		token string
+@param[in]	stopwords	stopwords rb tree
+@param[in]	is_ngram	is ngram parser
+@param[in]	cs		token charset
+@retval true	if it is not stopword and length in range
+@retval false	if it is stopword or length not in range */
+bool
+fts_check_token(
+	const fts_string_t*	token,
+	const ib_rbt_t*		stopwords,
+	bool			is_ngram,
+	const CHARSET_INFO*	cs);
+
 /*******************************************************************//**
 Tokenize a document. */
-UNIV_INTERN
 void
 fts_tokenize_document(
 /*==================*/
 	fts_doc_t*	doc,		/*!< in/out: document to
 					tokenize */
-	fts_doc_t*	result)		/*!< out: if provided, save
+	fts_doc_t*	result,		/*!< out: if provided, save
 					result tokens here */
-	MY_ATTRIBUTE((nonnull(1)));
+	st_mysql_ftparser*	parser);/* in: plugin fts parser */
 
 /*******************************************************************//**
 Continue to tokenize a document. */
-UNIV_INTERN
 void
 fts_tokenize_document_next(
 /*=======================*/
@@ -239,23 +252,21 @@ fts_tokenize_document_next(
 					tokenize */
 	ulint		add_pos,	/*!< in: add this position to all
 					tokens from this tokenization */
-	fts_doc_t*	result)		/*!< out: if provided, save
+	fts_doc_t*	result,		/*!< out: if provided, save
 					result tokens here */
-	MY_ATTRIBUTE((nonnull(1)));
+	st_mysql_ftparser*	parser);/* in: plugin fts parser */
+
 /******************************************************************//**
 Initialize a document. */
-UNIV_INTERN
 void
 fts_doc_init(
 /*=========*/
-	fts_doc_t*	doc)		/*!< in: doc to initialize */
-	MY_ATTRIBUTE((nonnull));
+	fts_doc_t*	doc);		/*!< in: doc to initialize */
 
 /******************************************************************//**
 Do a binary search for a doc id in the array
 @return +ve index if found -ve index where it should be
         inserted if not found */
-UNIV_INTERN
 int
 fts_bsearch(
 /*========*/
@@ -263,27 +274,24 @@ fts_bsearch(
 	int		lower,		/*!< in: lower bound of array*/
 	int		upper,		/*!< in: upper bound of array*/
 	doc_id_t	doc_id)		/*!< in: doc id to lookup */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************************//**
 Free document. */
-UNIV_INTERN
 void
 fts_doc_free(
 /*=========*/
-	fts_doc_t*	doc)		/*!< in: document */
-	MY_ATTRIBUTE((nonnull));
+	fts_doc_t*	doc);		/*!< in: document */
+
 /******************************************************************//**
 Free fts_optimizer_word_t instanace.*/
-UNIV_INTERN
 void
 fts_word_free(
 /*==========*/
-	fts_word_t*	word)		/*!< in: instance to free.*/
-	MY_ATTRIBUTE((nonnull));
+	fts_word_t*	word);		/*!< in: instance to free.*/
+
 /******************************************************************//**
 Read the rows from the FTS inde
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_index_fetch_nodes(
 /*==================*/
@@ -292,19 +300,18 @@ fts_index_fetch_nodes(
 	fts_table_t*	fts_table,	/*!< in: FTS aux table */
 	const fts_string_t*
 			word,		/*!< in: the word to fetch */
-	fts_fetch_t*	fetch)		/*!< in: fetch callback.*/
-	MY_ATTRIBUTE((nonnull));
+	fts_fetch_t*	fetch);		/*!< in: fetch callback.*/
+
 /******************************************************************//**
 Create a fts_optimizer_word_t instance.
 @return new instance */
-UNIV_INTERN
 fts_word_t*
 fts_word_init(
 /*==========*/
 	fts_word_t*	word,		/*!< in: word to initialize */
 	byte*		utf8,		/*!< in: UTF-8 string */
-	ulint		len)		/*!< in: length of string in bytes */
-	MY_ATTRIBUTE((nonnull));
+	ulint		len);		/*!< in: length of string in bytes */
+
 /******************************************************************//**
 Compare two fts_trx_table_t instances, we actually compare the
 table id's here.
@@ -314,8 +321,8 @@ int
 fts_trx_table_cmp(
 /*==============*/
 	const void*	v1,		/*!< in: id1 */
-	const void*	v2)		/*!< in: id2 */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	const void*	v2);		/*!< in: id2 */
+
 /******************************************************************//**
 Compare a table id with a trx_table_t table id.
 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
@@ -324,43 +331,40 @@ int
 fts_trx_table_id_cmp(
 /*=================*/
 	const void*	p1,		/*!< in: id1 */
-	const void*	p2)		/*!< in: id2 */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	const void*	p2);		/*!< in: id2 */
+
 /******************************************************************//**
 Commit a transaction.
 @return DB_SUCCESS if all OK */
-UNIV_INTERN
 dberr_t
 fts_sql_commit(
 /*===========*/
-	trx_t*		trx)		/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*		trx);		/*!< in: transaction */
+
 /******************************************************************//**
 Rollback a transaction.
 @return DB_SUCCESS if all OK */
-UNIV_INTERN
 dberr_t
 fts_sql_rollback(
 /*=============*/
-	trx_t*		trx)		/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*		trx);		/*!< in: transaction */
+
 /******************************************************************//**
 Parse an SQL string. %s is replaced with the table's id. Don't acquire
 the dict mutex
 @return query graph */
-UNIV_INTERN
 que_t*
 fts_parse_sql_no_dict_lock(
 /*=======================*/
 	fts_table_t*	fts_table,	/*!< in: table with FTS index */
 	pars_info_t*	info,		/*!< in: parser info */
 	const char*	sql)		/*!< in: SQL string to evaluate */
-	MY_ATTRIBUTE((nonnull(3), malloc, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Get value from config table. The caller must ensure that enough
 space is allocated for value to hold the column contents
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_get_value(
 /*=================*/
@@ -368,15 +372,13 @@ fts_config_get_value(
 	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
 	const char*	name,		/*!< in: get config value for
 					this parameter name */
-	fts_string_t*	value)		/*!< out: value read from
+	fts_string_t*	value);		/*!< out: value read from
 					config table */
-	MY_ATTRIBUTE((nonnull));
 /******************************************************************//**
 Get value specific to an FTS index from the config table. The caller
 must ensure that enough space is allocated for value to hold the
 column contents.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_get_index_value(
 /*=======================*/
@@ -386,11 +388,11 @@ fts_config_get_index_value(
 					this parameter name */
 	fts_string_t*	value)		/*!< out: value read from
 					config table */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Set the value in the config table for name.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_set_value(
 /*=================*/
@@ -399,12 +401,11 @@ fts_config_set_value(
 	const char*	name,		/*!< in: get config value for
 					this parameter name */
 	const fts_string_t*
-			value)		/*!< in: value to update */
-	MY_ATTRIBUTE((nonnull));
+			value);		/*!< in: value to update */
+
 /****************************************************************//**
 Set an ulint value in the config table.
 @return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
 dberr_t
 fts_config_set_ulint(
 /*=================*/
@@ -412,11 +413,11 @@ fts_config_set_ulint(
 	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
 	const char*	name,		/*!< in: param name */
 	ulint		int_value)	/*!< in: value */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Set the value specific to an FTS index in the config table.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_set_index_value(
 /*=======================*/
@@ -426,11 +427,11 @@ fts_config_set_index_value(
 					this parameter name */
 	fts_string_t*	value)		/*!< out: value read from
 					config table */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Increment the value in the config table for column name.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_increment_value(
 /*=======================*/
@@ -439,11 +440,11 @@ fts_config_increment_value(
 	const char*	name,		/*!< in: increment config value
 					for this parameter name */
 	ulint		delta)		/*!< in: increment by this much */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Increment the per index value in the config table for column name.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_increment_index_value(
 /*=============================*/
@@ -451,12 +452,11 @@ fts_config_increment_index_value(
 	dict_index_t*	index,		/*!< in: FTS index */
 	const char*	name,		/*!< in: increment config value
 					for this parameter name */
-	ulint		delta)		/*!< in: increment by this much */
-	MY_ATTRIBUTE((nonnull));
+	ulint		delta);		/*!< in: increment by this much */
+
 /******************************************************************//**
 Get an ulint value from the config table.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_get_index_ulint(
 /*=======================*/
@@ -464,11 +464,11 @@ fts_config_get_index_ulint(
 	dict_index_t*	index,		/*!< in: FTS index */
 	const char*	name,		/*!< in: param name */
 	ulint*		int_value)	/*!< out: value */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Set an ulint value int the config table.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_set_index_ulint(
 /*=======================*/
@@ -476,23 +476,22 @@ fts_config_set_index_ulint(
 	dict_index_t*	index,		/*!< in: FTS index */
 	const char*	name,		/*!< in: param name */
 	ulint		int_value)	/*!< in: value */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Get an ulint value from the config table.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_get_ulint(
 /*=================*/
 	trx_t*		trx,		/*!< in: transaction */
 	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
 	const char*	name,		/*!< in: param name */
-	ulint*		int_value)	/*!< out: value */
-	MY_ATTRIBUTE((nonnull));
+	ulint*		int_value);	/*!< out: value */
+
 /******************************************************************//**
 Search cache for word.
 @return the word node vector if found else NULL */
-UNIV_INTERN
 const ib_vector_t*
 fts_cache_find_word(
 /*================*/
@@ -500,21 +499,21 @@ fts_cache_find_word(
 			index_cache,	/*!< in: cache to search */
 	const fts_string_t*
 			text)		/*!< in: word to search for */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Check cache for deleted doc id.
 @return TRUE if deleted */
-UNIV_INTERN
 ibool
 fts_cache_is_deleted_doc_id(
 /*========================*/
 	const fts_cache_t*
 			cache,		/*!< in: cache ito search */
 	doc_id_t	doc_id)		/*!< in: doc id to search for */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Append deleted doc ids to vector and sort the vector. */
-UNIV_INTERN
 void
 fts_cache_append_deleted_doc_ids(
 /*=============================*/
@@ -526,7 +525,6 @@ Wait for the background thread to start. We poll to detect change
 of state, which is acceptable, since the wait should happen only
 once during startup.
 @return true if the thread started else FALSE (i.e timed out) */
-UNIV_INTERN
 ibool
 fts_wait_for_background_thread_to_start(
 /*====================================*/
@@ -539,19 +537,17 @@ fts_wait_for_background_thread_to_start(
 /******************************************************************//**
 Get the total number of words in the FTS for a particular FTS index.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_get_total_word_count(
 /*=====================*/
 	trx_t*		trx,		/*!< in: transaction */
 	dict_index_t*	index,		/*!< in: for this index */
 	ulint*		total)		/*!< out: total words */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif
 /******************************************************************//**
 Search the index specific cache for a particular FTS index.
 @return the index specific cache else NULL */
-UNIV_INTERN
 fts_index_cache_t*
 fts_find_index_cache(
 /*================*/
@@ -559,21 +555,22 @@ fts_find_index_cache(
 			cache,		/*!< in: cache to search */
 	const dict_index_t*
 			index)		/*!< in: index to search for */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Write the table id to the given buffer (including final NUL). Buffer must be
 at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
-@return	number of bytes written */
+@return number of bytes written */
 UNIV_INLINE
 int
 fts_write_object_id(
 /*================*/
 	ib_id_t		id,		/*!< in: a table/index id */
 	char*		str,		/*!< in: buffer to write the id to */
-	bool		hex_format MY_ATTRIBUTE((unused)))
+	bool		hex_format MY_ATTRIBUTE((unused)));
 					/*!< in: true for fixed hex format,
 					false for old ambiguous format */
-	MY_ATTRIBUTE((nonnull));
+
 /******************************************************************//**
 Read the table id from the string generated by fts_write_object_id().
 @return TRUE if parse successful */
@@ -583,11 +580,11 @@ fts_read_object_id(
 /*===============*/
 	ib_id_t*	id,		/*!< out: a table id */
 	const char*	str)		/*!< in: buffer to read from */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Get the table id.
 @return number of bytes written */
-UNIV_INTERN
 int
 fts_get_table_id(
 /*=============*/
@@ -596,55 +593,51 @@ fts_get_table_id(
 	char*		table_id)	/*!< out: table id, must be at least
 					FTS_AUX_MIN_TABLE_ID_LENGTH bytes
 					long */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Add the table to add to the OPTIMIZER's list. */
-UNIV_INTERN
 void
 fts_optimize_add_table(
 /*===================*/
-	dict_table_t*	table)		/*!< in: table to add */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	table);		/*!< in: table to add */
+
 /******************************************************************//**
 Optimize a table. */
-UNIV_INTERN
 void
 fts_optimize_do_table(
 /*==================*/
-	dict_table_t*	table)		/*!< in: table to optimize */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	table);		/*!< in: table to optimize */
+
 /******************************************************************//**
 Construct the prefix name of an FTS table.
-@return own: table name, must be freed with mem_free() */
-UNIV_INTERN
+@return own: table name, must be freed with ut_free() */
 char*
 fts_get_table_name_prefix(
 /*======================*/
 	const fts_table_t*
 			fts_table)	/*!< in: Auxiliary table type */
-	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Add node positions. */
-UNIV_INTERN
 void
 fts_cache_node_add_positions(
 /*=========================*/
 	fts_cache_t*	cache,		/*!< in: cache */
 	fts_node_t*	node,		/*!< in: word node */
 	doc_id_t	doc_id,		/*!< in: doc id */
-	ib_vector_t*	positions)	/*!< in: fts_token_t::positions */
-	MY_ATTRIBUTE((nonnull(2,4)));
+	ib_vector_t*	positions);	/*!< in: fts_token_t::positions */
 
 /******************************************************************//**
 Create the config table name for retrieving index specific value.
 @return index config parameter name */
-UNIV_INTERN
 char*
 fts_config_create_index_param_name(
 /*===============================*/
-	const char*		param,		/*!< in: base name of param */
-	const dict_index_t*	index)		/*!< in: index for config */
-	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
+	const char*		param,	/*!< in: base name of param */
+	const dict_index_t*	index)	/*!< in: index for config */
+	MY_ATTRIBUTE((warn_unused_result));
 
 #ifndef UNIV_NONINL
 #include "fts0priv.ic"
diff --git a/storage/innobase/include/fts0priv.ic b/storage/innobase/include/fts0priv.ic
index 88f2d67c7b8..fa2cdd44a36 100644
--- a/storage/innobase/include/fts0priv.ic
+++ b/storage/innobase/include/fts0priv.ic
@@ -26,7 +26,7 @@ Created 2011/11/12 Sunny Bains
 /******************************************************************//**
 Write the table id to the given buffer (including final NUL). Buffer must be
 at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
-@return	number of bytes written */
+@return number of bytes written */
 UNIV_INLINE
 int
 fts_write_object_id(
@@ -46,36 +46,31 @@ fts_write_object_id(
 	/* Use this to construct old(5.6.14 and 5.7.3) windows
 	ambiguous aux table names */
 	DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
-			return(sprintf(str, "%016llu", id)););
+			return(sprintf(str, "%016llu", (ulonglong) id)););
 
 #else /* _WIN32 */
 
 	/* Use this to construct old(5.6.14 and 5.7.3) windows
 	ambiguous aux table names */
 	DBUG_EXECUTE_IF("innodb_test_wrong_windows_fts_aux_table_name",
-			return(sprintf(str, "%016" PRIu64, id)););
+			return(sprintf(str, "%016llu", (ulonglong) id)););
 
 	DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
-			return(sprintf(str, UINT64PFx, id)););
+			return(sprintf(str, "%016llx", (ulonglong) id)););
 
 #endif /* _WIN32 */
 
 	/* As above, but this is only for those tables failing to rename. */
 	if (!hex_format) {
-#ifdef _WIN32
-		// FIXME: Use ut_snprintf(), so does following one.
-		return(sprintf(str, "%016llu", id));
-#else /* _WIN32 */
-		return(sprintf(str, "%016" PRIu64, id));
-#endif /* _WIN32 */
+		return(sprintf(str, "%016llu", (ulonglong) id));
 	}
 
-	return(sprintf(str, UINT64PFx, id));
+	return(sprintf(str, "%016llx", (ulonglong) id));
 }
 
 /******************************************************************//**
 Read the table id from the string generated by fts_write_object_id().
-@return	TRUE if parse successful */
+@return TRUE if parse successful */
 UNIV_INLINE
 ibool
 fts_read_object_id(
@@ -91,7 +86,7 @@ fts_read_object_id(
 
 /******************************************************************//**
 Compare two fts_trx_table_t instances.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2  */
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
 UNIV_INLINE
 int
 fts_trx_table_cmp(
@@ -99,8 +94,11 @@ fts_trx_table_cmp(
 	const void*	p1,			/*!< in: id1 */
 	const void*	p2)			/*!< in: id2 */
 {
-	const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table;
-	const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+	const dict_table_t*	table1
+		= (*static_cast<const fts_trx_table_t* const*>(p1))->table;
+
+	const dict_table_t*	table2
+		= (*static_cast<const fts_trx_table_t* const*>(p2))->table;
 
 	return((table1->id > table2->id)
 	       ? 1
@@ -119,8 +117,9 @@ fts_trx_table_id_cmp(
 	const void*	p1,			/*!< in: id1 */
 	const void*	p2)			/*!< in: id2 */
 {
-	const ullint* table_id = (const ullint*) p1;
-	const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+	const uintmax_t*	table_id = static_cast<const uintmax_t*>(p1);
+	const dict_table_t*	table2
+		= (*static_cast<const fts_trx_table_t* const*>(p2))->table;
 
 	return((*table_id > table2->id)
 	       ? 1
diff --git a/storage/innobase/include/fts0tlex.h b/storage/innobase/include/fts0tlex.h
index f91533803e8..49bea8b08d4 100644
--- a/storage/innobase/include/fts0tlex.h
+++ b/storage/innobase/include/fts0tlex.h
@@ -341,7 +341,7 @@ extern int fts0tlex (yyscan_t yyscanner);
 #undef YY_DECL
 #endif
 
-#line 68 "fts0tlex.l"
+#line 69 "fts0tlex.l"
 
 
 #line 348 "../include/fts0tlex.h"
diff --git a/storage/innobase/include/fts0tokenize.h b/storage/innobase/include/fts0tokenize.h
new file mode 100644
index 00000000000..15726aea1de
--- /dev/null
+++ b/storage/innobase/include/fts0tokenize.h
@@ -0,0 +1,188 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fts/fts0tokenize.cc
+Full Text Search plugin tokenizer refer to MyISAM
+
+Created 2014/11/17 Shaohua Wang
+***********************************************************************/
+
+#include "ft_global.h"
+#include "mysql/plugin_ftparser.h"
+#include "m_ctype.h"
+
+/* Macros and structs below are from ftdefs.h in MyISAM */
+/** Check a char is true word */
+#define true_word_char(c, ch) ((c) & (_MY_U | _MY_L | _MY_NMR) || (ch) == '_')
+
+/** Check if a char is misc word */
+#define misc_word_char(X)       0
+
+/** Boolean search syntax */
+static const char* fts_boolean_syntax = DEFAULT_FTB_SYNTAX;
+
+#define FTB_YES   (fts_boolean_syntax[0])
+#define FTB_EGAL  (fts_boolean_syntax[1])
+#define FTB_NO    (fts_boolean_syntax[2])
+#define FTB_INC   (fts_boolean_syntax[3])
+#define FTB_DEC   (fts_boolean_syntax[4])
+#define FTB_LBR   (fts_boolean_syntax[5])
+#define FTB_RBR   (fts_boolean_syntax[6])
+#define FTB_NEG   (fts_boolean_syntax[7])
+#define FTB_TRUNC (fts_boolean_syntax[8])
+#define FTB_LQUOT (fts_boolean_syntax[10])
+#define FTB_RQUOT (fts_boolean_syntax[11])
+
+/** FTS query token */
+typedef struct st_ft_word {
+        uchar* pos;     /*!< word start pointer */
+        uint   len;     /*!< word len */
+        double weight;  /*!< word weight, unused in innodb */
+} FT_WORD;
+
+/** Tokenizer for ngram referring to ft_get_word(ft_parser.c) in MyISAM.
+Differences: a. code format changed; b. stopword processing removed.
+@param[in]	cs	charset
+@param[in,out]	start	doc start pointer
+@param[in,out]	end	doc end pointer
+@param[in,out]	word	token
+@param[in,out]	info	token info
+@retval	0	eof
+@retval	1	word found
+@retval	2	left bracket
+@retval	3	right bracket
+@retval	4	stopword found */
+inline
+uchar
+fts_get_word(
+	const CHARSET_INFO*	cs,
+	uchar**			start,
+	uchar*			end,
+	FT_WORD*		word,
+	MYSQL_FTPARSER_BOOLEAN_INFO*
+				info)
+{
+	uchar*	doc = *start;
+	int	ctype;
+	uint	mwc;
+	uint	length;
+	int	mbl;
+
+	info->yesno = (FTB_YES ==' ') ? 1 : (info->quot != 0);
+	info->weight_adjust = info->wasign = 0;
+	info->type = FT_TOKEN_EOF;
+
+	while (doc < end) {
+		for (; doc < end;
+		     doc += (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1))) {
+			mbl = cs->cset->ctype(cs, &ctype, doc, end);
+
+			if (true_word_char(ctype, *doc)) {
+				break;
+			}
+
+			if (*doc == FTB_RQUOT && info->quot) {
+				*start = doc + 1;
+				info->type = FT_TOKEN_RIGHT_PAREN;
+
+				return(info->type);
+			}
+
+			if (!info->quot) {
+				if (*doc == FTB_LBR
+				    || *doc == FTB_RBR
+				    || *doc == FTB_LQUOT) {
+					/* param->prev=' '; */
+					*start = doc + 1;
+					if (*doc == FTB_LQUOT) {
+						info->quot = (char*)1;
+					}
+
+					info->type = (*doc == FTB_RBR ?
+						       FT_TOKEN_RIGHT_PAREN :
+						       FT_TOKEN_LEFT_PAREN);
+
+					return(info->type);
+				}
+
+				if (info->prev == ' ') {
+					if (*doc == FTB_YES) {
+						info->yesno = +1;
+						continue;
+					} else if (*doc == FTB_EGAL) {
+						info->yesno = 0;
+						continue;
+					} else if (*doc == FTB_NO) {
+						info->yesno = -1;
+						continue;
+					} else if (*doc == FTB_INC) {
+						info->weight_adjust++;
+						continue;
+					} else if (*doc == FTB_DEC) {
+						info->weight_adjust--;
+						continue;
+					} else if (*doc == FTB_NEG) {
+						info->wasign = !info->wasign;
+						continue;
+					}
+				}
+			}
+
+			info->prev = *doc;
+			info->yesno = (FTB_YES == ' ') ? 1 : (info->quot != 0);
+			info->weight_adjust = info->wasign = 0;
+		}
+
+		mwc = length = 0;
+		for (word->pos = doc;
+		     doc < end;
+		     length++, doc += (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1))) {
+			mbl = cs->cset->ctype(cs, &ctype, doc, end);
+
+			if (true_word_char(ctype, *doc)) {
+				mwc = 0;
+			} else if (!misc_word_char(*doc) || mwc) {
+				break;
+			} else {
+				mwc++;
+			}
+		}
+
+		/* Be sure *prev is true_word_char. */
+		info->prev = 'A';
+		word->len = (uint)(doc-word->pos) - mwc;
+
+		if ((info->trunc = (doc < end && *doc == FTB_TRUNC))) {
+			doc++;
+		}
+
+		/* We don't check stopword here. */
+		*start = doc;
+		info->type = FT_TOKEN_WORD;
+
+		return(info->type);
+	}
+
+	if (info->quot) {
+		*start = doc;
+		info->type = FT_TOKEN_RIGHT_PAREN;
+	}
+
+	return(info->type);
+}
diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h
index e495fe72a60..039006265f6 100644
--- a/storage/innobase/include/fts0types.h
+++ b/storage/innobase/include/fts0types.h
@@ -26,16 +26,17 @@ Created 2007-03-27 Sunny Bains
 #ifndef INNOBASE_FTS0TYPES_H
 #define INNOBASE_FTS0TYPES_H
 
+#include "univ.i"
+#include "fts0fts.h"
+#include "fut0fut.h"
+#include "pars0pars.h"
 #include "que0types.h"
 #include "ut0byte.h"
-#include "fut0fut.h"
 #include "ut0rbt.h"
-#include "fts0fts.h"
 
 /** Types used within FTS. */
 struct fts_que_t;
 struct fts_node_t;
-struct fts_utf8_str_t;
 
 /** Callbacks used within FTS. */
 typedef pars_user_func_cb_t fts_sql_callback;
@@ -270,6 +271,12 @@ struct fts_doc_t {
 					same lifespan, most notably
 					the vector of token positions */
 	CHARSET_INFO*	charset;	/*!< Document's charset info */
+
+	st_mysql_ftparser* parser;	/*!< fts plugin parser */
+
+	bool		is_ngram;	/*!< Whether it is a ngram parser */
+
+	ib_rbt_t*	stopwords;	/*!< Stopwords */
 };
 
 /** A token and its positions within a document. */
@@ -285,33 +292,6 @@ struct fts_token_t {
 extern const fts_index_selector_t fts_index_selector[];
 
 /******************************************************************//**
-Compare two UTF-8 strings. */
-UNIV_INLINE
-int
-fts_utf8_string_cmp(
-/*================*/
-						/*!< out:
-						< 0 if n1 < n2,
-						0 if n1 == n2,
-						> 0 if n1 > n2 */
-	const void*	p1,			/*!< in: key */
-	const void*	p2);			/*!< in: node */
-
-/******************************************************************//**
-Compare two UTF-8 strings, and return match (0) if
-passed in "key" value equals or is the prefix of the "node" value. */
-UNIV_INLINE
-int
-fts_utf8_string_cmp_prefix(
-/*=======================*/
-						/*!< out:
-						< 0 if n1 < n2,
-						0 if n1 == n2,
-						> 0 if n1 > n2 */
-	const void*	p1,			/*!< in: key */
-	const void*	p2);			/*!< in: node */
-
-/******************************************************************//**
 Compare two fts_trx_row_t instances doc_ids. */
 UNIV_INLINE
 int
@@ -361,11 +341,11 @@ fts_decode_vlc(
 			incremented by the number of bytes decoded */
 
 /******************************************************************//**
-Duplicate an UTF-8 string. */
+Duplicate a string. */
 UNIV_INLINE
 void
-fts_utf8_string_dup(
-/*================*/
+fts_string_dup(
+/*===========*/
 						/*!< out:
 						< 0 if n1 < n2,
 						0 if n1 == n2,
@@ -397,43 +377,6 @@ fts_encode_int(
 						enough space */
 
 /******************************************************************//**
-Decode a UTF-8 character.
-
-http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
-
- Scalar Value              1st Byte 2nd Byte 3rd Byte 4th Byte
-00000000 0xxxxxxx          0xxxxxxx
-00000yyy yyxxxxxx          110yyyyy 10xxxxxx
-zzzzyyyy yyxxxxxx          1110zzzz 10yyyyyy 10xxxxxx
-000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
-
-This function decodes UTF-8 sequences up to 6 bytes (31 bits).
-
-On error *ptr will point to the first byte that was not correctly
-decoded. This will hopefully help in resyncing the input. */
-UNIV_INLINE
-ulint
-fts_utf8_decode(
-/*============*/
-						/*!< out: UTF8_ERROR if *ptr
-						did not point to a valid
-						UTF-8 sequence, or the
-						Unicode code point. */
-	const byte**	ptr);			/*!< in/out: pointer to
-						UTF-8 string. The
-						pointer is advanced to
-						the start of the next
-						character. */
-
-/******************************************************************//**
-Lowercase an UTF-8 string. */
-UNIV_INLINE
-void
-fts_utf8_tolower(
-/*=============*/
-	fts_string_t*	str);			/*!< in: string */
-
-/******************************************************************//**
 Get the selected FTS aux INDEX suffix. */
 UNIV_INLINE
 const char*
@@ -441,34 +384,17 @@ fts_get_suffix(
 /*===========*/
 	ulint		selected);		/*!< in: selected index */
 
-/********************************************************************
-Get the number of index selectors. */
-UNIV_INLINE
-ulint
-fts_get_n_selectors(void);
-/*=====================*/
-
-/******************************************************************//**
-Select the FTS auxiliary index for the given string.
+/** Select the FTS auxiliary index for the given character.
+@param[in]	cs	charset
+@param[in]	str	string
+@param[in]	len	string length in bytes
 @return the index to use for the string */
 UNIV_INLINE
 ulint
 fts_select_index(
-/*=============*/
-	const CHARSET_INFO*	cs,		/*!< Charset */
-	const byte*		str,		/*!< in: word string */
-	ulint			len);		/*!< in: string length */
-
-/********************************************************************
-Select the next FTS auxiliary index for the given character.
-@return the next index to use for character */
-UNIV_INLINE
-ulint
-fts_select_next_index(
-/*==================*/
-	const CHARSET_INFO*	cs,		/*!< Charset */
-	const byte*		str,		/*!< in: string */
-	ulint			len);		/*!< in: string length */
+	const CHARSET_INFO*	cs,
+	const byte*		str,
+	ulint			len);
 
 #ifndef UNIV_NONINL
 #include "fts0types.ic"
diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic
index f0dfd023a70..417a1010919 100644
--- a/storage/innobase/include/fts0types.ic
+++ b/storage/innobase/include/fts0types.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,23 +26,16 @@ Created 2007-03-27 Sunny Bains
 #ifndef INNOBASE_FTS0TYPES_IC
 #define INNOBASE_FTS0TYPES_IC
 
-#include <ctype.h>
-
 #include "rem0cmp.h"
 #include "ha_prototypes.h"
 
-extern const ulint UTF8_ERROR;
-
-/* Determine if a UTF-8 continuation byte is valid. */
-#define fts_utf8_is_valid(b) (((b) & 0xC0) == 0x80)
-
 /******************************************************************//**
-Duplicate an UTF-8 string.
+Duplicate a string.
 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
 UNIV_INLINE
 void
-fts_utf8_string_dup(
-/*================*/
+fts_string_dup(
+/*===========*/
 	fts_string_t*		dst,		/*!< in: dup to here */
 	const fts_string_t*	src,		/*!< in: src string */
 	mem_heap_t*		heap)		/*!< in: heap to use */
@@ -103,183 +96,6 @@ fts_update_doc_id_cmp(
 	return((int)(up1->doc_id - up2->doc_id));
 }
 
-
-/******************************************************************//**
-Lowercase an UTF-8 string. */
-UNIV_INLINE
-void
-fts_utf8_tolower(
-/*=============*/
-	fts_string_t*	str)			/*!< in: string */
-{
-	innobase_casedn_str((char*) str->f_str);
-}
-
-/******************************************************************//**
-Compare two UTF-8 strings.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_utf8_string_cmp(
-/*================*/
-	const void*	p1,			/*!< in: key */
-	const void*	p2)			/*!< in: node */
-{
-	const fts_string_t* s1 = (const fts_string_t*) p1;
-	const fts_string_t* s2 = (const fts_string_t*) p2;
-
-	return(cmp_data_data_slow_varchar(
-		s1->f_str, s1->f_len, s2->f_str, s2->f_len));
-}
-
-/******************************************************************//**
-Compare two UTF-8 strings, and return match (0) if
-passed in "key" value equals or is the prefix of the "node" value.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_utf8_string_cmp_prefix(
-/*=======================*/
-	const void*	p1,			/*!< in: key */
-	const void*	p2)			/*!< in: node */
-{
-	int	result;
-	ulint	len;
-
-	const fts_string_t* s1 = (const fts_string_t*) p1;
-	const fts_string_t* s2 = (const fts_string_t*) p2;
-
-	len = ut_min(s1->f_len, s2->f_len);
-
-	result = cmp_data_data_slow_varchar(s1->f_str, len, s2->f_str, len);
-
-	if (result) {
-		return(result);
-	}
-
-	if (s1->f_len > s2->f_len) {
-		return(1);
-	}
-
-	return(0);
-}
-
-/******************************************************************//**
-Decode a UTF-8 character.
-
-http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
-
- Scalar Value              1st Byte 2nd Byte 3rd Byte 4th Byte
-00000000 0xxxxxxx          0xxxxxxx
-00000yyy yyxxxxxx          110yyyyy 10xxxxxx
-zzzzyyyy yyxxxxxx          1110zzzz 10yyyyyy 10xxxxxx
-000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
-
-This function decodes UTF-8 sequences up to 6 bytes (31 bits).
-
-On error *ptr will point to the first byte that was not correctly
-decoded. This will hopefully help in resyncing the input.
-@return UTF8_ERROR if *ptr did not point to a valid
-UTF-8 sequence, or the Unicode code point. */
-UNIV_INLINE
-ulint
-fts_utf8_decode(
-/*============*/
-	const byte**	ptr)			/*!< in/out: pointer to
-						UTF-8 string. The
-						pointer is advanced to
-						the start of the next
-						character. */
-{
-	const byte*	p = *ptr;
-	ulint		ch = *p++;
-#ifdef UNIV_DEBUG
-	ulint		min_ch;
-#endif /* UNIV_DEBUG */
-
-	if (UNIV_LIKELY(ch < 0x80)) {
-		/* 0xxxxxxx */
-	} else if (UNIV_UNLIKELY(ch < 0xC0)) {
-		/* A continuation byte cannot start a code. */
-		goto err_exit;
-	} else if (ch < 0xE0) {
-		/* 110yyyyy 10xxxxxx */
-		ch &= 0x1F;
-		ut_d(min_ch = 0x80);
-		goto get1;
-	} else if (ch < 0xF0) {
-		/* 1110zzzz 10yyyyyy 10xxxxxx */
-		ch &= 0x0F;
-		ut_d(min_ch = 0x800);
-		goto get2;
-	} else if (ch < 0xF8) {
-		/* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */
-		ch &= 0x07;
-		ut_d(min_ch = 0x10000);
-		goto get3;
-	} else if (ch < 0xFC) {
-		/* 111110tt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
-		ch &= 0x03;
-		ut_d(min_ch = 0x200000);
-		goto get4;
-	} else if (ch < 0xFE) {
-		/* 1111110s 10tttttt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
-		ut_d(min_ch = 0x4000000);
-		if (!fts_utf8_is_valid(*p)) {
-			goto err_exit;
-		}
-		ch <<= 6;
-		ch |= (*p++) & 0x3F;
-get4:
-		if (!fts_utf8_is_valid(*p)) {
-			goto err_exit;
-		}
-		ch <<= 6;
-		ch |= (*p++) & 0x3F;
-get3:
-		if (!fts_utf8_is_valid(*p)) {
-			goto err_exit;
-		}
-		ch <<= 6;
-		ch |= (*p++) & 0x3F;
-get2:
-		if (!fts_utf8_is_valid(*p)) {
-			goto err_exit;
-		}
-		ch <<= 6;
-		ch |= (*p++) & 0x3F;
-get1:
-		if (!fts_utf8_is_valid(*p)) {
-			goto err_exit;
-		}
-		ch <<= 6;
-		ch |= (*p++) & 0x3F;
-
-		/* The following is needed in the 6-byte case
-		when ulint is wider than 32 bits. */
-		ch &= 0xFFFFFFFF;
-
-		/* The code positions U+D800 to U+DFFF (UTF-16 surrogate pairs)
-		and U+FFFE and U+FFFF cannot occur in valid UTF-8. */
-
-		if ( (ch >= 0xD800 && ch <= 0xDFFF)
-#ifdef UNIV_DEBUG
-		     || ch < min_ch
-#endif /* UNIV_DEBUG */
-		     || ch == 0xFFFE || ch == 0xFFFF) {
-
-			ch = UTF8_ERROR;
-		}
-	} else {
-err_exit:
-		ch = UTF8_ERROR;
-	}
-
-	*ptr = p;
-
-	return(ch);
-}
-
 /******************************************************************//**
 Get the first character's code position for FTS index partition */
 extern
@@ -290,16 +106,41 @@ innobase_strnxfrm(
         const uchar*		p2,	/*!< in: string */
         const ulint		len2);	/*!< in: string length */
 
-/******************************************************************//**
-Select the FTS auxiliary index for the given character.
-@return the index to use for the string */
+/** Check if fts index charset is cjk
+@param[in]	cs	charset
+@retval	true	if the charset is cjk
+@retval	false	if not. */
+UNIV_INLINE
+bool
+fts_is_charset_cjk(
+	const CHARSET_INFO*	cs)
+{
+	if (strcmp(cs->name, "gb2312_chinese_ci") == 0
+	    || strcmp(cs->name, "gbk_chinese_ci") == 0
+	    || strcmp(cs->name, "big5_chinese_ci") == 0
+	    || strcmp(cs->name, "gb18030_chinese_ci") == 0
+	    || strcmp(cs->name, "ujis_japanese_ci") == 0
+	    || strcmp(cs->name, "sjis_japanese_ci") == 0
+	    || strcmp(cs->name, "cp932_japanese_ci") == 0
+	    || strcmp(cs->name, "eucjpms_japanese_ci") == 0
+	    || strcmp(cs->name, "euckr_korean_ci") == 0) {
+		return(true);
+	} else {
+		return(false);
+	}
+}
+
+/** Select the FTS auxiliary index for the given character by range.
+@param[in]	cs	charset
+@param[in]	str	string
+@param[in]	len	string length
+@retval	the index to use for the string */
 UNIV_INLINE
 ulint
-fts_select_index(
-/*=============*/
-	const CHARSET_INFO*	cs,	/*!< in: Charset */
-	const byte*		str,	/*!< in: string */
-	ulint			len)	/*!< in: string length */
+fts_select_index_by_range(
+	const CHARSET_INFO*	cs,
+	const byte*		str,
+	ulint			len)
 {
 	ulint			selected = 0;
 	ulint			value = innobase_strnxfrm(cs, str, len);
@@ -323,37 +164,64 @@ fts_select_index(
 	return(selected - 1);
 }
 
-/******************************************************************//**
-Select the next FTS auxiliary index for the given character.
-@return the next index to use for character */
+/** Select the FTS auxiliary index for the given character by hash.
+@param[in]	cs	charset
+@param[in]	str	string
+@param[in]	len	string length
+@retval the index to use for the string */
 UNIV_INLINE
 ulint
-fts_select_next_index(
-/*==================*/
-	const CHARSET_INFO*	cs,	/*!< in: Charset */
-	const byte*		str,	/*!< in: string */
-	ulint			len)	/*!< in: string length */
+fts_select_index_by_hash(
+	const CHARSET_INFO*	cs,
+	const byte*		str,
+	ulint			len)
 {
-	ulint		selected = 0;
-	ulint		value = innobase_strnxfrm(cs, str, len);
+	int	char_len;
+	ulong	nr1 = 1;
+	ulong	nr2 = 4;
 
-	while (fts_index_selector[selected].value != 0) {
+	ut_ad(!(str == NULL && len > 0));
 
-		if (fts_index_selector[selected].value == value) {
+	if (str == NULL || len == 0) {
+		return 0;
+	}
 
-			return(selected + 1);
+	/* Get the first char */
+	/* JAN: TODO: MySQL 5.7 had
+	char_len = my_mbcharlen_ptr(cs, reinterpret_cast<const char*>(str),
+				    reinterpret_cast<const char*>(str + len));
+	*/
+	char_len = cs->cset->charlen(cs, str, str+len);
 
-		} else if (fts_index_selector[selected].value > value) {
+	ut_ad(static_cast<ulint>(char_len) <= len);
 
-			return(selected);
-		}
+	/* Get collation hash code */
+	cs->coll->hash_sort(cs, str, char_len, &nr1, &nr2);
 
-		++selected;
-	}
+	return(nr1 % FTS_NUM_AUX_INDEX);
+}
 
-	ut_ad(selected > 0);
+/** Select the FTS auxiliary index for the given character.
+@param[in]	cs	charset
+@param[in]	str	string
+@param[in]	len	string length in bytes
+@retval	the index to use for the string */
+UNIV_INLINE
+ulint
+fts_select_index(
+	const CHARSET_INFO*	cs,
+	const byte*		str,
+	ulint			len)
+{
+	ulint	selected;
 
-	return((ulint) selected);
+	if (fts_is_charset_cjk(cs)) {
+		selected = fts_select_index_by_hash(cs, str, len);
+	} else {
+		selected = fts_select_index_by_range(cs, str, len);
+	}
+
+	return(selected);
 }
 
 /******************************************************************//**
@@ -367,22 +235,4 @@ fts_get_suffix(
 	return(fts_index_selector[selected].suffix);
 }
 
-/******************************************************************//**
-Get the number of index selectors.
-@return The number of selectors */
-UNIV_INLINE
-ulint
-fts_get_n_selectors(void)
-/*=====================*/
-{
-	ulint	i = 0;
-
-	// FIXME: This is a hack
-	while (fts_index_selector[i].value != 0) {
-		++i;
-	}
-
-	return(i);
-}
-
 #endif /* INNOBASE_FTS0TYPES_IC */
diff --git a/storage/innobase/include/fut0fut.h b/storage/innobase/include/fut0fut.h
index 851cdb44cdf..0b8b8b0e43b 100644
--- a/storage/innobase/include/fut0fut.h
+++ b/storage/innobase/include/fut0fut.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -32,24 +32,28 @@ Created 12/13/1995 Heikki Tuuri
 #include "fil0fil.h"
 #include "mtr0mtr.h"
 
-/********************************************************************//**
-Gets a pointer to a file address and latches the page.
-@return pointer to a byte in a frame; the file page in the frame is
+/** Gets a pointer to a file address and latches the page.
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in]	addr		file address
+@param[in]	rw_latch	RW_S_LATCH, RW_X_LATCH, RW_SX_LATCH
+@param[out]	ptr_block	file page
+@param[in,out]	mtr		mini-transaction
+@return pointer to a byte in (*ptr_block)->frame; the *ptr_block is
 bufferfixed and latched */
 UNIV_INLINE
 byte*
 fut_get_ptr(
-/*========*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	fil_addr_t	addr,	/*!< in: file address */
-	ulint		rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
-	mtr_t*		mtr);	/*!< in: mtr handle */
+	ulint			space,
+	const page_size_t&	page_size,
+	fil_addr_t		addr,
+	rw_lock_type_t		rw_latch,
+	mtr_t*			mtr,
+	buf_block_t**		ptr_block = NULL)
+	MY_ATTRIBUTE((warn_unused_result));
 
 #ifndef UNIV_NONINL
 #include "fut0fut.ic"
 #endif
 
-#endif
-
+#endif /* fut0fut_h */
diff --git a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic
index b065b10b9ca..6fe031876e6 100644
--- a/storage/innobase/include/fut0fut.ic
+++ b/storage/innobase/include/fut0fut.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,31 +26,43 @@ Created 12/13/1995 Heikki Tuuri
 #include "sync0rw.h"
 #include "buf0buf.h"
 
-/********************************************************************//**
-Gets a pointer to a file address and latches the page.
-@return pointer to a byte in a frame; the file page in the frame is
+/** Gets a pointer to a file address and latches the page.
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in]	addr		file address
+@param[in]	rw_latch	RW_S_LATCH, RW_X_LATCH, RW_SX_LATCH
+@param[in,out]	mtr		mini-transaction
+@param[out]	ptr_block	file page
+@return pointer to a byte in (*ptr_block)->frame; the *ptr_block is
 bufferfixed and latched */
 UNIV_INLINE
 byte*
 fut_get_ptr(
-/*========*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	fil_addr_t	addr,	/*!< in: file address */
-	ulint		rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
-	mtr_t*		mtr)	/*!< in: mtr handle */
+	ulint			space,
+	const page_size_t&	page_size,
+	fil_addr_t		addr,
+	rw_lock_type_t		rw_latch,
+	mtr_t*			mtr,
+	buf_block_t**		ptr_block)
 {
 	buf_block_t*	block;
-	byte*		ptr;
+	byte*		ptr = NULL;
 
 	ut_ad(addr.boffset < UNIV_PAGE_SIZE);
-	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+	ut_ad((rw_latch == RW_S_LATCH)
+	      || (rw_latch == RW_X_LATCH)
+	      || (rw_latch == RW_SX_LATCH));
+
+	block = buf_page_get(page_id_t(space, addr.page), page_size,
+			     rw_latch, mtr);
 
-	block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
 	ptr = buf_block_get_frame(block) + addr.boffset;
 
 	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
+	if (ptr_block != NULL) {
+		*ptr_block = block;
+	}
+
 	return(ptr);
 }
diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h
index 90f9a65d4fa..9c980d1358d 100644
--- a/storage/innobase/include/fut0lst.h
+++ b/storage/innobase/include/fut0lst.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,8 @@ Created 11/28/1995 Heikki Tuuri
 #ifndef fut0lst_h
 #define fut0lst_h
 
+#ifndef UNIV_INNOCHECKSUM
+
 #include "univ.i"
 
 #include "fil0fil.h"
@@ -41,11 +43,12 @@ typedef	byte	flst_node_t;
 
 /* The physical size of a list base node in bytes */
 #define	FLST_BASE_NODE_SIZE	(4 + 2 * FIL_ADDR_SIZE)
+#endif /* !UNIV_INNOCHECKSUM */
 
 /* The physical size of a list node in bytes */
 #define	FLST_NODE_SIZE		(2 * FIL_ADDR_SIZE)
 
-#ifndef UNIV_HOTBACKUP
+#if !defined UNIV_HOTBACKUP && !defined UNIV_INNOCHECKSUM
 /********************************************************************//**
 Initializes a list base node. */
 UNIV_INLINE
@@ -56,7 +59,6 @@ flst_init(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Adds a node as the last node in a list. */
-UNIV_INTERN
 void
 flst_add_last(
 /*==========*/
@@ -65,7 +67,6 @@ flst_add_last(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Adds a node as the first node in a list. */
-UNIV_INTERN
 void
 flst_add_first(
 /*===========*/
@@ -74,7 +75,6 @@ flst_add_first(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Inserts a node after another in a list. */
-UNIV_INTERN
 void
 flst_insert_after(
 /*==============*/
@@ -84,7 +84,6 @@ flst_insert_after(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Inserts a node before another in a list. */
-UNIV_INTERN
 void
 flst_insert_before(
 /*===============*/
@@ -94,7 +93,6 @@ flst_insert_before(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Removes a node. */
-UNIV_INTERN
 void
 flst_remove(
 /*========*/
@@ -105,7 +103,6 @@ flst_remove(
 Cuts off the tail of the list, including the node given. The number of
 nodes which will be removed must be provided by the caller, as this function
 does not measure the length of the tail. */
-UNIV_INTERN
 void
 flst_cut_end(
 /*=========*/
@@ -118,7 +115,6 @@ flst_cut_end(
 Cuts off the tail of the list, not including the given node. The number of
 nodes which will be removed must be provided by the caller, as this function
 does not measure the length of the tail. */
-UNIV_INTERN
 void
 flst_truncate_end(
 /*==============*/
@@ -126,18 +122,16 @@ flst_truncate_end(
 	flst_node_t*		node2,	/*!< in: first node not to remove */
 	ulint			n_nodes,/*!< in: number of nodes to remove */
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list length.
-@return	length */
+/** Get the length of a list.
+@param[in]	base	base node
+@return length */
 UNIV_INLINE
 ulint
 flst_get_len(
-/*=========*/
-	const flst_base_node_t*	base,	/*!< in: pointer to base node */
-	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+	const flst_base_node_t*	base);
 /********************************************************************//**
 Gets list first node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_first(
@@ -146,7 +140,7 @@ flst_get_first(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Gets list last node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_last(
@@ -155,7 +149,7 @@ flst_get_last(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Gets list next node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_next_addr(
@@ -164,7 +158,7 @@ flst_get_next_addr(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Gets list prev node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_prev_addr(
@@ -182,7 +176,7 @@ flst_write_addr(
 	mtr_t*		mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Reads a file address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_read_addr(
@@ -191,8 +185,7 @@ flst_read_addr(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Validates a file-based list.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 flst_validate(
 /*==========*/
@@ -200,7 +193,6 @@ flst_validate(
 	mtr_t*			mtr1);	/*!< in: mtr */
 /********************************************************************//**
 Prints info of a file-based list. */
-UNIV_INTERN
 void
 flst_print(
 /*=======*/
@@ -212,6 +204,6 @@ flst_print(
 #include "fut0lst.ic"
 #endif
 
-#endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_HOTBACKUP && !UNIV_INNOCHECKSUM*/
 
 #endif
diff --git a/storage/innobase/include/fut0lst.ic b/storage/innobase/include/fut0lst.ic
index d18cf21378f..128dc77ed92 100644
--- a/storage/innobase/include/fut0lst.ic
+++ b/storage/innobase/include/fut0lst.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -54,7 +54,9 @@ flst_write_addr(
 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
 {
 	ut_ad(faddr && mtr);
-	ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, faddr,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
 	ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
 	ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
 
@@ -65,7 +67,7 @@ flst_write_addr(
 
 /********************************************************************//**
 Reads a file address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_read_addr(
@@ -94,29 +96,29 @@ flst_init(
 	flst_base_node_t*	base,	/*!< in: pointer to base node */
 	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
-	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
 
 	mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
 	flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
 	flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
 }
 
-/********************************************************************//**
-Gets list length.
-@return	length */
+/** Get the length of a list.
+@param[in]	base	base node
+@return length */
 UNIV_INLINE
 ulint
 flst_get_len(
-/*=========*/
-	const flst_base_node_t*	base,	/*!< in: pointer to base node */
-	mtr_t*			mtr)	/*!< in: mini-transaction handle */
+	const flst_base_node_t*	base)
 {
-	return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
+	return(mach_read_from_4(base + FLST_LEN));
 }
 
 /********************************************************************//**
 Gets list first node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_first(
@@ -129,7 +131,7 @@ flst_get_first(
 
 /********************************************************************//**
 Gets list last node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_last(
@@ -142,7 +144,7 @@ flst_get_last(
 
 /********************************************************************//**
 Gets list next node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_next_addr(
@@ -155,7 +157,7 @@ flst_get_next_addr(
 
 /********************************************************************//**
 Gets list prev node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_prev_addr(
diff --git a/storage/innobase/include/gis0geo.h b/storage/innobase/include/gis0geo.h
new file mode 100644
index 00000000000..08895af545e
--- /dev/null
+++ b/storage/innobase/include/gis0geo.h
@@ -0,0 +1,162 @@
+/*****************************************************************************
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software Foundation,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+*****************************************************************************/
+
+/**************************************************//**
+@file gis0geo.h
+The r-tree define from MyISAM
+*******************************************************/
+
+#ifndef _gis0geo_h
+#define _gis0geo_h
+
+#include "my_global.h"
+#include "string.h"
+
+#define SPTYPE HA_KEYTYPE_DOUBLE
+#define SPLEN  8
+
+/* Since the mbr could be a point or a linestring, in this case, area of
+mbr is 0. So, we define this macro for calculating the area increasing
+when we need to enlarge the mbr. */
+#define LINE_MBR_WEIGHTS	0.001
+
+/* Types of "well-known binary representation" (wkb) format. */
+enum wkbType
+{
+  wkbPoint = 1,
+  wkbLineString = 2,
+  wkbPolygon = 3,
+  wkbMultiPoint = 4,
+  wkbMultiLineString = 5,
+  wkbMultiPolygon = 6,
+  wkbGeometryCollection = 7
+};
+
+/* Byte order of "well-known binary representation" (wkb) format. */
+enum wkbByteOrder
+{
+  wkbXDR = 0,    /* Big Endian    */
+  wkbNDR = 1     /* Little Endian */
+};
+
+/** Get the wkb of default POINT value, which represents POINT(0 0)
+if it's of dimension 2, etc.
+@param[in]	n_dims		dimensions
+@param[out]	wkb		wkb buffer for default POINT
+@param[in]	len		length of wkb buffer
+@return non-0 indicate the length of wkb of the default POINT,
+0 if the buffer is too small */
+uint
+get_wkb_of_default_point(
+	uint	n_dims,
+	uchar*	wkb,
+	uint	len);
+
+/*************************************************************//**
+Calculate minimal bounding rectangle (mbr) of the spatial object
+stored in "well-known binary representation" (wkb) format.
+@return 0 if ok */
+int
+rtree_mbr_from_wkb(
+/*===============*/
+	uchar*	wkb,		/*!< in: pointer to wkb. */
+	uint	size,		/*!< in: size of wkb. */
+	uint	n_dims,		/*!< in: dimensions. */
+	double*	mbr);		/*!< in/out: mbr. */
+
+/* Rtree split node structure. */
+struct rtr_split_node_t
+{
+	double	square;		/* square of the mbr.*/
+	int	n_node;		/* which group in.*/
+	uchar*	key;		/* key. */
+	double* coords;		/* mbr. */
+};
+
+/*************************************************************//**
+Inline function for reserving coords */
+inline
+static
+double*
+reserve_coords(double	**d_buffer,	/*!< in/out: buffer. */
+	       int	n_dim)		/*!< in: dimensions. */
+/*===========*/
+{
+  double *coords = *d_buffer;
+  (*d_buffer) += n_dim * 2;
+  return coords;
+}
+
+/*************************************************************//**
+Split rtree nodes.
+Return which group the first rec is in.  */
+int
+split_rtree_node(
+/*=============*/
+	rtr_split_node_t*	node,		/*!< in: split nodes.*/
+	int			n_entries,	/*!< in: entries number.*/
+	int			all_size,	/*!< in: total key's size.*/
+	int			key_size,	/*!< in: key's size.*/
+	int			min_size,	/*!< in: minimal group size.*/
+	int			size1,		/*!< in: size of group.*/
+	int			size2,		/*!< in: initial group sizes */
+	double**		d_buffer,	/*!< in/out: buffer.*/
+	int			n_dim,		/*!< in: dimensions. */
+	uchar*			first_rec);	/*!< in: the first rec. */
+
+/*************************************************************//**
+Compares two keys a and b depending on nextflag
+nextflag can contain these flags:
+   MBR_INTERSECT(a,b)  a overlaps b
+   MBR_CONTAIN(a,b)    a contains b
+   MBR_DISJOINT(a,b)   a disjoint b
+   MBR_WITHIN(a,b)     a within   b
+   MBR_EQUAL(a,b)      All coordinates of MBRs are equal
+   MBR_DATA(a,b)       Data reference is the same
+Returns 0 on success.  */
+int
+rtree_key_cmp(
+/*==========*/
+	page_cur_mode_t	mode,	/*!< in: compare method. */
+	const uchar*	b,	/*!< in: first key. */
+	int		b_len,	/*!< in: first key len. */
+	const uchar*	a,	/*!< in: second key. */
+	int		a_len);	/*!< in: second key len. */
+
+/*************************************************************//**
+Calculates MBR_AREA(a+b) - MBR_AREA(a)
+Note: when 'a' and 'b' objects are far from each other,
+the area increase can be really big, so this function
+can return 'inf' as a result.  */
+double
+rtree_area_increase(
+	const uchar*	a,		/*!< in: first mbr. */
+	const uchar*	b,		/*!< in: second mbr. */
+	int		a_len,		/*!< in: mbr length. */
+	double*		ab_area);	/*!< out: increased area. */
+
+/** Calculates overlapping area
+@param[in]	a	mbr a
+@param[in]	b	mbr b
+@param[in]	mbr_len	mbr length
+@return overlapping area */
+double
+rtree_area_overlapping(
+	const uchar*	a,
+	const uchar*	b,
+	int		mbr_len);
+#endif
diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h
new file mode 100644
index 00000000000..436374fd6b2
--- /dev/null
+++ b/storage/innobase/include/gis0rtree.h
@@ -0,0 +1,589 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include gis0rtree.h
+R-tree header file
+
+Created 2013/03/27 Jimmy Yang and Allen Lai
+***********************************************************************/
+
+#ifndef gis0rtree_h
+#define gis0rtree_h
+
+#include "univ.i"
+
+#include "data0type.h"
+#include "data0types.h"
+#include "dict0types.h"
+#include "hash0hash.h"
+#include "mem0mem.h"
+#include "page0page.h"
+#include "rem0types.h"
+#include "row0types.h"
+#include "trx0types.h"
+#include "ut0vec.h"
+#include "ut0wqueue.h"
+#include "que0types.h"
+#include "gis0geo.h"
+#include "gis0type.h"
+#include "btr0types.h"
+#include "btr0cur.h"
+
+/* Whether MBR 'a' contains 'b' */
+#define	MBR_CONTAIN_CMP(a, b)					\
+	((((b)->xmin >= (a)->xmin) && ((b)->xmax <= (a)->xmax)	\
+	 && ((b)->ymin >= (a)->ymin) && ((b)->ymax <= (a)->ymax)))
+
+/* Whether MBR 'a' equals to 'b' */
+#define	MBR_EQUAL_CMP(a, b)					\
+	((((b)->xmin == (a)->xmin) && ((b)->xmax == (a)->xmax))	\
+	 && (((b)->ymin == (a)->ymin) && ((b)->ymax == (a)->ymax)))
+
+/* Whether MBR 'a' intersects 'b' */
+#define	MBR_INTERSECT_CMP(a, b)					\
+	((((b)->xmin <= (a)->xmax) || ((b)->xmax >= (a)->xmin))	\
+	 && (((b)->ymin <= (a)->ymax) || ((b)->ymax >= (a)->ymin)))
+
+/* Whether MBR 'a' and 'b' disjoint */
+#define	MBR_DISJOINT_CMP(a, b)	(!MBR_INTERSECT_CMP(a, b))
+
+/* Whether MBR 'a' within 'b' */
+#define	MBR_WITHIN_CMP(a, b)					\
+	((((b)->xmin <= (a)->xmin) && ((b)->xmax >= (a)->xmax))	\
+	 && (((b)->ymin <= (a)->ymin) && ((b)->ymax >= (a)->ymax)))
+
+/* Define it for rtree search mode checking. */
+#define RTREE_SEARCH_MODE(mode)					\
+	(((mode) >= PAGE_CUR_CONTAIN) && ((mode <= PAGE_CUR_RTREE_GET_FATHER)))
+
+/* Geometry data header */
+#define	GEO_DATA_HEADER_SIZE	4
+/**********************************************************************//**
+Builds a Rtree node pointer out of a physical record and a page number.
+@return own: node pointer */
+dtuple_t*
+rtr_index_build_node_ptr(
+/*=====================*/
+	const dict_index_t*	index,	/*!< in: index */
+	const rtr_mbr_t*	mbr,	/*!< in: mbr of lower page */
+	const rec_t*		rec,	/*!< in: record for which to build node
+					pointer */
+	ulint			page_no,/*!< in: page number to put in node
+					pointer */
+	mem_heap_t*		heap,	/*!< in: memory heap where pointer
+					created */
+	ulint			level);	/*!< in: level of rec in tree:
+					0 means leaf level */
+
+/*************************************************************//**
+Splits an R-tree index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
+released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore enough
+free disk space (2 pages) must be guaranteed to be available before
+this function is called.
+@return inserted record */
+rec_t*
+rtr_page_split_and_insert(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in/out: cursor at which to insert; when the
+				function returns, the cursor is positioned
+				on the predecessor of the inserted record */
+	ulint**		offsets,/*!< out: offsets on inserted record */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+/**************************************************************//**
+Sets the child node mbr in a node pointer. */
+UNIV_INLINE
+void
+rtr_page_cal_mbr(
+/*=============*/
+	const dict_index_t*	index,	/*!< in: index */
+	const buf_block_t*	block,	/*!< in: buffer block */
+	rtr_mbr_t*		mbr,	/*!< out: MBR encapsulates the page */
+	mem_heap_t*		heap);	/*!< in: heap for the memory
+					allocation */
+/*************************************************************//**
+Find the next matching record. This function will first exhaust
+the copied record listed in the rtr_info->matches vector before
+moving to next page
+@return true if there is next qualified record found, otherwise(if
+exhausted) false */
+bool
+rtr_pcur_move_to_next(
+/*==================*/
+	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
+				tuple must be set so that it cannot get
+				compared to the node ptr page number field! */
+	page_cur_mode_t	mode,	/*!< in: cursor search mode */
+	btr_pcur_t*	cursor, /*!< in: persistent cursor; NOTE that the
+				function may release the page latch */
+	ulint		cur_level,
+				/*!< in: current level */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+/**************************************************************//**
+Restores the stored position of a persistent cursor bufferfixing the page */
+bool
+rtr_cur_restore_position_func(
+/*==========================*/
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor,		/*!< in: detached persistent cursor */
+	ulint		level,		/*!< in: index level */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
+	mtr_t*		mtr);		/*!< in: mtr */
+#define rtr_cur_restore_position(l,cur,level,mtr)		\
+	rtr_cur_restore_position_func(l,cur,level,__FILE__,__LINE__,mtr)
+
+/****************************************************************//**
+Searches the right position in rtree for a page cursor. */
+bool
+rtr_cur_search_with_match(
+/*======================*/
+	const buf_block_t*	block,	/*!< in: buffer block */
+	dict_index_t*		index,	/*!< in: index descriptor */
+	const dtuple_t*		tuple,	/*!< in: data tuple */
+	page_cur_mode_t		mode,	/*!< in: PAGE_CUR_L,
+					PAGE_CUR_LE, PAGE_CUR_G, or
+					PAGE_CUR_GE */
+	page_cur_t*		cursor,	/*!< in/out: page cursor */
+	rtr_info_t*		rtr_info);/*!< in/out: search stack */
+
+/****************************************************************//**
+Calculate the area increased for a new record
+@return area increased */
+double
+rtr_rec_cal_increase(
+/*=================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple to insert, which
+				cause area increase */
+	const rec_t*	rec,	/*!< in: physical record which differs from
+				dtuple in some of the common fields, or which
+				has an equal number or more fields than
+				dtuple */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	double*		area);	/*!< out: increased area */
+
+/****************************************************************//**
+Following the right link to find the proper block for insert.
+@return the proper block.*/
+dberr_t
+rtr_ins_enlarge_mbr(
+/*=================*/
+	btr_cur_t*		cursor,	/*!< in: btr cursor */
+	que_thr_t*		thr,	/*!< in: query thread */
+	mtr_t*			mtr);	/*!< in: mtr */
+
+/********************************************************************//**
+*/
+void
+rtr_get_father_node(
+/*================*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: the tree level of search */
+	const dtuple_t* tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
+				tuple must be set so that it cannot get
+				compared to the node ptr page number field! */
+	btr_cur_t*	sea_cur,/*!< in: search cursor */
+	btr_cur_t*	cursor,	/*!< in/out: tree cursor; the cursor page is
+				s- or x-latched */
+	ulint		page_no,/*!< in: current page no */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+/**************************************************************//**
+push a nonleaf index node to the search path */
+UNIV_INLINE
+void
+rtr_non_leaf_stack_push(
+/*====================*/
+	rtr_node_path_t*	path,		/*!< in/out: search path */
+	ulint			pageno,		/*!< in: pageno to insert */
+	node_seq_t		seq_no,		/*!< in: Node sequence num */
+	ulint			level,		/*!< in: index level */
+	ulint			child_no,	/*!< in: child page no */
+	btr_pcur_t*		cursor,		/*!< in: position cursor */
+	double			mbr_inc);	/*!< in: MBR needs to be
+						enlarged */
+
+/**************************************************************//**
+push a nonleaf index node to the search path for insertion */
+void
+rtr_non_leaf_insert_stack_push(
+/*===========================*/
+	dict_index_t*		index,		/*!< in: index descriptor */
+	rtr_node_path_t*	path,		/*!< in/out: search path */
+	ulint			level,		/*!< in: index level */
+	const buf_block_t*	block,		/*!< in: block of the page */
+	const rec_t*		rec,		/*!< in: positioned record */
+	double			mbr_inc);	/*!< in: MBR needs to be
+						enlarged */
+
+/*****************************************************************//**
+Allocates a new Split Sequence Number.
+@return new SSN id */
+UNIV_INLINE
+node_seq_t
+rtr_get_new_ssn_id(
+/*===============*/
+	dict_index_t*	index);		/*!< in: the index struct */
+
+/*****************************************************************//**
+Get the current Split Sequence Number.
+@return current SSN id */
+UNIV_INLINE
+node_seq_t
+rtr_get_current_ssn_id(
+/*===================*/
+	dict_index_t*	index);		/*!< in/out: the index struct */
+
+/********************************************************************//**
+Create a RTree search info structure */
+rtr_info_t*
+rtr_create_rtr_info(
+/******************/
+	bool		need_prdt,	/*!< in: Whether predicate lock is
+					needed */
+	bool		init_matches,	/*!< in: Whether to initiate the
+					"matches" structure for collecting
+					matched leaf records */
+	btr_cur_t*	cursor,		/*!< in: tree search cursor */
+	dict_index_t*	index);		/*!< in: index struct */
+
+/********************************************************************//**
+Update a btr_cur_t with rtr_info */
+void
+rtr_info_update_btr(
+/******************/
+	btr_cur_t*	cursor,		/*!< in/out: tree cursor */
+	rtr_info_t*	rtr_info);	/*!< in: rtr_info to set to the
+					cursor */
+
+/********************************************************************//**
+Update a btr_cur_t with rtr_info */
+void
+rtr_init_rtr_info(
+/****************/
+	rtr_info_t*	rtr_info,	/*!< in: rtr_info to set to the
+					cursor */
+	bool		need_prdt,	/*!< in: Whether predicate lock is
+					needed */
+	btr_cur_t*	cursor,		/*!< in: tree search cursor */
+	dict_index_t*	index,		/*!< in: index structure */
+	bool		reinit);	/*!< in: Whether this is a reinit */
+
+/**************************************************************//**
+Clean up Rtree cursor */
+void
+rtr_clean_rtr_info(
+/*===============*/
+	rtr_info_t*	rtr_info,	/*!< in: RTree search info */
+	bool		free_all);	/*!< in: need to free rtr_info itself */
+
+/****************************************************************//**
+Get the bounding box content from an index record*/
+void
+rtr_get_mbr_from_rec(
+/*=================*/
+	const rec_t*	rec,	/*!< in: data tuple */
+	const ulint*	offsets,/*!< in: offsets array */
+	rtr_mbr_t*	mbr);	/*!< out MBR */
+
+/****************************************************************//**
+Get the bounding box content from a MBR data record */
+void
+rtr_get_mbr_from_tuple(
+/*===================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	rtr_mbr*	mbr);	/*!< out: mbr to fill */
+
+#define rtr_page_get_father_node_ptr(of,heap,sea,cur,mtr)		\
+	rtr_page_get_father_node_ptr_func(of,heap,sea,cur,__FILE__,__LINE__,mtr)
+
+/* Get the rtree page father.
+@param[in]	offsets		work area for the return value
+@param[in]	index		rtree index
+@param[in]	block		child page in the index
+@param[in]	mtr		mtr
+@param[in]	sea_cur		search cursor, contains information
+				about parent nodes in search
+@param[in]	cursor		cursor on node pointer record,
+				its page x-latched */
+void
+rtr_page_get_father(
+	dict_index_t*	index,
+	buf_block_t*	block,
+	mtr_t*		mtr,
+	btr_cur_t*	sea_cur,
+	btr_cur_t*	cursor);
+
+/************************************************************//**
+Returns the upper level node pointer to a R-Tree page. It is assumed
+that mtr holds an x-latch on the tree.
+@return rec_get_offsets() of the node pointer record */
+ulint*
+rtr_page_get_father_node_ptr_func(
+/*==============================*/
+	ulint*		offsets,/*!< in: work area for the return value */
+	mem_heap_t*	heap,	/*!< in: memory heap to use */
+	btr_cur_t*	sea_cur,/*!< in: search cursor */
+	btr_cur_t*	cursor,	/*!< in: cursor pointing to user record,
+				out: cursor on node pointer record,
+				its page x-latched */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+
+/************************************************************//**
+Returns the father block to a page. It is assumed that mtr holds
+an X or SX latch on the tree.
+@return rec_get_offsets() of the node pointer record */
+ulint*
+rtr_page_get_father_block(
+/*======================*/
+	ulint*		offsets,/*!< in: work area for the return value */
+	mem_heap_t*	heap,	/*!< in: memory heap to use */
+	dict_index_t*	index,	/*!< in: b-tree index */
+	buf_block_t*	block,	/*!< in: child page in the index */
+	mtr_t*		mtr,	/*!< in: mtr */
+	btr_cur_t*	sea_cur,/*!< in: search cursor, contains information
+				about parent nodes in search */
+	btr_cur_t*	cursor);/*!< out: cursor on node pointer record,
+				its page x-latched */
+/**************************************************************//**
+Store the parent path cursor
+@return number of cursor stored */
+ulint
+rtr_store_parent_path(
+/*==================*/
+	const buf_block_t*	block,	/*!< in: block of the page */
+	btr_cur_t*		btr_cur,/*!< in/out: persistent cursor */
+	ulint			latch_mode,
+					/*!< in: latch_mode */
+	ulint			level,	/*!< in: index level */
+	mtr_t*			mtr);	/*!< in: mtr */
+
+/**************************************************************//**
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. */
+void
+rtr_pcur_open_low(
+/*==============*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: level in the btree */
+	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page from the
+				record! */
+	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,	/*!< in: memory buffer for persistent cursor */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+#define rtr_pcur_open(i,t,md,l,c,m)			\
+	rtr_pcur_open_low(i,0,t,md,l,c,__FILE__,__LINE__,m)
+
+struct btr_cur_t;
+
+/*********************************************************//**
+Returns the R-Tree node stored in the parent search path
+@return pointer to R-Tree cursor component */
+UNIV_INLINE
+node_visit_t*
+rtr_get_parent_node(
+/*================*/
+	btr_cur_t*	btr_cur,	/*!< in: persistent cursor */
+	ulint		level,		/*!< in: index level of buffer page */
+	ulint		is_insert);	/*!< in: whether it is insert */
+
+/*********************************************************//**
+Returns the R-Tree cursor stored in the parent search path
+@return pointer to R-Tree cursor component */
+UNIV_INLINE
+btr_pcur_t*
+rtr_get_parent_cursor(
+/*==================*/
+	btr_cur_t*	btr_cur,	/*!< in: persistent cursor */
+	ulint		level,		/*!< in: index level of buffer page */
+	ulint		is_insert);	/*!< in: whether insert operation */
+
+/*************************************************************//**
+Copy recs from a page to new_block of rtree. */
+void
+rtr_page_copy_rec_list_end_no_locks(
+/*================================*/
+	buf_block_t*	new_block,	/*!< in: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page of rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	rtr_rec_move_t*	rec_move,	/*!< in: recording records moved */
+	ulint		max_move,	/*!< in: num of rec to move */
+	ulint*		num_moved,	/*!< out: num of rec to move */
+	mtr_t*		mtr);		/*!< in: mtr */
+
+/*************************************************************//**
+Copy recs till a specified rec from a page to new_block of rtree. */
+void
+rtr_page_copy_rec_list_start_no_locks(
+/*==================================*/
+	buf_block_t*	new_block,	/*!< in: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page of rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	rtr_rec_move_t*	rec_move,	/*!< in: recording records moved */
+	ulint		max_move,	/*!< in: num of rec to move */
+	ulint*		num_moved,	/*!< out: num of rec to move */
+	mtr_t*		mtr);		/*!< in: mtr */
+
+/****************************************************************//**
+Merge 2 mbrs and update the the mbr that cursor is on. */
+dberr_t
+rtr_merge_and_update_mbr(
+/*=====================*/
+	btr_cur_t*		cursor,		/*!< in/out: cursor */
+	btr_cur_t*		cursor2,	/*!< in: the other cursor */
+	ulint*			offsets,	/*!< in: rec offsets */
+	ulint*			offsets2,	/*!< in: rec offsets */
+	page_t*			child_page,	/*!< in: the child page. */
+	buf_block_t*		merge_block,	/*!< in: page to merge */
+	buf_block_t*		block,		/*!< in: page be merged */
+	dict_index_t*		index,		/*!< in: index */
+	mtr_t*			mtr);		/*!< in: mtr */
+
+/*************************************************************//**
+Deletes on the upper level the node pointer to a page. */
+void
+rtr_node_ptr_delete(
+/*================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	btr_cur_t*	sea_cur,/*!< in: search cursor, contains information
+				about parent nodes in search */
+	buf_block_t*	block,	/*!< in: page whose node pointer is deleted */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+/****************************************************************//**
+Check two MBRs are identical or need to be merged */
+bool
+rtr_merge_mbr_changed(
+/*==================*/
+	btr_cur_t*	cursor,		/*!< in: cursor */
+	btr_cur_t*	cursor2,	/*!< in: the other cursor */
+	ulint*		offsets,	/*!< in: rec offsets */
+	ulint*		offsets2,	/*!< in: rec offsets */
+	rtr_mbr_t*	new_mbr,	/*!< out: MBR to update */
+	buf_block_t*	merge_block,	/*!< in: page to merge */
+	buf_block_t*	block,		/*!< in: page be merged */
+	dict_index_t*	index);		/*!< in: index */
+
+
+/**************************************************************//**
+Update the mbr field of a spatial index row.
+@return true if successful */
+bool
+rtr_update_mbr_field(
+/*=================*/
+	btr_cur_t*	cursor,		/*!< in: cursor pointed to rec.*/
+	ulint*		offsets,	/*!< in: offsets on rec. */
+	btr_cur_t*	cursor2,	/*!< in/out: cursor pointed to rec
+					that should be deleted.
+					this cursor is for btr_compress to
+					delete the merged page's father rec.*/
+	page_t*		child_page,	/*!< in: child page. */
+	rtr_mbr_t*	new_mbr,	/*!< in: the new mbr. */
+	rec_t*		new_rec,	/*!< in: rec to use */
+	mtr_t*		mtr);		/*!< in: mtr */
+
+/**************************************************************//**
+Check whether a Rtree page is child of a parent page
+@return true if there is child/parent relationship */
+bool
+rtr_check_same_block(
+/*=================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	btr_cur_t*	cur,	/*!< in/out: position at the parent entry
+				pointing to the child if successful */
+	buf_block_t*	parentb,/*!< in: parent page to check */
+	buf_block_t*	childb, /*!< in: child Page */
+	mem_heap_t*	heap);	/*!< in: memory heap */
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+rtr_write_mbr(
+/*==========*/
+	byte*			data,	/*!< out: data */
+	const rtr_mbr_t*	mbr);	/*!< in: data */
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+rtr_read_mbr(
+/*==========*/
+	const byte*		data,	/*!< in: data */
+	rtr_mbr_t*		mbr);	/*!< out: data */
+
+/**************************************************************//**
+Check whether a discarding page is in anyone's search path */
+void
+rtr_check_discard_page(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index */
+	btr_cur_t*	cursor,	/*!< in: cursor on the page to discard: not on
+				the root page */
+	buf_block_t*	block);	/*!< in: block of page to be discarded */
+
+/********************************************************************//**
+Reinitialize a RTree search info */
+UNIV_INLINE
+void
+rtr_info_reinit_in_cursor(
+/************************/
+	btr_cur_t*	cursor,		/*!< in/out: tree cursor */
+	dict_index_t*	index,		/*!< in: index struct */
+	bool		need_prdt);	/*!< in: Whether predicate lock is
+					needed */
+
+/** Estimates the number of rows in a given area.
+@param[in]	index	index
+@param[in]	tuple	range tuple containing mbr, may also be empty tuple
+@param[in]	mode	search mode
+@return estimated number of rows */
+int64_t
+rtr_estimate_n_rows_in_range(
+	dict_index_t*	index,
+	const dtuple_t*	tuple,
+	page_cur_mode_t	mode);
+
+#ifndef UNIV_NONINL
+#include "gis0rtree.ic"
+#endif
+#endif /*!< gis0rtree.h */
diff --git a/storage/innobase/include/gis0rtree.ic b/storage/innobase/include/gis0rtree.ic
new file mode 100644
index 00000000000..a30db122273
--- /dev/null
+++ b/storage/innobase/include/gis0rtree.ic
@@ -0,0 +1,274 @@
+/*****************************************************************************
+
+Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include gis0rtree.h
+R-tree Inline code
+
+Created 2013/03/27 Jimmy Yang and Allen Lai
+***********************************************************************/
+
+/**************************************************************//**
+Sets the child node mbr in a node pointer. */
+UNIV_INLINE
+void
+rtr_page_cal_mbr(
+/*=============*/
+	const dict_index_t*	index,	/*!< in: index */
+	const buf_block_t*	block,	/*!< in: buffer block */
+	rtr_mbr_t*		rtr_mbr,/*!< out: MBR encapsulates the page */
+	mem_heap_t*		heap)	/*!< in: heap for the memory
+					allocation */
+{
+	page_t*		page;
+	rec_t*		rec;
+	byte*		field;
+	ulint		len;
+	ulint*		offsets = NULL;
+	double		bmin, bmax;
+	double*		amin;
+	double*		amax;
+	ulint		inc = 0;
+	double*		mbr;
+
+	rtr_mbr->xmin = DBL_MAX;
+	rtr_mbr->ymin = DBL_MAX;
+	rtr_mbr->xmax = -DBL_MAX;
+	rtr_mbr->ymax = -DBL_MAX;
+
+	mbr = reinterpret_cast<double*>(rtr_mbr);
+
+	page = buf_block_get_frame(block);
+
+	rec = page_rec_get_next(page_get_infimum_rec(page));
+	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+
+	do {
+		/* The mbr address is in the first field. */
+		field = rec_get_nth_field(rec, offsets, 0, &len);
+
+		ut_ad(len == DATA_MBR_LEN);
+		inc = 0;
+		for (int i = 0; i < SPDIMS; i++) {
+			bmin = mach_double_read(field + inc);
+			bmax = mach_double_read(field + inc + sizeof(double));
+
+			amin = mbr + i * SPDIMS;
+			amax = mbr + i * SPDIMS + 1;
+
+			if (*amin > bmin)
+				*amin = bmin;
+			if (*amax < bmax)
+				*amax = bmax;
+
+			inc += 2 * sizeof(double);
+		}
+
+		rec = page_rec_get_next(rec);
+
+		if (rec == NULL) {
+			break;
+		}
+	} while (!page_rec_is_supremum(rec));
+}
+
+/**************************************************************//**
+push a nonleaf index node to the search path */
+UNIV_INLINE
+void
+rtr_non_leaf_stack_push(
+/*====================*/
+	rtr_node_path_t*	path,		/*!< in/out: search path */
+	ulint			pageno,		/*!< in: pageno to insert */
+	node_seq_t		seq_no,		/*!< in: Node sequence num */
+	ulint			level,		/*!< in: index page level */
+	ulint			child_no,	/*!< in: child page no */
+	btr_pcur_t*		cursor,		/*!< in: position cursor */
+	double			mbr_inc)	/*!< in: MBR needs to be
+						enlarged */
+{
+	node_visit_t	insert_val;
+
+	insert_val.page_no = pageno;
+	insert_val.seq_no = seq_no;
+	insert_val.level = level;
+	insert_val.child_no = child_no;
+	insert_val.cursor = cursor;
+	insert_val.mbr_inc = mbr_inc;
+
+	path->push_back(insert_val);
+
+#ifdef RTR_SEARCH_DIAGNOSTIC
+	fprintf(stderr, "INNODB_RTR: Push page %d, level %d, seq %d"
+			" to search stack \n",
+		static_cast<int>(pageno), static_cast<int>(level),
+		static_cast<int>(seq_no));
+#endif /* RTR_SEARCH_DIAGNOSTIC */
+}
+
+/*****************************************************************//**
+Allocates a new Split Sequence Number.
+@return new SSN id */
+UNIV_INLINE
+node_seq_t
+rtr_get_new_ssn_id(
+/*===============*/
+	dict_index_t*	index)	/*!< in/out: the index struct */
+{
+	node_seq_t	ssn;
+
+	mutex_enter(&(index->rtr_ssn.mutex));
+	ssn = ++index->rtr_ssn.seq_no;
+	mutex_exit(&(index->rtr_ssn.mutex));
+
+	return(ssn);
+}
+/*****************************************************************//**
+Get the current Split Sequence Number.
+@return current SSN id */
+UNIV_INLINE
+node_seq_t
+rtr_get_current_ssn_id(
+/*===================*/
+	dict_index_t*	index)	/*!< in: index struct */
+{
+	node_seq_t	ssn;
+
+	mutex_enter(&(index->rtr_ssn.mutex));
+	ssn = index->rtr_ssn.seq_no;
+	mutex_exit(&(index->rtr_ssn.mutex));
+
+	return(ssn);
+}
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+rtr_write_mbr(
+/*==========*/
+	byte*			data,	/*!< out: data */
+	const rtr_mbr_t*	mbr)	/*!< in: data */
+{
+	const double* my_mbr = reinterpret_cast<const double*>(mbr);
+
+	for (int i = 0; i < SPDIMS * 2; i++) {
+		mach_double_write(data + i * sizeof(double), my_mbr[i]);
+	}
+}
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+rtr_read_mbr(
+/*==========*/
+	const byte*	data,	/*!< in: data */
+	rtr_mbr_t*	mbr)	/*!< out: MBR */
+{
+	for (int i = 0; i < SPDIMS * 2; i++) {
+		(reinterpret_cast<double*>(mbr))[i] = mach_double_read(
+							data
+							+ i * sizeof(double));
+	}
+}
+
+/*********************************************************//**
+Returns the R-Tree node stored in the parent search path
+@return pointer to R-Tree cursor component in the parent path,
+NULL if parent path is empty or index is larger than num of items contained */
+UNIV_INLINE
+node_visit_t*
+rtr_get_parent_node(
+/*================*/
+	btr_cur_t*	btr_cur,	/*!< in: persistent cursor */
+	ulint		level,		/*!< in: index level of buffer page */
+	ulint		is_insert)	/*!< in: whether it is insert */
+{
+	ulint			num;
+	ulint			tree_height = btr_cur->tree_height;
+	node_visit_t*		found_node = NULL;
+
+	if (level >= tree_height) {
+		return(NULL);
+	}
+
+	mutex_enter(&btr_cur->rtr_info->rtr_path_mutex);
+
+	num = btr_cur->rtr_info->parent_path->size();
+
+	if (!num) {
+		mutex_exit(&btr_cur->rtr_info->rtr_path_mutex);
+		return(NULL);
+	}
+
+	if (is_insert) {
+		ulint	idx = tree_height - level - 1;
+		ut_ad(idx < num);
+
+		found_node = &(*btr_cur->rtr_info->parent_path)[idx];
+	} else {
+		node_visit_t*	node;
+
+		while (num > 0) {
+			node = &(*btr_cur->rtr_info->parent_path)[num - 1];
+
+			if (node->level == level) {
+				found_node = node;
+				break;
+			}
+			num--;
+		}
+	}
+
+	mutex_exit(&btr_cur->rtr_info->rtr_path_mutex);
+
+	return(found_node);
+}
+
+/*********************************************************//**
+Returns the R-Tree cursor stored in the parent search path
+@return pointer to R-Tree cursor component */
+UNIV_INLINE
+btr_pcur_t*
+rtr_get_parent_cursor(
+/*==================*/
+	btr_cur_t*	btr_cur,	/*!< in: persistent cursor */
+	ulint		level,		/*!< in: index level of buffer page */
+	ulint		is_insert)	/*!< in: whether insert operation */
+{
+	node_visit_t*   found_node = rtr_get_parent_node(
+					btr_cur, level, is_insert);
+
+	return((found_node) ? found_node->cursor : NULL);
+}
+
+/********************************************************************//**
+Reinitialize a R-Tree search info in btr_cur_t */
+UNIV_INLINE
+void
+rtr_info_reinit_in_cursor(
+/************************/
+	btr_cur_t*	cursor,		/*!< in/out: tree cursor */
+	dict_index_t*	index,		/*!< in: index struct */
+	bool		need_prdt)	/*!< in: Whether predicate lock is
+					needed */
+{
+	rtr_clean_rtr_info(cursor->rtr_info, false);
+	rtr_init_rtr_info(cursor->rtr_info, need_prdt, cursor, index, true);
+}
diff --git a/storage/innobase/include/gis0type.h b/storage/innobase/include/gis0type.h
new file mode 100644
index 00000000000..14c098f9608
--- /dev/null
+++ b/storage/innobase/include/gis0type.h
@@ -0,0 +1,168 @@
+/*****************************************************************************
+
+Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include gis0type.h
+R-tree header file
+
+Created 2013/03/27 Jimmy Yang
+***********************************************************************/
+
+#ifndef gis0type_h
+#define gis0type_h
+
+#include "univ.i"
+
+#include "buf0buf.h"
+#include "data0type.h"
+#include "data0types.h"
+#include "dict0types.h"
+#include "hash0hash.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "row0types.h"
+#include "trx0types.h"
+#include "ut0vec.h"
+#include "ut0wqueue.h"
+#include "que0types.h"
+#include "gis0geo.h"
+#include "ut0new.h"
+
+#include <vector>
+#include <list>
+
+/* Node Sequence Number. Only updated when page splits */
+typedef ib_uint32_t     node_seq_t;
+
+/* RTree internal non-leaf Nodes to be searched, from root to leaf */
+typedef	struct node_visit {
+	ulint		page_no;	/*!< the page number */
+	node_seq_t	seq_no;		/*!< the SSN (split sequence number */
+	ulint		level;		/*!< the page's index level */
+	ulint		child_no;	/*!< child page num if for parent
+					recording */
+	btr_pcur_t*	cursor;		/*!< cursor structure if we positioned
+					FIXME: there is no need to use whole
+					btr_pcur_t, just the position related
+					members */
+	double		mbr_inc;	/*!< whether this node needs to be
+					enlarged for insertion */
+} node_visit_t;
+
+typedef std::vector<node_visit_t, ut_allocator<node_visit_t> >	rtr_node_path_t;
+
+typedef	struct rtr_rec {
+		rec_t*	r_rec;		/*!< matched record */
+		bool	locked;		/*!< whether the record locked */
+} rtr_rec_t;
+
+typedef std::vector<rtr_rec_t, ut_allocator<rtr_rec_t> >	rtr_rec_vector;
+
+/* Structure for matched records on the leaf page */
+typedef	struct matched_rec {
+	byte*		bufp;		/*!< aligned buffer point */
+	byte		rec_buf[UNIV_PAGE_SIZE_MAX * 2];
+					/*!< buffer used to copy matching rec */
+	buf_block_t	block;		/*!< the shadow buffer block */
+	ulint		used;		/*!< memory used */
+	rtr_rec_vector*	matched_recs;	/*!< vector holding the matching rec */
+	ib_mutex_t	rtr_match_mutex;/*!< mutex protect the match_recs
+					vector */
+	bool		valid;		/*!< whether result in matched_recs
+					or this search is valid (page not
+					dropped) */
+	bool		locked;		/*!< whether these recs locked */
+} matched_rec_t;
+
+/* In memory representation of a minimum bounding rectangle */
+typedef struct rtr_mbr {
+	double	xmin;			/*!< minimum on x */
+	double	xmax;			/*!< maximum on x */
+	double	ymin;			/*!< minimum on y */
+	double	ymax;			/*!< maximum on y */
+} rtr_mbr_t;
+
+/* Maximum index level for R-Tree, this is consistent with BTR_MAX_LEVELS */
+#define RTR_MAX_LEVELS		100
+
+/* Number of pages we latch at leaf level when there is possible Tree
+modification (split, shrink), we always latch left, current
+and right pages */
+#define RTR_LEAF_LATCH_NUM	3
+
+/** Vectors holding the matching internal pages/nodes and leaf records */
+typedef	struct rtr_info{
+	rtr_node_path_t*path;	/*!< vector holding matching pages */
+	rtr_node_path_t*parent_path;
+				/*!< vector holding parent pages during
+				search */
+	matched_rec_t*	matches;/*!< struct holding matching leaf records */
+	ib_mutex_t	rtr_path_mutex;
+				/*!< mutex protect the "path" vector */
+	buf_block_t*	tree_blocks[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM];
+				/*!< tracking pages that would be locked
+				at leaf level, for future free */
+        ulint		tree_savepoints[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM];
+				/*!< savepoint used to release latches/blocks
+				on each level and leaf level */
+	rtr_mbr_t	mbr;	/*!< the search MBR */
+	que_thr_t*      thr;	/*!< the search thread */
+	mem_heap_t*	heap;	/*!< memory heap */
+	btr_cur_t*	cursor;	/*!< cursor used for search */
+	dict_index_t*	index;	/*!< index it is searching */
+	bool		need_prdt_lock;
+				/*!< whether we will need predicate lock
+				the tree */
+	bool		need_page_lock;
+				/*!< whether we will need predicate page lock
+				the tree */
+	bool		allocated;/*!< whether this structure is allocate or
+				on stack */
+	bool		mbr_adj;/*!< whether mbr will need to be enlarged
+				for an insertion operation */
+	bool		fd_del;	/*!< found deleted row */
+	const dtuple_t*	search_tuple;
+				/*!< search tuple being used */
+	page_cur_mode_t	search_mode;
+				/*!< current search mode */
+} rtr_info_t;
+
+typedef std::list<rtr_info_t*, ut_allocator<rtr_info_t*> >	rtr_info_active;
+
+/* Tracking structure for all onoging search for an index */
+typedef struct	rtr_info_track {
+	rtr_info_active*	rtr_active;	/*!< Active search info */
+	ib_mutex_t		rtr_active_mutex;
+						/*!< mutex to protect
+						rtr_active */
+} rtr_info_track_t;
+
+/* Node Sequence Number and mutex protects it. */
+typedef struct rtree_ssn {
+        ib_mutex_t      mutex;          /*!< mutex protect the seq num */
+        node_seq_t      seq_no;         /*!< the SSN (node sequence number) */
+} rtr_ssn_t;
+
+/* This is to record the record movement between pages. Used for corresponding
+lock movement */
+typedef struct rtr_rec_move {
+	rec_t*		old_rec;	/*!< record being moved in old page */
+	rec_t*		new_rec;	/*!< new record location */
+	bool		moved;		/*!< whether lock are moved too */
+} rtr_rec_move_t;
+#endif /*!< gis0rtree.h */
diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h
index 07ab20ab995..15a99ddf683 100644
--- a/storage/innobase/include/ha0ha.h
+++ b/storage/innobase/include/ha0ha.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -47,7 +47,6 @@ ha_search_and_get_data(
 Looks for an element when we know the pointer to the data and updates
 the pointer to data if found.
 @return TRUE if found */
-UNIV_INTERN
 ibool
 ha_search_and_update_if_found_func(
 /*===============================*/
@@ -62,66 +61,55 @@ ha_search_and_update_if_found_func(
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 /** Looks for an element when we know the pointer to the data and
 updates the pointer to data if found.
-@param table		in/out: hash table
-@param fold		in: folded value of the searched data
-@param data		in: pointer to the data
-@param new_block	in: block containing new_data
-@param new_data		in: new pointer to the data */
+@param table in/out: hash table
+@param fold in: folded value of the searched data
+@param data in: pointer to the data
+@param new_block in: block containing new_data
+@param new_data in: new pointer to the data */
 # define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
 	ha_search_and_update_if_found_func(table,fold,data,new_block,new_data)
 #else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 /** Looks for an element when we know the pointer to the data and
 updates the pointer to data if found.
-@param table		in/out: hash table
-@param fold		in: folded value of the searched data
-@param data		in: pointer to the data
-@param new_block	ignored: block containing new_data
-@param new_data		in: new pointer to the data */
+@param table in/out: hash table
+@param fold in: folded value of the searched data
+@param data in: pointer to the data
+@param new_block ignored: block containing new_data
+@param new_data in: new pointer to the data */
 # define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
 	ha_search_and_update_if_found_func(table,fold,data,new_data)
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
 /*************************************************************//**
 Creates a hash table with at least n array cells.  The actual number
 of cells is chosen to be a prime number slightly bigger than n.
-@return	own: created table */
-UNIV_INTERN
+@return own: created table */
 hash_table_t*
-ha_create_func(
-/*===========*/
-	ulint	n,		/*!< in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
-	ulint	mutex_level,	/*!< in: level of the mutexes in the latching
-				order: this is used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
-	ulint	n_mutexes,	/*!< in: number of mutexes to protect the
+ib_create(
+/*======*/
+	ulint		n,	/*!< in: number of array cells */
+	latch_id_t	id,	/*!< in: latch ID */
+	ulint		n_mutexes,/*!< in: number of mutexes to protect the
 				hash table: must be a power of 2, or 0 */
-	ulint	type);		/*!< in: type of datastructure for which
+	ulint		type);	/*!< in: type of datastructure for which
 				the memory heap is going to be used e.g.:
 				MEM_HEAP_FOR_BTR_SEARCH or
 				MEM_HEAP_FOR_PAGE_HASH */
-#ifdef UNIV_SYNC_DEBUG
-/** Creates a hash table.
-@return		own: created table
-@param n_c	in: number of array cells.  The actual number of cells is
-chosen to be a slightly bigger prime number.
-@param level	in: level of the mutexes in the latching order
-@param n_m	in: number of mutexes to protect the hash table;
-		must be a power of 2, or 0 */
-# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type)
-#else /* UNIV_SYNC_DEBUG */
-/** Creates a hash table.
-@return		own: created table
-@param n_c	in: number of array cells.  The actual number of cells is
-chosen to be a slightly bigger prime number.
-@param level	in: level of the mutexes in the latching order
-@param n_m	in: number of mutexes to protect the hash table;
-		must be a power of 2, or 0 */
-# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type)
-#endif /* UNIV_SYNC_DEBUG */
+
+/** Recreate a hash table with at least n array cells. The actual number
+of cells is chosen to be a prime number slightly bigger than n.
+The new cells are all cleared. The heaps are recreated.
+The sync objects are reused.
+@param[in,out]	table	hash table to be resuzed (to be freed later)
+@param[in]	n	number of array cells
+@return	resized new table */
+hash_table_t*
+ib_recreate(
+	hash_table_t*	table,
+	ulint		n);
 
 /*************************************************************//**
 Empties a hash table and frees the memory heaps. */
-UNIV_INTERN
 void
 ha_clear(
 /*=====*/
@@ -131,8 +119,7 @@ ha_clear(
 Inserts an entry into a hash table. If an entry with the same fold number
 is found, its node is updated to point to the new data, and no new node
 is inserted.
-@return	TRUE if succeed, FALSE if no more memory could be allocated */
-UNIV_INTERN
+@return TRUE if succeed, FALSE if no more memory could be allocated */
 ibool
 ha_insert_for_fold_func(
 /*====================*/
@@ -151,11 +138,11 @@ ha_insert_for_fold_func(
 Inserts an entry into a hash table. If an entry with the same fold number
 is found, its node is updated to point to the new data, and no new node
 is inserted.
-@return	TRUE if succeed, FALSE if no more memory could be allocated
-@param t	in: hash table
-@param f	in: folded value of data
-@param b	in: buffer block containing the data
-@param d	in: data, must not be NULL */
+@return TRUE if succeed, FALSE if no more memory could be allocated
+@param t in: hash table
+@param f in: folded value of data
+@param b in: buffer block containing the data
+@param d in: data, must not be NULL */
 # define ha_insert_for_fold(t,f,b,d) 	do {		\
 	ha_insert_for_fold_func(t,f,b,d);		\
 	MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED);	\
@@ -165,11 +152,11 @@ is inserted.
 Inserts an entry into a hash table. If an entry with the same fold number
 is found, its node is updated to point to the new data, and no new node
 is inserted.
-@return	TRUE if succeed, FALSE if no more memory could be allocated
-@param t	in: hash table
-@param f	in: folded value of data
-@param b	ignored: buffer block containing the data
-@param d	in: data, must not be NULL */
+@return TRUE if succeed, FALSE if no more memory could be allocated
+@param t in: hash table
+@param f in: folded value of data
+@param b ignored: buffer block containing the data
+@param d in: data, must not be NULL */
 # define ha_insert_for_fold(t,f,b,d)	do {		\
 	ha_insert_for_fold_func(t,f,d);			\
 	MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED);	\
@@ -179,7 +166,7 @@ is inserted.
 /*********************************************************//**
 Looks for an element when we know the pointer to the data and deletes
 it from the hash table if found.
-@return	TRUE if found */
+@return TRUE if found */
 UNIV_INLINE
 ibool
 ha_search_and_delete_if_found(
@@ -191,7 +178,6 @@ ha_search_and_delete_if_found(
 /*****************************************************************//**
 Removes from the chain determined by fold all nodes whose data pointer
 points to the page given. */
-UNIV_INTERN
 void
 ha_remove_all_nodes_to_page(
 /*========================*/
@@ -201,8 +187,7 @@ ha_remove_all_nodes_to_page(
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 /*************************************************************//**
 Validates a given range of the cells in hash table.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 ha_validate(
 /*========*/
@@ -212,7 +197,6 @@ ha_validate(
 #endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
 /*************************************************************//**
 Prints info of a hash table. */
-UNIV_INTERN
 void
 ha_print_info(
 /*==========*/
@@ -222,12 +206,12 @@ ha_print_info(
 
 /** The hash table external chain node */
 struct ha_node_t {
+	ulint		fold;	/*!< fold value for the data */
 	ha_node_t*	next;	/*!< next chain node or NULL if none */
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	buf_block_t*	block;	/*!< buffer block containing the data, or NULL */
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-        const rec_t*	data;	/*!< pointer to the data */
-	ulint		fold;	/*!< fold value for the data */
+	const rec_t*	data;	/*!< pointer to the data */
 };
 
 #ifdef UNIV_DEBUG
diff --git a/storage/innobase/include/ha0ha.ic b/storage/innobase/include/ha0ha.ic
index c478ff54303..854ff9bc046 100644
--- a/storage/innobase/include/ha0ha.ic
+++ b/storage/innobase/include/ha0ha.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,6 @@ Created 8/18/1994 Heikki Tuuri
 
 /***********************************************************//**
 Deletes a hash node. */
-UNIV_INTERN
 void
 ha_delete_hash_node(
 /*================*/
@@ -38,7 +37,7 @@ ha_delete_hash_node(
 
 /******************************************************************//**
 Gets a hash node data.
-@return	pointer to the data */
+@return pointer to the data */
 UNIV_INLINE
 const rec_t*
 ha_node_get_data(
@@ -68,33 +67,33 @@ ha_node_set_data_func(
 
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 /** Sets hash node data.
-@param n	in: hash chain node
-@param b	in: buffer block containing the data
-@param d	in: pointer to the data */
+@param n in: hash chain node
+@param b in: buffer block containing the data
+@param d in: pointer to the data */
 # define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d)
 #else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 /** Sets hash node data.
-@param n	in: hash chain node
-@param b	in: buffer block containing the data
-@param d	in: pointer to the data */
+@param n in: hash chain node
+@param b in: buffer block containing the data
+@param d in: pointer to the data */
 # define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d)
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 
 /******************************************************************//**
 Gets the next node in a hash chain.
-@return	next node, NULL if none */
+@return next node, NULL if none */
 UNIV_INLINE
 ha_node_t*
 ha_chain_get_next(
 /*==============*/
-	ha_node_t*	node)	/*!< in: hash chain node */
+	const ha_node_t*	node)	/*!< in: hash chain node */
 {
 	return(node->next);
 }
 
 /******************************************************************//**
 Gets the first node in a hash chain.
-@return	first node, NULL if none */
+@return first node, NULL if none */
 UNIV_INLINE
 ha_node_t*
 ha_chain_get_first(
@@ -122,9 +121,9 @@ hash_assert_can_modify(
 	if (table->type == HASH_TABLE_SYNC_MUTEX) {
 		ut_ad(mutex_own(hash_get_mutex(table, fold)));
 	} else if (table->type == HASH_TABLE_SYNC_RW_LOCK) {
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 		rw_lock_t* lock = hash_get_lock(table, fold);
-		ut_ad(rw_lock_own(lock, RW_LOCK_EX));
+		ut_ad(rw_lock_own(lock, RW_LOCK_X));
 # endif
 	} else {
 		ut_ad(table->type == HASH_TABLE_SYNC_NONE);
@@ -145,10 +144,10 @@ hash_assert_can_search(
 	if (table->type == HASH_TABLE_SYNC_MUTEX) {
 		ut_ad(mutex_own(hash_get_mutex(table, fold)));
 	} else if (table->type == HASH_TABLE_SYNC_RW_LOCK) {
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 		rw_lock_t* lock = hash_get_lock(table, fold);
-		ut_ad(rw_lock_own(lock, RW_LOCK_EX)
-		      || rw_lock_own(lock, RW_LOCK_SHARED));
+		ut_ad(rw_lock_own(lock, RW_LOCK_X)
+		      || rw_lock_own(lock, RW_LOCK_S));
 # endif
 	} else {
 		ut_ad(table->type == HASH_TABLE_SYNC_NONE);
@@ -167,20 +166,17 @@ ha_search_and_get_data(
 	hash_table_t*	table,	/*!< in: hash table */
 	ulint		fold)	/*!< in: folded value of the searched data */
 {
-	ha_node_t*	node;
-
 	hash_assert_can_search(table, fold);
 	ut_ad(btr_search_enabled);
 
-	node = ha_chain_get_first(table, fold);
+	for (const ha_node_t* node = ha_chain_get_first(table, fold);
+	     node != NULL;
+	     node = ha_chain_get_next(node)) {
 
-	while (node) {
 		if (node->fold == fold) {
 
 			return(node->data);
 		}
-
-		node = ha_chain_get_next(node);
 	}
 
 	return(NULL);
@@ -188,7 +184,7 @@ ha_search_and_get_data(
 
 /*********************************************************//**
 Looks for an element when we know the pointer to the data.
-@return	pointer to the hash table node, NULL if not found in the table */
+@return pointer to the hash table node, NULL if not found in the table */
 UNIV_INLINE
 ha_node_t*
 ha_search_with_data(
@@ -220,7 +216,7 @@ ha_search_with_data(
 /*********************************************************//**
 Looks for an element when we know the pointer to the data, and deletes
 it from the hash table, if found.
-@return	TRUE if found */
+@return TRUE if found */
 UNIV_INLINE
 ibool
 ha_search_and_delete_if_found(
diff --git a/storage/innobase/include/ha0storage.h b/storage/innobase/include/ha0storage.h
index 0073930b502..7dd6d26bad0 100644
--- a/storage/innobase/include/ha0storage.h
+++ b/storage/innobase/include/ha0storage.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -44,7 +44,7 @@ struct ha_storage_t;
 /*******************************************************************//**
 Creates a hash storage. If any of the parameters is 0, then a default
 value is used.
-@return	own: hash storage */
+@return own: hash storage */
 UNIV_INLINE
 ha_storage_t*
 ha_storage_create(
@@ -62,8 +62,7 @@ data_len bytes need to be allocated) and the size of storage is going to
 become more than "memlim" then "data" is not added and NULL is returned.
 To disable this behavior "memlim" can be set to 0, which stands for
 "no limit".
-@return	pointer to the copy */
-UNIV_INTERN
+@return pointer to the copy */
 const void*
 ha_storage_put_memlim(
 /*==================*/
@@ -74,10 +73,10 @@ ha_storage_put_memlim(
 
 /*******************************************************************//**
 Same as ha_storage_put_memlim() but without memory limit.
-@param storage	in/out: hash storage
-@param data	in: data to store
-@param data_len	in: data length
-@return		pointer to the copy of the string */
+@param storage in/out: hash storage
+@param data in: data to store
+@param data_len in: data length
+@return pointer to the copy of the string */
 #define ha_storage_put(storage, data, data_len)	\
 	ha_storage_put_memlim((storage), (data), (data_len), 0)
 
@@ -85,9 +84,9 @@ Same as ha_storage_put_memlim() but without memory limit.
 Copies string into the storage and returns a pointer to the copy. If the
 same string is already present, then pointer to it is returned.
 Strings are considered to be equal if strcmp(str1, str2) == 0.
-@param storage	in/out: hash storage
-@param str	in: string to put
-@return		pointer to the copy of the string */
+@param storage in/out: hash storage
+@param str in: string to put
+@return pointer to the copy of the string */
 #define ha_storage_put_str(storage, str)	\
 	((const char*) ha_storage_put((storage), (str), strlen(str) + 1))
 
@@ -96,10 +95,10 @@ Copies string into the storage and returns a pointer to the copy obeying
 a memory limit.
 If the same string is already present, then pointer to it is returned.
 Strings are considered to be equal if strcmp(str1, str2) == 0.
-@param storage	in/out: hash storage
-@param str	in: string to put
-@param memlim	in: memory limit to obey
-@return		pointer to the copy of the string */
+@param storage in/out: hash storage
+@param str in: string to put
+@param memlim in: memory limit to obey
+@return pointer to the copy of the string */
 #define ha_storage_put_str_memlim(storage, str, memlim)	\
 	((const char*) ha_storage_put_memlim((storage), (str),	\
 					     strlen(str) + 1, (memlim)))
@@ -126,7 +125,7 @@ ha_storage_free(
 
 /*******************************************************************//**
 Gets the size of the memory used by a storage.
-@return	bytes used */
+@return bytes used */
 UNIV_INLINE
 ulint
 ha_storage_get_size(
diff --git a/storage/innobase/include/ha0storage.ic b/storage/innobase/include/ha0storage.ic
index 7150ca045ec..e841925d320 100644
--- a/storage/innobase/include/ha0storage.ic
+++ b/storage/innobase/include/ha0storage.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -48,7 +48,7 @@ struct ha_storage_node_t {
 /*******************************************************************//**
 Creates a hash storage. If any of the parameters is 0, then a default
 value is used.
-@return	own: hash storage */
+@return own: hash storage */
 UNIV_INLINE
 ha_storage_t*
 ha_storage_create(
@@ -127,7 +127,7 @@ ha_storage_free(
 
 /*******************************************************************//**
 Gets the size of the memory used by a storage.
-@return	bytes used */
+@return bytes used */
 UNIV_INLINE
 ulint
 ha_storage_get_size(
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index 12453099ef7..116ca781726 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -19,33 +19,43 @@ this program; if not, write to the Free Software Foundation, Inc.,
 /*******************************************************************//**
 @file include/ha_prototypes.h
 Prototypes for global functions in ha_innodb.cc that are called by
-InnoDB C code
+InnoDB C code.
 
-Created 5/11/2006 Osku Salerma
+NOTE: This header is intended to insulate InnoDB from SQL names and functions.
+Do not include any headers other than univ.i into this unless they are very
+simple headers.
 ************************************************************************/
 
 #ifndef HA_INNODB_PROTOTYPES_H
 #define HA_INNODB_PROTOTYPES_H
 
-#include "my_dbug.h"
-#include "mysqld_error.h"
-#include "my_compare.h"
-#include "my_sys.h"
-#include "m_string.h"
-#include "debug_sync.h"
-#include "my_base.h"
+#include "univ.i"
 
-#include "trx0types.h"
-#include "m_ctype.h" /* CHARSET_INFO */
+#if !defined UNIV_HOTBACKUP && !defined UNIV_INNOCHECKSUM
 
-// Forward declarations
+/* Forward declarations */
+class THD;
 class Field;
 struct fts_string_t;
+//typedef struct charset_info_st CHARSET_INFO;
+
+// JAN: TODO missing features:
+#undef MYSQL_57_SELECT_COUNT_OPTIMIZATION
+#undef MYSQL_COMPRESSION
+#undef MYSQL_ENCRYPTION
+#undef MYSQL_FT_INIT_EXT
+#undef MYSQL_INNODB_API_CB
+#undef MYSQL_INNODB_PARTITIONING
+#undef MYSQL_PFS
+#undef MYSQL_RENAME_INDEX
+#undef MYSQL_REPLACE_TRX_IN_THD
+#undef MYSQL_SPATIAL_INDEX
+#undef MYSQL_STORE_FTS_DOC_ID
+#undef MYSQL_TABLESPACES
 
 /*********************************************************************//**
 Wrapper around MySQL's copy_and_convert function.
-@return	number of bytes copied to 'to' */
-UNIV_INTERN
+@return number of bytes copied to 'to' */
 ulint
 innobase_convert_string(
 /*====================*/
@@ -68,8 +78,7 @@ Not more than "buf_size" bytes are written to "buf".
 The result is always NUL-terminated (provided buf_size > 0) and the
 number of bytes that were written to "buf" is returned (including the
 terminating NUL).
-@return	number of bytes that were written */
-UNIV_INTERN
+@return number of bytes that were written */
 ulint
 innobase_raw_format(
 /*================*/
@@ -83,35 +92,50 @@ innobase_raw_format(
 
 /*****************************************************************//**
 Invalidates the MySQL query cache for the table. */
-UNIV_INTERN
 void
 innobase_invalidate_query_cache(
 /*============================*/
 	trx_t*		trx,		/*!< in: transaction which
 					modifies the table */
 	const char*	full_name,	/*!< in: concatenation of
-					database name, null char NUL,
+					database name, path separator,
 					table name, null char NUL;
 					NOTE that in Windows this is
 					always in LOWER CASE! */
 	ulint		full_name_len);	/*!< in: full name length where
 					also the null chars count */
 
+/** Quote a standard SQL identifier like tablespace, index or column name.
+@param[in]	file	output stream
+@param[in]	trx	InnoDB transaction, or NULL
+@param[in]	id	identifier to quote */
+void
+innobase_quote_identifier(
+	FILE*		file,
+	trx_t*		trx,
+	const char*	id);
+
+/** Quote an standard SQL identifier like tablespace, index or column name.
+Return the string as an std:string object.
+@param[in]	trx	InnoDB transaction, or NULL
+@param[in]	id	identifier to quote
+@return a std::string with id properly quoted. */
+std::string
+innobase_quote_identifier(
+	trx_t*		trx,
+	const char*	id);
+
 /*****************************************************************//**
-Convert a table or index name to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return	pointer to the end of buf */
-UNIV_INTERN
+Convert a table name to the MySQL system_charset_info (UTF-8).
+@return pointer to the end of buf */
 char*
 innobase_convert_name(
 /*==================*/
 	char*		buf,	/*!< out: buffer for converted identifier */
 	ulint		buflen,	/*!< in: length of buf, in bytes */
-	const char*	id,	/*!< in: identifier to convert */
+	const char*	id,	/*!< in: table name to convert */
 	ulint		idlen,	/*!< in: length of id, in bytes */
-	THD*		thd,	/*!< in: MySQL connection thread, or NULL */
-	ibool		table_id);/*!< in: TRUE=id is a table or database name;
-				FALSE=id is an index name */
+	THD*		thd);	/*!< in: MySQL connection thread, or NULL */
 
 /******************************************************************//**
 Returns true if the thread is the replication thread on the slave
@@ -119,32 +143,18 @@ server. Used in srv_conc_enter_innodb() to determine if the thread
 should be allowed to enter InnoDB - the replication thread is treated
 differently than other threads. Also used in
 srv_conc_force_exit_innodb().
-@return	true if thd is the replication thread */
-UNIV_INTERN
+@return true if thd is the replication thread */
 ibool
 thd_is_replication_slave_thread(
 /*============================*/
 	THD*	thd);	/*!< in: thread handle */
 
 /******************************************************************//**
-Gets information on the durability property requested by thread.
-Used when writing either a prepare or commit record to the log
-buffer.
-@return the durability property. */
-UNIV_INTERN
-enum durability_properties
-thd_requested_durability(
-/*=====================*/
-	const THD* thd)	/*!< in: thread handle */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/******************************************************************//**
 Returns true if the transaction this thread is processing has edited
 non-transactional tables. Used by the deadlock detector when deciding
 which transaction to rollback in case of a deadlock - we try to avoid
 rolling back transactions that have edited non-transactional tables.
-@return	true if non-transactional tables have been edited */
-UNIV_INTERN
+@return true if non-transactional tables have been edited */
 ibool
 thd_has_edited_nontrans_tables(
 /*===========================*/
@@ -152,7 +162,6 @@ thd_has_edited_nontrans_tables(
 
 /*************************************************************//**
 Prints info of a THD object (== user session thread) to the given file. */
-UNIV_INTERN
 void
 innobase_mysql_print_thd(
 /*=====================*/
@@ -161,24 +170,6 @@ innobase_mysql_print_thd(
 	uint	max_query_len);	/*!< in: max query length to print, or 0 to
 				   use the default max length */
 
-/*************************************************************//**
-InnoDB uses this function to compare two data fields for which the data type
-is such that we must use MySQL code to compare them.
-@return	1, 0, -1, if a is greater, equal, less than b, respectively */
-UNIV_INTERN
-int
-innobase_mysql_cmp(
-/*===============*/
-	int		mysql_type,	/*!< in: MySQL type */
-	uint		charset_number,	/*!< in: number of the charset */
-	const unsigned char* a,		/*!< in: data field */
-	unsigned int	a_length,	/*!< in: data field length,
-					not UNIV_SQL_NULL */
-	const unsigned char* b,		/*!< in: data field */
-	unsigned int	b_length)	/*!< in: data field length,
-					not UNIV_SQL_NULL */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
 /*****************************************************************//**
 Log code calls this whenever log has been written and/or flushed up
 to a new position. We use this to notify upper layer of a new commit
@@ -186,30 +177,24 @@ checkpoint when necessary.*/
 UNIV_INTERN
 void
 innobase_mysql_log_notify(
-/*===============*/
+/*======================*/
 	ib_uint64_t	write_lsn,	/*!< in: LSN written to log file */
 	ib_uint64_t	flush_lsn);	/*!< in: LSN flushed to disk */
 
-/**************************************************************//**
-Converts a MySQL type to an InnoDB type. Note that this function returns
+/** Converts a MySQL type to an InnoDB type. Note that this function returns
 the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
 VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
-@return	DATA_BINARY, DATA_VARCHAR, ... */
-UNIV_INTERN
+@param[out]	unsigned_flag		DATA_UNSIGNED if an 'unsigned type';
+at least ENUM and SET, and unsigned integer types are 'unsigned types'
+@param[in]	f			MySQL Field
+@return DATA_BINARY, DATA_VARCHAR, ... */
 ulint
 get_innobase_type_from_mysql_type(
-/*==============================*/
-	ulint*		unsigned_flag,	/*!< out: DATA_UNSIGNED if an
-					'unsigned type';
-					at least ENUM and SET,
-					and unsigned integer
-					types are 'unsigned types' */
-	const void*	field)		/*!< in: MySQL Field */
-	MY_ATTRIBUTE((nonnull));
+	ulint*			unsigned_flag,
+	const void*		field);
 
 /******************************************************************//**
 Get the variable length bounds of the given character set. */
-UNIV_INTERN
 void
 innobase_get_cset_width(
 /*====================*/
@@ -219,8 +204,7 @@ innobase_get_cset_width(
 
 /******************************************************************//**
 Compares NUL-terminated UTF-8 strings case insensitively.
-@return	0 if a=b, <0 if a<b, >1 if a>b */
-UNIV_INTERN
+@return 0 if a=b, <0 if a<b, >1 if a>b */
 int
 innobase_strcasecmp(
 /*================*/
@@ -231,26 +215,22 @@ innobase_strcasecmp(
 Compares NUL-terminated UTF-8 strings case insensitively. The
 second string contains wildcards.
 @return 0 if a match is found, 1 if not */
-UNIV_INTERN
 int
 innobase_wildcasecmp(
 /*=================*/
 	const char*	a,	/*!< in: string to compare */
 	const char*	b);	/*!< in: wildcard string to compare */
 
-/******************************************************************//**
-Strip dir name from a full path name and return only its file name.
+/** Strip dir name from a full path name and return only the file name
+@param[in]	path_name	full path name
 @return file name or "null" if no file name */
-UNIV_INTERN
 const char*
 innobase_basename(
-/*==============*/
-	const char*	path_name);	/*!< in: full path name */
+	const char*	path_name);
 
 /******************************************************************//**
 Returns true if the thread is executing a SELECT statement.
-@return	true if thd is executing SELECT */
-UNIV_INTERN
+@return true if thd is executing SELECT */
 ibool
 thd_is_select(
 /*==========*/
@@ -258,29 +238,26 @@ thd_is_select(
 
 /******************************************************************//**
 Converts an identifier to a table name. */
-UNIV_INTERN
 void
 innobase_convert_from_table_id(
 /*===========================*/
-	struct charset_info_st*	cs,	/*!< in: the 'from' character set */
-	char*			to,	/*!< out: converted identifier */
-	const char*		from,	/*!< in: identifier to convert */
-	ulint			len);	/*!< in: length of 'to', in bytes; should
-					be at least 5 * strlen(to) + 1 */
+	CHARSET_INFO*	cs,	/*!< in: the 'from' character set */
+	char*		to,	/*!< out: converted identifier */
+	const char*	from,	/*!< in: identifier to convert */
+	ulint		len);	/*!< in: length of 'to', in bytes; should
+				be at least 5 * strlen(to) + 1 */
 /******************************************************************//**
 Converts an identifier to UTF-8. */
-UNIV_INTERN
 void
 innobase_convert_from_id(
 /*=====================*/
-	struct charset_info_st*	cs,	/*!< in: the 'from' character set */
-	char*			to,	/*!< out: converted identifier */
-	const char*		from,	/*!< in: identifier to convert */
-	ulint			len);	/*!< in: length of 'to', in bytes;
-					should be at least 3 * strlen(to) + 1 */
+	CHARSET_INFO*	cs,	/*!< in: the 'from' character set */
+	char*		to,	/*!< out: converted identifier */
+	const char*	from,	/*!< in: identifier to convert */
+	ulint		len);	/*!< in: length of 'to', in bytes;
+				should be at least 3 * strlen(to) + 1 */
 /******************************************************************//**
 Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
 void
 innobase_casedn_str(
 /*================*/
@@ -300,29 +277,41 @@ int wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
 
 /**********************************************************************//**
 Determines the connection character set.
-@return	connection character set */
-UNIV_INTERN
-struct charset_info_st*
+@return connection character set */
+CHARSET_INFO*
 innobase_get_charset(
 /*=================*/
 	THD*	thd);	/*!< in: MySQL thread handle */
-/**********************************************************************//**
-Determines the current SQL statement.
-@return	SQL statement string */
-UNIV_INTERN
+
+/** Determines the current SQL statement.
+Thread unsafe, can only be called from the thread owning the THD.
+@param[in]	thd	MySQL thread handle
+@param[out]	length	Length of the SQL statement
+@return			SQL statement string */
 const char*
-innobase_get_stmt(
-/*==============*/
-	THD*	thd,		/*!< in: MySQL thread handle */
-	size_t*	length)		/*!< out: length of the SQL statement */
-	MY_ATTRIBUTE((nonnull));
+innobase_get_stmt_unsafe(
+	THD*	thd,
+	size_t*	length);
+
+/** Determines the current SQL statement.
+Thread safe, can be called from any thread as the string is copied
+into the provided buffer.
+@param[in]	thd	MySQL thread handle
+@param[out]	buf	Buffer containing SQL statement
+@param[in]	buflen	Length of provided buffer
+@return			Length of the SQL statement */
+size_t
+innobase_get_stmt_safe(
+	THD*	thd,
+	char*	buf,
+	size_t	buflen);
+
 /******************************************************************//**
 This function is used to find the storage length in bytes of the first n
 characters for prefix indexes using a multibyte character set. The function
 finds charset information and returns length of prefix_len characters in the
 index field in bytes.
-@return	number of bytes occupied by the first n characters */
-UNIV_INTERN
+@return number of bytes occupied by the first n characters */
 ulint
 innobase_get_at_most_n_mbchars(
 /*===========================*/
@@ -333,20 +322,10 @@ innobase_get_at_most_n_mbchars(
 	ulint data_len,		/*!< in: length of the string in bytes */
 	const char* str);	/*!< in: character string */
 
-/*************************************************************//**
-InnoDB index push-down condition check
-@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
-UNIV_INTERN
-enum icp_result
-innobase_index_cond(
-/*================*/
-	void*	file)	/*!< in/out: pointer to ha_innobase */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /******************************************************************//**
 Returns true if the thread supports XA,
 global value of innodb_supports_xa if thd is NULL.
-@return	true if thd supports XA */
-UNIV_INTERN
+@return true if thd supports XA */
 ibool
 thd_supports_xa(
 /*============*/
@@ -364,8 +343,7 @@ thd_innodb_tmpdir(
 
 /******************************************************************//**
 Returns the lock wait timeout for the current connection.
-@return	the lock wait timeout, in seconds */
-UNIV_INTERN
+@return the lock wait timeout, in seconds */
 ulong
 thd_lock_wait_timeout(
 /*==================*/
@@ -373,19 +351,25 @@ thd_lock_wait_timeout(
 			the global innodb_lock_wait_timeout */
 /******************************************************************//**
 Add up the time waited for the lock for the current query. */
-UNIV_INTERN
 void
 thd_set_lock_wait_time(
 /*===================*/
 	THD*	thd,	/*!< in/out: thread handle */
 	ulint	value);	/*!< in: time waited for the lock */
 
+/** Get status of innodb_tmpdir.
+@param[in]	thd	thread handle, or NULL to query
+			the global innodb_tmpdir.
+@retval NULL if innodb_tmpdir="" */
+const char*
+thd_innodb_tmpdir(
+	THD*	thd);
+
 /**********************************************************************//**
 Get the current setting of the table_cache_size global parameter. We do
 a dirty read because for one there is no synchronization object and
 secondly there is little harm in doing so even if we get a torn read.
-@return	SQL statement string */
-UNIV_INTERN
+@return SQL statement string */
 ulint
 innobase_get_table_cache_size(void);
 /*===============================*/
@@ -395,39 +379,13 @@ Get the current setting of the lower_case_table_names global parameter from
 mysqld.cc. We do a dirty read because for one there is no synchronization
 object and secondly there is little harm in doing so even if we get a torn
 read.
-@return	value of lower_case_table_names */
-UNIV_INTERN
+@return value of lower_case_table_names */
 ulint
 innobase_get_lower_case_table_names(void);
 /*=====================================*/
 
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
-UNIV_INTERN
-int
-innobase_close_thd(
-/*===============*/
-	THD*	thd);		/*!< in: MySQL thread handle for
-				which to close the connection */
-/*************************************************************//**
-Get the next token from the given string and store it in *token. */
-UNIV_INTERN
-ulint
-innobase_mysql_fts_get_token(
-/*=========================*/
-	CHARSET_INFO*	charset,	/*!< in: Character set */
-	const byte*	start,		/*!< in: start of text */
-	const byte*	end,		/*!< in: one character past end of
-					text */
-	fts_string_t*	token,		/*!< out: token's text */
-	ulint*		offset);	/*!< out: offset to token,
-					measured as characters from
-					'start' */
-
 /******************************************************************//**
 compare two character string case insensitively according to their charset. */
-UNIV_INTERN
 int
 innobase_fts_text_case_cmp(
 /*=======================*/
@@ -435,49 +393,62 @@ innobase_fts_text_case_cmp(
 	const void*	p1,		/*!< in: key */
 	const void*	p2);		/*!< in: node */
 
-/****************************************************************//**
-Get FTS field charset info from the field's prtype
-@return charset info */
-UNIV_INTERN
-CHARSET_INFO*
-innobase_get_fts_charset(
-/*=====================*/
-	int		mysql_type,	/*!< in: MySQL type */
-	uint		charset_number);/*!< in: number of the charset */
 /******************************************************************//**
 Returns true if transaction should be flagged as read-only.
-@return	true if the thd is marked as read-only */
-UNIV_INTERN
-ibool
+@return true if the thd is marked as read-only */
+bool
 thd_trx_is_read_only(
 /*=================*/
 	THD*	thd);	/*!< in/out: thread handle */
 
+#if 0
+/**
+Check if the transaction can be rolled back
+@param[in] requestor	Session requesting the lock
+@param[in] holder	Session that holds the lock
+@return the session that will be rolled back, null don't care */
+
+THD*
+thd_trx_arbitrate(THD* requestor, THD* holder);
+
+/**
+@param[in] thd		Session to check
+@return the priority */
+
+int
+thd_trx_priority(THD* thd);
+
+#else
+static inline THD* thd_trx_arbitrate(THD*, THD*) { return NULL; }
+static inline int thd_trx_priority(THD*) { return 0; }
+#endif
 /******************************************************************//**
 Check if the transaction is an auto-commit transaction. TRUE also
 implies that it is a SELECT (read-only) transaction.
-@return	true if the transaction is an auto commit read-only transaction. */
-UNIV_INTERN
+@return true if the transaction is an auto commit read-only transaction. */
 ibool
 thd_trx_is_auto_commit(
 /*===================*/
 	THD*	thd);	/*!< in: thread handle, or NULL */
 
+/******************************************************************//**
+Get the thread start time.
+@return the thread start time in seconds since the epoch. */
+ulint
+thd_start_time_in_secs(
+/*===================*/
+	THD*	thd);	/*!< in: thread handle, or NULL */
+
 /*****************************************************************//**
-A wrapper function of innobase_convert_name(), convert a table or
-index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
-@return	pointer to the end of buf */
-UNIV_INTERN
+A wrapper function of innobase_convert_name(), convert a table name
+to the MySQL system_charset_info (UTF-8) and quote it if needed.
+@return pointer to the end of buf */
 void
 innobase_format_name(
 /*==================*/
-	char*		buf,		/*!< out: buffer for converted
-					identifier */
-	ulint		buflen,		/*!< in: length of buf, in bytes */
-	const char*	name,		/*!< in: index or table name
-					to format */
-	ibool		is_index_name)	/*!< in: index name */
-	MY_ATTRIBUTE((nonnull));
+	char*		buf,	/*!< out: buffer for converted identifier */
+	ulint		buflen,	/*!< in: length of buf, in bytes */
+	const char*	name);	/*!< in: table name to format */
 
 /** Corresponds to Sql_condition:enum_warning_level. */
 enum ib_log_level_t {
@@ -498,7 +469,6 @@ void push_warning_printf(
 	THD *thd, Sql_condition::enum_warning_level level,
 	uint code, const char *format, ...);
 */
-UNIV_INTERN
 void
 ib_errf(
 /*====*/
@@ -519,7 +489,6 @@ void push_warning_printf(
 	THD *thd, Sql_condition::enum_warning_level level,
 	uint code, const char *format, ...);
 */
-UNIV_INTERN
 void
 ib_senderrf(
 /*========*/
@@ -528,22 +497,19 @@ ib_senderrf(
 	ib_uint32_t	code,		/*!< MySQL error code */
 	...);				/*!< Args */
 
-/******************************************************************//**
-Write a message to the MySQL log, prefixed with "InnoDB: ".
-Wrapper around sql_print_information() */
-UNIV_INTERN
-void
-ib_logf(
-/*====*/
-	ib_log_level_t	level,		/*!< in: warning level */
-	const char*	format,		/*!< printf format */
-	...)				/*!< Args */
-	MY_ATTRIBUTE((format(printf, 2, 3)));
+extern const char* 	TROUBLESHOOTING_MSG;
+extern const char* 	TROUBLESHOOT_DATADICT_MSG;
+extern const char* 	BUG_REPORT_MSG;
+extern const char* 	FORCE_RECOVERY_MSG;
+extern const char*      ERROR_CREATING_MSG;
+extern const char*      OPERATING_SYSTEM_ERROR_MSG;
+extern const char*      FOREIGN_KEY_CONSTRAINTS_MSG;
+extern const char*      SET_TRANSACTION_MSG;
+extern const char*      INNODB_PARAMETERS_MSG;
 
 /******************************************************************//**
 Returns the NUL terminated value of glob_hostname.
-@return	pointer to glob_hostname. */
-UNIV_INTERN
+@return pointer to glob_hostname. */
 const char*
 server_get_hostname();
 /*=================*/
@@ -551,7 +517,6 @@ server_get_hostname();
 /******************************************************************//**
 Get the error message format string.
 @return the format string or 0 if not found. */
-UNIV_INTERN
 const char*
 innobase_get_err_msg(
 /*=================*/
@@ -574,8 +539,7 @@ values we want to reserve for multi-value inserts e.g.,
 innobase_next_autoinc() will be called with increment set to 3 where
 autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
 the multi-value INSERT above.
-@return	the next value */
-UNIV_INTERN
+@return the next value */
 ulonglong
 innobase_next_autoinc(
 /*==================*/
@@ -586,16 +550,6 @@ innobase_next_autoinc(
 	ulonglong	max_value)	/*!< in: max value for type */
 	MY_ATTRIBUTE((pure, warn_unused_result));
 
-/********************************************************************//**
-Get the upper limit of the MySQL integral and floating-point type.
-@return maximum allowed value for the field */
-UNIV_INTERN
-ulonglong
-innobase_get_int_col_max_value(
-/*===========================*/
-	const Field*	field)	/*!< in: MySQL field */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-
 /**********************************************************************
 Converts an identifier from my_charset_filename to UTF-8 charset. */
 uint
@@ -605,12 +559,10 @@ innobase_convert_to_system_charset(
 	const char*     from,		/* in: identifier to convert */
 	ulint           len,		/* in: length of 'to', in bytes */
 	uint*		errors);	/* out: error return */
-
 /**********************************************************************
 Check if the length of the identifier exceeds the maximum allowed.
 The input to this function is an identifier in charset my_charset_filename.
 return true when length of identifier is too long. */
-UNIV_INTERN
 my_bool
 innobase_check_identifier_length(
 /*=============================*/
@@ -622,9 +574,9 @@ Converts an identifier from my_charset_filename to UTF-8 charset. */
 uint
 innobase_convert_to_system_charset(
 /*===============================*/
-	char*           to,		/* out: converted identifier */
-	const char*     from,		/* in: identifier to convert */
-	ulint           len,		/* in: length of 'to', in bytes */
+	char*		to,		/* out: converted identifier */
+	const char*	from,		/* in: identifier to convert */
+	ulint		len,		/* in: length of 'to', in bytes */
 	uint*		errors);	/* out: error return */
 
 /**********************************************************************
@@ -632,9 +584,9 @@ Converts an identifier from my_charset_filename to UTF-8 charset. */
 uint
 innobase_convert_to_filename_charset(
 /*=================================*/
-	char*           to,     /* out: converted identifier */
-	const char*     from,   /* in: identifier to convert */
-	ulint           len);   /* in: length of 'to', in bytes */
+	char*		to,	/* out: converted identifier */
+	const char*	from,	/* in: identifier to convert */
+	ulint		len);	/* in: length of 'to', in bytes */
 
 /********************************************************************//**
 Helper function to push warnings from InnoDB internals to SQL-layer. */
@@ -662,12 +614,59 @@ database name catenated to '/' and table name. An example:
 test/mytable. On Windows normalization puts both the database name and the
 table name always to lower case if "set_lower_case" is set to TRUE. */
 void
-normalize_table_name_low(
-/*=====================*/
+normalize_table_name_c_low(
+/*=======================*/
 	char*		norm_name,	/*!< out: normalized name as a
 					null-terminated string */
 	const char*	name,		/*!< in: table name string */
 	ibool		set_lower_case); /*!< in: TRUE if we want to set
 					name to lower case */
+/*************************************************************//**
+InnoDB index push-down condition check defined in ha_innodb.cc
+@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
+
+#include <my_compare.h>
+
+ICP_RESULT
+innobase_index_cond(
+/*================*/
+	void*	file)	/*!< in/out: pointer to ha_innobase */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/******************************************************************//**
+Gets information on the durability property requested by thread.
+Used when writing either a prepare or commit record to the log
+buffer.
+@return the durability property. */
+
+#include <dur_prop.h>
+
+enum durability_properties
+thd_requested_durability(
+/*=====================*/
+	const THD* thd)	/*!< in: thread handle */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Update the system variable with the given value of the InnoDB
+buffer pool size.
+@param[in]	buf_pool_size	given value of buffer pool size.*/
+void
+innodb_set_buf_pool_size(ulonglong buf_pool_size);
+
+/** Create a MYSQL_THD for background purge threads and mark it as such.
+@returns new MYSQL_THD */
+MYSQL_THD
+innobase_create_background_thd();
+
+/** Destroy a background purge thread THD.
+@param[in]	thd	MYSQL_THD to destroy */
+void
+innobase_destroy_background_thd(MYSQL_THD);
+
+/** Close opened tables, free memory, delete items for a MYSQL_THD.
+@param[in]	thd	MYSQL_THD to reset */
+void
+innobase_reset_background_thd(MYSQL_THD);
 
+#endif /* !UNIV_HOTBACKUP && !UNIV_INNOCHECKSUM */
 #endif /* HA_INNODB_PROTOTYPES_H */
diff --git a/storage/innobase/include/handler0alter.h b/storage/innobase/include/handler0alter.h
index 3dd6c99eb6d..1c690839449 100644
--- a/storage/innobase/include/handler0alter.h
+++ b/storage/innobase/include/handler0alter.h
@@ -23,7 +23,6 @@ Smart ALTER TABLE
 
 /*************************************************************//**
 Copies an InnoDB record to table->record[0]. */
-UNIV_INTERN
 void
 innobase_rec_to_mysql(
 /*==================*/
@@ -36,7 +35,6 @@ innobase_rec_to_mysql(
 
 /*************************************************************//**
 Copies an InnoDB index entry to table->record[0]. */
-UNIV_INTERN
 void
 innobase_fields_to_mysql(
 /*=====================*/
@@ -47,7 +45,6 @@ innobase_fields_to_mysql(
 
 /*************************************************************//**
 Copies an InnoDB row to table->record[0]. */
-UNIV_INTERN
 void
 innobase_row_to_mysql(
 /*==================*/
@@ -58,7 +55,6 @@ innobase_row_to_mysql(
 
 /*************************************************************//**
 Resets table->record[0]. */
-UNIV_INTERN
 void
 innobase_rec_reset(
 /*===============*/
@@ -70,13 +66,12 @@ auto_increment_increment and auto_increment_offset variables. */
 struct ib_sequence_t {
 
 	/**
-	@param thd - the session
-	@param start_value - the lower bound
-	@param max_value - the upper bound (inclusive) */
+	@param thd the session
+	@param start_value the lower bound
+	@param max_value the upper bound (inclusive) */
 	ib_sequence_t(THD* thd, ulonglong start_value, ulonglong max_value);
 
-	/**
-	Postfix increment
+	/** Postfix increment
 	@return the value to insert */
 	ulonglong operator++(int) UNIV_NOTHROW;
 
diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
index 9a4077befb1..a7bcee1185b 100644
--- a/storage/innobase/include/hash0hash.h
+++ b/storage/innobase/include/hash0hash.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,6 @@ Created 5/20/1997 Heikki Tuuri
 #include "univ.i"
 #include "mem0mem.h"
 #ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
 # include "sync0rw.h"
 #endif /* !UNIV_HOTBACKUP */
 
@@ -56,8 +55,7 @@ enum hash_table_sync_t {
 /*************************************************************//**
 Creates a hash table with >= n array cells. The actual number
 of cells is chosen to be a prime number slightly bigger than n.
-@return	own: created table */
-UNIV_INTERN
+@return own: created table */
 hash_table_t*
 hash_create(
 /*========*/
@@ -67,39 +65,26 @@ hash_create(
 Creates a sync object array array to protect a hash table.
 ::sync_obj can be mutexes or rw_locks depening on the type of
 hash table. */
-UNIV_INTERN
 void
-hash_create_sync_obj_func(
-/*======================*/
+hash_create_sync_obj(
+/*=================*/
 	hash_table_t*		table,	/*!< in: hash table */
-	enum hash_table_sync_t	type,	/*!< in: HASH_TABLE_SYNC_MUTEX
+	hash_table_sync_t	type,	/*!< in: HASH_TABLE_SYNC_MUTEX
 					or HASH_TABLE_SYNC_RW_LOCK */
-#ifdef UNIV_SYNC_DEBUG
-	ulint			sync_level,/*!< in: latching order level
-					of the mutexes: used in the
-					debug version */
-#endif /* UNIV_SYNC_DEBUG */
+	latch_id_t		id,	/*!< in: mutex/rw_lock ID */
 	ulint			n_sync_obj);/*!< in: number of sync objects,
 					must be a power of 2 */
-#ifdef UNIV_SYNC_DEBUG
-# define hash_create_sync_obj(t, s, n, level)			\
-			hash_create_sync_obj_func(t, s, level, n)
-#else /* UNIV_SYNC_DEBUG */
-# define hash_create_sync_obj(t, s, n, level)			\
-			hash_create_sync_obj_func(t, s, n)
-#endif /* UNIV_SYNC_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
 
 /*************************************************************//**
 Frees a hash table. */
-UNIV_INTERN
 void
 hash_table_free(
 /*============*/
 	hash_table_t*	table);	/*!< in, own: hash table */
 /**************************************************************//**
 Calculates the hash value from a folded value.
-@return	hashed value */
+@return hashed value */
 UNIV_INLINE
 ulint
 hash_calc_hash(
@@ -268,7 +253,7 @@ do {									\
 
 /************************************************************//**
 Gets the nth cell in a hash table.
-@return	pointer to cell */
+@return pointer to cell */
 UNIV_INLINE
 hash_cell_t*
 hash_get_nth_cell(
@@ -286,7 +271,7 @@ hash_table_clear(
 
 /*************************************************************//**
 Returns the number of cells in a hash table.
-@return	number of cells */
+@return number of cells */
 UNIV_INLINE
 ulint
 hash_get_n_cells(
@@ -364,10 +349,12 @@ do {\
 	cell_count2222 = hash_get_n_cells(OLD_TABLE);\
 \
 	for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
-		NODE_TYPE*	node2222 = HASH_GET_FIRST((OLD_TABLE), i2222);\
+		NODE_TYPE*	node2222 = static_cast<NODE_TYPE*>(\
+			HASH_GET_FIRST((OLD_TABLE), i2222));\
 \
 		while (node2222) {\
-			NODE_TYPE*	next2222 = node2222->PTR_NAME;\
+			NODE_TYPE*	next2222 = static_cast<NODE_TYPE*>(\
+				node2222->PTR_NAME);\
 			ulint		fold2222 = FOLD_FUNC(node2222);\
 \
 			HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
@@ -380,7 +367,7 @@ do {\
 
 /************************************************************//**
 Gets the sync object index for a fold value in a hash table.
-@return	index */
+@return index */
 UNIV_INLINE
 ulint
 hash_get_sync_obj_index(
@@ -389,7 +376,7 @@ hash_get_sync_obj_index(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 Gets the nth heap in a hash table.
-@return	mem heap */
+@return mem heap */
 UNIV_INLINE
 mem_heap_t*
 hash_get_nth_heap(
@@ -398,7 +385,7 @@ hash_get_nth_heap(
 	ulint		i);	/*!< in: index of the heap */
 /************************************************************//**
 Gets the heap for a fold value in a hash table.
-@return	mem heap */
+@return mem heap */
 UNIV_INLINE
 mem_heap_t*
 hash_get_heap(
@@ -407,7 +394,7 @@ hash_get_heap(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 Gets the nth mutex in a hash table.
-@return	mutex */
+@return mutex */
 UNIV_INLINE
 ib_mutex_t*
 hash_get_nth_mutex(
@@ -416,7 +403,7 @@ hash_get_nth_mutex(
 	ulint		i);	/*!< in: index of the mutex */
 /************************************************************//**
 Gets the nth rw_lock in a hash table.
-@return	rw_lock */
+@return rw_lock */
 UNIV_INLINE
 rw_lock_t*
 hash_get_nth_lock(
@@ -425,7 +412,7 @@ hash_get_nth_lock(
 	ulint		i);	/*!< in: index of the rw_lock */
 /************************************************************//**
 Gets the mutex for a fold value in a hash table.
-@return	mutex */
+@return mutex */
 UNIV_INLINE
 ib_mutex_t*
 hash_get_mutex(
@@ -434,16 +421,42 @@ hash_get_mutex(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 Gets the rw_lock for a fold value in a hash table.
-@return	rw_lock */
+@return rw_lock */
 UNIV_INLINE
 rw_lock_t*
 hash_get_lock(
 /*==========*/
 	hash_table_t*	table,	/*!< in: hash table */
 	ulint		fold);	/*!< in: fold */
+
+/** If not appropriate rw_lock for a fold value in a hash table,
+relock S-lock the another rw_lock until appropriate for a fold value.
+@param[in]	hash_lock	latched rw_lock to be confirmed
+@param[in]	table		hash table
+@param[in]	fold		fold value
+@return	latched rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_lock_s_confirm(
+	rw_lock_t*	hash_lock,
+	hash_table_t*	table,
+	ulint		fold);
+
+/** If not appropriate rw_lock for a fold value in a hash table,
+relock X-lock the another rw_lock until appropriate for a fold value.
+@param[in]	hash_lock	latched rw_lock to be confirmed
+@param[in]	table		hash table
+@param[in]	fold		fold value
+@return	latched rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_lock_x_confirm(
+	rw_lock_t*	hash_lock,
+	hash_table_t*	table,
+	ulint		fold);
+
 /************************************************************//**
 Reserves the mutex for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_mutex_enter(
 /*=============*/
@@ -451,7 +464,6 @@ hash_mutex_enter(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 Releases the mutex for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_mutex_exit(
 /*============*/
@@ -459,21 +471,18 @@ hash_mutex_exit(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 Reserves all the mutexes of a hash table, in an ascending order. */
-UNIV_INTERN
 void
 hash_mutex_enter_all(
 /*=================*/
 	hash_table_t*	table);	/*!< in: hash table */
 /************************************************************//**
 Releases all the mutexes of a hash table. */
-UNIV_INTERN
 void
 hash_mutex_exit_all(
 /*================*/
 	hash_table_t*	table);	/*!< in: hash table */
 /************************************************************//**
 Releases all but the passed in mutex of a hash table. */
-UNIV_INTERN
 void
 hash_mutex_exit_all_but(
 /*====================*/
@@ -481,7 +490,6 @@ hash_mutex_exit_all_but(
 	ib_mutex_t*	keep_mutex);	/*!< in: mutex to keep */
 /************************************************************//**
 s-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_lock_s(
 /*========*/
@@ -489,7 +497,6 @@ hash_lock_s(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 x-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_lock_x(
 /*========*/
@@ -497,7 +504,6 @@ hash_lock_x(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 unlock an s-lock for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_unlock_s(
 /*==========*/
@@ -506,7 +512,6 @@ hash_unlock_s(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 unlock x-lock for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_unlock_x(
 /*==========*/
@@ -514,21 +519,18 @@ hash_unlock_x(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 Reserves all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
 void
 hash_lock_x_all(
 /*============*/
 	hash_table_t*	table);	/*!< in: hash table */
 /************************************************************//**
 Releases all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
 void
 hash_unlock_x_all(
 /*==============*/
 	hash_table_t*	table);	/*!< in: hash table */
 /************************************************************//**
 Releases all but passed in lock of a hash table, */
-UNIV_INTERN
 void
 hash_unlock_x_all_but(
 /*==================*/
diff --git a/storage/innobase/include/hash0hash.ic b/storage/innobase/include/hash0hash.ic
index 254f3f82e5d..b99ac1eb501 100644
--- a/storage/innobase/include/hash0hash.ic
+++ b/storage/innobase/include/hash0hash.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,7 @@ Created 5/20/1997 Heikki Tuuri
 
 /************************************************************//**
 Gets the nth cell in a hash table.
-@return	pointer to cell */
+@return pointer to cell */
 UNIV_INLINE
 hash_cell_t*
 hash_get_nth_cell(
@@ -58,7 +58,7 @@ hash_table_clear(
 
 /*************************************************************//**
 Returns the number of cells in a hash table.
-@return	number of cells */
+@return number of cells */
 UNIV_INLINE
 ulint
 hash_get_n_cells(
@@ -72,7 +72,7 @@ hash_get_n_cells(
 
 /**************************************************************//**
 Calculates the hash value from a folded value.
-@return	hashed value */
+@return hashed value */
 UNIV_INLINE
 ulint
 hash_calc_hash(
@@ -88,7 +88,7 @@ hash_calc_hash(
 #ifndef UNIV_HOTBACKUP
 /************************************************************//**
 Gets the sync object index for a fold value in a hash table.
-@return	index */
+@return index */
 UNIV_INLINE
 ulint
 hash_get_sync_obj_index(
@@ -106,7 +106,7 @@ hash_get_sync_obj_index(
 
 /************************************************************//**
 Gets the nth heap in a hash table.
-@return	mem heap */
+@return mem heap */
 UNIV_INLINE
 mem_heap_t*
 hash_get_nth_heap(
@@ -124,7 +124,7 @@ hash_get_nth_heap(
 
 /************************************************************//**
 Gets the heap for a fold value in a hash table.
-@return	mem heap */
+@return mem heap */
 UNIV_INLINE
 mem_heap_t*
 hash_get_heap(
@@ -148,7 +148,7 @@ hash_get_heap(
 
 /************************************************************//**
 Gets the nth mutex in a hash table.
-@return	mutex */
+@return mutex */
 UNIV_INLINE
 ib_mutex_t*
 hash_get_nth_mutex(
@@ -166,7 +166,7 @@ hash_get_nth_mutex(
 
 /************************************************************//**
 Gets the mutex for a fold value in a hash table.
-@return	mutex */
+@return mutex */
 UNIV_INLINE
 ib_mutex_t*
 hash_get_mutex(
@@ -186,7 +186,7 @@ hash_get_mutex(
 
 /************************************************************//**
 Gets the nth rw_lock in a hash table.
-@return	rw_lock */
+@return rw_lock */
 UNIV_INLINE
 rw_lock_t*
 hash_get_nth_lock(
@@ -204,7 +204,7 @@ hash_get_nth_lock(
 
 /************************************************************//**
 Gets the rw_lock for a fold value in a hash table.
-@return	rw_lock */
+@return rw_lock */
 UNIV_INLINE
 rw_lock_t*
 hash_get_lock(
@@ -222,4 +222,58 @@ hash_get_lock(
 
 	return(hash_get_nth_lock(table, i));
 }
+
+/** If not appropriate rw_lock for a fold value in a hash table,
+relock S-lock the another rw_lock until appropriate for a fold value.
+@param[in]	hash_lock	latched rw_lock to be confirmed
+@param[in]	table		hash table
+@param[in]	fold		fold value
+@return	latched rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_lock_s_confirm(
+	rw_lock_t*	hash_lock,
+	hash_table_t*	table,
+	ulint		fold)
+{
+	ut_ad(rw_lock_own(hash_lock, RW_LOCK_S));
+
+	rw_lock_t*	hash_lock_tmp = hash_get_lock(table, fold);
+
+	while (hash_lock_tmp != hash_lock) {
+		rw_lock_s_unlock(hash_lock);
+		hash_lock = hash_lock_tmp;
+		rw_lock_s_lock(hash_lock);
+		hash_lock_tmp = hash_get_lock(table, fold);
+	}
+
+	return(hash_lock);
+}
+
+/** If not appropriate rw_lock for a fold value in a hash table,
+relock X-lock the another rw_lock until appropriate for a fold value.
+@param[in]	hash_lock	latched rw_lock to be confirmed
+@param[in]	table		hash table
+@param[in]	fold		fold value
+@return	latched rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_lock_x_confirm(
+	rw_lock_t*	hash_lock,
+	hash_table_t*	table,
+	ulint		fold)
+{
+	ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
+
+	rw_lock_t*	hash_lock_tmp = hash_get_lock(table, fold);
+
+	while (hash_lock_tmp != hash_lock) {
+		rw_lock_x_unlock(hash_lock);
+		hash_lock = hash_lock_tmp;
+		rw_lock_x_lock(hash_lock);
+		hash_lock_tmp = hash_get_lock(table, fold);
+	}
+
+	return(hash_lock);
+}
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/ib0mutex.h b/storage/innobase/include/ib0mutex.h
new file mode 100644
index 00000000000..9f4ad8424f3
--- /dev/null
+++ b/storage/innobase/include/ib0mutex.h
@@ -0,0 +1,815 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ib0mutex.h
+Policy based mutexes.
+
+Created 2013-03-26 Sunny Bains.
+***********************************************************************/
+
+#ifndef UNIV_INNOCHECKSUM
+
+#ifndef ib0mutex_h
+#define ib0mutex_h
+
+#include "ut0ut.h"
+#include "ut0rnd.h"
+#include "os0event.h"
+#include "sync0arr.h"
+
+/** OS mutex for tracking lock/unlock for debugging */
+template <template <typename> class Policy = NoPolicy>
+struct OSTrackMutex {
+
+	typedef Policy<OSTrackMutex> MutexPolicy;
+
+	explicit OSTrackMutex(bool destroy_mutex_at_exit = true)
+		UNIV_NOTHROW
+	{
+		ut_d(m_freed = true);
+		ut_d(m_locked = false);
+		ut_d(m_destroy_at_exit = destroy_mutex_at_exit);
+	}
+
+	~OSTrackMutex() UNIV_NOTHROW
+	{
+		ut_ad(!m_destroy_at_exit || !m_locked);
+	}
+
+	/** Initialise the mutex.
+	@param[in]	id              Mutex ID
+	@param[in]	filename	File where mutex was created
+	@param[in]	line		Line in filename */
+	void init(
+		latch_id_t	id,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		ut_ad(m_freed);
+		ut_ad(!m_locked);
+
+		m_mutex.init();
+
+		ut_d(m_freed = false);
+	}
+
+	/** Destroy the mutex */
+	void destroy() UNIV_NOTHROW
+	{
+		ut_ad(!m_locked);
+		ut_ad(innodb_calling_exit || !m_freed);
+
+		m_mutex.destroy();
+
+		ut_d(m_freed = true);
+	}
+
+	/** Release the mutex. */
+	void exit() UNIV_NOTHROW
+	{
+		ut_ad(m_locked);
+		ut_d(m_locked = false);
+		ut_ad(innodb_calling_exit || !m_freed);
+
+		m_mutex.exit();
+	}
+
+	/** Acquire the mutex.
+	@param[in]	max_spins	max number of spins
+	@param[in]	max_delay	max delay per spin
+	@param[in]	filename	from where called
+	@param[in]	line		within filename */
+	void enter(
+		uint32_t	max_spins,
+		uint32_t	max_delay,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		ut_ad(innodb_calling_exit || !m_freed);
+
+		m_mutex.enter();
+
+		ut_ad(!m_locked);
+		ut_d(m_locked = true);
+	}
+
+	/** @return true if locking succeeded */
+	bool try_lock() UNIV_NOTHROW
+	{
+		ut_ad(innodb_calling_exit || !m_freed);
+
+		bool	locked = m_mutex.try_lock();
+
+		if (locked) {
+			ut_ad(!m_locked);
+			ut_d(m_locked = locked);
+		}
+
+		return(locked);
+	}
+
+	/** @return non-const version of the policy */
+	MutexPolicy& policy()
+		UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+	/** @return the const version of the policy */
+	const MutexPolicy& policy() const
+		UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+private:
+#ifdef UNIV_DEBUG
+	/** true if the mutex has not be initialized */
+	bool			m_freed;
+
+	/** true if the mutex has been locked. */
+	bool			m_locked;
+
+	/** Do/Dont destroy mutex at exit */
+	bool			m_destroy_at_exit;
+#endif /* UNIV_DEBUG */
+
+	/** OS Mutex instance */
+	OSMutex			m_mutex;
+
+	/** Policy data */
+	MutexPolicy		m_policy;
+};
+
+
+#ifdef HAVE_IB_LINUX_FUTEX
+
+#include <linux/futex.h>
+#include <sys/syscall.h>
+
+/** Mutex implementation that used the Linux futex. */
+template <template <typename> class Policy = NoPolicy>
+struct TTASFutexMutex {
+
+	typedef Policy<TTASFutexMutex> MutexPolicy;
+
+	TTASFutexMutex() UNIV_NOTHROW
+		:
+		m_lock_word(MUTEX_STATE_UNLOCKED)
+	{
+		/* Check that lock_word is aligned. */
+		ut_ad(!((ulint) &m_lock_word % sizeof(ulint)));
+	}
+
+	~TTASFutexMutex()
+	{
+		ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Called when the mutex is "created". Note: Not from the constructor
+	but when the mutex is initialised.
+	@param[in]	id		Mutex ID
+	@param[in]	filename	File where mutex was created
+	@param[in]	line		Line in filename */
+	void init(
+		latch_id_t	id,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Destroy the mutex. */
+	void destroy() UNIV_NOTHROW
+	{
+		/* The destructor can be called at shutdown. */
+		ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Acquire the mutex.
+	@param[in]	max_spins	max number of spins
+	@param[in]	max_delay	max delay per spin
+	@param[in]	filename	from where called
+	@param[in]	line		within filename */
+	void enter(
+		uint32_t	max_spins,
+		uint32_t	max_delay,
+		const char*	filename,
+		uint32_t	line) UNIV_NOTHROW
+	{
+		uint32_t n_spins, n_waits;
+
+		for (n_spins= 0; n_spins < max_spins; n_spins++) {
+			if (try_lock()) {
+				m_policy.add(n_spins, 0);
+				return;
+			}
+
+			ut_delay(ut_rnd_interval(0, max_delay));
+		}
+
+		for (n_waits= 0;; n_waits++) {
+			if (my_atomic_fas32_explicit(&m_lock_word,
+						     MUTEX_STATE_WAITERS,
+						     MY_MEMORY_ORDER_ACQUIRE)
+			    == MUTEX_STATE_UNLOCKED) {
+				break;
+			}
+
+			syscall(SYS_futex, &m_lock_word,
+				FUTEX_WAIT_PRIVATE, MUTEX_STATE_WAITERS,
+				0, 0, 0);
+		}
+
+		m_policy.add(n_spins, n_waits);
+	}
+
+	/** Release the mutex. */
+	void exit() UNIV_NOTHROW
+	{
+		if (my_atomic_fas32_explicit(&m_lock_word,
+					     MUTEX_STATE_UNLOCKED,
+					     MY_MEMORY_ORDER_RELEASE)
+		    == MUTEX_STATE_WAITERS) {
+			syscall(SYS_futex, &m_lock_word, FUTEX_WAKE_PRIVATE,
+				1, 0, 0, 0);
+		}
+	}
+
+	/** Try and lock the mutex.
+	@return true if successful */
+	bool try_lock() UNIV_NOTHROW
+	{
+		int32 oldval = MUTEX_STATE_UNLOCKED;
+		return(my_atomic_cas32_strong_explicit(&m_lock_word, &oldval,
+						       MUTEX_STATE_LOCKED,
+						       MY_MEMORY_ORDER_ACQUIRE,
+						       MY_MEMORY_ORDER_RELAXED));
+	}
+
+	/** @return non-const version of the policy */
+	MutexPolicy& policy() UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+	/** @return const version of the policy */
+	const MutexPolicy& policy() const UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+private:
+	/** Policy data */
+	MutexPolicy		m_policy;
+
+	/** lock_word is the target of the atomic test-and-set instruction
+	when atomic operations are enabled. */
+	int32			m_lock_word;
+};
+
+#endif /* HAVE_IB_LINUX_FUTEX */
+
+template <template <typename> class Policy = NoPolicy>
+struct TTASMutex {
+
+	typedef Policy<TTASMutex> MutexPolicy;
+
+	TTASMutex() UNIV_NOTHROW
+		:
+		m_lock_word(MUTEX_STATE_UNLOCKED)
+	{
+		/* Check that lock_word is aligned. */
+		ut_ad(!((ulint) &m_lock_word % sizeof(ulint)));
+	}
+
+	~TTASMutex()
+	{
+		ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Called when the mutex is "created". Note: Not from the constructor
+	but when the mutex is initialised.
+	@param[in]	id		Mutex ID
+	@param[in]	filename	File where mutex was created
+	@param[in]	line		Line in filename */
+	void init(
+		latch_id_t	id,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Destroy the mutex. */
+	void destroy() UNIV_NOTHROW
+	{
+		/* The destructor can be called at shutdown. */
+		ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Try and lock the mutex.
+	@return true on success */
+	bool try_lock() UNIV_NOTHROW
+	{
+		int32 oldval = MUTEX_STATE_UNLOCKED;
+		return(my_atomic_cas32_strong_explicit(&m_lock_word, &oldval,
+						       MUTEX_STATE_LOCKED,
+						       MY_MEMORY_ORDER_ACQUIRE,
+						       MY_MEMORY_ORDER_RELAXED));
+	}
+
+	/** Release the mutex. */
+	void exit() UNIV_NOTHROW
+	{
+		ut_ad(m_lock_word == MUTEX_STATE_LOCKED);
+		my_atomic_store32_explicit(&m_lock_word, MUTEX_STATE_UNLOCKED,
+					   MY_MEMORY_ORDER_RELEASE);
+	}
+
+	/** Acquire the mutex.
+	@param max_spins	max number of spins
+	@param max_delay	max delay per spin
+	@param filename		from where called
+	@param line		within filename */
+	void enter(
+		uint32_t	max_spins,
+		uint32_t	max_delay,
+		const char*	filename,
+		uint32_t	line) UNIV_NOTHROW
+	{
+		const uint32_t	step = max_spins;
+		uint32_t n_spins = 0;
+
+		while (!try_lock()) {
+			ut_delay(ut_rnd_interval(0, max_delay));
+			if (++n_spins == max_spins) {
+				os_thread_yield();
+				max_spins+= step;
+			}
+		}
+
+		m_policy.add(n_spins, 0);
+	}
+
+	/** @return non-const version of the policy */
+	MutexPolicy& policy() UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+	/** @return const version of the policy */
+	const MutexPolicy& policy() const UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+private:
+	// Disable copying
+	TTASMutex(const TTASMutex&);
+	TTASMutex& operator=(const TTASMutex&);
+
+	/** Policy data */
+	MutexPolicy		m_policy;
+
+	/** lock_word is the target of the atomic test-and-set instruction
+	when atomic operations are enabled. */
+	int32			m_lock_word;
+};
+
+template <template <typename> class Policy = NoPolicy>
+struct TTASEventMutex {
+
+	typedef Policy<TTASEventMutex> MutexPolicy;
+
+	TTASEventMutex()
+		UNIV_NOTHROW
+		:
+		m_lock_word(MUTEX_STATE_UNLOCKED),
+		m_event()
+	{
+		/* Check that lock_word is aligned. */
+		ut_ad(!((ulint) &m_lock_word % sizeof(ulint)));
+	}
+
+	~TTASEventMutex()
+		UNIV_NOTHROW
+	{
+		ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Called when the mutex is "created". Note: Not from the constructor
+	but when the mutex is initialised.
+	@param[in]	id		Mutex ID
+	@param[in]	filename	File where mutex was created
+	@param[in]	line		Line in filename */
+	void init(
+		latch_id_t	id,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		ut_a(m_event == 0);
+		ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
+
+		m_event = os_event_create(sync_latch_get_name(id));
+	}
+
+	/** This is the real desctructor. This mutex can be created in BSS and
+	its desctructor will be called on exit(). We can't call
+	os_event_destroy() at that stage. */
+	void destroy()
+		UNIV_NOTHROW
+	{
+		ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+
+		/* We have to free the event before InnoDB shuts down. */
+		os_event_destroy(m_event);
+		m_event = 0;
+	}
+
+	/** Try and lock the mutex. Note: POSIX returns 0 on success.
+	@return true on success */
+	bool try_lock()
+		UNIV_NOTHROW
+	{
+		int32 oldval = MUTEX_STATE_UNLOCKED;
+		return(my_atomic_cas32_strong_explicit(&m_lock_word, &oldval,
+						       MUTEX_STATE_LOCKED,
+						       MY_MEMORY_ORDER_ACQUIRE,
+						       MY_MEMORY_ORDER_RELAXED));
+	}
+
+	/** Release the mutex. */
+	void exit()
+		UNIV_NOTHROW
+	{
+		if (my_atomic_fas32_explicit(&m_lock_word,
+					     MUTEX_STATE_UNLOCKED,
+					     MY_MEMORY_ORDER_RELEASE)
+		    == MUTEX_STATE_WAITERS) {
+			os_event_set(m_event);
+			sync_array_object_signalled();
+		}
+	}
+
+	/** Acquire the mutex.
+	@param[in]	max_spins	max number of spins
+	@param[in]	max_delay	max delay per spin
+	@param[in]	filename	from where called
+	@param[in]	line		within filename */
+	void enter(
+		uint32_t	max_spins,
+		uint32_t	max_delay,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		uint32_t	n_spins = 0;
+		uint32_t	n_waits = 0;
+		const uint32_t	step = max_spins;
+
+		while (!try_lock()) {
+			if (n_spins++ == max_spins) {
+				max_spins += step;
+				n_waits++;
+				os_thread_yield();
+
+				sync_cell_t*	cell;
+				sync_array_t *sync_arr = sync_array_get_and_reserve_cell(
+					this,
+					(m_policy.get_id() == LATCH_ID_BUF_BLOCK_MUTEX
+					 || m_policy.get_id() == LATCH_ID_BUF_POOL_ZIP)
+					? SYNC_BUF_BLOCK
+					: SYNC_MUTEX,
+					filename, line, &cell);
+
+				int32 oldval = MUTEX_STATE_LOCKED;
+				my_atomic_cas32_strong_explicit(&m_lock_word, &oldval,
+								MUTEX_STATE_WAITERS,
+								MY_MEMORY_ORDER_RELAXED,
+								MY_MEMORY_ORDER_RELAXED);
+
+				if (oldval == MUTEX_STATE_UNLOCKED) {
+					sync_array_free_cell(sync_arr, cell);
+				} else {
+					sync_array_wait_event(sync_arr, cell);
+				}
+			} else {
+				ut_delay(ut_rnd_interval(0, max_delay));
+			}
+		}
+
+		m_policy.add(n_spins, n_waits);
+	}
+
+	/** @return the lock state. */
+	int32 state() const
+		UNIV_NOTHROW
+	{
+		return(m_lock_word);
+	}
+
+	/** The event that the mutex will wait in sync0arr.cc
+	@return even instance */
+	os_event_t event()
+		UNIV_NOTHROW
+	{
+		return(m_event);
+	}
+
+	/** @return non-const version of the policy */
+	MutexPolicy& policy()
+		UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+	/** @return const version of the policy */
+	const MutexPolicy& policy() const
+		UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+private:
+	/** Disable copying */
+	TTASEventMutex(const TTASEventMutex&);
+	TTASEventMutex& operator=(const TTASEventMutex&);
+
+	/** lock_word is the target of the atomic test-and-set instruction
+	when atomic operations are enabled. */
+	int32			m_lock_word;
+
+	/** Used by sync0arr.cc for the wait queue */
+	os_event_t		m_event;
+
+	/** Policy data */
+	MutexPolicy		m_policy;
+};
+
+/** Mutex interface for all policy mutexes. This class handles the interfacing
+with the Performance Schema instrumentation. */
+template <typename MutexImpl>
+struct PolicyMutex
+{
+	typedef MutexImpl MutexType;
+	typedef typename MutexImpl::MutexPolicy Policy;
+
+	PolicyMutex() UNIV_NOTHROW : m_impl()
+	{
+#ifdef UNIV_PFS_MUTEX
+		m_ptr = 0;
+#endif /* UNIV_PFS_MUTEX */
+	}
+
+	~PolicyMutex() { }
+
+	/** @return non-const version of the policy */
+	Policy& policy() UNIV_NOTHROW
+	{
+		return(m_impl.policy());
+	}
+
+	/** @return const version of the policy */
+	const Policy& policy() const UNIV_NOTHROW
+	{
+		return(m_impl.policy());
+	}
+
+	/** Release the mutex. */
+	void exit() UNIV_NOTHROW
+	{
+#ifdef UNIV_PFS_MUTEX
+		pfs_exit();
+#endif /* UNIV_PFS_MUTEX */
+
+		policy().release(m_impl);
+
+		m_impl.exit();
+	}
+
+	/** Acquire the mutex.
+	@param n_spins	max number of spins
+	@param n_delay	max delay per spin
+	@param name	filename where locked
+	@param line	line number where locked */
+	void enter(
+		uint32_t	n_spins,
+		uint32_t	n_delay,
+		const char*	name,
+		uint32_t	line) UNIV_NOTHROW
+	{
+#ifdef UNIV_PFS_MUTEX
+		/* Note: locker is really an alias for state. That's why
+		it has to be in the same scope during pfs_end(). */
+
+		PSI_mutex_locker_state	state;
+		PSI_mutex_locker*	locker;
+
+		locker = pfs_begin_lock(&state, name, line);
+#endif /* UNIV_PFS_MUTEX */
+
+		policy().enter(m_impl, name, line);
+
+		m_impl.enter(n_spins, n_delay, name, line);
+
+		policy().locked(m_impl, name, line);
+#ifdef UNIV_PFS_MUTEX
+		pfs_end(locker, 0);
+#endif /* UNIV_PFS_MUTEX */
+	}
+
+	/** Try and lock the mutex, return 0 on SUCCESS and 1 otherwise.
+	@param name	filename where locked
+	@param line	line number where locked */
+	int trylock(const char* name, uint32_t line) UNIV_NOTHROW
+	{
+#ifdef UNIV_PFS_MUTEX
+		/* Note: locker is really an alias for state. That's why
+		it has to be in the same scope during pfs_end(). */
+
+		PSI_mutex_locker_state	state;
+		PSI_mutex_locker*	locker;
+
+		locker = pfs_begin_trylock(&state, name, line);
+#endif /* UNIV_PFS_MUTEX */
+
+		/* There is a subtlety here, we check the mutex ordering
+		after locking here. This is only done to avoid add and
+		then remove if the trylock was unsuccesful. */
+
+		int ret = m_impl.try_lock() ? 0 : 1;
+
+		if (ret == 0) {
+
+			policy().enter(m_impl, name, line);
+
+			policy().locked(m_impl, name, line);
+		}
+
+#ifdef UNIV_PFS_MUTEX
+		pfs_end(locker, 0);
+#endif /* UNIV_PFS_MUTEX */
+
+		return(ret);
+	}
+
+#ifdef UNIV_DEBUG
+	/** @return true if the thread owns the mutex. */
+	bool is_owned() const UNIV_NOTHROW
+	{
+		return(policy().is_owned());
+	}
+#endif /* UNIV_DEBUG */
+
+	/**
+	Initialise the mutex.
+
+	@param[in]	id              Mutex ID
+	@param[in]	filename	file where created
+	@param[in]	line		line number in file where created */
+	void init(
+		latch_id_t      id,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+#ifdef UNIV_PFS_MUTEX
+		pfs_add(sync_latch_get_pfs_key(id));
+#endif /* UNIV_PFS_MUTEX */
+
+		m_impl.init(id, filename, line);
+		policy().init(m_impl, id, filename, line);
+	}
+
+	/** Free resources (if any) */
+	void destroy() UNIV_NOTHROW
+	{
+#ifdef UNIV_PFS_MUTEX
+		pfs_del();
+#endif /* UNIV_PFS_MUTEX */
+		m_impl.destroy();
+		policy().destroy();
+	}
+
+	/** Required for os_event_t */
+	operator sys_mutex_t*() UNIV_NOTHROW
+	{
+		return(m_impl.operator sys_mutex_t*());
+	}
+
+#ifdef UNIV_PFS_MUTEX
+	/** Performance schema monitoring - register mutex with PFS.
+
+	Note: This is public only because we want to get around an issue
+	with registering a subset of buffer pool pages with PFS when
+	PFS_GROUP_BUFFER_SYNC is defined. Therefore this has to then
+	be called by external code (see buf0buf.cc).
+
+	@param key - Performance Schema key. */
+	void pfs_add(mysql_pfs_key_t key) UNIV_NOTHROW
+	{
+		ut_ad(m_ptr == 0);
+		m_ptr = PSI_MUTEX_CALL(init_mutex)(key, this);
+	}
+
+private:
+
+	/** Performance schema monitoring.
+	@param state - PFS locker state
+	@param name - file name where locked
+	@param line - line number in file where locked */
+	PSI_mutex_locker* pfs_begin_lock(
+		PSI_mutex_locker_state*	state,
+		const char*		name,
+		uint32_t		line) UNIV_NOTHROW
+	{
+		if (m_ptr != 0) {
+			return(PSI_MUTEX_CALL(start_mutex_wait)(
+					state, m_ptr,
+					PSI_MUTEX_LOCK, name, (uint) line));
+		}
+
+		return(0);
+	}
+
+	/** Performance schema monitoring.
+	@param state - PFS locker state
+	@param name - file name where locked
+	@param line - line number in file where locked */
+	PSI_mutex_locker* pfs_begin_trylock(
+		PSI_mutex_locker_state*	state,
+		const char*		name,
+		uint32_t		line) UNIV_NOTHROW
+	{
+		if (m_ptr != 0) {
+			return(PSI_MUTEX_CALL(start_mutex_wait)(
+					state, m_ptr,
+					PSI_MUTEX_TRYLOCK, name, (uint) line));
+		}
+
+		return(0);
+	}
+
+	/** Performance schema monitoring
+	@param locker - PFS identifier
+	@param ret - 0 for success and 1 for failure */
+	void pfs_end(PSI_mutex_locker* locker, int ret) UNIV_NOTHROW
+	{
+		if (locker != 0) {
+			PSI_MUTEX_CALL(end_mutex_wait)(locker, ret);
+		}
+	}
+
+	/** Performance schema monitoring - register mutex release */
+	void pfs_exit()
+	{
+		if (m_ptr != 0) {
+			PSI_MUTEX_CALL(unlock_mutex)(m_ptr);
+		}
+	}
+
+	/** Performance schema monitoring - deregister */
+	void pfs_del()
+	{
+		if (m_ptr != 0) {
+			PSI_MUTEX_CALL(destroy_mutex)(m_ptr);
+			m_ptr = 0;
+		}
+	}
+#endif /* UNIV_PFS_MUTEX */
+
+private:
+	/** The mutex implementation */
+	MutexImpl		m_impl;
+
+#ifdef UNIV_PFS_MUTEX
+	/** The performance schema instrumentation hook. */
+	PSI_mutex*		m_ptr;
+#endif /* UNIV_PFS_MUTEX */
+
+};
+
+#endif /* ib0mutex_h */
+
+#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index 09c48822b9f..f3fd5e9a364 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -94,13 +94,11 @@ free bits could momentarily be set too high. */
 /******************************************************************//**
 Creates the insert buffer data structure at a database startup.
 @return DB_SUCCESS or failure */
-UNIV_INTERN
 dberr_t
 ibuf_init_at_db_start(void);
 /*=======================*/
 /*********************************************************************//**
 Updates the max_size value for ibuf. */
-UNIV_INTERN
 void
 ibuf_max_size_update(
 /*=================*/
@@ -109,7 +107,6 @@ ibuf_max_size_update(
 /*********************************************************************//**
 Reads the biggest tablespace id from the high end of the insert buffer
 tree and updates the counter in fil_system. */
-UNIV_INTERN
 void
 ibuf_update_max_tablespace_id(void);
 /*===============================*/
@@ -131,7 +128,6 @@ ibuf_mtr_commit(
 	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Initializes an ibuf bitmap page. */
-UNIV_INTERN
 void
 ibuf_bitmap_page_init(
 /*==================*/
@@ -146,7 +142,6 @@ buffer bitmap must never exceed the free space on a page.  It is safe
 to decrement or reset the bits in the bitmap in a mini-transaction
 that is committed before the mini-transaction that affects the free
 space. */
-UNIV_INTERN
 void
 ibuf_reset_free_bits(
 /*=================*/
@@ -189,7 +184,6 @@ thread until mtr is committed.  NOTE: The free bits in the insert
 buffer bitmap must never exceed the free space on a page.  It is safe
 to set the free bits in the same mini-transaction that updated the
 page. */
-UNIV_INTERN
 void
 ibuf_update_free_bits_low(
 /*======================*/
@@ -208,7 +202,6 @@ thread until mtr is committed.  NOTE: The free bits in the insert
 buffer bitmap must never exceed the free space on a page.  It is safe
 to set the free bits in the same mini-transaction that updated the
 page. */
-UNIV_INTERN
 void
 ibuf_update_free_bits_zip(
 /*======================*/
@@ -221,12 +214,9 @@ virtually prevent any further operations until mtr is committed.
 NOTE: The free bits in the insert buffer bitmap must never exceed the
 free space on a page.  It is safe to set the free bits in the same
 mini-transaction that updated the pages. */
-UNIV_INTERN
 void
 ibuf_update_free_bits_for_two_pages_low(
 /*====================================*/
-	ulint		zip_size,/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
 	buf_block_t*	block1,	/*!< in: index page */
 	buf_block_t*	block2,	/*!< in: index page */
 	mtr_t*		mtr);	/*!< in: mtr */
@@ -254,114 +244,116 @@ ibool
 ibuf_inside(
 /*========*/
 	const mtr_t*	mtr)	/*!< in: mini-transaction */
-	MY_ATTRIBUTE((nonnull, pure));
-/***********************************************************************//**
-Checks if a page address is an ibuf bitmap page (level 3 page) address.
-@return	TRUE if a bitmap page */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Checks if a page address is an ibuf bitmap page (level 3 page) address.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return TRUE if a bitmap page */
 UNIV_INLINE
 ibool
 ibuf_bitmap_page(
-/*=============*/
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	ulint	page_no);/*!< in: page number */
-/***********************************************************************//**
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==TRUE.
-@return	TRUE if level 2 or level 3 page */
-UNIV_INTERN
+	const page_id_t&	page_id,
+	const page_size_t&	page_size);
+
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==true.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	x_latch		FALSE if relaxed check (avoid latching the
+bitmap page)
+@param[in]	file		file name
+@param[in]	line		line where called
+@param[in,out]	mtr		mtr which will contain an x-latch to the
+bitmap page if the page is not one of the fixed address ibuf pages, or NULL,
+in which case a new transaction is created.
+@return TRUE if level 2 or level 3 page */
 ibool
 ibuf_page_low(
-/*==========*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
-	ulint		page_no,/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
 #ifdef UNIV_DEBUG
-	ibool		x_latch,/*!< in: FALSE if relaxed check
-				(avoid latching the bitmap page) */
+	ibool			x_latch,
 #endif /* UNIV_DEBUG */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr)	/*!< in: mtr which will contain an
-				x-latch to the bitmap page if the page
-				is not one of the fixed address ibuf
-				pages, or NULL, in which case a new
-				transaction is created. */
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr)
 	MY_ATTRIBUTE((warn_unused_result));
+
 #ifdef UNIV_DEBUG
-/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of
-pages.  Must not be called when recv_no_ibuf_operations==TRUE.
-@param space	tablespace identifier
-@param zip_size	compressed page size in bytes, or 0
-@param page_no	page number
-@param mtr	mini-transaction or NULL
+
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==true.
+@param[in]	page_id		tablespace/page identifier
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction or NULL
 @return TRUE if level 2 or level 3 page */
-# define ibuf_page(space, zip_size, page_no, mtr)			\
-	ibuf_page_low(space, zip_size, page_no, TRUE, __FILE__, __LINE__, mtr)
+# define ibuf_page(page_id, page_size, mtr)	\
+	ibuf_page_low(page_id, page_size, TRUE, __FILE__, __LINE__, mtr)
+
 #else /* UVIV_DEBUG */
-/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of
-pages.  Must not be called when recv_no_ibuf_operations==TRUE.
-@param space	tablespace identifier
-@param zip_size	compressed page size in bytes, or 0
-@param page_no	page number
-@param mtr	mini-transaction or NULL
+
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==true.
+@param[in]	page_id		tablespace/page identifier
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction or NULL
 @return TRUE if level 2 or level 3 page */
-# define ibuf_page(space, zip_size, page_no, mtr)			\
-	ibuf_page_low(space, zip_size, page_no, __FILE__, __LINE__, mtr)
+# define ibuf_page(page_id, page_size, mtr)	\
+	ibuf_page_low(page_id, page_size, __FILE__, __LINE__, mtr)
+
 #endif /* UVIV_DEBUG */
 /***********************************************************************//**
 Frees excess pages from the ibuf free list. This function is called when an OS
 thread calls fsp services to allocate a new file segment, or a new page to a
 file segment, and the thread did not own the fsp latch before this call. */
-UNIV_INTERN
 void
 ibuf_free_excess_pages(void);
 /*========================*/
-/*********************************************************************//**
-Buffer an operation in the insert/delete buffer, instead of doing it
+
+/** Buffer an operation in the insert/delete buffer, instead of doing it
 directly to the disk page, if this is possible. Does not do it if the index
 is clustered or unique.
-@return	TRUE if success */
-UNIV_INTERN
+@param[in]	op		operation type
+@param[in]	entry		index entry to insert
+@param[in,out]	index		index where to insert
+@param[in]	page_id		page id where to insert
+@param[in]	page_size	page size
+@param[in,out]	thr		query thread
+@return TRUE if success */
 ibool
 ibuf_insert(
-/*========*/
-	ibuf_op_t	op,	/*!< in: operation type */
-	const dtuple_t*	entry,	/*!< in: index entry to insert */
-	dict_index_t*	index,	/*!< in: index where to insert */
-	ulint		space,	/*!< in: space id where to insert */
-	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
-	ulint		page_no,/*!< in: page number where to insert */
-	que_thr_t*	thr);	/*!< in: query thread */
-/*********************************************************************//**
-When an index page is read from a disk to the buffer pool, this function
+	ibuf_op_t		op,
+	const dtuple_t*		entry,
+	dict_index_t*		index,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	que_thr_t*		thr);
+
+/** When an index page is read from a disk to the buffer pool, this function
 applies any buffered operations to the page and deletes the entries from the
 insert buffer. If the page is not read, but created in the buffer pool, this
 function deletes its buffered entries from the insert buffer; there can
 exist entries for such a page if the page belonged to an index which
-subsequently was dropped. */
-UNIV_INTERN
+subsequently was dropped.
+@param[in,out]	block			if page has been read from disk,
+pointer to the page x-latched, else NULL
+@param[in]	page_id			page id of the index page
+@param[in]	update_ibuf_bitmap	normally this is set to TRUE, but
+if we have deleted or are deleting the tablespace, then we naturally do not
+want to update a non-existent bitmap page */
 void
 ibuf_merge_or_delete_for_page(
-/*==========================*/
-	buf_block_t*	block,	/*!< in: if page has been read from
-				disk, pointer to the page x-latched,
-				else NULL */
-	ulint		space,	/*!< in: space id of the index page */
-	ulint		page_no,/*!< in: page number of the index page */
-	ulint		zip_size,/*!< in: compressed page size in bytes,
-				or 0 */
-	ibool		update_ibuf_bitmap);/*!< in: normally this is set
-				to TRUE, but if we have deleted or are
-				deleting the tablespace, then we
-				naturally do not want to update a
-				non-existent bitmap page */
+	buf_block_t*		block,
+	const page_id_t&	page_id,
+	const page_size_t*	page_size,
+	ibool			update_ibuf_bitmap);
+
 /*********************************************************************//**
 Deletes all entries in the insert buffer for a given space id. This is used
 in DISCARD TABLESPACE and IMPORT TABLESPACE.
 NOTE: this does not update the page free bitmaps in the space. The space will
 become CORRUPT when you call this function! */
-UNIV_INTERN
 void
 ibuf_delete_for_discarded_space(
 /*============================*/
@@ -373,19 +365,13 @@ based on the current size of the change buffer.
 @return a lower limit for the combined size in bytes of entries which
 will be merged from ibuf trees to the pages read, 0 if ibuf is
 empty */
-UNIV_INTERN
 ulint
 ibuf_merge_in_background(
-	bool	full);	/*!< in: TRUE if the caller wants to
-			do a full contract based on PCT_IO(100).
-			If FALSE then the size of contract
-			batch is determined based on the
-			current size of the ibuf tree. */
+	bool	full);
 
 /** Contracts insert buffer trees by reading pages referring to space_id
 to the buffer pool.
 @returns number of pages merged.*/
-UNIV_INTERN
 ulint
 ibuf_merge_space(
 /*=============*/
@@ -394,8 +380,7 @@ ibuf_merge_space(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Parses a redo log record of an ibuf bitmap page init.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 ibuf_parse_bitmap_init(
 /*===================*/
@@ -405,27 +390,24 @@ ibuf_parse_bitmap_init(
 	mtr_t*		mtr);	/*!< in: mtr or NULL */
 #ifndef UNIV_HOTBACKUP
 #ifdef UNIV_IBUF_COUNT_DEBUG
-/******************************************************************//**
-Gets the ibuf count for a given page.
+
+/** Gets the ibuf count for a given page.
+@param[in]	page_id	page id
 @return number of entries in the insert buffer currently buffered for
 this page */
-UNIV_INTERN
 ulint
 ibuf_count_get(
-/*===========*/
-	ulint	space,	/*!< in: space id */
-	ulint	page_no);/*!< in: page number */
+	const page_id_t&	page_id);
+
 #endif
 /******************************************************************//**
 Looks if the insert buffer is empty.
-@return	true if empty */
-UNIV_INTERN
+@return true if empty */
 bool
 ibuf_is_empty(void);
 /*===============*/
 /******************************************************************//**
 Prints info of ibuf. */
-UNIV_INTERN
 void
 ibuf_print(
 /*=======*/
@@ -433,15 +415,13 @@ ibuf_print(
 /********************************************************************
 Read the first two bytes from a record's fourth field (counter field in new
 records; something else in older records).
-@return	"counter" field, or ULINT_UNDEFINED if for some reason it can't be read */
-UNIV_INTERN
+@return "counter" field, or ULINT_UNDEFINED if for some reason it can't be read */
 ulint
 ibuf_rec_get_counter(
 /*=================*/
 	const rec_t*	rec);	/*!< in: ibuf record */
 /******************************************************************//**
 Closes insert buffer and frees the data structures. */
-UNIV_INTERN
 void
 ibuf_close(void);
 /*============*/
@@ -449,7 +429,6 @@ ibuf_close(void);
 /******************************************************************//**
 Checks the insert buffer bitmaps on IMPORT TABLESPACE.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 ibuf_check_bitmap_on_import(
 /*========================*/
@@ -457,6 +436,14 @@ ibuf_check_bitmap_on_import(
 	ulint		space_id)	/*!< in: tablespace identifier */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
+/** Updates free bits and buffered bits for bulk loaded page.
+@param[in]      block   index page
+@param]in]      reset   flag if reset free val */
+void
+ibuf_set_bitmap_for_bulk_load(
+	buf_block_t*    block,
+	bool		reset);
+
 #define IBUF_HEADER_PAGE_NO	FSP_IBUF_HEADER_PAGE_NO
 #define IBUF_TREE_ROOT_PAGE_NO	FSP_IBUF_TREE_ROOT_PAGE_NO
 
@@ -468,7 +455,7 @@ for the file segment from which the pages for the ibuf tree are allocated */
 #define	IBUF_TREE_SEG_HEADER	0	/* fseg header for ibuf tree */
 
 /* The insert buffer tree itself is always located in space 0. */
-#define IBUF_SPACE_ID		0
+#define IBUF_SPACE_ID		static_cast<ulint>(0)
 
 #ifndef UNIV_NONINL
 #include "ibuf0ibuf.ic"
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index a5df9f7b6b4..de39592ae6b 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -25,6 +25,7 @@ Created 7/19/1997 Heikki Tuuri
 
 #include "page0page.h"
 #include "page0zip.h"
+#include "fsp0types.h"
 #ifndef UNIV_HOTBACKUP
 #include "buf0lru.h"
 
@@ -43,7 +44,7 @@ ibuf_mtr_start(
 	mtr_t*	mtr)	/*!< out: mini-transaction */
 {
 	mtr_start(mtr);
-	mtr->inside_ibuf = TRUE;
+	mtr->enter_ibuf();
 }
 /***************************************************************//**
 Commits an insert buffer mini-transaction. */
@@ -53,8 +54,9 @@ ibuf_mtr_commit(
 /*============*/
 	mtr_t*	mtr)	/*!< in/out: mini-transaction */
 {
-	ut_ad(mtr->inside_ibuf);
-	ut_d(mtr->inside_ibuf = FALSE);
+	ut_ad(mtr->is_inside_ibuf());
+	ut_d(mtr->exit_ibuf());
+
 	mtr_commit(mtr);
 }
 
@@ -93,7 +95,6 @@ Sets the free bit of the page in the ibuf bitmap. This is done in a separate
 mini-transaction, hence this operation does not restrict further work to only
 ibuf bitmap operations, which would result if the latch to the bitmap page
 were kept. */
-UNIV_INTERN
 void
 ibuf_set_free_bits_func(
 /*====================*/
@@ -127,6 +128,7 @@ ibuf_should_try(
 	return(ibuf_use != IBUF_USE_NONE
 	       && ibuf->max_size != 0
 	       && !dict_index_is_clust(index)
+	       && !dict_index_is_spatial(index)
 	       && index->table->quiesce == QUIESCE_NONE
 	       && (ignore_sec_unique || !dict_index_is_unique(index))
 	       && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE);
@@ -145,54 +147,39 @@ ibuf_inside(
 /*========*/
 	const mtr_t*	mtr)	/*!< in: mini-transaction */
 {
-	return(mtr->inside_ibuf);
+	return(mtr->is_inside_ibuf());
 }
 
-/***********************************************************************//**
-Checks if a page address is an ibuf bitmap page address.
-@return	TRUE if a bitmap page */
+/** Checks if a page address is an ibuf bitmap page (level 3 page) address.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return TRUE if a bitmap page */
 UNIV_INLINE
 ibool
 ibuf_bitmap_page(
-/*=============*/
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	ulint	page_no)/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size)
 {
-	ut_ad(ut_is_2pow(zip_size));
-
-	if (!zip_size) {
-		return((page_no & (UNIV_PAGE_SIZE - 1))
-			== FSP_IBUF_BITMAP_OFFSET);
-	}
-
-	return((page_no & (zip_size - 1)) == FSP_IBUF_BITMAP_OFFSET);
+	return((page_id.page_no() & (page_size.physical() - 1))
+	       == FSP_IBUF_BITMAP_OFFSET);
 }
 
-/*********************************************************************//**
-Translates the free space on a page to a value in the ibuf bitmap.
-@return	value for ibuf bitmap bits */
+/** Translates the free space on a page to a value in the ibuf bitmap.
+@param[in]	page_size	page size in bytes
+@param[in]	max_ins_size	maximum insert size after reorganize for
+the page
+@return value for ibuf bitmap bits */
 UNIV_INLINE
 ulint
 ibuf_index_page_calc_free_bits(
-/*===========================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	max_ins_size)	/*!< in: maximum insert size after reorganize
-				for the page */
+	ulint	page_size,
+	ulint	max_ins_size)
 {
 	ulint	n;
-	ut_ad(ut_is_2pow(zip_size));
-	ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
-	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+	ut_ad(ut_is_2pow(page_size));
+	ut_ad(page_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
 
-	if (zip_size) {
-		n = max_ins_size
-			/ (zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-	} else {
-		n = max_ins_size
-			/ (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-	}
+	n = max_ins_size / (page_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
 
 	if (n == 3) {
 		n = 2;
@@ -205,54 +192,43 @@ ibuf_index_page_calc_free_bits(
 	return(n);
 }
 
-/*********************************************************************//**
-Translates the ibuf free bits to the free space on a page in bytes.
-@return	maximum insert size after reorganize for the page */
+/** Translates the ibuf free bits to the free space on a page in bytes.
+@param[in]	page_size	page_size
+@param[in]	bits		value for ibuf bitmap bits
+@return maximum insert size after reorganize for the page */
 UNIV_INLINE
 ulint
 ibuf_index_page_calc_free_from_bits(
-/*================================*/
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	ulint	bits)	/*!< in: value for ibuf bitmap bits */
+	const page_size_t&	page_size,
+	ulint			bits)
 {
 	ut_ad(bits < 4);
-	ut_ad(ut_is_2pow(zip_size));
-	ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
-	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-
-	if (zip_size) {
-		if (bits == 3) {
-			return(4 * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-		}
-
-		return(bits * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-	}
+	ut_ad(!page_size.is_compressed()
+	      || page_size.physical() > IBUF_PAGE_SIZE_PER_FREE_SPACE);
 
 	if (bits == 3) {
-		return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+		return(4 * page_size.physical()
+		       / IBUF_PAGE_SIZE_PER_FREE_SPACE);
 	}
 
-	return(bits * (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE));
+	return(bits * (page_size.physical()
+		       / IBUF_PAGE_SIZE_PER_FREE_SPACE));
 }
 
 /*********************************************************************//**
 Translates the free space on a compressed page to a value in the ibuf bitmap.
-@return	value for ibuf bitmap bits */
+@return value for ibuf bitmap bits */
 UNIV_INLINE
 ulint
 ibuf_index_page_calc_free_zip(
 /*==========================*/
-	ulint			zip_size,
-					/*!< in: compressed page size in bytes */
 	const buf_block_t*	block)	/*!< in: buffer block */
 {
 	ulint			max_ins_size;
 	const page_zip_des_t*	page_zip;
 	lint			zip_max_ins;
 
-	ut_ad(zip_size == buf_block_get_zip_size(block));
-	ut_ad(zip_size);
+	ut_ad(block->page.size.is_compressed());
 
 	/* Consider the maximum insert size on the uncompressed page
 	without reorganizing the page. We must not assume anything
@@ -275,31 +251,29 @@ ibuf_index_page_calc_free_zip(
 		max_ins_size = (ulint) zip_max_ins;
 	}
 
-	return(ibuf_index_page_calc_free_bits(zip_size, max_ins_size));
+	return(ibuf_index_page_calc_free_bits(block->page.size.physical(),
+					      max_ins_size));
 }
 
 /*********************************************************************//**
 Translates the free space on a page to a value in the ibuf bitmap.
-@return	value for ibuf bitmap bits */
+@return value for ibuf bitmap bits */
 UNIV_INLINE
 ulint
 ibuf_index_page_calc_free(
 /*======================*/
-	ulint			zip_size,/*!< in: compressed page size in bytes;
-					0 for uncompressed pages */
 	const buf_block_t*	block)	/*!< in: buffer block */
 {
-	ut_ad(zip_size == buf_block_get_zip_size(block));
-
-	if (!zip_size) {
+	if (!block->page.size.is_compressed()) {
 		ulint	max_ins_size;
 
 		max_ins_size = page_get_max_insert_size_after_reorganize(
 			buf_block_get_frame(block), 1);
 
-		return(ibuf_index_page_calc_free_bits(0, max_ins_size));
+		return(ibuf_index_page_calc_free_bits(
+				block->page.size.physical(), max_ins_size));
 	} else {
-		return(ibuf_index_page_calc_free_zip(zip_size, block));
+		return(ibuf_index_page_calc_free_zip(block));
 	}
 }
 
@@ -335,21 +309,22 @@ ibuf_update_free_bits_if_full(
 	ulint	before;
 	ulint	after;
 
-	ut_ad(!buf_block_get_page_zip(block));
+	ut_ad(buf_block_get_page_zip(block) == NULL);
 
-	before = ibuf_index_page_calc_free_bits(0, max_ins_size);
+	before = ibuf_index_page_calc_free_bits(
+		block->page.size.physical(), max_ins_size);
 
 	if (max_ins_size >= increase) {
 #if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX
 # error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX"
 #endif
-		after = ibuf_index_page_calc_free_bits(0, max_ins_size
-						       - increase);
+		after = ibuf_index_page_calc_free_bits(
+			block->page.size.physical(), max_ins_size - increase);
 #ifdef UNIV_IBUF_DEBUG
-		ut_a(after <= ibuf_index_page_calc_free(0, block));
+		ut_a(after <= ibuf_index_page_calc_free(block));
 #endif
 	} else {
-		after = ibuf_index_page_calc_free(0, block);
+		after = ibuf_index_page_calc_free(block);
 	}
 
 	if (after == 0) {
diff --git a/storage/innobase/include/lock0iter.h b/storage/innobase/include/lock0iter.h
index 0054850b526..ca97d22556a 100644
--- a/storage/innobase/include/lock0iter.h
+++ b/storage/innobase/include/lock0iter.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -46,7 +46,6 @@ record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
    bit_no is calculated in this function by using
    lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
    of a wait lock. */
-UNIV_INTERN
 void
 lock_queue_iterator_reset(
 /*======================*/
@@ -59,8 +58,7 @@ lock_queue_iterator_reset(
 Gets the previous lock in the lock queue, returns NULL if there are no
 more locks (i.e. the current lock is the first one). The iterator is
 receded (if not-NULL is returned).
-@return	previous lock or NULL */
-
+@return previous lock or NULL */
 const lock_t*
 lock_queue_iterator_get_prev(
 /*=========================*/
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index c00b5450036..f6bbd571907 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -34,47 +34,52 @@ Created 5/7/1996 Heikki Tuuri
 #include "dict0types.h"
 #include "que0types.h"
 #include "lock0types.h"
-#include "read0types.h"
 #include "hash0hash.h"
 #include "srv0srv.h"
 #include "ut0vec.h"
-
-#ifdef UNIV_DEBUG
-extern ibool	lock_print_waits;
-#endif /* UNIV_DEBUG */
+#include "gis0rtree.h"
+#include "lock0prdt.h"
 
 /** Alternatives for innodb_lock_schedule_algorithm, which can be changed by
 	setting innodb_lock_schedule_algorithm. */
 enum innodb_lock_schedule_algorithm_t {
-	INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,		/*!< First Come First Served */
-	INNODB_LOCK_SCHEDULE_ALGORITHM_VATS			/*!< Variance-Aware-Transaction-Scheduling */
+	/*!< First Come First Served */
+	INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
+	/*!< Variance-Aware-Transaction-Scheduling */
+	INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
 };
 
 extern ulong innodb_lock_schedule_algorithm;
 
+// Forward declaration
+class ReadView;
+
 /*********************************************************************//**
 Gets the size of a lock struct.
-@return	size in bytes */
-UNIV_INTERN
+@return size in bytes */
 ulint
 lock_get_size(void);
 /*===============*/
 /*********************************************************************//**
 Creates the lock system at database start. */
-UNIV_INTERN
 void
 lock_sys_create(
 /*============*/
 	ulint	n_cells);	/*!< in: number of slots in lock hash table */
+/** Resize the lock hash table.
+@param[in]	n_cells	number of slots in lock hash table */
+void
+lock_sys_resize(
+	ulint	n_cells);
+
 /*********************************************************************//**
 Closes the lock system at database shutdown. */
-UNIV_INTERN
 void
 lock_sys_close(void);
 /*================*/
 /*********************************************************************//**
 Gets the heap_no of the smallest user record on a page.
-@return	heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
+@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
 UNIV_INLINE
 ulint
 lock_get_min_heap_no(
@@ -85,7 +90,6 @@ Updates the lock table when we have reorganized a page. NOTE: we copy
 also the locks set on the infimum of the page; the infimum may carry
 locks if an update of a record is occurring on the page, and its locks
 were temporarily stored on the infimum. */
-UNIV_INTERN
 void
 lock_move_reorganize_page(
 /*======================*/
@@ -96,7 +100,6 @@ lock_move_reorganize_page(
 /*************************************************************//**
 Moves the explicit locks on user records to another page if a record
 list end is moved to another page. */
-UNIV_INTERN
 void
 lock_move_rec_list_end(
 /*===================*/
@@ -107,7 +110,6 @@ lock_move_rec_list_end(
 /*************************************************************//**
 Moves the explicit locks on user records to another page if a record
 list start is moved to another page. */
-UNIV_INTERN
 void
 lock_move_rec_list_start(
 /*=====================*/
@@ -123,7 +125,6 @@ lock_move_rec_list_start(
 						were copied */
 /*************************************************************//**
 Updates the lock table when a page is split to the right. */
-UNIV_INTERN
 void
 lock_update_split_right(
 /*====================*/
@@ -131,7 +132,6 @@ lock_update_split_right(
 	const buf_block_t*	left_block);	/*!< in: left page */
 /*************************************************************//**
 Updates the lock table when a page is merged to the right. */
-UNIV_INTERN
 void
 lock_update_merge_right(
 /*====================*/
@@ -151,7 +151,6 @@ root page, even though they do not make sense on other than leaf
 pages: the reason is that in a pessimistic update the infimum record
 of the root page will act as a dummy carrier of the locks of the record
 to be updated. */
-UNIV_INTERN
 void
 lock_update_root_raise(
 /*===================*/
@@ -160,7 +159,6 @@ lock_update_root_raise(
 /*************************************************************//**
 Updates the lock table when a page is copied to another and the original page
 is removed from the chain of leaf pages, except if page is the root! */
-UNIV_INTERN
 void
 lock_update_copy_and_discard(
 /*=========================*/
@@ -170,7 +168,6 @@ lock_update_copy_and_discard(
 						NOT the root! */
 /*************************************************************//**
 Updates the lock table when a page is split to the left. */
-UNIV_INTERN
 void
 lock_update_split_left(
 /*===================*/
@@ -178,7 +175,6 @@ lock_update_split_left(
 	const buf_block_t*	left_block);	/*!< in: left page */
 /*************************************************************//**
 Updates the lock table when a page is merged to the left. */
-UNIV_INTERN
 void
 lock_update_merge_left(
 /*===================*/
@@ -202,7 +198,6 @@ lock_update_split_and_merge(
 /*************************************************************//**
 Resets the original locks on heir and replaces them with gap type locks
 inherited from rec. */
-UNIV_INTERN
 void
 lock_rec_reset_and_inherit_gap_locks(
 /*=================================*/
@@ -218,7 +213,6 @@ lock_rec_reset_and_inherit_gap_locks(
 						donating record */
 /*************************************************************//**
 Updates the lock table when a page is discarded. */
-UNIV_INTERN
 void
 lock_update_discard(
 /*================*/
@@ -230,7 +224,6 @@ lock_update_discard(
 						which will be discarded */
 /*************************************************************//**
 Updates the lock table when a new user record is inserted. */
-UNIV_INTERN
 void
 lock_update_insert(
 /*===============*/
@@ -238,7 +231,6 @@ lock_update_insert(
 	const rec_t*		rec);	/*!< in: the inserted record */
 /*************************************************************//**
 Updates the lock table when a record is removed. */
-UNIV_INTERN
 void
 lock_update_delete(
 /*===============*/
@@ -251,7 +243,6 @@ updated and the size of the record changes in the update. The record
 is in such an update moved, perhaps to another page. The infimum record
 acts as a dummy carrier record, taking care of lock releases while the
 actual record is being moved. */
-UNIV_INTERN
 void
 lock_rec_store_on_page_infimum(
 /*===========================*/
@@ -264,7 +255,6 @@ lock_rec_store_on_page_infimum(
 /*********************************************************************//**
 Restores the state of explicit lock requests on a single record, where the
 state was stored on the infimum of the page. */
-UNIV_INTERN
 void
 lock_rec_restore_from_page_infimum(
 /*===============================*/
@@ -278,8 +268,7 @@ lock_rec_restore_from_page_infimum(
 					the infimum */
 /*********************************************************************//**
 Determines if there are explicit record locks on a page.
-@return	an explicit record lock on the page, or NULL if there are none */
-UNIV_INTERN
+@return an explicit record lock on the page, or NULL if there are none */
 lock_t*
 lock_rec_expl_exist_on_page(
 /*========================*/
@@ -292,8 +281,7 @@ a record. If they do, first tests if the query thread should anyway
 be suspended for some reason; if not, then puts the transaction and
 the query thread to the lock wait state and inserts a waiting request
 for a gap x-lock to the lock queue.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_rec_insert_check_and_lock(
 /*===========================*/
@@ -308,7 +296,83 @@ lock_rec_insert_check_and_lock(
 				inserted record maybe should inherit
 				LOCK_GAP type locks from the successor
 				record */
-	MY_ATTRIBUTE((nonnull(2,3,4,6,7), warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/*********************************************************************//**
+Enqueues a waiting request for a lock which cannot be granted immediately.
+Checks for deadlocks.
+@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
+there was a deadlock, but another transaction was chosen as a victim,
+and we got the lock immediately: no need to wait then */
+dberr_t
+lock_rec_enqueue_waiting(
+/*=====================*/
+	ulint			type_mode,/*!< in: lock mode this
+					transaction is requesting:
+					LOCK_S or LOCK_X, possibly
+					ORed with LOCK_GAP or
+					LOCK_REC_NOT_GAP, ORed with
+					LOCK_INSERT_INTENTION if this
+					waiting lock request is set
+					when performing an insert of
+					an index record */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	dict_index_t*		index,	/*!< in: index of record */
+	que_thr_t*		thr,	/*!< in: query thread */
+	lock_prdt_t*		prdt);	/*!< in: Minimum Bounding Box */
+
+/*************************************************************//**
+Removes a record lock request, waiting or granted, from the queue and
+grants locks to other transactions in the queue if they now are entitled
+to a lock. NOTE: all record locks contained in in_lock are removed. */
+void
+lock_rec_dequeue_from_page(
+/*=======================*/
+        lock_t*         in_lock);        /*!< in: record lock object: all
+                                        record locks which are contained in
+                                        this lock object are removed;
+                                        transactions waiting behind will
+                                        get their lock requests granted,
+                                        if they are now qualified to it */
+
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+UNIV_INLINE
+void
+lock_rec_move(
+/*==========*/
+        const buf_block_t*      receiver,       /*!< in: buffer block containing
+                                                the receiving record */
+        const buf_block_t*      donator,        /*!< in: buffer block containing
+                                                the donating record */
+        ulint                   receiver_heap_no,/*!< in: heap_no of the record
+                                                which gets the locks; there
+                                                must be no lock requests
+                                                on it! */
+        ulint                   donator_heap_no);/*!< in: heap_no of the record
+                                                which gives the locks */
+
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+void
+lock_rec_move_low(
+/*==============*/
+	hash_table_t*		lock_hash,	/*!< in: hash  table to use */
+        const buf_block_t*      receiver,       /*!< in: buffer block containing
+                                                the receiving record */
+        const buf_block_t*      donator,        /*!< in: buffer block containing
+                                                the donating record */
+        ulint                   receiver_heap_no,/*!< in: heap_no of the record
+                                                which gets the locks; there
+                                                must be no lock requests
+                                                on it! */
+        ulint                   donator_heap_no);/*!< in: heap_no of the record
+                                                which gives the locks */
 /*********************************************************************//**
 Checks if locks of other transactions prevent an immediate modify (update,
 delete mark, or delete unmark) of a clustered index record. If they do,
@@ -316,8 +380,7 @@ first tests if the query thread should anyway be suspended for some
 reason; if not, then puts the transaction and the query thread to the
 lock wait state and inserts a waiting request for a record x-lock to the
 lock queue.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_clust_rec_modify_check_and_lock(
 /*=================================*/
@@ -329,12 +392,11 @@ lock_clust_rec_modify_check_and_lock(
 	dict_index_t*		index,	/*!< in: clustered index */
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
 	que_thr_t*		thr)	/*!< in: query thread */
-	MY_ATTRIBUTE((warn_unused_result, nonnull));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Checks if locks of other transactions prevent an immediate modify
 (delete mark or delete unmark) of a secondary index record.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_sec_rec_modify_check_and_lock(
 /*===============================*/
@@ -350,13 +412,12 @@ lock_sec_rec_modify_check_and_lock(
 	que_thr_t*	thr,	/*!< in: query thread
 				(can be NULL if BTR_NO_LOCKING_FLAG) */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
-	MY_ATTRIBUTE((warn_unused_result, nonnull(2,3,4,6)));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Like lock_clust_rec_read_check_and_lock(), but reads a
 secondary index record.
-@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
 dberr_t
 lock_sec_rec_read_check_and_lock(
 /*=============================*/
@@ -369,7 +430,7 @@ lock_sec_rec_read_check_and_lock(
 					read cursor */
 	dict_index_t*		index,	/*!< in: secondary index */
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
-	enum lock_mode		mode,	/*!< in: mode of the lock which
+	lock_mode		mode,	/*!< in: mode of the lock which
 					the read cursor should set on
 					records: LOCK_S or LOCK_X; the
 					latter is possible in
@@ -384,9 +445,8 @@ if the query thread should anyway be suspended for some reason; if not, then
 puts the transaction and the query thread to the lock wait state and inserts a
 waiting request for a record lock to the lock queue. Sets the requested mode
 lock on the record.
-@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
 dberr_t
 lock_clust_rec_read_check_and_lock(
 /*===============================*/
@@ -399,7 +459,7 @@ lock_clust_rec_read_check_and_lock(
 					read cursor */
 	dict_index_t*		index,	/*!< in: clustered index */
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
-	enum lock_mode		mode,	/*!< in: mode of the lock which
+	lock_mode		mode,	/*!< in: mode of the lock which
 					the read cursor should set on
 					records: LOCK_S or LOCK_X; the
 					latter is possible in
@@ -416,8 +476,7 @@ waiting request for a record lock to the lock queue. Sets the requested mode
 lock on the record. This is an alternative version of
 lock_clust_rec_read_check_and_lock() that does not require the parameter
 "offsets".
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_clust_rec_read_check_and_lock_alt(
 /*===================================*/
@@ -429,7 +488,7 @@ lock_clust_rec_read_check_and_lock_alt(
 					be read or passed over by a
 					read cursor */
 	dict_index_t*		index,	/*!< in: clustered index */
-	enum lock_mode		mode,	/*!< in: mode of the lock which
+	lock_mode		mode,	/*!< in: mode of the lock which
 					the read cursor should set on
 					records: LOCK_S or LOCK_X; the
 					latter is possible in
@@ -437,12 +496,11 @@ lock_clust_rec_read_check_and_lock_alt(
 	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
 					LOCK_REC_NOT_GAP */
 	que_thr_t*		thr)	/*!< in: query thread */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Checks that a record is seen in a consistent read.
 @return true if sees, or false if an earlier version of the record
 should be retrieved */
-UNIV_INTERN
 bool
 lock_clust_rec_cons_read_sees(
 /*==========================*/
@@ -450,7 +508,7 @@ lock_clust_rec_cons_read_sees(
 				passed over by a read cursor */
 	dict_index_t*	index,	/*!< in: clustered index */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
-	read_view_t*	view);	/*!< in: consistent read view */
+	ReadView*	view);	/*!< in: consistent read view */
 /*********************************************************************//**
 Checks that a non-clustered index record is seen in a consistent read.
 
@@ -461,20 +519,19 @@ record.
 
 @return true if certainly sees, or false if an earlier version of the
 clustered index record might be needed */
-UNIV_INTERN
 bool
 lock_sec_rec_cons_read_sees(
 /*========================*/
 	const rec_t*		rec,	/*!< in: user record which
 					should be read or passed over
 					by a read cursor */
-	const read_view_t*	view)	/*!< in: consistent read view */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	const dict_index_t*     index,  /*!< in: index */
+	const ReadView*	view)	/*!< in: consistent read view */
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Locks the specified database table in the mode given. If the lock cannot
 be granted immediately, the query thread is put to wait.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_table(
 /*=======*/
@@ -482,22 +539,33 @@ lock_table(
 				does nothing */
 	dict_table_t*	table,	/*!< in/out: database table
 				in dictionary cache */
-	enum lock_mode	mode,	/*!< in: lock mode */
+	lock_mode	mode,	/*!< in: lock mode */
 	que_thr_t*	thr)	/*!< in: query thread */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Creates a table IX lock object for a resurrected transaction. */
-UNIV_INTERN
 void
 lock_table_ix_resurrect(
 /*====================*/
 	dict_table_t*	table,	/*!< in/out: table */
 	trx_t*		trx);	/*!< in/out: transaction */
+
+/** Sets a lock on a table based on the given mode.
+@param[in]	table	table to lock
+@param[in,out]	trx	transaction
+@param[in]	mode	LOCK_X or LOCK_S
+@return error code or DB_SUCCESS. */
+dberr_t
+lock_table_for_trx(
+	dict_table_t*	table,
+	trx_t*		trx,
+	enum lock_mode	mode)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /*************************************************************//**
 Removes a granted record lock of a transaction from the queue and grants
 locks to other transactions waiting in the queue if they now are entitled
 to a lock. */
-UNIV_INTERN
 void
 lock_rec_unlock(
 /*============*/
@@ -505,12 +573,11 @@ lock_rec_unlock(
 					set a record lock */
 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
 	const rec_t*		rec,	/*!< in: record */
-	enum lock_mode		lock_mode);/*!< in: LOCK_S or LOCK_X */
+	lock_mode		lock_mode);/*!< in: LOCK_S or LOCK_X */
 /*********************************************************************//**
 Releases a transaction's locks, and releases possible other transactions
 waiting because of these locks. Change the state of the transaction to
 TRX_STATE_COMMITTED_IN_MEMORY. */
-UNIV_INTERN
 void
 lock_trx_release_locks(
 /*===================*/
@@ -520,7 +587,6 @@ Removes locks on a table to be dropped or truncated.
 If remove_also_table_sx_locks is TRUE then table-level S and X locks are
 also removed in addition to other table-level and record-level locks.
 No lock, that is going to be removed, is allowed to be a wait lock. */
-UNIV_INTERN
 void
 lock_remove_all_on_table(
 /*=====================*/
@@ -532,7 +598,7 @@ lock_remove_all_on_table(
 /*********************************************************************//**
 Calculates the fold value of a page file address: used in inserting or
 searching for a lock in the hash table.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 lock_rec_fold(
@@ -543,7 +609,7 @@ lock_rec_fold(
 /*********************************************************************//**
 Calculates the hash value of a page file address: used in inserting or
 searching for a lock in the hash table.
-@return	hashed value */
+@return hashed value */
 UNIV_INLINE
 ulint
 lock_rec_hash(
@@ -551,12 +617,19 @@ lock_rec_hash(
 	ulint	space,	/*!< in: space */
 	ulint	page_no);/*!< in: page number */
 
+/*************************************************************//**
+Get the lock hash table */
+UNIV_INLINE
+hash_table_t*
+lock_hash_get(
+/*==========*/
+	ulint	mode);	/*!< in: lock mode */
+
 /**********************************************************************//**
 Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
 if none found.
 @return bit index == heap number of the record, or ULINT_UNDEFINED if
 none found */
-UNIV_INTERN
 ulint
 lock_rec_find_set_bit(
 /*==================*/
@@ -570,30 +643,26 @@ covered by an IX or IS table lock.
 IS table lock; dest if there is no source table, and NULL if the
 transaction is locking more than two tables or an inconsistency is
 found */
-UNIV_INTERN
 dict_table_t*
 lock_get_src_table(
 /*===============*/
 	trx_t*		trx,	/*!< in: transaction */
 	dict_table_t*	dest,	/*!< in: destination of ALTER TABLE */
-	enum lock_mode*	mode);	/*!< out: lock mode of the source table */
+	lock_mode*	mode);	/*!< out: lock mode of the source table */
 /*********************************************************************//**
 Determine if the given table is exclusively "owned" by the given
 transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
 on the table.
 @return TRUE if table is only locked by trx, with LOCK_IX, and
 possibly LOCK_AUTO_INC */
-UNIV_INTERN
 ibool
 lock_is_table_exclusive(
 /*====================*/
 	const dict_table_t*	table,	/*!< in: table */
-	const trx_t*		trx)	/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull));
+	const trx_t*		trx);	/*!< in: transaction */
 /*********************************************************************//**
 Checks if a lock request lock1 has to wait for request lock2.
-@return	TRUE if lock1 has to wait for lock2 to be removed */
-UNIV_INTERN
+@return TRUE if lock1 has to wait for lock2 to be removed */
 ibool
 lock_has_to_wait(
 /*=============*/
@@ -604,7 +673,6 @@ lock_has_to_wait(
 				locks are record locks */
 /*********************************************************************//**
 Reports that a transaction id is insensible, i.e., in the future. */
-UNIV_INTERN
 void
 lock_report_trx_id_insanity(
 /*========================*/
@@ -612,11 +680,9 @@ lock_report_trx_id_insanity(
 	const rec_t*	rec,		/*!< in: user record */
 	dict_index_t*	index,		/*!< in: index */
 	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
-	trx_id_t	max_trx_id)	/*!< in: trx_sys_get_max_trx_id() */
-	MY_ATTRIBUTE((nonnull));
+	trx_id_t	max_trx_id);	/*!< in: trx_sys_get_max_trx_id() */
 /*********************************************************************//**
 Prints info of a table lock. */
-UNIV_INTERN
 void
 lock_table_print(
 /*=============*/
@@ -624,7 +690,6 @@ lock_table_print(
 	const lock_t*	lock);	/*!< in: table type lock */
 /*********************************************************************//**
 Prints info of a record lock. */
-UNIV_INTERN
 void
 lock_rec_print(
 /*===========*/
@@ -634,18 +699,25 @@ lock_rec_print(
 Prints info of locks for all transactions.
 @return FALSE if not able to obtain lock mutex and exits without
 printing info */
-UNIV_INTERN
 ibool
 lock_print_info_summary(
 /*====================*/
 	FILE*	file,	/*!< in: file where to print */
 	ibool   nowait)	/*!< in: whether to wait for the lock mutex */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Prints transaction lock wait and MVCC state.
+@param[in,out]	file	file where to print
+@param[in]	trx	transaction */
+void
+lock_trx_print_wait_and_mvcc_state(
+	FILE*		file,
+	const trx_t*	trx);
+
 /*********************************************************************//**
 Prints info of locks for each transaction. This function assumes that the
 caller holds the lock mutex and more importantly it will release the lock
 mutex on behalf of the caller. (This should be fixed in the future). */
-UNIV_INTERN
 void
 lock_print_info_all_transactions(
 /*=============================*/
@@ -655,18 +727,25 @@ Return approximate number or record locks (bits set in the bitmap) for
 this transaction. Since delete-marked records may be removed, the
 record count will not be precise.
 The caller must be holding lock_sys->mutex. */
-UNIV_INTERN
 ulint
 lock_number_of_rows_locked(
 /*=======================*/
 	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/*********************************************************************//**
+Return the number of table locks for a transaction.
+The caller must be holding lock_sys->mutex. */
+ulint
+lock_number_of_tables_locked(
+/*=========================*/
+	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*******************************************************************//**
 Gets the type of a lock. Non-inline version for using outside of the
 lock module.
-@return	LOCK_TABLE or LOCK_REC */
-UNIV_INTERN
+@return LOCK_TABLE or LOCK_REC */
 ulint
 lock_get_type(
 /*==========*/
@@ -684,8 +763,7 @@ lock_get_trx(
 
 /*******************************************************************//**
 Gets the id of the transaction owning a lock.
-@return	transaction id */
-UNIV_INTERN
+@return transaction id */
 trx_id_t
 lock_get_trx_id(
 /*============*/
@@ -694,8 +772,7 @@ lock_get_trx_id(
 /*******************************************************************//**
 Gets the mode of a lock in a human readable string.
 The string should not be free()'d or modified.
-@return	lock mode */
-UNIV_INTERN
+@return lock mode */
 const char*
 lock_get_mode_str(
 /*==============*/
@@ -704,8 +781,7 @@ lock_get_mode_str(
 /*******************************************************************//**
 Gets the type of a lock in a human readable string.
 The string should not be free()'d or modified.
-@return	lock type */
-UNIV_INTERN
+@return lock type */
 const char*
 lock_get_type_str(
 /*==============*/
@@ -713,27 +789,22 @@ lock_get_type_str(
 
 /*******************************************************************//**
 Gets the id of the table on which the lock is.
-@return	id of the table */
-UNIV_INTERN
+@return id of the table */
 table_id_t
 lock_get_table_id(
 /*==============*/
 	const lock_t*	lock);	/*!< in: lock */
 
-/*******************************************************************//**
-Gets the name of the table on which the lock is.
-The string should not be free()'d or modified.
-@return	name of the table */
-UNIV_INTERN
-const char*
+/** Determine which table a lock is associated with.
+@param[in]	lock	the lock
+@return name of the table */
+const table_name_t&
 lock_get_table_name(
-/*================*/
-	const lock_t*	lock);	/*!< in: lock */
+	const lock_t*	lock);
 
 /*******************************************************************//**
 For a record lock, gets the index on which the lock is.
-@return	index */
-UNIV_INTERN
+@return index */
 const dict_index_t*
 lock_rec_get_index(
 /*===============*/
@@ -742,8 +813,7 @@ lock_rec_get_index(
 /*******************************************************************//**
 For a record lock, gets the name of the index on which the lock is.
 The string should not be free()'d or modified.
-@return	name of the index */
-UNIV_INTERN
+@return name of the index */
 const char*
 lock_rec_get_index_name(
 /*====================*/
@@ -751,8 +821,7 @@ lock_rec_get_index_name(
 
 /*******************************************************************//**
 For a record lock, gets the tablespace number on which the lock is.
-@return	tablespace number */
-UNIV_INTERN
+@return tablespace number */
 ulint
 lock_rec_get_space_id(
 /*==================*/
@@ -760,17 +829,15 @@ lock_rec_get_space_id(
 
 /*******************************************************************//**
 For a record lock, gets the page number on which the lock is.
-@return	page number */
-UNIV_INTERN
+@return page number */
 ulint
 lock_rec_get_page_no(
 /*=================*/
 	const lock_t*	lock);	/*!< in: lock */
 /*******************************************************************//**
 Check if there are any locks (table or rec) against table.
-@return	TRUE if locks exist */
-UNIV_INTERN
-ibool
+@return TRUE if locks exist */
+bool
 lock_table_has_locks(
 /*=================*/
 	const dict_table_t*	table);	/*!< in: check if there are any locks
@@ -779,8 +846,8 @@ lock_table_has_locks(
 
 /*********************************************************************//**
 A thread which wakes up threads whose lock wait may have lasted too long.
-@return	a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(lock_wait_timeout_thread)(
 /*=====================================*/
@@ -790,7 +857,6 @@ DECLARE_THREAD(lock_wait_timeout_thread)(
 /********************************************************************//**
 Releases a user OS thread waiting for a lock to be released, if the
 thread is already suspended. */
-UNIV_INTERN
 void
 lock_wait_release_thread_if_suspended(
 /*==================================*/
@@ -803,7 +869,6 @@ occurs during the wait trx->error_state associated with thr is
 != DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
 are possible errors. DB_DEADLOCK is returned if selective deadlock
 resolution chose this transaction as a victim. */
-UNIV_INTERN
 void
 lock_wait_suspend_thread(
 /*=====================*/
@@ -813,7 +878,6 @@ lock_wait_suspend_thread(
 Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
 function should be called at the the end of an SQL statement, by the
 connection thread that owns the transaction (trx->mysql_thd). */
-UNIV_INTERN
 void
 lock_unlock_table_autoinc(
 /*======================*/
@@ -823,26 +887,36 @@ Check whether the transaction has already been rolled back because it
 was selected as a deadlock victim, or if it has to wait then cancel
 the wait lock.
 @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 lock_trx_handle_wait(
 /*=================*/
-	trx_t*	trx)	/*!< in/out: trx lock state */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*	trx,	/*!< in/out: trx lock state */
+	bool	lock_mutex_taken,
+	bool	trx_mutex_taken)
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
 /*********************************************************************//**
 Get the number of locks on a table.
 @return number of locks */
-UNIV_INTERN
 ulint
 lock_table_get_n_locks(
 /*===================*/
-	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull));
+	const dict_table_t*	table);	/*!< in: table */
+/*******************************************************************//**
+Initialise the trx lock list. */
+void
+lock_trx_lock_list_init(
+/*====================*/
+	trx_lock_list_t*	lock_list);	/*!< List to initialise */
+
+/*******************************************************************//**
+Set the lock system timeout event. */
+void
+lock_set_timeout_event();
+/*====================*/
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Checks that a transaction id is sensible, i.e., not in the future.
-@return	true if ok */
-UNIV_INTERN
+@return true if ok */
 bool
 lock_check_trx_id_sanity(
 /*=====================*/
@@ -850,12 +924,11 @@ lock_check_trx_id_sanity(
 	const rec_t*	rec,		/*!< in: user record */
 	dict_index_t*	index,		/*!< in: index */
 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************************//**
 Check if the transaction holds any locks on the sys tables
 or its records.
-@return	the strongest lock found on any sys table or 0 for none */
-UNIV_INTERN
+@return the strongest lock found on any sys table or 0 for none */
 const lock_t*
 lock_trx_has_sys_table_locks(
 /*=========================*/
@@ -864,8 +937,7 @@ lock_trx_has_sys_table_locks(
 
 /*******************************************************************//**
 Check if the transaction holds an exclusive lock on a record.
-@return	whether the locks are held */
-UNIV_INTERN
+@return whether the locks are held */
 bool
 lock_trx_has_rec_x_lock(
 /*====================*/
@@ -873,9 +945,15 @@ lock_trx_has_rec_x_lock(
 	const dict_table_t*	table,	/*!< in: table to check */
 	const buf_block_t*	block,	/*!< in: buffer block of the record */
 	ulint			heap_no)/*!< in: record heap number */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* UNIV_DEBUG */
 
+/**
+Allocate cached locks for the transaction.
+@param trx		allocate cached record locks for this transaction */
+void
+lock_trx_alloc_locks(trx_t* trx);
+
 /** Lock modes and types */
 /* @{ */
 #define LOCK_MODE_MASK	0xFUL	/*!< mask used to extract mode from the
@@ -921,11 +999,14 @@ lock_trx_has_rec_x_lock(
 				remains set when the waiting lock is granted,
 				or if the lock is inherited to a neighboring
 				record */
+#define LOCK_PREDICATE	8192	/*!< Predicate lock */
+#define LOCK_PRDT_PAGE	16384	/*!< Page lock */
+
 
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_PREDICATE|LOCK_PRDT_PAGE)&LOCK_MODE_MASK
 # error
 #endif
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_PREDICATE|LOCK_PRDT_PAGE)&LOCK_TYPE_MASK
 # error
 #endif
 /* @} */
@@ -933,16 +1014,28 @@ lock_trx_has_rec_x_lock(
 /** Lock operation struct */
 struct lock_op_t{
 	dict_table_t*	table;	/*!< table to be locked */
-	enum lock_mode	mode;	/*!< lock mode */
+	lock_mode	mode;	/*!< lock mode */
 };
 
+typedef ib_mutex_t LockMutex;
+
 /** The lock system struct */
 struct lock_sys_t{
-	ib_mutex_t	mutex;			/*!< Mutex protecting the
+	char		pad1[CACHE_LINE_SIZE];	/*!< padding to prevent other
+						memory update hotspots from
+						residing on the same memory
+						cache line */
+	LockMutex	mutex;			/*!< Mutex protecting the
 						locks */
 	hash_table_t*	rec_hash;		/*!< hash table of the record
 						locks */
-	ib_mutex_t	wait_mutex;		/*!< Mutex protecting the
+	hash_table_t*	prdt_hash;		/*!< hash table of the predicate
+						lock */
+	hash_table_t*	prdt_page_hash;		/*!< hash table of the page
+						lock */
+
+	char		pad2[CACHE_LINE_SIZE];	/*!< Padding */
+	LockMutex	wait_mutex;		/*!< Mutex protecting the
 						next two fields */
 	srv_slot_t*	waiting_threads;	/*!< Array  of user threads
 						suspended while waiting for
@@ -969,14 +1062,56 @@ struct lock_sys_t{
 						is running */
 };
 
+/*************************************************************//**
+Removes a record lock request, waiting or granted, from the queue. */
+void
+lock_rec_discard(
+/*=============*/
+	lock_t*		in_lock);	/*!< in: record lock object: all
+					record locks which are contained
+					in this lock object are removed */
+
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+void
+lock_rtr_move_rec_list(
+/*===================*/
+	const buf_block_t*	new_block,	/*!< in: index page to
+						move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	rtr_rec_move_t*		rec_move,	/*!< in: recording records
+						moved */
+	ulint			num_move);	/*!< in: num of rec to move */
+
+/*************************************************************//**
+Removes record lock objects set on an index page which is discarded. This
+function does not move locks, or check for waiting locks, therefore the
+lock bitmaps must already be reset when this function is called. */
+void
+lock_rec_free_all_from_discard_page(
+/*================================*/
+	const buf_block_t*	block);		/*!< in: page to be discarded */
+
+/** Reset the nth bit of a record lock.
+@param[in,out]	lock record lock
+@param[in] i	index of the bit that will be reset
+@param[in] type	whether the lock is in wait mode  */
+void
+lock_rec_trx_wait(
+	lock_t*		lock,
+	ulint		i,
+	ulint		type);
+
 /** The lock system */
 extern lock_sys_t*	lock_sys;
 
 /** Test if lock_sys->mutex can be acquired without waiting. */
-#define lock_mutex_enter_nowait() mutex_enter_nowait(&lock_sys->mutex)
+#define lock_mutex_enter_nowait() 		\
+	(lock_sys->mutex.trylock(__FILE__, __LINE__))
 
 /** Test if lock_sys->mutex is owned. */
-#define lock_mutex_own() mutex_own(&lock_sys->mutex)
+#define lock_mutex_own() (lock_sys->mutex.is_owned())
 
 /** Acquire the lock_sys->mutex. */
 #define lock_mutex_enter() do {			\
@@ -985,11 +1120,11 @@ extern lock_sys_t*	lock_sys;
 
 /** Release the lock_sys->mutex. */
 #define lock_mutex_exit() do {			\
-	mutex_exit(&lock_sys->mutex);		\
+	lock_sys->mutex.exit();			\
 } while (0)
 
 /** Test if lock_sys->wait_mutex is owned. */
-#define lock_wait_mutex_own() mutex_own(&lock_sys->wait_mutex)
+#define lock_wait_mutex_own() (lock_sys->wait_mutex.is_owned())
 
 /** Acquire the lock_sys->wait_mutex. */
 #define lock_wait_mutex_enter() do {		\
@@ -998,7 +1133,7 @@ extern lock_sys_t*	lock_sys;
 
 /** Release the lock_sys->wait_mutex. */
 #define lock_wait_mutex_exit() do {		\
-	mutex_exit(&lock_sys->wait_mutex);	\
+	lock_sys->wait_mutex.exit();		\
 } while (0)
 
 #ifdef WITH_WSREP
@@ -1018,7 +1153,19 @@ std::string
 lock_get_info(
 	const lock_t*);
 
+/*************************************************************//**
+Updates the lock table when a page is split and merged to
+two pages. */
+UNIV_INTERN
+void
+lock_update_split_and_merge(
+	const buf_block_t* left_block,	/*!< in: left page to which merged */
+	const rec_t* orig_pred,		/*!< in: original predecessor of
+					supremum on the left page before merge*/
+	const buf_block_t* right_block);/*!< in: right page from which merged */
+
 #endif /* WITH_WSREP */
+
 #ifndef UNIV_NONINL
 #include "lock0lock.ic"
 #endif
diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic
index 736936954cb..ba2311c02ea 100644
--- a/storage/innobase/include/lock0lock.ic
+++ b/storage/innobase/include/lock0lock.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,7 +23,6 @@ The transaction lock system
 Created 5/7/1996 Heikki Tuuri
 *******************************************************/
 
-#include "sync0sync.h"
 #include "srv0srv.h"
 #include "dict0dict.h"
 #include "row0row.h"
@@ -41,7 +40,7 @@ Created 5/7/1996 Heikki Tuuri
 /*********************************************************************//**
 Calculates the fold value of a page file address: used in inserting or
 searching for a lock in the hash table.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 lock_rec_fold(
@@ -55,7 +54,7 @@ lock_rec_fold(
 /*********************************************************************//**
 Calculates the hash value of a page file address: used in inserting or
 searching for a lock in the hash table.
-@return	hashed value */
+@return hashed value */
 UNIV_INLINE
 ulint
 lock_rec_hash(
@@ -69,7 +68,7 @@ lock_rec_hash(
 
 /*********************************************************************//**
 Gets the heap_no of the smallest user record on a page.
-@return	heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
+@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
 UNIV_INLINE
 ulint
 lock_get_min_heap_no(
@@ -90,3 +89,43 @@ lock_get_min_heap_no(
 						   FALSE)));
 	}
 }
+
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+UNIV_INLINE
+void
+lock_rec_move(
+/*==========*/
+	const buf_block_t*	receiver,       /*!< in: buffer block containing
+						the receiving record */
+	const buf_block_t*	donator,        /*!< in: buffer block containing
+						the donating record */
+	ulint			receiver_heap_no,/*!< in: heap_no of the record
+						which gets the locks; there
+						must be no lock requests
+						on it! */
+	ulint			donator_heap_no)/*!< in: heap_no of the record
+                                                which gives the locks */
+{
+	lock_rec_move_low(lock_sys->rec_hash, receiver, donator,
+			  receiver_heap_no, donator_heap_no);
+}
+
+/*************************************************************//**
+Get the lock hash table */
+UNIV_INLINE
+hash_table_t*
+lock_hash_get(
+/*==========*/
+	ulint	mode)	/*!< in: lock mode */
+{
+	if (mode & LOCK_PREDICATE) {
+		return(lock_sys->prdt_hash);
+	} else if (mode & LOCK_PRDT_PAGE) {
+		return(lock_sys->prdt_page_hash);
+	} else {
+		return(lock_sys->rec_hash);
+	}
+}
+
diff --git a/storage/innobase/include/lock0prdt.h b/storage/innobase/include/lock0prdt.h
new file mode 100644
index 00000000000..6c61f07a4e8
--- /dev/null
+++ b/storage/innobase/include/lock0prdt.h
@@ -0,0 +1,227 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0prdt.h
+The predicate lock system
+
+Created 9/7/2013 Jimmy Yang
+*******************************************************/
+#ifndef lock0prdt_h
+#define lock0prdt_h
+
+#include "univ.i"
+#include "lock0lock.h"
+
+/* Predicate lock data */
+typedef struct lock_prdt {
+	void*		data;		/* Predicate data */
+	uint16		op;		/* Predicate operator */
+} lock_prdt_t;
+
+/*********************************************************************//**
+Acquire a predicate lock on a block
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+dberr_t
+lock_prdt_lock(
+/*===========*/
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	lock_prdt_t*	prdt,	/*!< in: Predicate for the lock */
+	dict_index_t*	index,	/*!< in: secondary index */
+	enum lock_mode	mode,	/*!< in: mode of the lock which
+				the read cursor should set on
+				records: LOCK_S or LOCK_X; the
+				latter is possible in
+				SELECT FOR UPDATE */
+	ulint		type_mode,
+				/*!< in: LOCK_PREDICATE or LOCK_PRDT_PAGE */
+	que_thr_t*	thr,	/*!< in: query thread
+				(can be NULL if BTR_NO_LOCKING_FLAG) */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+
+/*********************************************************************//**
+Acquire a "Page" lock on a block
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+dberr_t
+lock_place_prdt_page_lock(
+/*======================*/
+	ulint		space,	/*!< in: space for the page to lock */
+	ulint		pageno,	/*!< in: page number */
+	dict_index_t*	index,	/*!< in: secondary index */
+	que_thr_t*	thr);	/*!< in: query thread */
+
+/*********************************************************************//**
+Checks two predicate locks are compatible with each other
+@return true if conflicts */
+bool
+lock_prdt_consistent(
+/*=================*/
+	lock_prdt_t*	prdt1,	/*!< in: Predicate for the lock */
+	lock_prdt_t*	prdt2,	/*!< in: Predicate for the lock */
+	ulint		op);	/*!< in: Predicate comparison operator */
+
+/*********************************************************************//**
+Initiate a Predicate lock from a MBR */
+void
+lock_init_prdt_from_mbr(
+/*====================*/
+	lock_prdt_t*	prdt,	/*!< in/out: predicate to initialized */
+	rtr_mbr_t*	mbr,	/*!< in: Minimum Bounding Rectangle */
+	ulint		mode,	/*!< in: Search mode */
+	mem_heap_t*	heap);	/*!< in: heap for allocating memory */
+
+/*********************************************************************//**
+Get predicate lock's minimum bounding box
+@return the minimum bounding box*/
+lock_prdt_t*
+lock_get_prdt_from_lock(
+/*====================*/
+	const lock_t*	lock);	/*!< in: the lock */
+
+/*********************************************************************//**
+Checks if a predicate lock request for a new lock has to wait for
+request lock2.
+@return true if new lock has to wait for lock2 to be removed */
+bool
+lock_prdt_has_to_wait(
+/*==================*/
+	const trx_t*	trx,	/*!< in: trx of new lock */
+	ulint		type_mode,/*!< in: precise mode of the new lock
+				to set: LOCK_S or LOCK_X, possibly
+				ORed to LOCK_PREDICATE or LOCK_PRDT_PAGE,
+				LOCK_INSERT_INTENTION */
+	lock_prdt_t*	prdt,	/*!< in: lock predicate to check */
+	const lock_t*	lock2);	/*!< in: another record lock; NOTE that
+				it is assumed that this has a lock bit
+				set on the same record as in the new
+				lock we are setting */
+
+/**************************************************************//**
+Update predicate lock when page splits */
+void
+lock_prdt_update_split(
+/*===================*/
+	buf_block_t*	block,		/*!< in/out: page to be split */
+	buf_block_t*	new_block,	/*!< in/out: the new half page */
+	lock_prdt_t*	prdt,		/*!< in: MBR on the old page */
+	lock_prdt_t*	new_prdt,	/*!< in: MBR on the new page */
+	ulint		space,		/*!< in: space id */
+	ulint		page_no);	/*!< in: page number */
+
+/**************************************************************//**
+Ajust locks from an ancester page of Rtree on the appropriate level . */
+void
+lock_prdt_update_parent(
+/*====================*/
+	buf_block_t*	left_block,	/*!< in/out: page to be split */
+	buf_block_t*	right_block,	/*!< in/out: the new half page */
+	lock_prdt_t*	left_prdt,	/*!< in: MBR on the old page */
+	lock_prdt_t*	right_prdt,	/*!< in: MBR on the new page */
+	lock_prdt_t*	parent_prdt,	/*!< in: original parent MBR */
+	ulint		space,		/*!< in: space id */
+	ulint		page_no);	/*!< in: page number */
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate insert of
+a predicate record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+dberr_t
+lock_prdt_insert_check_and_lock(
+/*============================*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
+				set, does nothing */
+	const rec_t*	rec,	/*!< in: record after which to insert */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	dict_index_t*	index,	/*!< in: index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	lock_prdt_t*	prdt);	/*!< in: Minimum Bound Rectangle */
+
+/*********************************************************************//**
+Append a predicate to the lock */
+void
+lock_prdt_set_prdt(
+/*===============*/
+	lock_t*			lock,	/*!< in: lock */
+	const lock_prdt_t*	prdt);	/*!< in: Predicate */
+
+#if 0
+
+/*********************************************************************//**
+Checks if a predicate lock request for a new lock has to wait for
+request lock2.
+@return true if new lock has to wait for lock2 to be removed */
+UNIV_INLINE
+bool
+lock_prdt_has_to_wait(
+/*==================*/
+	const trx_t*	trx,	/*!< in: trx of new lock */
+	ulint		type_mode,/*!< in: precise mode of the new lock
+				to set: LOCK_S or LOCK_X, possibly
+				ORed to LOCK_PREDICATE or LOCK_PRDT_PAGE,
+				LOCK_INSERT_INTENTION */
+	lock_prdt_t*	prdt,	/*!< in: lock predicate to check */
+	const lock_t*	lock2);	/*!< in: another record lock; NOTE that
+				it is assumed that this has a lock bit
+				set on the same record as in the new
+				lock we are setting */
+
+/*********************************************************************//**
+Get predicate lock's minimum bounding box
+@return the minimum bounding box*/
+UNIV_INLINE
+rtr_mbr_t*
+prdt_get_mbr_from_prdt(
+/*===================*/
+	const lock_prdt_t*	prdt);	/*!< in: the lock predicate */
+
+
+#endif
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+void
+lock_prdt_rec_move(
+/*===============*/
+	const buf_block_t*	receiver,	/*!< in: buffer block containing
+						the receiving record */
+	const buf_block_t*	donator);	/*!< in: buffer block containing
+						the donating record */
+
+/** Check whether there are R-tree Page lock on a buffer page
+@param[in]	trx	trx to test the lock
+@param[in]	space	space id for the page
+@param[in]	page_no	page number
+@return true if there is none */
+bool
+lock_test_prdt_page_lock(
+/*=====================*/
+	const trx_t*	trx,
+	ulint		space,
+	ulint		page_no);
+
+/** Removes predicate lock objects set on an index page which is discarded.
+@param[in]	block		page to be discarded
+@param[in]	lock_hash	lock hash */
+void
+lock_prdt_page_free_from_discard(
+/*=============================*/
+	const buf_block_t*	block,
+	hash_table_t*		lock_hash);
+
+#endif
diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h
index b60dd0d92c8..0b2ab1bfcfe 100644
--- a/storage/innobase/include/lock0priv.h
+++ b/storage/innobase/include/lock0priv.h
@@ -38,7 +38,10 @@ those functions in lock/ */
 #include "dict0types.h"
 #include "hash0hash.h"
 #include "trx0types.h"
-#include "ut0lst.h"
+
+#ifndef UINT32_MAX
+#define UINT32_MAX             (4294967295U)
+#endif
 
 /** A table lock */
 struct lock_table_t {
@@ -47,18 +50,67 @@ struct lock_table_t {
 	UT_LIST_NODE_T(lock_t)
 			locks;		/*!< list of locks on the same
 					table */
+	/** Print the table lock into the given output stream
+	@param[in,out]	out	the output stream
+	@return the given output stream. */
+	std::ostream& print(std::ostream& out) const;
 };
 
+/** Print the table lock into the given output stream
+@param[in,out]	out	the output stream
+@return the given output stream. */
+inline
+std::ostream& lock_table_t::print(std::ostream& out) const
+{
+	out << "[lock_table_t: name=" << table->name << "]";
+	return(out);
+}
+
+/** The global output operator is overloaded to conveniently
+print the lock_table_t object into the given output stream.
+@param[in,out]	out	the output stream
+@param[in]	lock	the table lock
+@return the given output stream */
+inline
+std::ostream&
+operator<<(std::ostream& out, const lock_table_t& lock)
+{
+	return(lock.print(out));
+}
+
 /** Record lock for a page */
 struct lock_rec_t {
-	ulint	space;			/*!< space id */
-	ulint	page_no;		/*!< page number */
-	ulint	n_bits;			/*!< number of bits in the lock
+	ib_uint32_t	space;		/*!< space id */
+	ib_uint32_t	page_no;	/*!< page number */
+	ib_uint32_t	n_bits;		/*!< number of bits in the lock
 					bitmap; NOTE: the lock bitmap is
 					placed immediately after the
 					lock struct */
+
+	/** Print the record lock into the given output stream
+	@param[in,out]	out	the output stream
+	@return the given output stream. */
+	std::ostream& print(std::ostream& out) const;
 };
 
+/** Print the record lock into the given output stream
+@param[in,out]	out	the output stream
+@return the given output stream. */
+inline
+std::ostream& lock_rec_t::print(std::ostream& out) const
+{
+	out << "[lock_rec_t: space=" << space << ", page_no=" << page_no
+		<< ", n_bits=" << n_bits << "]";
+	return(out);
+}
+
+inline
+std::ostream&
+operator<<(std::ostream& out, const lock_rec_t& lock)
+{
+	return(lock.print(out));
+}
+
 /** Lock struct; protected by lock_sys->mutex */
 struct lock_t {
 	trx_t*		trx;		/*!< transaction owning the
@@ -66,14 +118,13 @@ struct lock_t {
 	UT_LIST_NODE_T(lock_t)
 			trx_locks;	/*!< list of the locks of the
 					transaction */
-	ulint		type_mode;	/*!< lock type, mode, LOCK_GAP or
-					LOCK_REC_NOT_GAP,
-					LOCK_INSERT_INTENTION,
-					wait flag, ORed */
-	hash_node_t	hash;		/*!< hash chain node for a record
-					lock */
+
 	dict_index_t*	index;		/*!< index for a record lock */
 
+	lock_t*		hash;		/*!< hash chain node for a record
+					lock. The link node in a singly linked
+					list, used during hashing. */
+
 	/* Statistics for how long lock has been held and time
 	how long this lock had to be waited before it was granted */
 	time_t		requested_time; /*!< Lock request time */
@@ -83,11 +134,842 @@ struct lock_t {
 		lock_table_t	tab_lock;/*!< table lock */
 		lock_rec_t	rec_lock;/*!< record lock */
 	} un_member;			/*!< lock details */
+
+	ib_uint32_t	type_mode;	/*!< lock type, mode, LOCK_GAP or
+					LOCK_REC_NOT_GAP,
+					LOCK_INSERT_INTENTION,
+					wait flag, ORed */
+
+	/** Determine if the lock object is a record lock.
+	@return true if record lock, false otherwise. */
+	bool is_record_lock() const
+	{
+		return(type() == LOCK_REC);
+	}
+
+	bool is_waiting() const
+	{
+		return(type_mode & LOCK_WAIT);
+	}
+
+	bool is_gap() const
+	{
+		return(type_mode & LOCK_GAP);
+	}
+
+	bool is_record_not_gap() const
+	{
+		return(type_mode & LOCK_REC_NOT_GAP);
+	}
+
+	bool is_insert_intention() const
+	{
+		return(type_mode & LOCK_INSERT_INTENTION);
+	}
+
+	ulint type() const {
+		return(type_mode & LOCK_TYPE_MASK);
+	}
+
+	enum lock_mode mode() const
+	{
+		return(static_cast<enum lock_mode>(type_mode & LOCK_MODE_MASK));
+	}
+
+	/** Print the lock object into the given output stream.
+	@param[in,out]	out	the output stream
+	@return the given output stream. */
+	std::ostream& print(std::ostream& out) const;
+
+	/** Convert the member 'type_mode' into a human readable string.
+	@return human readable string */
+	std::string type_mode_string() const;
+
+	const char* type_string() const
+	{
+		switch (type_mode & LOCK_TYPE_MASK) {
+		case LOCK_REC:
+			return("LOCK_REC");
+		case LOCK_TABLE:
+			return("LOCK_TABLE");
+		default:
+			ut_error;
+		}
+	}
+};
+
+/** Convert the member 'type_mode' into a human readable string.
+@return human readable string */
+inline
+std::string
+lock_t::type_mode_string() const
+{
+	std::ostringstream sout;
+	sout << type_string();
+	sout << " | " << lock_mode_string(mode());
+
+	if (is_record_not_gap()) {
+		sout << " | LOCK_REC_NOT_GAP";
+	}
+
+	if (is_waiting()) {
+		sout << " | LOCK_WAIT";
+	}
+
+	if (is_gap()) {
+		sout << " | LOCK_GAP";
+	}
+
+	if (is_insert_intention()) {
+		sout << " | LOCK_INSERT_INTENTION";
+	}
+	return(sout.str());
+}
+
+inline
+std::ostream&
+lock_t::print(std::ostream& out) const
+{
+	out << "[lock_t: type_mode=" << type_mode << "("
+		<< type_mode_string() << ")";
+
+	if (is_record_lock()) {
+		out << un_member.rec_lock;
+	} else {
+		out << un_member.tab_lock;
+	}
+
+	out << "]";
+	return(out);
+}
+
+inline
+std::ostream&
+operator<<(std::ostream& out, const lock_t& lock)
+{
+	return(lock.print(out));
+}
+
+#ifdef UNIV_DEBUG
+extern ibool	lock_print_waits;
+#endif /* UNIV_DEBUG */
+
+/** Restricts the length of search we will do in the waits-for
+graph of transactions */
+static const ulint	LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK = 1000000;
+
+/** Restricts the search depth we will do in the waits-for graph of
+transactions */
+static const ulint	LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK = 200;
+
+/** When releasing transaction locks, this specifies how often we release
+the lock mutex for a moment to give also others access to it */
+static const ulint	LOCK_RELEASE_INTERVAL = 1000;
+
+/* Safety margin when creating a new record lock: this many extra records
+can be inserted to the page without need to create a lock with a bigger
+bitmap */
+
+static const ulint	LOCK_PAGE_BITMAP_MARGIN = 64;
+
+/* An explicit record lock affects both the record and the gap before it.
+An implicit x-lock does not affect the gap, it only locks the index
+record from read or update.
+
+If a transaction has modified or inserted an index record, then
+it owns an implicit x-lock on the record. On a secondary index record,
+a transaction has an implicit x-lock also if it has modified the
+clustered index record, the max trx id of the page where the secondary
+index record resides is >= trx id of the transaction (or database recovery
+is running), and there are no explicit non-gap lock requests on the
+secondary index record.
+
+This complicated definition for a secondary index comes from the
+implementation: we want to be able to determine if a secondary index
+record has an implicit x-lock, just by looking at the present clustered
+index record, not at the historical versions of the record. The
+complicated definition can be explained to the user so that there is
+nondeterminism in the access path when a query is answered: we may,
+or may not, access the clustered index record and thus may, or may not,
+bump into an x-lock set there.
+
+Different transaction can have conflicting locks set on the gap at the
+same time. The locks on the gap are purely inhibitive: an insert cannot
+be made, or a select cursor may have to wait if a different transaction
+has a conflicting lock on the gap. An x-lock on the gap does not give
+the right to insert into the gap.
+
+An explicit lock can be placed on a user record or the supremum record of
+a page. The locks on the supremum record are always thought to be of the gap
+type, though the gap bit is not set. When we perform an update of a record
+where the size of the record changes, we may temporarily store its explicit
+locks on the infimum record of the page, though the infimum otherwise never
+carries locks.
+
+A waiting record lock can also be of the gap type. A waiting lock request
+can be granted when there is no conflicting mode lock request by another
+transaction ahead of it in the explicit lock queue.
+
+In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
+It only locks the record it is placed on, not the gap before the record.
+This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
+level.
+
+-------------------------------------------------------------------------
+RULE 1: If there is an implicit x-lock on a record, and there are non-gap
+-------
+lock requests waiting in the queue, then the transaction holding the implicit
+x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
+released, we can grant locks to waiting lock requests purely by looking at
+the explicit lock requests in the queue.
+
+RULE 3: Different transactions cannot have conflicting granted non-gap locks
+-------
+on a record at the same time. However, they can have conflicting granted gap
+locks.
+RULE 4: If a there is a waiting lock request in a queue, no lock request,
+-------
+gap or not, can be inserted ahead of it in the queue. In record deletes
+and page splits new gap type locks can be created by the database manager
+for a transaction, and without rule 4, the waits-for graph of transactions
+might become cyclic without the database noticing it, as the deadlock check
+is only performed when a transaction itself requests a lock!
+-------------------------------------------------------------------------
+
+An insert is allowed to a gap if there are no explicit lock requests by
+other transactions on the next record. It does not matter if these lock
+requests are granted or waiting, gap bit set or not, with the exception
+that a gap type request set by another transaction to wait for
+its turn to do an insert is ignored. On the other hand, an
+implicit x-lock by another transaction does not prevent an insert, which
+allows for more concurrency when using an Oracle-style sequence number
+generator for the primary key with many transactions doing inserts
+concurrently.
+
+A modify of a record is allowed if the transaction has an x-lock on the
+record, or if other transactions do not have any non-gap lock requests on the
+record.
+
+A read of a single user record with a cursor is allowed if the transaction
+has a non-gap explicit, or an implicit lock on the record, or if the other
+transactions have no x-lock requests on the record. At a page supremum a
+read is always allowed.
+
+In summary, an implicit lock is seen as a granted x-lock only on the
+record, not on the gap. An explicit lock with no gap bit set is a lock
+both on the record and the gap. If the gap bit is set, the lock is only
+on the gap. Different transaction cannot own conflicting locks on the
+record at the same time, but they may own conflicting locks on the gap.
+Granted locks on a record give an access right to the record, but gap type
+locks just inhibit operations.
+
+NOTE: Finding out if some transaction has an implicit x-lock on a secondary
+index record can be cumbersome. We may have to look at previous versions of
+the corresponding clustered index record to find out if a delete marked
+secondary index record was delete marked by an active transaction, not by
+a committed one.
+
+FACT A: If a transaction has inserted a row, it can delete it any time
+without need to wait for locks.
+
+PROOF: The transaction has an implicit x-lock on every index record inserted
+for the row, and can thus modify each record without the need to wait. Q.E.D.
+
+FACT B: If a transaction has read some result set with a cursor, it can read
+it again, and retrieves the same result set, if it has not modified the
+result set in the meantime. Hence, there is no phantom problem. If the
+biggest record, in the alphabetical order, touched by the cursor is removed,
+a lock wait may occur, otherwise not.
+
+PROOF: When a read cursor proceeds, it sets an s-lock on each user record
+it passes, and a gap type s-lock on each page supremum. The cursor must
+wait until it has these locks granted. Then no other transaction can
+have a granted x-lock on any of the user records, and therefore cannot
+modify the user records. Neither can any other transaction insert into
+the gaps which were passed over by the cursor. Page splits and merges,
+and removal of obsolete versions of records do not affect this, because
+when a user record or a page supremum is removed, the next record inherits
+its locks as gap type locks, and therefore blocks inserts to the same gap.
+Also, if a page supremum is inserted, it inherits its locks from the successor
+record. When the cursor is positioned again at the start of the result set,
+the records it will touch on its course are either records it touched
+during the last pass or new inserted page supremums. It can immediately
+access all these records, and when it arrives at the biggest record, it
+notices that the result set is complete. If the biggest record was removed,
+lock wait can occur because the next record only inherits a gap type lock,
+and a wait may be needed. Q.E.D. */
+
+/* If an index record should be changed or a new inserted, we must check
+the lock on the record or the next. When a read cursor starts reading,
+we will set a record level s-lock on each record it passes, except on the
+initial record on which the cursor is positioned before we start to fetch
+records. Our index tree search has the convention that the B-tree
+cursor is positioned BEFORE the first possibly matching record in
+the search. Optimizations are possible here: if the record is searched
+on an equality condition to a unique key, we could actually set a special
+lock on the record, a lock which would not prevent any insert before
+this record. In the next key locking an x-lock set on a record also
+prevents inserts just before that record.
+	There are special infimum and supremum records on each page.
+A supremum record can be locked by a read cursor. This records cannot be
+updated but the lock prevents insert of a user record to the end of
+the page.
+	Next key locks will prevent the phantom problem where new rows
+could appear to SELECT result sets after the select operation has been
+performed. Prevention of phantoms ensures the serilizability of
+transactions.
+	What should we check if an insert of a new record is wanted?
+Only the lock on the next record on the same page, because also the
+supremum record can carry a lock. An s-lock prevents insertion, but
+what about an x-lock? If it was set by a searched update, then there
+is implicitly an s-lock, too, and the insert should be prevented.
+What if our transaction owns an x-lock to the next record, but there is
+a waiting s-lock request on the next record? If this s-lock was placed
+by a read cursor moving in the ascending order in the index, we cannot
+do the insert immediately, because when we finally commit our transaction,
+the read cursor should see also the new inserted record. So we should
+move the read cursor backward from the next record for it to pass over
+the new inserted record. This move backward may be too cumbersome to
+implement. If we in this situation just enqueue a second x-lock request
+for our transaction on the next record, then the deadlock mechanism
+notices a deadlock between our transaction and the s-lock request
+transaction. This seems to be an ok solution.
+	We could have the convention that granted explicit record locks,
+lock the corresponding records from changing, and also lock the gaps
+before them from inserting. A waiting explicit lock request locks the gap
+before from inserting. Implicit record x-locks, which we derive from the
+transaction id in the clustered index record, only lock the record itself
+from modification, not the gap before it from inserting.
+	How should we store update locks? If the search is done by a unique
+key, we could just modify the record trx id. Otherwise, we could put a record
+x-lock on the record. If the update changes ordering fields of the
+clustered index record, the inserted new record needs no record lock in
+lock table, the trx id is enough. The same holds for a secondary index
+record. Searched delete is similar to update.
+
+PROBLEM:
+What about waiting lock requests? If a transaction is waiting to make an
+update to a record which another modified, how does the other transaction
+know to send the end-lock-wait signal to the waiting transaction? If we have
+the convention that a transaction may wait for just one lock at a time, how
+do we preserve it if lock wait ends?
+
+PROBLEM:
+Checking the trx id label of a secondary index record. In the case of a
+modification, not an insert, is this necessary? A secondary index record
+is modified only by setting or resetting its deleted flag. A secondary index
+record contains fields to uniquely determine the corresponding clustered
+index record. A secondary index record is therefore only modified if we
+also modify the clustered index record, and the trx id checking is done
+on the clustered index record, before we come to modify the secondary index
+record. So, in the case of delete marking or unmarking a secondary index
+record, we do not have to care about trx ids, only the locks in the lock
+table must be checked. In the case of a select from a secondary index, the
+trx id is relevant, and in this case we may have to search the clustered
+index record.
+
+PROBLEM: How to update record locks when page is split or merged, or
+--------------------------------------------------------------------
+a record is deleted or updated?
+If the size of fields in a record changes, we perform the update by
+a delete followed by an insert. How can we retain the locks set or
+waiting on the record? Because a record lock is indexed in the bitmap
+by the heap number of the record, when we remove the record from the
+record list, it is possible still to keep the lock bits. If the page
+is reorganized, we could make a table of old and new heap numbers,
+and permute the bitmaps in the locks accordingly. We can add to the
+table a row telling where the updated record ended. If the update does
+not require a reorganization of the page, we can simply move the lock
+bits for the updated record to the position determined by its new heap
+number (we may have to allocate a new lock, if we run out of the bitmap
+in the old one).
+	A more complicated case is the one where the reinsertion of the
+updated record is done pessimistically, because the structure of the
+tree may change.
+
+PROBLEM: If a supremum record is removed in a page merge, or a record
+---------------------------------------------------------------------
+removed in a purge, what to do to the waiting lock requests? In a split to
+the right, we just move the lock requests to the new supremum. If a record
+is removed, we could move the waiting lock request to its inheritor, the
+next record in the index. But, the next record may already have lock
+requests on its own queue. A new deadlock check should be made then. Maybe
+it is easier just to release the waiting transactions. They can then enqueue
+new lock requests on appropriate records.
+
+PROBLEM: When a record is inserted, what locks should it inherit from the
+-------------------------------------------------------------------------
+upper neighbor? An insert of a new supremum record in a page split is
+always possible, but an insert of a new user record requires that the upper
+neighbor does not have any lock requests by other transactions, granted or
+waiting, in its lock queue. Solution: We can copy the locks as gap type
+locks, so that also the waiting locks are transformed to granted gap type
+locks on the inserted record. */
+
+/* LOCK COMPATIBILITY MATRIX
+ *    IS IX S  X  AI
+ * IS +	 +  +  -  +
+ * IX +	 +  -  -  +
+ * S  +	 -  +  -  -
+ * X  -	 -  -  -  -
+ * AI +	 +  -  -  -
+ *
+ * Note that for rows, InnoDB only acquires S or X locks.
+ * For tables, InnoDB normally acquires IS or IX locks.
+ * S or X table locks are only acquired for LOCK TABLES.
+ * Auto-increment (AI) locks are needed because of
+ * statement-level MySQL binlog.
+ * See also lock_mode_compatible().
+ */
+static const byte lock_compatibility_matrix[5][5] = {
+ /**         IS     IX       S     X       AI */
+ /* IS */ {  TRUE,  TRUE,  TRUE,  FALSE,  TRUE},
+ /* IX */ {  TRUE,  TRUE,  FALSE, FALSE,  TRUE},
+ /* S  */ {  TRUE,  FALSE, TRUE,  FALSE,  FALSE},
+ /* X  */ {  FALSE, FALSE, FALSE, FALSE,  FALSE},
+ /* AI */ {  TRUE,  TRUE,  FALSE, FALSE,  FALSE}
 };
 
+/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
+ *    IS IX S  X  AI
+ * IS +  -  -  -  -
+ * IX +  +  -  -  -
+ * S  +  -  +  -  -
+ * X  +  +  +  +  +
+ * AI -  -  -  -  +
+ * See lock_mode_stronger_or_eq().
+ */
+static const byte lock_strength_matrix[5][5] = {
+ /**         IS     IX       S     X       AI */
+ /* IS */ {  TRUE,  FALSE, FALSE,  FALSE, FALSE},
+ /* IX */ {  TRUE,  TRUE,  FALSE, FALSE,  FALSE},
+ /* S  */ {  TRUE,  FALSE, TRUE,  FALSE,  FALSE},
+ /* X  */ {  TRUE,  TRUE,  TRUE,  TRUE,   TRUE},
+ /* AI */ {  FALSE, FALSE, FALSE, FALSE,  TRUE}
+};
+
+/** Maximum depth of the DFS stack. */
+static const ulint MAX_STACK_SIZE = 4096;
+
+#define PRDT_HEAPNO	PAGE_HEAP_NO_INFIMUM
+/** Record locking request status */
+enum lock_rec_req_status {
+        /** Failed to acquire a lock */
+        LOCK_REC_FAIL,
+        /** Succeeded in acquiring a lock (implicit or already acquired) */
+        LOCK_REC_SUCCESS,
+        /** Explicitly created a new lock */
+        LOCK_REC_SUCCESS_CREATED
+};
+
+/**
+Record lock ID */
+struct RecID {
+
+	RecID(ulint space_id, ulint page_no, ulint heap_no)
+		:
+		m_space_id(static_cast<uint32_t>(space_id)),
+		m_page_no(static_cast<uint32_t>(page_no)),
+		m_heap_no(static_cast<uint32_t>(heap_no)),
+		m_fold(lock_rec_fold(m_space_id, m_page_no))
+	{
+		ut_ad(space_id < UINT32_MAX);
+		ut_ad(page_no < UINT32_MAX);
+		ut_ad(heap_no < UINT32_MAX);
+	}
+
+	RecID(const buf_block_t* block, ulint heap_no)
+		:
+		m_space_id(block->page.id.space()),
+		m_page_no(block->page.id.page_no()),
+		m_heap_no(static_cast<uint32_t>(heap_no)),
+		m_fold(lock_rec_fold(m_space_id, m_page_no))
+	{
+		ut_ad(heap_no < UINT32_MAX);
+	}
+
+	/**
+	@return the "folded" value of {space, page_no} */
+	ulint fold() const
+	{
+		return(m_fold);
+	}
+
+	/**
+	Tablespace ID */
+	uint32_t		m_space_id;
+
+	/**
+	Page number within the space ID */
+	uint32_t		m_page_no;
+
+	/**
+	Heap number within the page */
+	uint32_t		m_heap_no;
+
+	/**
+	Hashed key value */
+	ulint			m_fold;
+};
+
+/**
+Create record locks */
+class RecLock {
+public:
+
+	/**
+	@param[in,out] thr	Transaction query thread requesting the record
+				lock
+	@param[in] index	Index on which record lock requested
+	@param[in] rec_id	Record lock tuple {space, page_no, heap_no}
+	@param[in] mode		The lock mode */
+	RecLock(que_thr_t*	thr,
+		dict_index_t*	index,
+		const RecID&	rec_id,
+		ulint		mode)
+		:
+		m_thr(thr),
+		m_trx(thr_get_trx(thr)),
+		m_mode(mode),
+		m_index(index),
+		m_rec_id(rec_id)
+	{
+		ut_ad(is_predicate_lock(m_mode));
+
+		init(NULL);
+	}
+
+	/**
+	@param[in,out] thr	Transaction query thread requesting the record
+				lock
+	@param[in] index	Index on which record lock requested
+	@param[in] block	Buffer page containing record
+	@param[in] heap_no	Heap number within the block
+	@param[in] mode		The lock mode
+	@param[in] prdt		The predicate for the rtree lock */
+	RecLock(que_thr_t*	thr,
+		dict_index_t*	index,
+		const buf_block_t*
+				block,
+		ulint		heap_no,
+		ulint		mode,
+		lock_prdt_t*	prdt = NULL)
+		:
+		m_thr(thr),
+		m_trx(thr_get_trx(thr)),
+		m_mode(mode),
+		m_index(index),
+		m_rec_id(block, heap_no)
+	{
+		btr_assert_not_corrupted(block, index);
+
+		init(block->frame);
+	}
+
+	/**
+	@param[in] index	Index on which record lock requested
+	@param[in] rec_id	Record lock tuple {space, page_no, heap_no}
+	@param[in] mode		The lock mode */
+	RecLock(dict_index_t*	index,
+		const RecID&	rec_id,
+		ulint		mode)
+		:
+		m_thr(),
+		m_trx(),
+		m_mode(mode),
+		m_index(index),
+		m_rec_id(rec_id)
+	{
+		ut_ad(is_predicate_lock(m_mode));
+
+		init(NULL);
+	}
+
+	/**
+	@param[in] index	Index on which record lock requested
+	@param[in] block	Buffer page containing record
+	@param[in] heap_no	Heap number withing block
+	@param[in] mode		The lock mode */
+	RecLock(dict_index_t*	index,
+		const buf_block_t*
+				block,
+		ulint		heap_no,
+		ulint		mode)
+		:
+		m_thr(),
+		m_trx(),
+		m_mode(mode),
+		m_index(index),
+		m_rec_id(block, heap_no)
+	{
+		btr_assert_not_corrupted(block, index);
+
+		init(block->frame);
+	}
+
+	/**
+	Enqueue a lock wait for a transaction. If it is a high priority
+	transaction (cannot rollback) then jump ahead in the record lock wait
+	queue and if the transaction at the head of the queue is itself waiting
+	roll it back.
+	@param[in, out] wait_for	The lock that the the joining
+					transaction is waiting for
+	@param[in] prdt			Predicate [optional]
+	@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+		DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
+		there was a deadlock, but another transaction was chosen
+		as a victim, and we got the lock immediately: no need to
+		wait then */
+	dberr_t add_to_waitq(
+		const lock_t*	wait_for,
+		const lock_prdt_t*
+				prdt = NULL);
+
+	/**
+	Create a lock for a transaction and initialise it.
+	@param[in, out] trx		Transaction requesting the new lock
+	@param[in] owns_trx_mutex	true if caller owns the trx_t::mutex
+	@param[in] add_to_hash		add the lock to hash table
+	@param[in] prdt			Predicate lock (optional)
+	@return new lock instance */
+	lock_t* create(
+		trx_t*		trx,
+		bool		owns_trx_mutex,
+		bool		add_to_hash,
+		const lock_prdt_t*
+				prdt = NULL);
+
+	lock_t* create(
+		lock_t* const	c_lock,
+		trx_t*		trx,
+		bool		owns_trx_mutex,
+		bool		add_to_hash,
+		const lock_prdt_t*
+				prdt = NULL);
+	/**
+	Check of the lock is on m_rec_id.
+	@param[in] lock			Lock to compare with
+	@return true if the record lock is on m_rec_id*/
+	bool is_on_row(const lock_t* lock) const;
+
+	/**
+	Create the lock instance
+	@param[in, out] trx	The transaction requesting the lock
+	@param[in, out] index	Index on which record lock is required
+	@param[in] mode		The lock mode desired
+	@param[in] rec_id	The record id
+	@param[in] size		Size of the lock + bitmap requested
+	@return a record lock instance */
+	static lock_t* lock_alloc(
+		trx_t*		trx,
+		dict_index_t*	index,
+		ulint		mode,
+		const RecID&	rec_id,
+		ulint		size);
+
+private:
+	/*
+	@return the record lock size in bytes */
+	size_t lock_size() const
+	{
+		return(m_size);
+	}
+
+	/**
+	Do some checks and prepare for creating a new record lock */
+	void prepare() const;
+
+	/**
+	Collect the transactions that will need to be rolled back asynchronously
+	@param[in, out] trx	Transaction to be rolled back */
+	void mark_trx_for_rollback(trx_t* trx);
+
+	/**
+	Jump the queue for the record over all low priority transactions and
+	add the lock. If all current granted locks are compatible, grant the
+	lock. Otherwise, mark all granted transaction for asynchronous
+	rollback and add to hit list.
+	@param[in, out]	lock		Lock being requested
+	@param[in]	conflict_lock	First conflicting lock from the head
+	@return true if the lock is granted */
+	bool jump_queue(lock_t* lock, const lock_t* conflict_lock);
+
+	/** Find position in lock queue and add the high priority transaction
+	lock. Intention and GAP only locks can be granted even if there are
+	waiting locks in front of the queue. To add the High priority
+	transaction in a safe position we keep the following rule.
+
+	1. If the lock can be granted, add it before the first waiting lock
+	in the queue so that all currently waiting locks need to do conflict
+	check before getting granted.
+
+	2. If the lock has to wait, add it after the last granted lock or the
+	last waiting high priority transaction in the queue whichever is later.
+	This ensures that the transaction is granted only after doing conflict
+	check with all granted transactions.
+	@param[in]      lock            Lock being requested
+	@param[in]      conflict_lock   First conflicting lock from the head
+	@param[out]     high_priority   high priority transaction ahead in queue
+	@return true if the lock can be granted */
+	bool
+	lock_add_priority(
+		lock_t*		lock,
+		const lock_t*	conflict_lock,
+		bool*		high_priority);
+
+	/** Iterate over the granted locks and prepare the hit list for ASYNC Rollback.
+	If the transaction is waiting for some other lock then wake up with deadlock error.
+	Currently we don't mark following transactions for ASYNC Rollback.
+	1. Read only transactions
+	2. Background transactions
+	3. Other High priority transactions
+	@param[in]      lock            Lock being requested
+	@param[in]      conflict_lock   First conflicting lock from the head */
+	void make_trx_hit_list(lock_t* lock, const lock_t* conflict_lock);
+
+	/**
+	Setup the requesting transaction state for lock grant
+	@param[in,out] lock	Lock for which to change state */
+	void set_wait_state(lock_t* lock);
+
+	/**
+	Add the lock to the record lock hash and the transaction's lock list
+	@param[in,out] lock	Newly created record lock to add to the
+				rec hash and the transaction lock list
+	@param[in] add_to_hash	If the lock should be added to the hash table */
+	void lock_add(lock_t* lock, bool add_to_hash);
+
+	/**
+	Check and resolve any deadlocks
+	@param[in, out] lock		The lock being acquired
+	@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+		DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
+		there was a deadlock, but another transaction was chosen
+		as a victim, and we got the lock immediately: no need to
+		wait then */
+	dberr_t deadlock_check(lock_t* lock);
+
+	/**
+	Check the outcome of the deadlock check
+	@param[in,out] victim_trx	Transaction selected for rollback
+	@param[in,out] lock		Lock being requested
+	@return DB_LOCK_WAIT, DB_DEADLOCK or DB_SUCCESS_LOCKED_REC */
+	dberr_t check_deadlock_result(const trx_t* victim_trx, lock_t* lock);
+
+	/**
+	Setup the context from the requirements */
+	void init(const page_t* page)
+	{
+		ut_ad(lock_mutex_own());
+		ut_ad(!srv_read_only_mode);
+		ut_ad(dict_index_is_clust(m_index)
+		      || !dict_index_is_online_ddl(m_index));
+		ut_ad(m_thr == NULL || m_trx == thr_get_trx(m_thr));
+
+		m_size = is_predicate_lock(m_mode)
+			  ? lock_size(m_mode) : lock_size(page);
+
+		/** If rec is the supremum record, then we reset the
+		gap and LOCK_REC_NOT_GAP bits, as all locks on the
+		supremum are automatically of the gap type */
+
+		if (m_rec_id.m_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+			ut_ad(!(m_mode & LOCK_REC_NOT_GAP));
+
+			m_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
+		}
+	}
+
+	/**
+	Calculate the record lock physical size required for a predicate lock.
+	@param[in] mode For predicate locks the lock mode
+	@return the size of the lock data structure required in bytes */
+	static size_t lock_size(ulint mode)
+	{
+		ut_ad(is_predicate_lock(mode));
+
+		/* The lock is always on PAGE_HEAP_NO_INFIMUM(0),
+		so we only need 1 bit (which is rounded up to 1
+		byte) for lock bit setting */
+
+		size_t	n_bytes;
+
+		if (mode & LOCK_PREDICATE) {
+			const ulint	align = UNIV_WORD_SIZE - 1;
+
+			/* We will attach the predicate structure
+			after lock. Make sure the memory is
+			aligned on 8 bytes, the mem_heap_alloc
+			will align it with MEM_SPACE_NEEDED
+			anyway. */
+
+			n_bytes = (1 + sizeof(lock_prdt_t) + align) & ~align;
+
+			/* This should hold now */
+
+			ut_ad(n_bytes == sizeof(lock_prdt_t) + UNIV_WORD_SIZE);
+
+		} else {
+			n_bytes = 1;
+		}
+
+		return(n_bytes);
+	}
+
+	/**
+	Calculate the record lock physical size required, non-predicate lock.
+	@param[in] page		For non-predicate locks the buffer page
+	@return the size of the lock data structure required in bytes */
+	static size_t lock_size(const page_t* page)
+	{
+		ulint	n_recs = page_dir_get_n_heap(page);
+
+		/* Make lock bitmap bigger by a safety margin */
+
+		return(1 + ((n_recs + LOCK_PAGE_BITMAP_MARGIN) / 8));
+	}
+
+	/**
+	@return true if the requested lock mode is for a predicate
+		or page lock */
+	static bool is_predicate_lock(ulint mode)
+	{
+		return(mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+	}
+
+private:
+	/** The query thread of the transaction */
+	que_thr_t*		m_thr;
+
+	/**
+	Transaction requesting the record lock */
+	trx_t*			m_trx;
+
+	/**
+	Lock mode requested */
+	ulint			m_mode;
+
+	/**
+	Size of the record lock in bytes */
+	size_t			m_size;
+
+	/**
+	Index on which the record lock is required */
+	dict_index_t*		m_index;
+
+	/**
+	The record lock tuple {space, page_no, heap_no} */
+	RecID			m_rec_id;
+};
+
+#ifdef UNIV_DEBUG
+/** The count of the types of locks. */
+static const ulint      lock_types = UT_ARR_SIZE(lock_compatibility_matrix);
+#endif /* UNIV_DEBUG */
+
 /*********************************************************************//**
 Gets the type of a lock.
-@return	LOCK_TABLE or LOCK_REC */
+@return LOCK_TABLE or LOCK_REC */
 UNIV_INLINE
 ulint
 lock_get_type_low(
@@ -96,8 +978,7 @@ lock_get_type_low(
 
 /*********************************************************************//**
 Gets the previous record lock set on a record.
-@return	previous lock on the same record, NULL if none exists */
-UNIV_INTERN
+@return previous lock on the same record, NULL if none exists */
 const lock_t*
 lock_rec_get_prev(
 /*==============*/
@@ -107,7 +988,6 @@ lock_rec_get_prev(
 /*********************************************************************//**
 Cancels a waiting lock request and releases possible other transactions
 waiting behind it. */
-UNIV_INTERN
 void
 lock_cancel_waiting_and_release(
 /*============================*/
@@ -116,7 +996,7 @@ lock_cancel_waiting_and_release(
 /*********************************************************************//**
 Checks if some transaction has an implicit x-lock on a record in a clustered
 index.
-@return	transaction id of the transaction which has the x-lock, or 0 */
+@return transaction id of the transaction which has the x-lock, or 0 */
 UNIV_INLINE
 trx_id_t
 lock_clust_rec_some_has_impl(
@@ -124,7 +1004,171 @@ lock_clust_rec_some_has_impl(
 	const rec_t*		rec,	/*!< in: user record */
 	const dict_index_t*	index,	/*!< in: clustered index */
 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_on_page_const(
+/*============================*/
+	const lock_t*	lock);	/*!< in: a record lock */
+
+/*********************************************************************//**
+Gets the nth bit of a record lock.
+@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
+UNIV_INLINE
+ibool
+lock_rec_get_nth_bit(
+/*=================*/
+	const lock_t*	lock,	/*!< in: record lock */
+	ulint		i);	/*!< in: index of the bit */
+
+/*********************************************************************//**
+Gets the number of bits in a record lock bitmap.
+@return number of bits */
+UNIV_INLINE
+ulint
+lock_rec_get_n_bits(
+/*================*/
+	const lock_t*	lock);	/*!< in: record lock */
+
+/**********************************************************************//**
+Sets the nth bit of a record lock to TRUE. */
+UNIV_INLINE
+void
+lock_rec_set_nth_bit(
+/*=================*/
+	lock_t*	lock,	/*!< in: record lock */
+	ulint	i);	/*!< in: index of the bit */
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next_on_page(
+/*======================*/
+	lock_t*		lock);		/*!< in: a record lock */
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by its
+file address.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page_addr(
+/*============================*/
+	hash_table_t*   lock_hash,	/* Lock hash table */
+	ulint           space,		/*!< in: space */
+	ulint           page_no);	/*!< in: page number */
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by a
+pointer to it.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page(
+/*=======================*/
+	hash_table_t*		lock_hash,	/*!< in: lock hash table */
+	const buf_block_t*	block);		/*!< in: buffer block */
+
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next(
+/*==============*/
+	ulint	heap_no,/*!< in: heap number of the record */
+	lock_t*	lock);	/*!< in: lock */
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_const(
+/*====================*/
+	ulint		heap_no,/*!< in: heap number of the record */
+	const lock_t*	lock);	/*!< in: lock */
+
+/*********************************************************************//**
+Gets the first explicit lock request on a record.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first(
+/*===============*/
+	hash_table_t*		hash,	/*!< in: hash chain the lock on */
+	const buf_block_t*	block,	/*!< in: block containing the record */
+	ulint			heap_no);/*!< in: heap number of the record */
+
+/*********************************************************************//**
+Gets the mode of a lock.
+@return mode */
+UNIV_INLINE
+enum lock_mode
+lock_get_mode(
+/*==========*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*********************************************************************//**
+Calculates if lock mode 1 is compatible with lock mode 2.
+@return nonzero if mode1 compatible with mode2 */
+UNIV_INLINE
+ulint
+lock_mode_compatible(
+/*=================*/
+	enum lock_mode	mode1,	/*!< in: lock mode */
+	enum lock_mode	mode2);	/*!< in: lock mode */
+
+/*********************************************************************//**
+Calculates if lock mode 1 is stronger or equal to lock mode 2.
+@return nonzero if mode1 stronger or equal to mode2 */
+UNIV_INLINE
+ulint
+lock_mode_stronger_or_eq(
+/*=====================*/
+	enum lock_mode	mode1,	/*!< in: lock mode */
+	enum lock_mode	mode2);	/*!< in: lock mode */
+
+/*********************************************************************//**
+Gets the wait flag of a lock.
+@return LOCK_WAIT if waiting, 0 if not */
+UNIV_INLINE
+ulint
+lock_get_wait(
+/*==========*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*********************************************************************//**
+Looks for a suitable type record lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old is found.
+@return lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_rec_find_similar_on_page(
+/*==========================*/
+	ulint		type_mode,	/*!< in: lock type_mode field */
+	ulint		heap_no,	/*!< in: heap number of the record */
+	lock_t*		lock,		/*!< in: lock_rec_get_first_on_page() */
+	const trx_t*	trx);		/*!< in: transaction */
+
+/*********************************************************************//**
+Checks if a transaction has the specified table lock, or stronger. This
+function should only be called by the thread that owns the transaction.
+@return lock or NULL */
+UNIV_INLINE
+const lock_t*
+lock_table_has(
+/*===========*/
+	const trx_t*		trx,	/*!< in: transaction */
+	const dict_table_t*	table,	/*!< in: table */
+	enum lock_mode		mode);	/*!< in: lock mode */
 
 #ifndef UNIV_NONINL
 #include "lock0priv.ic"
diff --git a/storage/innobase/include/lock0priv.ic b/storage/innobase/include/lock0priv.ic
index 6b70dc33d3c..f6e5f7acb8f 100644
--- a/storage/innobase/include/lock0priv.ic
+++ b/storage/innobase/include/lock0priv.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,7 +34,7 @@ methods but they are used only in that file. */
 
 /*********************************************************************//**
 Gets the type of a lock.
-@return	LOCK_TABLE or LOCK_REC */
+@return LOCK_TABLE or LOCK_REC */
 UNIV_INLINE
 ulint
 lock_get_type_low(
@@ -49,7 +49,7 @@ lock_get_type_low(
 /*********************************************************************//**
 Checks if some transaction has an implicit x-lock on a record in a clustered
 index.
-@return	transaction id of the transaction which has the x-lock, or 0 */
+@return transaction id of the transaction which has the x-lock, or 0 */
 UNIV_INLINE
 trx_id_t
 lock_clust_rec_some_has_impl(
@@ -64,4 +64,362 @@ lock_clust_rec_some_has_impl(
 	return(row_get_rec_trx_id(rec, index, offsets));
 }
 
+/*********************************************************************//**
+Gets the number of bits in a record lock bitmap.
+@return	number of bits */
+UNIV_INLINE
+ulint
+lock_rec_get_n_bits(
+/*================*/
+	const lock_t*	lock)	/*!< in: record lock */
+{
+	return(lock->un_member.rec_lock.n_bits);
+}
+
+/**********************************************************************//**
+Sets the nth bit of a record lock to TRUE. */
+UNIV_INLINE
+void
+lock_rec_set_nth_bit(
+/*=================*/
+	lock_t*	lock,	/*!< in: record lock */
+	ulint	i)	/*!< in: index of the bit */
+{
+	ulint	byte_index;
+	ulint	bit_index;
+
+	ut_ad(lock);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(i < lock->un_member.rec_lock.n_bits);
+
+	byte_index = i / 8;
+	bit_index = i % 8;
+
+	((byte*) &lock[1])[byte_index] |= 1 << bit_index;
+
+	++lock->trx->lock.n_rec_locks;
+}
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return	next lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next_on_page(
+/*======================*/
+	lock_t*	lock)	/*!< in: a record lock */
+{
+	return((lock_t*) lock_rec_get_next_on_page_const(lock));
+}
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by its
+file address.
+@return	first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page_addr(
+/*============================*/
+	hash_table_t*	lock_hash,	/* Lock hash table */
+	ulint		space,		/*!< in: space */
+	ulint		page_no)	/*!< in: page number */
+{
+	ut_ad(lock_mutex_own());
+
+	for (lock_t* lock = static_cast<lock_t*>(
+			HASH_GET_FIRST(lock_hash,
+				       lock_rec_hash(space, page_no)));
+	     lock != NULL;
+	     lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
+
+		if (lock->un_member.rec_lock.space == space
+		    && lock->un_member.rec_lock.page_no == page_no) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by a
+pointer to it.
+@return	first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page(
+/*=======================*/
+	hash_table_t*		lock_hash,	/*!< in: lock hash table */
+	const buf_block_t*	block)		/*!< in: buffer block */
+{
+	ut_ad(lock_mutex_own());
+
+	ulint	space	= block->page.id.space();
+	ulint	page_no	= block->page.id.page_no();
+	ulint	hash = buf_block_get_lock_hash_val(block);
+
+	for (lock_t* lock = static_cast<lock_t*>(
+			HASH_GET_FIRST(lock_hash, hash));
+	     lock != NULL;
+	     lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
+
+		if (lock->un_member.rec_lock.space == space
+		    && lock->un_member.rec_lock.page_no == page_no) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return	next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next(
+/*==============*/
+	ulint	heap_no,/*!< in: heap number of the record */
+	lock_t*	lock)	/*!< in: lock */
+{
+	ut_ad(lock_mutex_own());
+
+	do {
+		ut_ad(lock_get_type_low(lock) == LOCK_REC);
+		lock = lock_rec_get_next_on_page(lock);
+	} while (lock && !lock_rec_get_nth_bit(lock, heap_no));
+
+	return(lock);
+}
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return	next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_const(
+/*====================*/
+	ulint		heap_no,/*!< in: heap number of the record */
+	const lock_t*	lock)	/*!< in: lock */
+{
+	return(lock_rec_get_next(heap_no, (lock_t*) lock));
+}
+
+/*********************************************************************//**
+Gets the first explicit lock request on a record.
+@return	first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first(
+/*===============*/
+	hash_table_t*		hash,	/*!< in: hash chain the lock on */
+	const buf_block_t*	block,	/*!< in: block containing the record */
+	ulint			heap_no)/*!< in: heap number of the record */
+{
+	ut_ad(lock_mutex_own());
+
+	for (lock_t* lock = lock_rec_get_first_on_page(hash, block); lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		if (lock_rec_get_nth_bit(lock, heap_no)) {
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Gets the nth bit of a record lock.
+@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
+UNIV_INLINE
+ibool
+lock_rec_get_nth_bit(
+/*=================*/
+	const lock_t*	lock,	/*!< in: record lock */
+	ulint		i)	/*!< in: index of the bit */
+{
+	const byte*     b;
+
+	ut_ad(lock);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	if (i >= lock->un_member.rec_lock.n_bits) {
+
+		return(FALSE);
+	}
+
+	b = ((const byte*) &lock[1]) + (i / 8);
+
+	return(1 & *b >> (i % 8));
+}
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_on_page_const(
+/*============================*/
+	const lock_t*	lock)	/*!< in: a record lock */
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	ulint	space = lock->un_member.rec_lock.space;
+	ulint	page_no = lock->un_member.rec_lock.page_no;
+
+	while ((lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock)))
+	       != NULL) {
+
+		if (lock->un_member.rec_lock.space == space
+		    && lock->un_member.rec_lock.page_no == page_no) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Gets the mode of a lock.
+@return mode */
+UNIV_INLINE
+enum lock_mode
+lock_get_mode(
+/*==========*/
+	const lock_t*	lock)   /*!< in: lock */
+{
+	ut_ad(lock);
+
+	return(static_cast<enum lock_mode>(lock->type_mode & LOCK_MODE_MASK));
+}
+
+/*********************************************************************//**
+Calculates if lock mode 1 is compatible with lock mode 2.
+@return nonzero if mode1 compatible with mode2 */
+UNIV_INLINE
+ulint
+lock_mode_compatible(
+/*=================*/
+	enum lock_mode	mode1,	/*!< in: lock mode */
+	enum lock_mode	mode2)	/*!< in: lock mode */
+{
+	ut_ad((ulint) mode1 < lock_types);
+	ut_ad((ulint) mode2 < lock_types);
+
+	return(lock_compatibility_matrix[mode1][mode2]);
+}
+
+/*********************************************************************//**
+Calculates if lock mode 1 is stronger or equal to lock mode 2.
+@return nonzero if mode1 stronger or equal to mode2 */
+UNIV_INLINE
+ulint
+lock_mode_stronger_or_eq(
+/*=====================*/
+	enum lock_mode	mode1,	/*!< in: lock mode */
+	enum lock_mode	mode2)	/*!< in: lock mode */
+{
+	ut_ad((ulint) mode1 < lock_types);
+	ut_ad((ulint) mode2 < lock_types);
+
+	return(lock_strength_matrix[mode1][mode2]);
+}
+
+/*********************************************************************//**
+Gets the wait flag of a lock.
+@return LOCK_WAIT if waiting, 0 if not */
+UNIV_INLINE
+ulint
+lock_get_wait(
+/*==========*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_ad(lock);
+
+	return(lock->type_mode & LOCK_WAIT);
+}
+
+/*********************************************************************//**
+Looks for a suitable type record lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old is found.
+@return lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_rec_find_similar_on_page(
+/*==========================*/
+	ulint           type_mode,      /*!< in: lock type_mode field */
+	ulint           heap_no,        /*!< in: heap number of the record */
+	lock_t*         lock,           /*!< in: lock_rec_get_first_on_page() */
+	const trx_t*    trx)            /*!< in: transaction */
+{
+	ut_ad(lock_mutex_own());
+
+	for (/* No op */;
+	     lock != NULL;
+	     lock = lock_rec_get_next_on_page(lock)) {
+
+		if (lock->trx == trx
+		    && lock->type_mode == type_mode
+		    && lock_rec_get_n_bits(lock) > heap_no) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Checks if a transaction has the specified table lock, or stronger. This
+function should only be called by the thread that owns the transaction.
+@return lock or NULL */
+UNIV_INLINE
+const lock_t*
+lock_table_has(
+/*===========*/
+	const trx_t*		trx,	/*!< in: transaction */
+	const dict_table_t*	table,	/*!< in: table */
+	lock_mode		in_mode)/*!< in: lock mode */
+{
+	if (trx->lock.table_locks.empty()) {
+		return(NULL);
+	}
+
+	typedef lock_pool_t::const_reverse_iterator iterator;
+
+	iterator	end = trx->lock.table_locks.rend();
+
+	/* Look for stronger locks the same trx already has on the table */
+
+	for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
+
+		const lock_t*	lock = *it;
+
+		if (lock == NULL) {
+			continue;
+		}
+
+		lock_mode	mode = lock_get_mode(lock);
+
+		ut_ad(trx == lock->trx);
+		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+		ut_ad(lock->un_member.tab_lock.table != NULL);
+
+		if (table == lock->un_member.tab_lock.table
+		    && lock_mode_stronger_or_eq(mode, in_mode)) {
+
+			ut_ad(!lock_get_wait(lock));
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
 /* vim: set filetype=c: */
diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h
index cf32e72f864..d08eaabfb1e 100644
--- a/storage/innobase/include/lock0types.h
+++ b/storage/innobase/include/lock0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,12 +23,16 @@ The transaction lock system global types
 Created 5/7/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ut0lst.h"
+
 #ifndef lock0types_h
 #define lock0types_h
 
 #define lock_t ib_lock_t
+
 struct lock_t;
 struct lock_sys_t;
+struct lock_table_t;
 
 /* Basic lock modes */
 enum lock_mode {
@@ -43,5 +47,32 @@ enum lock_mode {
 	LOCK_NONE_UNSET = 255
 };
 
+/** Convert the given enum value into string.
+@param[in]	mode	the lock mode
+@return human readable string of the given enum value */
+inline
+const char* lock_mode_string(enum lock_mode mode)
+{
+	switch (mode) {
+	case LOCK_IS:
+		return("LOCK_IS");
+	case LOCK_IX:
+		return("LOCK_IX");
+	case LOCK_S:
+		return("LOCK_S");
+	case LOCK_X:
+		return("LOCK_X");
+	case LOCK_AUTO_INC:
+		return("LOCK_AUTO_INC");
+	case LOCK_NONE:
+		return("LOCK_NONE");
+	case LOCK_NONE_UNSET:
+		return("LOCK_NONE_UNSET");
+	default:
+		ut_error;
+	}
+}
+
+typedef UT_LIST_BASE_NODE_T(lock_t) trx_lock_list_t;
 
-#endif
+#endif /* lock0types_h */
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 0015ea15c35..caa067cd4ba 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2009, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -34,49 +34,35 @@ Created 12/9/1995 Heikki Tuuri
 #define log0log_h
 
 #include "univ.i"
-#include "ut0byte.h"
-#include "ut0lst.h"
+#include "dyn0buf.h"
 #ifndef UNIV_HOTBACKUP
-#include "sync0sync.h"
 #include "sync0rw.h"
 #endif /* !UNIV_HOTBACKUP */
 #include "log0crypt.h"
-
-#define LSN_MAX			IB_UINT64_MAX
-
-#define LSN_PF			UINT64PF
+#include "log0types.h"
 
 /** Redo log buffer */
 struct log_t;
+
 /** Redo log group */
 struct log_group_t;
 
-#ifdef UNIV_DEBUG
-/** Flag: write to log file? */
-extern	ibool	log_do_write;
-/** Flag: enable debug output when writing to the log? */
-extern	ibool	log_debug_writes;
-#else /* UNIV_DEBUG */
-/** Write to log */
-# define log_do_write TRUE
-#endif /* UNIV_DEBUG */
+/** Magic value to use instead of log checksums when they are disabled */
+#define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
 
-/** Wait modes for log_write_up_to @{ */
-#define LOG_NO_WAIT		91
-#define LOG_WAIT_ONE_GROUP	92
-#define	LOG_WAIT_ALL_GROUPS	93
-/* @} */
-/** Maximum number of log groups in log_group_t::checkpoint_buf */
-#define LOG_MAX_N_GROUPS	32
+typedef ulint (*log_checksum_func_t)(const byte* log_block);
+
+/** Pointer to the log checksum calculation function. Protected with
+log_sys->mutex. */
+extern log_checksum_func_t log_checksum_algorithm_ptr;
 
 /*******************************************************************//**
 Calculates where in log files we find a specified lsn.
-@return	log file number */
-UNIV_INTERN
+@return log file number */
 ulint
 log_calc_where_lsn_is(
 /*==================*/
-	ib_int64_t*	log_file_offset,	/*!< out: offset in that file
+	int64_t*	log_file_offset,	/*!< out: offset in that file
 						(including the header) */
 	ib_uint64_t	first_header_lsn,	/*!< in: first log file start
 						lsn */
@@ -84,26 +70,20 @@ log_calc_where_lsn_is(
 						determine */
 	ulint		n_log_files,		/*!< in: total number of log
 						files */
-	ib_int64_t	log_file_size);		/*!< in: log file size
+	int64_t		log_file_size);		/*!< in: log file size
 						(including the header) */
 #ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Writes to the log the string given. The log must be released with
-log_release.
-@return	end lsn of the log record, zero if did not succeed */
+/** Append a string to the log.
+@param[in]	str		string
+@param[in]	len		string length
+@param[out]	start_lsn	start LSN of the log record
+@return end lsn of the log record, zero if did not succeed */
 UNIV_INLINE
 lsn_t
 log_reserve_and_write_fast(
-/*=======================*/
-	const void*	str,	/*!< in: string */
-	ulint		len,	/*!< in: string length */
-	lsn_t*		start_lsn);/*!< out: start lsn of the log record */
-/***********************************************************************//**
-Releases the log mutex. */
-UNIV_INLINE
-void
-log_release(void);
-/*=============*/
+	const void*	str,
+	ulint		len,
+	lsn_t*		start_lsn);
 /***********************************************************************//**
 Checks if there is need for a log buffer flush or a new checkpoint, and does
 this if yes. Any database operation should call this when it has modified
@@ -113,34 +93,45 @@ UNIV_INLINE
 void
 log_free_check(void);
 /*================*/
-/************************************************************//**
-Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release.
-@return	start lsn of the log record */
-UNIV_INTERN
+
+/** Extends the log buffer.
+@param[in]	len	requested minimum size in bytes */
+void
+log_buffer_extend(
+	ulint	len);
+
+/** Check margin not to overwrite transaction log from the last checkpoint.
+If would estimate the log write to exceed the log_group_capacity,
+waits for the checkpoint is done enough.
+@param[in]	len	length of the data to be written */
+
+void
+log_margin_checkpoint_age(
+	ulint	len);
+
+/** Open the log for log_write_low. The log must be closed with log_close.
+@param[in]	len	length of the data to be written
+@return start lsn of the log record */
 lsn_t
 log_reserve_and_open(
-/*=================*/
-	ulint	len);	/*!< in: length of data to be catenated */
+	ulint	len);
 /************************************************************//**
 Writes to the log the string given. It is assumed that the caller holds the
 log mutex. */
-UNIV_INTERN
 void
 log_write_low(
 /*==========*/
-	byte*	str,		/*!< in: string */
-	ulint	str_len);	/*!< in: string length */
+	const byte*	str,		/*!< in: string */
+	ulint		str_len);	/*!< in: string length */
 /************************************************************//**
 Closes the log.
-@return	lsn */
-UNIV_INTERN
+@return lsn */
 lsn_t
 log_close(void);
 /*===========*/
 /************************************************************//**
 Gets the current lsn.
-@return	current lsn */
+@return current lsn */
 UNIV_INLINE
 lsn_t
 log_get_lsn(void);
@@ -162,7 +153,7 @@ log_get_flush_lsn(void);
 /****************************************************************
 Gets the log group capacity. It is OK to read the value without
 holding log_sys->mutex because it is constant.
-@return	log group capacity */
+@return log group capacity */
 UNIV_INLINE
 lsn_t
 log_get_capacity(void);
@@ -170,37 +161,31 @@ log_get_capacity(void);
 /****************************************************************
 Get log_sys::max_modified_age_async. It is OK to read the value without
 holding log_sys::mutex because it is constant.
-@return	max_modified_age_async */
+@return max_modified_age_async */
 UNIV_INLINE
 lsn_t
 log_get_max_modified_age_async(void);
 /*================================*/
 /******************************************************//**
 Initializes the log. */
-UNIV_INTERN
 void
 log_init(void);
 /*==========*/
 /******************************************************************//**
-Inits a log group to the log system. */
-UNIV_INTERN
-void
+Inits a log group to the log system.
+@return true if success, false if not */
+MY_ATTRIBUTE((warn_unused_result))
+bool
 log_group_init(
 /*===========*/
 	ulint	id,			/*!< in: group id */
 	ulint	n_files,		/*!< in: number of log files */
 	lsn_t	file_size,		/*!< in: log file size in bytes */
-	ulint	space_id,		/*!< in: space id of the file space
+	ulint	space_id);		/*!< in: space id of the file space
 					which contains the log files of this
 					group */
-	ulint	archive_space_id);	/*!< in: space id of the file space
-					which contains some archived log
-					files for this group; currently, only
-					for the first log group this is
-					used */
 /******************************************************//**
 Completes an i/o to a log file. */
-UNIV_INTERN
 void
 log_io_complete(
 /*============*/
@@ -210,156 +195,83 @@ This function is called, e.g., when a transaction wants to commit. It checks
 that the log has been written to the log file up to the last log entry written
 by the transaction. If there is a flush running, it waits and checks if the
 flush flushed enough. If not, starts a new flush. */
-UNIV_INTERN
 void
 log_write_up_to(
 /*============*/
 	lsn_t	lsn,	/*!< in: log sequence number up to which
 			the log should be written, LSN_MAX if not specified */
-	ulint	wait,	/*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
-			or LOG_WAIT_ALL_GROUPS */
-	ibool	flush_to_disk);
-			/*!< in: TRUE if we want the written log
+	bool	flush_to_disk);
+			/*!< in: true if we want the written log
 			also to be flushed to disk */
-/****************************************************************//**
-Does a syncronous flush of the log buffer to disk. */
-UNIV_INTERN
+/** write to the log file up to the last log entry.
+@param[in]	sync	whether we want the written log
+also to be flushed to disk. */
 void
-log_buffer_flush_to_disk(void);
-/*==========================*/
+log_buffer_flush_to_disk(
+	bool sync = true);
 /****************************************************************//**
 This functions writes the log buffer to the log file and if 'flush'
 is set it forces a flush of the log file as well. This is meant to be
 called from background master thread only as it does not wait for
 the write (+ possible flush) to finish. */
-UNIV_INTERN
 void
 log_buffer_sync_in_background(
 /*==========================*/
-	ibool	flush);	/*<! in: flush the logs to disk */
-/******************************************************//**
-Makes a checkpoint. Note that this function does not flush dirty
+	bool	flush);	/*<! in: flush the logs to disk */
+/** Make a checkpoint. Note that this function does not flush dirty
 blocks from the buffer pool: it only checks what is lsn of the oldest
 modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool.
-@return	TRUE if success, FALSE if a checkpoint write was already running */
-UNIV_INTERN
-ibool
+log files. Use log_make_checkpoint_at() to flush also the pool.
+@param[in]	sync		whether to wait for the write to complete
+@param[in]	write_always	force a write even if no log
+has been generated since the latest checkpoint
+@return true if success, false if a checkpoint write was already running */
+bool
 log_checkpoint(
-/*===========*/
-	ibool	sync,		/*!< in: TRUE if synchronous operation is
-				desired */
-	ibool	write_always);	/*!< in: the function normally checks if the
-				the new checkpoint would have a greater
-				lsn than the previous one: if not, then no
-				physical write is done; by setting this
-				parameter TRUE, a physical write will always be
-				made to log files */
-/****************************************************************//**
-Makes a checkpoint at a given lsn or later. */
-UNIV_INTERN
+	bool	sync,
+	bool	write_always);
+
+/** Make a checkpoint at or after a specified LSN.
+@param[in]	lsn		the log sequence number, or LSN_MAX
+for the latest LSN
+@param[in]	write_always	force a write even if no log
+has been generated since the latest checkpoint */
 void
 log_make_checkpoint_at(
-/*===================*/
-	lsn_t	lsn,		/*!< in: make a checkpoint at this or a
-				later lsn, if LSN_MAX, makes
-				a checkpoint at the latest lsn */
-	ibool	write_always);	/*!< in: the function normally checks if
-				the new checkpoint would have a
-				greater lsn than the previous one: if
-				not, then no physical write is done;
-				by setting this parameter TRUE, a
-				physical write will always be made to
-				log files */
+	lsn_t			lsn,
+	bool			write_always);
+
 /****************************************************************//**
 Makes a checkpoint at the latest lsn and writes it to first page of each
 data file in the database, so that we know that the file spaces contain
 all modifications up to that lsn. This can only be called at database
 shutdown. This function also writes all log in log files to the log archive. */
-UNIV_INTERN
 void
 logs_empty_and_mark_files_at_shutdown(void);
 /*=======================================*/
-/******************************************************//**
-Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-UNIV_INTERN
+/** Read a log group header page to log_sys->checkpoint_buf.
+@param[in]	group	log group
+@param[in]	header	0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
 void
-log_group_read_checkpoint_info(
-/*===========================*/
-	log_group_t*	group,	/*!< in: log group */
-	ulint		field);	/*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
-/*******************************************************************//**
-Gets info from a checkpoint about a log group. */
-UNIV_INTERN
-void
-log_checkpoint_get_nth_group_info(
-/*==============================*/
-	const byte*	buf,	/*!< in: buffer containing checkpoint info */
-	ulint		n,	/*!< in: nth slot */
-	ulint*		file_no,/*!< out: archived file number */
-	ulint*		offset);/*!< out: archived file offset */
-/******************************************************//**
-Writes checkpoint info to groups. */
-UNIV_INTERN
-void
-log_groups_write_checkpoint_info(void);
-/*==================================*/
-/********************************************************************//**
-Starts an archiving operation.
-@return	TRUE if succeed, FALSE if an archiving operation was already running */
-UNIV_INTERN
-ibool
-log_archive_do(
-/*===========*/
-	ibool	sync,	/*!< in: TRUE if synchronous operation is desired */
-	ulint*	n_bytes);/*!< out: archive log buffer size, 0 if nothing to
-			archive */
-/****************************************************************//**
-Writes the log contents to the archive up to the lsn when this function was
-called, and stops the archiving. When archiving is started again, the archived
-log file numbers start from a number one higher, so that the archiving will
-not write again to the archived log files which exist when this function
-returns.
-@return	DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_stop(void);
-/*==================*/
-/****************************************************************//**
-Starts again archiving which has been stopped.
-@return	DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_start(void);
-/*===================*/
-/****************************************************************//**
-Stop archiving the log so that a gap may occur in the archived log files.
-@return	DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_noarchivelog(void);
-/*==========================*/
-/****************************************************************//**
-Start archiving the log so that a gap may occur in the archived log files.
-@return	DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_archivelog(void);
-/*========================*/
-/******************************************************//**
-Generates an archived log file name. */
-UNIV_INTERN
+log_group_header_read(
+	const log_group_t*	group,
+	ulint			header);
+/** Write checkpoint info to the log header and invoke log_mutex_exit().
+@param[in]	sync	whether to wait for the write to complete */
 void
-log_archived_file_name_gen(
-/*=======================*/
-	char*	buf,	/*!< in: buffer where to write */
-	ulint	id,	/*!< in: group id */
-	ulint	file_no);/*!< in: file number */
+log_write_checkpoint_info(
+	bool	sync);
+
+/** Set extra data to be written to the redo log during checkpoint.
+@param[in]	buf	data to be appended on checkpoint, or NULL
+@return pointer to previous data to be appended on checkpoint */
+mtr_buf_t*
+log_append_on_checkpoint(
+	mtr_buf_t*	buf);
 #else /* !UNIV_HOTBACKUP */
 /******************************************************//**
 Writes info to a buffer of a log group when log files are created in
 backup restoration. */
-UNIV_INTERN
 void
 log_reset_first_header_and_checkpoint(
 /*==================================*/
@@ -369,49 +281,27 @@ log_reset_first_header_and_checkpoint(
 				we pretend that there is a checkpoint at
 				start + LOG_BLOCK_HDR_SIZE */
 #endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
+/**
 Checks that there is enough free space in the log to start a new query step.
 Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
 function may only be called if the calling thread owns no synchronization
 objects! */
-UNIV_INTERN
 void
 log_check_margins(void);
-/*===================*/
 #ifndef UNIV_HOTBACKUP
 /******************************************************//**
 Reads a specified log segment to a buffer. */
-UNIV_INTERN
 void
 log_group_read_log_seg(
 /*===================*/
-	ulint		type,		/*!< in: LOG_ARCHIVE or LOG_RECOVER */
 	byte*		buf,		/*!< in: buffer where to read */
 	log_group_t*	group,		/*!< in: log group */
 	lsn_t		start_lsn,	/*!< in: read area start */
 	lsn_t		end_lsn);	/*!< in: read area end */
-/******************************************************//**
-Writes a buffer to a log file group. */
-UNIV_INTERN
-void
-log_group_write_buf(
-/*================*/
-	log_group_t*	group,		/*!< in: log group */
-	byte*		buf,		/*!< in: buffer */
-	ulint		len,		/*!< in: buffer len; must be divisible
-					by OS_FILE_LOG_BLOCK_SIZE */
-	lsn_t		start_lsn,	/*!< in: start lsn of the buffer; must
-					be divisible by
-					OS_FILE_LOG_BLOCK_SIZE */
-	ulint		new_data_offset);/*!< in: start offset of new data in
-					buf: this parameter is used to decide
-					if we have to write a new log file
-					header */
 /********************************************************//**
 Sets the field values in group to correspond to a given lsn. For this function
 to work, the values must already be correctly initialized to correspond to
 some lsn, for instance, a checkpoint lsn. */
-UNIV_INTERN
 void
 log_group_set_fields(
 /*=================*/
@@ -421,8 +311,7 @@ log_group_set_fields(
 /******************************************************//**
 Calculates the data capacity of a log group, when the log file headers are not
 included.
-@return	capacity in bytes */
-UNIV_INTERN
+@return capacity in bytes */
 lsn_t
 log_group_get_capacity(
 /*===================*/
@@ -430,7 +319,7 @@ log_group_get_capacity(
 #endif /* !UNIV_HOTBACKUP */
 /************************************************************//**
 Gets a log block flush bit.
-@return	TRUE if this block was the first to be written in a log flush */
+@return TRUE if this block was the first to be written in a log flush */
 UNIV_INLINE
 ibool
 log_block_get_flush_bit(
@@ -438,7 +327,7 @@ log_block_get_flush_bit(
 	const byte*	log_block);	/*!< in: log block */
 /************************************************************//**
 Gets a log block number stored in the header.
-@return	log block number stored in the block header */
+@return log block number stored in the block header */
 UNIV_INLINE
 ulint
 log_block_get_hdr_no(
@@ -446,7 +335,7 @@ log_block_get_hdr_no(
 	const byte*	log_block);	/*!< in: log block */
 /************************************************************//**
 Gets a log block data length.
-@return	log block data length measured as a byte offset from the block start */
+@return log block data length measured as a byte offset from the block start */
 UNIV_INLINE
 ulint
 log_block_get_data_len(
@@ -462,15 +351,31 @@ log_block_set_data_len(
 	ulint	len);		/*!< in: data length */
 /************************************************************//**
 Calculates the checksum for a log block.
-@return	checksum */
+@return checksum */
 UNIV_INLINE
 ulint
 log_block_calc_checksum(
 /*====================*/
 	const byte*	block);	/*!< in: log block */
+
+/** Calculates the checksum for a log block using the CRC32 algorithm.
+@param[in]	block	log block
+@return checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_crc32(
+	const byte*	block);
+
+/** Calculates the checksum for a log block using the "no-op" algorithm.
+@param[in]	block	the redo log block
+@return		the calculated checksum value */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_none(const byte*	block);
+
 /************************************************************//**
 Gets a log block checksum field value.
-@return	checksum */
+@return checksum */
 UNIV_INLINE
 ulint
 log_block_get_checksum(
@@ -503,7 +408,7 @@ log_block_set_first_rec_group(
 	ulint	offset);	/*!< in: offset, 0 if none */
 /************************************************************//**
 Gets a log block checkpoint number field (4 lowest bytes).
-@return	checkpoint no (4 lowest bytes) */
+@return checkpoint no (4 lowest bytes) */
 UNIV_INLINE
 ulint
 log_block_get_checkpoint_no(
@@ -517,6 +422,7 @@ log_block_init(
 /*===========*/
 	byte*	log_block,	/*!< in: pointer to the log buffer */
 	lsn_t	lsn);		/*!< in: lsn within the log block */
+#ifdef UNIV_HOTBACKUP
 /************************************************************//**
 Initializes a log block in the log buffer in the old, < 3.23.52 format, where
 there was no checksum yet. */
@@ -526,9 +432,10 @@ log_block_init_in_old_format(
 /*=========================*/
 	byte*	log_block,	/*!< in: pointer to the log buffer */
 	lsn_t	lsn);		/*!< in: lsn within the log block */
+#endif /* UNIV_HOTBACKUP */
 /************************************************************//**
 Converts a lsn to a log block number.
-@return	log block number, it is > 0 and <= 1G */
+@return log block number, it is > 0 and <= 1G */
 UNIV_INLINE
 ulint
 log_block_convert_lsn_to_no(
@@ -536,59 +443,52 @@ log_block_convert_lsn_to_no(
 	lsn_t	lsn);	/*!< in: lsn of a byte within the block */
 /******************************************************//**
 Prints info of the log. */
-UNIV_INTERN
 void
 log_print(
 /*======*/
 	FILE*	file);	/*!< in: file where to print */
 /******************************************************//**
 Peeks the current lsn.
-@return	TRUE if success, FALSE if could not get the log system mutex */
-UNIV_INTERN
+@return TRUE if success, FALSE if could not get the log system mutex */
 ibool
 log_peek_lsn(
 /*=========*/
 	lsn_t*	lsn);	/*!< out: if returns TRUE, current lsn is here */
 /**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
 void
 log_refresh_stats(void);
 /*===================*/
 /********************************************************//**
 Closes all log groups. */
-UNIV_INTERN
 void
 log_group_close_all(void);
 /*=====================*/
 /********************************************************//**
 Shutdown the log system but do not release all the memory. */
-UNIV_INTERN
 void
 log_shutdown(void);
 /*==============*/
 /********************************************************//**
 Free the log system data structures. */
-UNIV_INTERN
 void
 log_mem_free(void);
 /*==============*/
 
+/** Redo log system */
 extern log_t*	log_sys;
 
+/** Whether to generate and require checksums on the redo log pages */
+extern my_bool	innodb_log_checksums;
+
 /* Values used as flags */
 #define LOG_FLUSH	7652559
 #define LOG_CHECKPOINT	78656949
-#ifdef UNIV_LOG_ARCHIVE
-# define LOG_ARCHIVE	11122331
-#endif /* UNIV_LOG_ARCHIVE */
-#define LOG_RECOVER	98887331
 
 /* The counting of lsn's starts from this value: this must be non-zero */
 #define LOG_START_LSN		((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
 
 #define LOG_BUFFER_SIZE		(srv_log_buffer_size * UNIV_PAGE_SIZE)
-#define LOG_ARCHIVE_BUF_SIZE	(srv_log_buffer_size * UNIV_PAGE_SIZE / 4)
 
 /* Offsets of a log block header */
 #define	LOG_BLOCK_HDR_NO	0	/* block number which must be > 0 and
@@ -629,55 +529,46 @@ extern log_t*	log_sys;
 					.._HDR_NO */
 #define	LOG_BLOCK_TRL_SIZE	4	/* trailer size in bytes */
 
-/* Offsets for a checkpoint field */
+/* Offsets inside the checkpoint pages (redo log format version 1) */
 #define LOG_CHECKPOINT_NO		0
 #define LOG_CHECKPOINT_LSN		8
-#define LOG_CHECKPOINT_OFFSET_LOW32	16
-#define LOG_CHECKPOINT_LOG_BUF_SIZE	20
-#define	LOG_CHECKPOINT_ARCHIVED_LSN	24
-#define	LOG_CHECKPOINT_GROUP_ARRAY	32
-
-/* For each value smaller than LOG_MAX_N_GROUPS the following 8 bytes: */
-
-#define LOG_CHECKPOINT_ARCHIVED_FILE_NO	0
-#define LOG_CHECKPOINT_ARCHIVED_OFFSET	4
-
-#define	LOG_CHECKPOINT_ARRAY_END	(LOG_CHECKPOINT_GROUP_ARRAY\
-							+ LOG_MAX_N_GROUPS * 8)
-#define LOG_CHECKPOINT_CHECKSUM_1	LOG_CHECKPOINT_ARRAY_END
-#define LOG_CHECKPOINT_CHECKSUM_2	(4 + LOG_CHECKPOINT_ARRAY_END)
-#if 0
-#define LOG_CHECKPOINT_FSP_FREE_LIMIT	(8 + LOG_CHECKPOINT_ARRAY_END)
-					/*!< Not used (0);
-					This used to contain the
-					current fsp free limit in
-					tablespace 0, in units of one
-					megabyte.
-
-					This information might have been used
-					since mysqlbackup version 0.35 but
-					before 1.41 to decide if unused ends of
-					non-auto-extending data files
-					in space 0 can be truncated.
-
-					This information was made obsolete
-					by mysqlbackup --compress. */
-#define LOG_CHECKPOINT_FSP_MAGIC_N	(12 + LOG_CHECKPOINT_ARRAY_END)
-					/*!< Not used (0);
-					This magic number tells if the
-					checkpoint contains the above field:
-					the field was added to
-					InnoDB-3.23.50 and
-					removed from MySQL 5.6 */
-#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL	1441231243
-					/*!< if LOG_CHECKPOINT_FSP_MAGIC_N
-					contains this value, then
-					LOG_CHECKPOINT_FSP_FREE_LIMIT
-					is valid */
-#endif
-#define LOG_CHECKPOINT_OFFSET_HIGH32	(16 + LOG_CHECKPOINT_ARRAY_END)
+#define LOG_CHECKPOINT_OFFSET		16
+#define LOG_CHECKPOINT_LOG_BUF_SIZE	24
+
+/** Offsets of a log file header */
+/* @{ */
+/** Log file header format identifier (32-bit unsigned big-endian integer).
+This used to be called LOG_GROUP_ID and always written as 0,
+because InnoDB never supported more than one copy of the redo log. */
+#define LOG_HEADER_FORMAT	0
+/** 4 unused (zero-initialized) bytes. In format version 0, the
+LOG_FILE_START_LSN started here, 4 bytes earlier than LOG_HEADER_START_LSN,
+which the LOG_FILE_START_LSN was renamed to. */
+#define LOG_HEADER_PAD1		4
+/** LSN of the start of data in this log file (with format version 1;
+in format version 0, it was called LOG_FILE_START_LSN and at offset 4). */
+#define LOG_HEADER_START_LSN	8
+/** A null-terminated string which will contain either the string 'ibbackup'
+and the creation time if the log file was created by mysqlbackup --restore,
+or the MySQL version that created the redo log file. */
+#define LOG_HEADER_CREATOR	16
+/** End of the log file creator field. */
+#define LOG_HEADER_CREATOR_END	(LOG_HEADER_CREATOR + 32)
+/** Contents of the LOG_HEADER_CREATOR field */
+#define LOG_HEADER_CREATOR_CURRENT		\
+	"MariaDB "				\
+	IB_TO_STR(MYSQL_VERSION_MAJOR) "."	\
+	IB_TO_STR(MYSQL_VERSION_MINOR) "."	\
+	IB_TO_STR(MYSQL_VERSION_PATCH)
+
+/** The redo log format identifier corresponding to the current format version.
+Stored in LOG_HEADER_FORMAT. */
+#define LOG_HEADER_FORMAT_CURRENT	1
+
+// JAN: TODO: Shoud 32 here be LOG_HEADER_CREATOR_END ?
+// Problem: Log format 5.6 == 5.7 ?
+#define LOG_CHECKPOINT_ARRAY_END	(32 + 32 * 8)
 #define LOG_CRYPT_VER			(20 + LOG_CHECKPOINT_ARRAY_END)
-
 #define LOG_CRYPT_MAX_ENTRIES           (5)
 #define LOG_CRYPT_ENTRY_SIZE            (4 + 4 + 2 * MY_AES_BLOCK_SIZE)
 #define LOG_CRYPT_SIZE                  (1 + 1 +			\
@@ -686,35 +577,8 @@ extern log_t*	log_sys;
 
 #define LOG_CHECKPOINT_SIZE		(20 + LOG_CHECKPOINT_ARRAY_END + \
 					 LOG_CRYPT_SIZE)
+/* @} */
 
-/* Offsets of a log file header */
-#define LOG_GROUP_ID		0	/* log group number */
-#define LOG_FILE_START_LSN	4	/* lsn of the start of data in this
-					log file */
-#define LOG_FILE_NO		12	/* 4-byte archived log file number;
-					this field is only defined in an
-					archived log file */
-#define LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 16
-					/* a 32-byte field which contains
-					the string 'ibbackup' and the
-					creation time if the log file was
-					created by mysqlbackup --restore;
-					when mysqld is first time started
-					on the restored database, it can
-					print helpful info for the user */
-#define	LOG_FILE_ARCH_COMPLETED	OS_FILE_LOG_BLOCK_SIZE
-					/* this 4-byte field is TRUE when
-					the writing of an archived log file
-					has been completed; this field is
-					only defined in an archived log file */
-#define LOG_FILE_END_LSN	(OS_FILE_LOG_BLOCK_SIZE + 4)
-					/* lsn where the archived log file
-					at least extends: actually the
-					archived log file may extend to a
-					later lsn, as long as it is within the
-					same log block as this lsn; this field
-					is defined only when an archived log
-					file has been completely written */
 #define LOG_CHECKPOINT_1	OS_FILE_LOG_BLOCK_SIZE
 					/* first checkpoint field in the log
 					header; we write alternately to the
@@ -726,74 +590,72 @@ extern log_t*	log_sys;
 					header */
 #define LOG_FILE_HDR_SIZE	(4 * OS_FILE_LOG_BLOCK_SIZE)
 
-#define LOG_GROUP_OK		301
-#define LOG_GROUP_CORRUPTED	302
+/** The state of a log group */
+enum log_group_state_t {
+	/** No corruption detected */
+	LOG_GROUP_OK,
+	/** Corrupted */
+	LOG_GROUP_CORRUPTED
+};
+
+typedef ib_mutex_t	LogSysMutex;
+typedef ib_mutex_t	FlushOrderMutex;
 
 /** Log group consists of a number of log files, each of the same size; a log
-group is implemented as a space in the sense of the module fil0fil. */
+group is implemented as a space in the sense of the module fil0fil.
+Currently, this is only protected by log_sys->mutex. However, in the case
+of log_write_up_to(), we will access some members only with the protection
+of log_sys->write_mutex, which should affect nothing for now. */
 struct log_group_t{
-	/* The following fields are protected by log_sys->mutex */
-	ulint		id;		/*!< log group id */
-	ulint		n_files;	/*!< number of files in the group */
-	lsn_t		file_size;	/*!< individual log file size in bytes,
-					including the log file header */
-	ulint		space_id;	/*!< file space which implements the log
-					group */
-	ulint		state;		/*!< LOG_GROUP_OK or
-					LOG_GROUP_CORRUPTED */
-	lsn_t		lsn;		/*!< lsn used to fix coordinates within
-					the log group */
-	lsn_t		lsn_offset;	/*!< the offset of the above lsn */
-	ulint		n_pending_writes;/*!< number of currently pending flush
-					writes for this log group */
-	byte**		file_header_bufs_ptr;/*!< unaligned buffers */
-	byte**		file_header_bufs;/*!< buffers for each file
-					header in the group */
-#ifdef UNIV_LOG_ARCHIVE
-	/*-----------------------------*/
-	byte**		archive_file_header_bufs_ptr;/*!< unaligned buffers */
-	byte**		archive_file_header_bufs;/*!< buffers for each file
-					header in the group */
-	ulint		archive_space_id;/*!< file space which
-					implements the log group
-					archive */
-	ulint		archived_file_no;/*!< file number corresponding to
-					log_sys->archived_lsn */
-	ulint		archived_offset;/*!< file offset corresponding to
-					log_sys->archived_lsn, 0 if we have
-					not yet written to the archive file
-					number archived_file_no */
-	ulint		next_archived_file_no;/*!< during an archive write,
-					until the write is completed, we
-					store the next value for
-					archived_file_no here: the write
-					completion function then sets the new
-					value to ..._file_no */
-	ulint		next_archived_offset; /*!< like the preceding field */
-#endif /* UNIV_LOG_ARCHIVE */
-	/*-----------------------------*/
-	lsn_t		scanned_lsn;	/*!< used only in recovery: recovery scan
-					succeeded up to this lsn in this log
-					group */
-	byte*		checkpoint_buf_ptr;/*!< unaligned checkpoint header */
-	byte*		checkpoint_buf;	/*!< checkpoint header is written from
-					this buffer to the group */
-	UT_LIST_NODE_T(log_group_t)
-			log_groups;	/*!< list of log groups */
+	/** log group identifier (always 0) */
+	ulint				id;
+	/** number of files in the group */
+	ulint				n_files;
+	/** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */
+	ulint				format;
+	/** individual log file size in bytes, including the header */
+	lsn_t				file_size
+	/** file space which implements the log group */;
+	ulint				space_id;
+	/** corruption status */
+	log_group_state_t		state;
+	/** lsn used to fix coordinates within the log group */
+	lsn_t				lsn;
+	/** the byte offset of the above lsn */
+	lsn_t				lsn_offset;
+	/** unaligned buffers */
+	byte**				file_header_bufs_ptr;
+	/** buffers for each file header in the group */
+	byte**				file_header_bufs;
+
+	/** used only in recovery: recovery scan succeeded up to this
+	lsn in this log group */
+	lsn_t				scanned_lsn;
+	/** unaligned checkpoint header */
+	byte*				checkpoint_buf_ptr;
+	/** buffer for writing a checkpoint header */
+	byte*				checkpoint_buf;
+	/** list of log groups */
+	UT_LIST_NODE_T(log_group_t)	log_groups;
 };
 
 /** Redo log buffer */
 struct log_t{
-	byte		pad[64];	/*!< padding to prevent other memory
+	char		pad1[CACHE_LINE_SIZE];
+					/*!< Padding to prevent other memory
 					update hotspots from residing on the
 					same memory cache line */
 	lsn_t		lsn;		/*!< log sequence number */
 	ulint		buf_free;	/*!< first free offset within the log
-					buffer */
+					buffer in use */
 #ifndef UNIV_HOTBACKUP
-	ib_mutex_t		mutex;		/*!< mutex protecting the log */
-
-	ib_mutex_t		log_flush_order_mutex;/*!< mutex to serialize access to
+	char		pad2[CACHE_LINE_SIZE];/*!< Padding */
+	LogSysMutex	mutex;		/*!< mutex protecting the log */
+	char		pad3[CACHE_LINE_SIZE]; /*!< Padding */
+	LogSysMutex	write_mutex;	/*!< mutex protecting writing to log
+					file and accessing to log_group_t */
+	char		pad4[CACHE_LINE_SIZE];/*!< Padding */
+	FlushOrderMutex	log_flush_order_mutex;/*!< mutex to serialize access to
 					the flush list when we are putting
 					dirty blocks in the list. The idea
 					behind this mutex is to be able
@@ -802,22 +664,24 @@ struct log_t{
 					insertions in the flush_list happen
 					in the LSN order. */
 #endif /* !UNIV_HOTBACKUP */
-	byte*		buf_ptr;	/* unaligned log buffer */
-	byte*		buf;		/*!< log buffer */
-	ulint		buf_size;	/*!< log buffer size in bytes */
+	byte*		buf_ptr;	/*!< unaligned log buffer, which should
+					be of double of buf_size */
+	byte*		buf;		/*!< log buffer currently in use;
+					this could point to either the first
+					half of the aligned(buf_ptr) or the
+					second half in turns, so that log
+					write/flush to disk don't block
+					concurrent mtrs which will write
+					log to this buffer */
+	bool		first_in_use;	/*!< true if buf points to the first
+					half of the aligned(buf_ptr), false
+					if the second half */
+	ulint		buf_size;	/*!< log buffer size of each in bytes */
 	ulint		max_buf_free;	/*!< recommended maximum value of
-					buf_free, after which the buffer is
-					flushed */
- #ifdef UNIV_LOG_DEBUG
-	ulint		old_buf_free;	/*!< value of buf free when log was
-					last time opened; only in the debug
-					version */
-	ib_uint64_t	old_lsn;	/*!< value of lsn when log was
-					last time opened; only in the
-					debug version */
-#endif /* UNIV_LOG_DEBUG */
-	ibool		check_flush_or_checkpoint;
-					/*!< this is set to TRUE when there may
+					buf_free for the buffer in use, after
+					which the buffer is flushed */
+	bool		check_flush_or_checkpoint;
+					/*!< this is set when there may
 					be need to flush the log buffer, or
 					preflush buffer pool pages, or make
 					a checkpoint; this MUST be TRUE when
@@ -840,65 +704,23 @@ struct log_t{
 					groups */
 	volatile bool	is_extending;	/*!< this is set to true during extend
 					the log buffer size */
-	lsn_t		written_to_some_lsn;
-					/*!< first log sequence number not yet
-					written to any log group; for this to
-					be advanced, it is enough that the
-					write i/o has been completed for any
-					one log group */
-	lsn_t		written_to_all_lsn;
-					/*!< first log sequence number not yet
-					written to some log group; for this to
-					be advanced, it is enough that the
-					write i/o has been completed for all
-					log groups.
-					Note that since InnoDB currently
-					has only one log group therefore
-					this value is redundant. Also it
-					is possible that this value
-					falls behind the
-					flushed_to_disk_lsn transiently.
-					It is appropriate to use either
-					flushed_to_disk_lsn or
-					write_lsn which are always
-					up-to-date and accurate. */
-	lsn_t		write_lsn;	/*!< end lsn for the current running
-					write */
-	ulint		write_end_offset;/*!< the data in buffer has
-					been written up to this offset
-					when the current write ends:
-					this field will then be copied
-					to buf_next_to_write */
+	lsn_t		write_lsn;	/*!< last written lsn */
 	lsn_t		current_flush_lsn;/*!< end lsn for the current running
 					write + flush operation */
 	lsn_t		flushed_to_disk_lsn;
 					/*!< how far we have written the log
 					AND flushed to disk */
-	ulint		n_pending_writes;/*!< number of currently
-					pending flushes or writes */
-	/* NOTE on the 'flush' in names of the fields below: starting from
-	4.0.14, we separate the write of the log file and the actual fsync()
-	or other method to flush it to disk. The names below should really
-	be 'flush_or_write'! */
-	os_event_t	no_flush_event;	/*!< this event is in the reset state
-					when a flush or a write is running;
-					a thread should wait for this without
+	ulint		n_pending_flushes;/*!< number of currently
+					pending flushes; incrementing is
+					protected by the log mutex;
+					may be decremented between
+					resetting and setting flush_event */
+	os_event_t	flush_event;	/*!< this event is in the reset state
+					when a flush is running; a thread
+					should wait for this without
 					owning the log mutex, but NOTE that
-					to set or reset this event, the
+					to set this event, the
 					thread MUST own the log mutex! */
-	ibool		one_flushed;	/*!< during a flush, this is
-					first FALSE and becomes TRUE
-					when one log group has been
-					written or flushed */
-	os_event_t	one_flushed_event;/*!< this event is reset when the
-					flush or write has not yet completed
-					for any log group; e.g., this means
-					that a transaction has been committed
-					when this is set; a thread should wait
-					for this without owning the log mutex,
-					but NOTE that to set or reset this
-					event, the thread MUST own the log
-					mutex! */
 	ulint		n_log_ios;	/*!< number of log i/os initiated thus
 					far */
 	ulint		n_log_ios_old;	/*!< number of log i/o's at the
@@ -940,6 +762,13 @@ struct log_t{
 					/*!< latest checkpoint lsn */
 	lsn_t		next_checkpoint_lsn;
 					/*!< next checkpoint lsn */
+	mtr_buf_t*	append_on_checkpoint;
+					/*!< extra redo log records to write
+					during a checkpoint, or NULL if none.
+					The pointer is protected by
+					log_sys->mutex, and the data must
+					remain constant as long as this
+					pointer is not NULL. */
 	ulint		n_pending_checkpoint_writes;
 					/*!< number of currently pending
 					checkpoint writes */
@@ -952,46 +781,10 @@ struct log_t{
 	byte*		checkpoint_buf;	/*!< checkpoint header is read to this
 					buffer */
 	/* @} */
-#ifdef UNIV_LOG_ARCHIVE
-	/** Fields involved in archiving @{ */
-	ulint		archiving_state;/*!< LOG_ARCH_ON, LOG_ARCH_STOPPING
-					LOG_ARCH_STOPPED, LOG_ARCH_OFF */
-	lsn_t		archived_lsn;	/*!< archiving has advanced to this
-					lsn */
-	lsn_t		max_archived_lsn_age_async;
-					/*!< recommended maximum age of
-					archived_lsn, before we start
-					asynchronous copying to the archive */
-	lsn_t		max_archived_lsn_age;
-					/*!< maximum allowed age for
-					archived_lsn */
-	lsn_t		next_archived_lsn;/*!< during an archive write,
-					until the write is completed, we
-					store the next value for
-					archived_lsn here: the write
-					completion function then sets the new
-					value to archived_lsn */
-	ulint		archiving_phase;/*!< LOG_ARCHIVE_READ or
-					LOG_ARCHIVE_WRITE */
-	ulint		n_pending_archive_ios;
-					/*!< number of currently pending reads
-					or writes in archiving */
-	rw_lock_t	archive_lock;	/*!< this latch is x-locked when an
-					archive write is running; a thread
-					should wait for this without owning
-					the log mutex */
-	ulint		archive_buf_size;/*!< size of archive_buf */
-	byte*		archive_buf;	/*!< log segment is written to the
-					archive from this buffer */
-	os_event_t	archiving_on;	/*!< if archiving has been stopped,
-					a thread can wait for this event to
-					become signaled */
-	/* @} */
-#endif /* UNIV_LOG_ARCHIVE */
 };
 
 /** Test if flush order mutex is owned. */
-#define log_flush_order_mutex_own()	\
+#define log_flush_order_mutex_own()			\
 	mutex_own(&log_sys->log_flush_order_mutex)
 
 /** Acquire the flush order mutex. */
@@ -1003,15 +796,44 @@ struct log_t{
 	mutex_exit(&log_sys->log_flush_order_mutex);	\
 } while (0)
 
-#ifdef UNIV_LOG_ARCHIVE
-/** Archiving state @{ */
-#define LOG_ARCH_ON		71
-#define LOG_ARCH_STOPPING	72
-#define LOG_ARCH_STOPPING2	73
-#define LOG_ARCH_STOPPED	74
-#define LOG_ARCH_OFF		75
-/* @} */
-#endif /* UNIV_LOG_ARCHIVE */
+/** Test if log sys mutex is owned. */
+#define log_mutex_own() mutex_own(&log_sys->mutex)
+
+/** Test if log sys write mutex is owned. */
+#define log_write_mutex_own() mutex_own(&log_sys->write_mutex)
+
+/** Acquire the log sys mutex. */
+#define log_mutex_enter() mutex_enter(&log_sys->mutex)
+
+/** Acquire the log sys write mutex. */
+#define log_write_mutex_enter() mutex_enter(&log_sys->write_mutex)
+
+/** Acquire all the log sys mutexes. */
+#define log_mutex_enter_all() do {		\
+	mutex_enter(&log_sys->write_mutex);	\
+	mutex_enter(&log_sys->mutex);		\
+} while (0)
+
+/** Release the log sys mutex. */
+#define log_mutex_exit() mutex_exit(&log_sys->mutex)
+
+/** Release the log sys write mutex.*/
+#define log_write_mutex_exit() mutex_exit(&log_sys->write_mutex)
+
+/** Release all the log sys mutexes. */
+#define log_mutex_exit_all() do {		\
+	mutex_exit(&log_sys->mutex);		\
+	mutex_exit(&log_sys->write_mutex);	\
+} while (0)
+
+/** Calculate the offset of an lsn within a log group.
+@param[in]	lsn	log sequence number
+@param[in]	group	log group
+@return offset within the log group */
+lsn_t
+log_group_calc_lsn_offset(
+	lsn_t			lsn,
+	const log_group_t*	group);
 
 extern os_event_t log_scrub_event;
 /* log scrubbing speed, in bytes/sec */
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
index 38ed2b51a4e..a53f8770cea 100644
--- a/storage/innobase/include/log0log.ic
+++ b/storage/innobase/include/log0log.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,27 +25,17 @@ Created 12/9/1995 Heikki Tuuri
 
 #include "os0file.h"
 #include "mach0data.h"
-#include "mtr0mtr.h"
 #include "srv0mon.h"
+#include "srv0srv.h"
+#include "ut0crc32.h"
 
-#ifdef UNIV_LOG_DEBUG
-/******************************************************//**
-Checks by parsing that the catenated log segment for a single mtr is
-consistent. */
-UNIV_INTERN
-ibool
-log_check_log_recs(
-/*===============*/
-	const byte*	buf,		/*!< in: pointer to the start of
-					the log segment in the
-					log_sys->buf log buffer */
-	ulint		len,		/*!< in: segment length in bytes */
-	ib_uint64_t	buf_start_lsn);	/*!< in: buffer start lsn */
-#endif /* UNIV_LOG_DEBUG */
+#ifdef UNIV_LOG_LSN_DEBUG
+#include "mtr0types.h"
+#endif /* UNIV_LOG_LSN_DEBUG */
 
 /************************************************************//**
 Gets a log block flush bit.
-@return	TRUE if this block was the first to be written in a log flush */
+@return TRUE if this block was the first to be written in a log flush */
 UNIV_INLINE
 ibool
 log_block_get_flush_bit(
@@ -85,7 +75,7 @@ log_block_set_flush_bit(
 
 /************************************************************//**
 Gets a log block number stored in the header.
-@return	log block number stored in the block header */
+@return log block number stored in the block header */
 UNIV_INLINE
 ulint
 log_block_get_hdr_no(
@@ -115,7 +105,7 @@ log_block_set_hdr_no(
 
 /************************************************************//**
 Gets a log block data length.
-@return	log block data length measured as a byte offset from the block start */
+@return log block data length measured as a byte offset from the block start */
 UNIV_INLINE
 ulint
 log_block_get_data_len(
@@ -164,7 +154,7 @@ log_block_set_first_rec_group(
 
 /************************************************************//**
 Gets a log block checkpoint number field (4 lowest bytes).
-@return	checkpoint no (4 lowest bytes) */
+@return checkpoint no (4 lowest bytes) */
 UNIV_INLINE
 ulint
 log_block_get_checkpoint_no(
@@ -188,7 +178,7 @@ log_block_set_checkpoint_no(
 
 /************************************************************//**
 Converts a lsn to a log block number.
-@return	log block number, it is > 0 and <= 1G */
+@return log block number, it is > 0 and <= 1G */
 UNIV_INLINE
 ulint
 log_block_convert_lsn_to_no(
@@ -200,13 +190,24 @@ log_block_convert_lsn_to_no(
 
 /************************************************************//**
 Calculates the checksum for a log block.
-@return	checksum */
+@return checksum */
 UNIV_INLINE
 ulint
 log_block_calc_checksum(
 /*====================*/
 	const byte*	block)	/*!< in: log block */
 {
+	return(log_checksum_algorithm_ptr(block));
+}
+
+/** Calculate the checksum for a log block using the pre-5.7.9 algorithm.
+@param[in]	block	log block
+@return		checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_format_0(
+	const byte*	block)
+{
 	ulint	sum;
 	ulint	sh;
 	ulint	i;
@@ -228,9 +229,31 @@ log_block_calc_checksum(
 	return(sum);
 }
 
+/** Calculate the checksum for a log block using the MySQL 5.7 algorithm.
+@param[in]	block	log block
+@return checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_crc32(
+	const byte*	block)
+{
+	return(ut_crc32(block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE));
+}
+
+/** Calculates the checksum for a log block using the "no-op" algorithm.
+@param[in]     block   log block
+@return        checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_none(
+	const byte*	block)
+{
+	return(LOG_NO_CHECKSUM_MAGIC);
+}
+
 /************************************************************//**
 Gets a log block checksum field value.
-@return	checksum */
+@return checksum */
 UNIV_INLINE
 ulint
 log_block_get_checksum(
@@ -266,8 +289,6 @@ log_block_init(
 {
 	ulint	no;
 
-	ut_ad(mutex_own(&(log_sys->mutex)));
-
 	no = log_block_convert_lsn_to_no(lsn);
 
 	log_block_set_hdr_no(log_block, no);
@@ -276,6 +297,7 @@ log_block_init(
 	log_block_set_first_rec_group(log_block, 0);
 }
 
+#ifdef UNIV_HOTBACKUP
 /************************************************************//**
 Initializes a log block in the log buffer in the old format, where there
 was no checksum yet. */
@@ -288,7 +310,7 @@ log_block_init_in_old_format(
 {
 	ulint	no;
 
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	ut_ad(log_mutex_own());
 
 	no = log_block_convert_lsn_to_no(lsn);
 
@@ -298,34 +320,47 @@ log_block_init_in_old_format(
 	log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
 	log_block_set_first_rec_group(log_block, 0);
 }
+#endif /* UNIV_HOTBACKUP */
 
 #ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Writes to the log the string given. The log must be released with
-log_release.
-@return	end lsn of the log record, zero if did not succeed */
+/** Append a string to the log.
+@param[in]	str		string
+@param[in]	len		string length
+@param[out]	start_lsn	start LSN of the log record
+@return end lsn of the log record, zero if did not succeed */
 UNIV_INLINE
 lsn_t
 log_reserve_and_write_fast(
-/*=======================*/
-	const void*	str,	/*!< in: string */
-	ulint		len,	/*!< in: string length */
-	lsn_t*		start_lsn)/*!< out: start lsn of the log record */
+	const void*	str,
+	ulint		len,
+	lsn_t*		start_lsn)
 {
-	ulint		data_len;
-#ifdef UNIV_LOG_LSN_DEBUG
-	/* length of the LSN pseudo-record */
-	ulint		lsn_len;
-#endif /* UNIV_LOG_LSN_DEBUG */
+	ut_ad(log_mutex_own());
+	ut_ad(len > 0);
 
-	mutex_enter(&log_sys->mutex);
 #ifdef UNIV_LOG_LSN_DEBUG
-	lsn_len = 1
+	/* Append a MLOG_LSN record after mtr_commit(), except when
+	the last bytes could be a MLOG_CHECKPOINT marker. We have special
+	handling when the log consists of only a single MLOG_CHECKPOINT
+	record since the latest checkpoint, and appending the
+	MLOG_LSN would ruin that.
+
+	Note that a longer redo log record could happen to end in what
+	looks like MLOG_CHECKPOINT, and we could be omitting MLOG_LSN
+	without reason. This is OK, because writing the MLOG_LSN is
+	just a 'best effort', aimed at finding log corruption due to
+	bugs in the redo log writing logic. */
+	const ulint	lsn_len
+		= len >= SIZE_OF_MLOG_CHECKPOINT
+		&& MLOG_CHECKPOINT == static_cast<const char*>(str)[
+			len - SIZE_OF_MLOG_CHECKPOINT]
+		? 0
+		: 1
 		+ mach_get_compressed_size(log_sys->lsn >> 32)
 		+ mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
 #endif /* UNIV_LOG_LSN_DEBUG */
 
-	data_len = len
+	const ulint	data_len = len
 #ifdef UNIV_LOG_LSN_DEBUG
 		+ lsn_len
 #endif /* UNIV_LOG_LSN_DEBUG */
@@ -336,39 +371,37 @@ log_reserve_and_write_fast(
 		/* The string does not fit within the current log block
 		or the log block would become full */
 
-		mutex_exit(&log_sys->mutex);
-
 		return(0);
 	}
 
 	*start_lsn = log_sys->lsn;
 
 #ifdef UNIV_LOG_LSN_DEBUG
-	{
+	if (lsn_len) {
 		/* Write the LSN pseudo-record. */
 		byte* b = &log_sys->buf[log_sys->buf_free];
+
 		*b++ = MLOG_LSN | (MLOG_SINGLE_REC_FLAG & *(const byte*) str);
+
 		/* Write the LSN in two parts,
 		as a pseudo page number and space id. */
 		b += mach_write_compressed(b, log_sys->lsn >> 32);
 		b += mach_write_compressed(b, log_sys->lsn & 0xFFFFFFFFUL);
 		ut_a(b - lsn_len == &log_sys->buf[log_sys->buf_free]);
 
-		memcpy(b, str, len);
+		::memcpy(b, str, len);
+
 		len += lsn_len;
-	}
-#else /* UNIV_LOG_LSN_DEBUG */
-	memcpy(log_sys->buf + log_sys->buf_free, str, len);
+	} else
 #endif /* UNIV_LOG_LSN_DEBUG */
+	memcpy(log_sys->buf + log_sys->buf_free, str, len);
+
+	log_block_set_data_len(
+                reinterpret_cast<byte*>(ut_align_down(
+                        log_sys->buf + log_sys->buf_free,
+                        OS_FILE_LOG_BLOCK_SIZE)),
+                data_len);
 
-	log_block_set_data_len((byte*) ut_align_down(log_sys->buf
-						     + log_sys->buf_free,
-						     OS_FILE_LOG_BLOCK_SIZE),
-			       data_len);
-#ifdef UNIV_LOG_DEBUG
-	log_sys->old_buf_free = log_sys->buf_free;
-	log_sys->old_lsn = log_sys->lsn;
-#endif
 	log_sys->buf_free += len;
 
 	ut_ad(log_sys->buf_free <= log_sys->buf_size);
@@ -378,27 +411,12 @@ log_reserve_and_write_fast(
 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 		    log_sys->lsn - log_sys->last_checkpoint_lsn);
 
-#ifdef UNIV_LOG_DEBUG
-	log_check_log_recs(log_sys->buf + log_sys->old_buf_free,
-			   log_sys->buf_free - log_sys->old_buf_free,
-			   log_sys->old_lsn);
-#endif
 	return(log_sys->lsn);
 }
 
-/***********************************************************************//**
-Releases the log mutex. */
-UNIV_INLINE
-void
-log_release(void)
-/*=============*/
-{
-	mutex_exit(&(log_sys->mutex));
-}
-
 /************************************************************//**
 Gets the current lsn.
-@return	current lsn */
+@return current lsn */
 UNIV_INLINE
 lsn_t
 log_get_lsn(void)
@@ -406,11 +424,11 @@ log_get_lsn(void)
 {
 	lsn_t	lsn;
 
-	mutex_enter(&(log_sys->mutex));
+	log_mutex_enter();
 
 	lsn = log_sys->lsn;
 
-	mutex_exit(&(log_sys->mutex));
+	log_mutex_exit();
 
 	return(lsn);
 }
@@ -421,15 +439,14 @@ Gets the last lsn that is fully flushed to disk.
 UNIV_INLINE
 ib_uint64_t
 log_get_flush_lsn(void)
-/*=============*/
 {
 	ib_uint64_t	lsn;
 
-	mutex_enter(&(log_sys->mutex));
+	log_mutex_enter();
 
 	lsn = log_sys->flushed_to_disk_lsn;
 
-	mutex_exit(&(log_sys->mutex));
+	log_mutex_exit();
 
 	return(lsn);
 }
@@ -440,7 +457,7 @@ Gets the current lsn with a trylock
 UNIV_INLINE
 lsn_t
 log_get_lsn_nowait(void)
-/*=============*/
+/*====================*/
 {
 	lsn_t	lsn=0;
 
@@ -457,7 +474,7 @@ log_get_lsn_nowait(void)
 /****************************************************************
 Gets the log group capacity. It is OK to read the value without
 holding log_sys->mutex because it is constant.
-@return	log group capacity */
+@return log group capacity */
 UNIV_INLINE
 lsn_t
 log_get_capacity(void)
@@ -469,7 +486,7 @@ log_get_capacity(void)
 /****************************************************************
 Get log_sys::max_modified_age_async. It is OK to read the value without
 holding log_sys::mutex because it is constant.
-@return	max_modified_age_async */
+@return max_modified_age_async */
 UNIV_INLINE
 lsn_t
 log_get_max_modified_age_async(void)
@@ -488,10 +505,24 @@ void
 log_free_check(void)
 /*================*/
 {
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	/* During row_log_table_apply(), this function will be called while we
+	are holding some latches. This is OK, as long as we are not holding
+	any latches on buffer blocks. */
+
+	static const latch_level_t latches[] = {
+		SYNC_DICT,		/* dict_sys->mutex during
+					commit_try_rebuild() */
+		SYNC_DICT_OPERATION,	/* dict_operation_lock X-latch during
+					commit_try_rebuild() */
+		SYNC_INDEX_TREE		/* index->lock */
+	};
+
+	sync_allowed_latches check(
+		latches, latches + sizeof(latches)/sizeof(*latches));
+
+        ut_ad(!sync_check_iterate(check));
+#endif /* UNIV_DEBUG */
 
 	if (log_sys->check_flush_or_checkpoint) {
 
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index b6c977bdc74..bd7118654f3 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -31,15 +31,18 @@ Created 9/20/1997 Heikki Tuuri
 #include "buf0types.h"
 #include "hash0hash.h"
 #include "log0log.h"
+#include "mtr0types.h"
+#include "ut0new.h"
+
 #include <list>
+#include <vector>
 
 #ifdef UNIV_HOTBACKUP
-extern ibool	recv_replay_file_ops;
+extern bool	recv_replay_file_ops;
 
 /*******************************************************************//**
 Reads the checkpoint info needed in hot backup.
-@return	TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
 ibool
 recv_read_checkpoint_info_for_backup(
 /*=================================*/
@@ -55,7 +58,6 @@ recv_read_checkpoint_info_for_backup(
 /*******************************************************************//**
 Scans the log segment and n_bytes_scanned is set to the length of valid
 log scanned. */
-UNIV_INTERN
 void
 recv_scan_log_seg_for_backup(
 /*=========================*/
@@ -73,25 +75,15 @@ recv_scan_log_seg_for_backup(
 #endif /* UNIV_HOTBACKUP */
 /*******************************************************************//**
 Returns TRUE if recovery is currently running.
-@return	recv_recovery_on */
+@return recv_recovery_on */
 UNIV_INLINE
-ibool
+bool
 recv_recovery_is_on(void);
 /*=====================*/
-#ifdef UNIV_LOG_ARCHIVE
-/*******************************************************************//**
-Returns TRUE if recovery from backup is currently running.
-@return	recv_recovery_from_backup_on */
-UNIV_INLINE
-ibool
-recv_recovery_from_backup_is_on(void);
-/*=================================*/
-#endif /* UNIV_LOG_ARCHIVE */
 /************************************************************************//**
 Applies the hashed log records to the page, if the page lsn is less than the
 lsn of a log record. This can be called when a buffer page has just been
 read in, or also for a page already in the buffer pool. */
-UNIV_INTERN
 void
 recv_recover_page_func(
 /*===================*/
@@ -106,9 +98,9 @@ recv_recover_page_func(
 Applies the hashed log records to the page, if the page lsn is less than the
 lsn of a log record. This can be called when a buffer page has just been
 read in, or also for a page already in the buffer pool.
-@param jri	in: TRUE if just read in (the i/o handler calls this for
+@param jri in: TRUE if just read in (the i/o handler calls this for
 a freshly read page)
-@param block	in/out: the buffer block
+@param block in/out: the buffer block
 */
 # define recv_recover_page(jri, block)	recv_recover_page_func(jri, block)
 #else /* !UNIV_HOTBACKUP */
@@ -116,110 +108,33 @@ a freshly read page)
 Applies the hashed log records to the page, if the page lsn is less than the
 lsn of a log record. This can be called when a buffer page has just been
 read in, or also for a page already in the buffer pool.
-@param jri	in: TRUE if just read in (the i/o handler calls this for
+@param jri in: TRUE if just read in (the i/o handler calls this for
 a freshly read page)
-@param block	in/out: the buffer block
+@param block in/out: the buffer block
 */
 # define recv_recover_page(jri, block)	recv_recover_page_func(block)
 #endif /* !UNIV_HOTBACKUP */
-/********************************************************//**
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Start recovering from a redo log checkpoint.
+@see recv_recovery_from_checkpoint_finish
+@param[in]	flush_lsn	FIL_PAGE_FILE_FLUSH_LSN
+of first system tablespace page
+@return error code or DB_SUCCESS */
 dberr_t
-recv_recovery_from_checkpoint_start_func(
-/*=====================================*/
-#ifdef UNIV_LOG_ARCHIVE
-	ulint		type,		/*!< in: LOG_CHECKPOINT or
-					LOG_ARCHIVE */
-	lsn_t		limit_lsn,	/*!< in: recover up to this lsn
-					if possible */
-#endif /* UNIV_LOG_ARCHIVE */
-	lsn_t		min_flushed_lsn,/*!< in: min flushed lsn from
-					data files */
-	lsn_t		max_flushed_lsn);/*!< in: max flushed lsn from
-					 data files */
-#ifdef UNIV_LOG_ARCHIVE
-/** Wrapper for recv_recovery_from_checkpoint_start_func().
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param type	in: LOG_CHECKPOINT or LOG_ARCHIVE
-@param lim	in: recover up to this log sequence number if possible
-@param min	in: minimum flushed log sequence number from data files
-@param max	in: maximum flushed log sequence number from data files
-@return	error code or DB_SUCCESS */
-# define recv_recovery_from_checkpoint_start(type,lim,min,max)		\
-	recv_recovery_from_checkpoint_start_func(type,lim,min,max)
-#else /* UNIV_LOG_ARCHIVE */
-/** Wrapper for recv_recovery_from_checkpoint_start_func().
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param type	ignored: LOG_CHECKPOINT or LOG_ARCHIVE
-@param lim	ignored: recover up to this log sequence number if possible
-@param min	in: minimum flushed log sequence number from data files
-@param max	in: maximum flushed log sequence number from data files
-@return	error code or DB_SUCCESS */
-# define recv_recovery_from_checkpoint_start(type,lim,min,max)		\
-	recv_recovery_from_checkpoint_start_func(min,max)
-#endif /* UNIV_LOG_ARCHIVE */
-/********************************************************//**
-Completes recovery from a checkpoint. */
-UNIV_INTERN
+recv_recovery_from_checkpoint_start(
+	lsn_t	flush_lsn);
+/** Complete recovery from a checkpoint. */
 void
 recv_recovery_from_checkpoint_finish(void);
-/*======================================*/
 /********************************************************//**
 Initiates the rollback of active transactions. */
-UNIV_INTERN
 void
 recv_recovery_rollback_active(void);
 /*===============================*/
-/*******************************************************//**
-Scans log from a buffer and stores new log data to the parsing buffer.
-Parses and hashes the log records if new data found.  Unless
-UNIV_HOTBACKUP is defined, this function will apply log records
-automatically when the hash table becomes full.
-@return TRUE if limit_lsn has been reached, or not able to scan any
-more in this log group */
-UNIV_INTERN
-ibool
-recv_scan_log_recs(
-/*===============*/
-	ulint		available_memory,/*!< in: we let the hash table of recs
-					to grow to this size, at the maximum */
-	ibool		store_to_hash,	/*!< in: TRUE if the records should be
-					stored to the hash table; this is set
-					to FALSE if just debug checking is
-					needed */
-	const byte*	buf,		/*!< in: buffer containing a log
-					segment or garbage */
-	ulint		len,		/*!< in: buffer length */
-	lsn_t		start_lsn,	/*!< in: buffer start lsn */
-	lsn_t*		contiguous_lsn,	/*!< in/out: it is known that all log
-					groups contain contiguous log data up
-					to this lsn */
-	lsn_t*		group_scanned_lsn);/*!< out: scanning succeeded up to
-					this lsn */
 /******************************************************//**
 Resets the logs. The contents of log files will be lost! */
-UNIV_INTERN
 void
 recv_reset_logs(
 /*============*/
-#ifdef UNIV_LOG_ARCHIVE
-	ulint		arch_log_no,	/*!< in: next archived log file number */
-	ibool		new_logs_created,/*!< in: TRUE if resetting logs
-					is done at the log creation;
-					FALSE if it is done after
-					archive recovery */
-#endif /* UNIV_LOG_ARCHIVE */
 	lsn_t		lsn);		/*!< in: reset to this lsn
 					rounded up to be divisible by
 					OS_FILE_LOG_BLOCK_SIZE, after
@@ -228,7 +143,6 @@ recv_reset_logs(
 #ifdef UNIV_HOTBACKUP
 /******************************************************//**
 Creates new log files after a backup has been restored. */
-UNIV_INTERN
 void
 recv_reset_log_files_for_backup(
 /*============================*/
@@ -240,33 +154,33 @@ recv_reset_log_files_for_backup(
 #endif /* UNIV_HOTBACKUP */
 /********************************************************//**
 Creates the recovery system. */
-UNIV_INTERN
 void
 recv_sys_create(void);
 /*=================*/
 /**********************************************************//**
 Release recovery system mutexes. */
-UNIV_INTERN
 void
 recv_sys_close(void);
 /*================*/
 /********************************************************//**
 Frees the recovery system memory. */
-UNIV_INTERN
 void
 recv_sys_mem_free(void);
 /*===================*/
 /********************************************************//**
 Inits the recovery system for a recovery operation. */
-UNIV_INTERN
 void
 recv_sys_init(
 /*==========*/
 	ulint	available_memory);	/*!< in: available memory in bytes */
 #ifndef UNIV_HOTBACKUP
 /********************************************************//**
+Frees the recovery system. */
+void
+recv_sys_debug_free(void);
+/*=====================*/
+/********************************************************//**
 Reset the state of the recovery system variables. */
-UNIV_INTERN
 void
 recv_sys_var_init(void);
 /*===================*/
@@ -274,49 +188,25 @@ recv_sys_var_init(void);
 /*******************************************************************//**
 Empties the hash table of stored log records, applying them to appropriate
 pages. */
-UNIV_INTERN
 dberr_t
 recv_apply_hashed_log_recs(
 /*=======================*/
-	ibool	allow_ibuf);	/*!< in: if TRUE, also ibuf operations are
+	ibool	allow_ibuf)	/*!< in: if TRUE, also ibuf operations are
 				allowed during the application; if FALSE,
 				no ibuf operations are allowed, and after
 				the application all file pages are flushed to
 				disk and invalidated in buffer pool: this
 				alternative means that no new log records
 				can be generated during the application */
+	__attribute__((warn_unused_result));
+
 #ifdef UNIV_HOTBACKUP
 /*******************************************************************//**
 Applies log records in the hash table to a backup. */
-UNIV_INTERN
 void
 recv_apply_log_recs_for_backup(void);
 /*================================*/
-#endif
-#ifdef UNIV_LOG_ARCHIVE
-/********************************************************//**
-Recovers from archived log files, and also from log files, if they exist.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-recv_recovery_from_archive_start(
-/*=============================*/
-	lsn_t		min_flushed_lsn,/*!< in: min flushed lsn field from the
-					data files */
-	lsn_t		limit_lsn,	/*!< in: recover up to this lsn if
-					possible */
-	ulint		first_log_no);	/*!< in: number of the first archived
-					log file to use in the recovery; the
-					file will be searched from
-					INNOBASE_LOG_ARCH_DIR specified in
-					server config file */
-/********************************************************//**
-Completes recovery from archive. */
-UNIV_INTERN
-void
-recv_recovery_from_archive_finish(void);
-/*===================================*/
-#endif /* UNIV_LOG_ARCHIVE */
+#endif /* UNIV_HOTBACKUP */
 
 /** Block of log record data */
 struct recv_data_t{
@@ -328,7 +218,7 @@ struct recv_data_t{
 
 /** Stored log record struct */
 struct recv_t{
-	byte		type;	/*!< log record type */
+	mlog_id_t	type;	/*!< log record type */
 	ulint		len;	/*!< log record body length in bytes */
 	recv_data_t*	data;	/*!< chain of blocks containing the log record
 				body */
@@ -352,9 +242,11 @@ enum recv_addr_state {
 	RECV_BEING_READ,
 	/** log records are being applied on the page */
 	RECV_BEING_PROCESSED,
-	/** log records have been applied on the page, or they have
-	been discarded because the tablespace does not exist */
-	RECV_PROCESSED
+	/** log records have been applied on the page */
+	RECV_PROCESSED,
+	/** log records have been discarded because the tablespace
+	does not exist */
+	RECV_DISCARDED
 };
 
 /** Hashed page file address struct */
@@ -369,17 +261,34 @@ struct recv_addr_t{
 };
 
 struct recv_dblwr_t {
-	void add(byte* page);
+	/** Add a page frame to the doublewrite recovery buffer. */
+	void add(const byte* page) {
+		pages.push_back(page);
+	}
 
-	byte* find_page(ulint space_id, ulint page_no);
+	/** Find a doublewrite copy of a page.
+	@param[in]	space_id	tablespace identifier
+	@param[in]	page_no		page number
+	@return	page frame
+	@retval NULL if no page was found */
+	const byte* find_page(ulint space_id, ulint page_no);
 
-	std::list<byte *> pages; /* Pages from double write buffer */
+	typedef std::list<const byte*, ut_allocator<const byte*> >	list;
 
-	void operator() () {
-		pages.clear();
-	}
+	/** Recovered doublewrite buffer page frames */
+	list	pages;
 };
 
+/* Recovery encryption information */
+typedef	struct recv_encryption {
+	ulint		space_id;	/*!< the page number */
+	byte*		key;		/*!< encryption key */
+	byte*		iv;		/*!< encryption iv */
+} recv_encryption_t;
+
+typedef std::vector<recv_encryption_t, ut_allocator<recv_encryption_t> >
+		encryption_list_t;
+
 /** Recovery system data structure */
 struct recv_sys_t{
 #ifndef UNIV_HOTBACKUP
@@ -389,6 +298,13 @@ struct recv_sys_t{
 	ib_mutex_t		writer_mutex;/*!< mutex coordinating
 				flushing between recv_writer_thread and
 				the recovery thread. */
+	os_event_t		flush_start;/*!< event to acticate
+				page cleaner threads */
+	os_event_t		flush_end;/*!< event to signal that the page
+				cleaner has finished the request */
+	buf_flush_t		flush_type;/*!< type of the flush request.
+				BUF_FLUSH_LRU: flush end of LRU, keeping free blocks.
+				BUF_FLUSH_LIST: flush all of blocks. */
 #endif /* !UNIV_HOTBACKUP */
 	ibool		apply_log_recs;
 				/*!< this is TRUE when log rec application to
@@ -398,10 +314,6 @@ struct recv_sys_t{
 	ibool		apply_batch_on;
 				/*!< this is TRUE when a log rec application
 				batch is running */
-	lsn_t		lsn;	/*!< log sequence number */
-	ulint		last_log_buf_size;
-				/*!< size of the log buffer when the database
-				last time wrote to the log */
 	byte*		last_block;
 				/*!< possible incomplete last recovered log
 				block */
@@ -427,18 +339,17 @@ struct recv_sys_t{
 	lsn_t		recovered_lsn;
 				/*!< the log records have been parsed up to
 				this lsn */
-	lsn_t		limit_lsn;/*!< recovery should be made at most
-				up to this lsn */
-	ibool		found_corrupt_log;
-				/*!< this is set to TRUE if we during log
-				scan find a corrupt log block, or a corrupt
-				log record, or there is a log parsing
-				buffer overflow */
-#ifdef UNIV_LOG_ARCHIVE
-	log_group_t*	archive_group;
-				/*!< in archive recovery: the log group whose
-				archive is read */
-#endif /* !UNIV_LOG_ARCHIVE */
+	bool		found_corrupt_log;
+				/*!< set when finding a corrupt log
+				block or record, or there is a log
+				parsing buffer overflow */
+	bool		found_corrupt_fs;
+				/*!< set when an inconsistency with
+				the file system contents is detected
+				during log scan or apply */
+	lsn_t		mlog_checkpoint_lsn;
+				/*!< the LSN of a MLOG_CHECKPOINT
+				record, or 0 if none was parsed */
 	mem_heap_t*	heap;	/*!< memory heap of log records and file
 				addresses*/
 	hash_table_t*	addr_hash;/*!< hash table of file addresses of pages */
@@ -446,6 +357,9 @@ struct recv_sys_t{
 				addresses in the hash table */
 
 	recv_dblwr_t	dblwr;
+
+	encryption_list_t*	/*!< Encryption information list */
+			encryption_list;
 };
 
 /** The recovery system */
@@ -454,7 +368,7 @@ extern recv_sys_t*	recv_sys;
 /** TRUE when applying redo log records during crash recovery; FALSE
 otherwise.  Note that this is FALSE while a background thread is
 rolling back incomplete transactions. */
-extern ibool		recv_recovery_on;
+extern volatile bool	recv_recovery_on;
 /** If the following is TRUE, the buffer pool file pages must be invalidated
 after recovery and no ibuf operations are allowed; this becomes TRUE if
 the log record hash table becomes too full, and log records must be merged
@@ -464,25 +378,28 @@ buffer pool before the pages have been recovered to the up-to-date state.
 
 TRUE means that recovery is running and no operations on the log files
 are allowed yet: the variable name is misleading. */
-extern ibool		recv_no_ibuf_operations;
+extern bool		recv_no_ibuf_operations;
 /** TRUE when recv_init_crash_recovery() has been called. */
-extern ibool		recv_needed_recovery;
+extern bool		recv_needed_recovery;
 #ifdef UNIV_DEBUG
 /** TRUE if writing to the redo log (mtr_commit) is forbidden.
 Protected by log_sys->mutex. */
-extern ibool		recv_no_log_write;
+extern bool		recv_no_log_write;
 #endif /* UNIV_DEBUG */
 
 /** TRUE if buf_page_is_corrupted() should check if the log sequence
 number (FIL_PAGE_LSN) is in the future.  Initially FALSE, and set by
-recv_recovery_from_checkpoint_start_func(). */
-extern ibool		recv_lsn_checks_on;
+recv_recovery_from_checkpoint_start(). */
+extern bool		recv_lsn_checks_on;
 #ifdef UNIV_HOTBACKUP
 /** TRUE when the redo log is being backed up */
-extern ibool		recv_is_making_a_backup;
+extern bool		recv_is_making_a_backup;
 #endif /* UNIV_HOTBACKUP */
-/** Maximum page number encountered in the redo log */
-extern ulint		recv_max_parsed_page_no;
+
+#ifndef UNIV_HOTBACKUP
+/** Flag indicating if recv_writer thread is active. */
+extern volatile bool	recv_writer_thread_active;
+#endif /* !UNIV_HOTBACKUP */
 
 /** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
 times! */
@@ -500,12 +417,9 @@ extern ulint	recv_n_pool_free_frames;
 
 /******************************************************//**
 Checks the 4-byte checksum to the trailer checksum field of a log
-block.  We also accept a log block in the old format before
-InnoDB-3.23.52 where the checksum field contains the log block number.
-@return TRUE if ok, or if the log block may be in the format of InnoDB
-version predating 3.23.52 */
-ibool
-log_block_checksum_is_ok_or_old_format(
+block.  */
+bool
+log_block_checksum_is_ok(
 /*===================================*/
 	const byte*	block,	/*!< in: pointer to a log block */
 	bool            print_err); /*!< in print error ? */
diff --git a/storage/innobase/include/log0recv.ic b/storage/innobase/include/log0recv.ic
index 32c28dd03e6..d197e5e3337 100644
--- a/storage/innobase/include/log0recv.ic
+++ b/storage/innobase/include/log0recv.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,27 +27,12 @@ Created 9/20/1997 Heikki Tuuri
 
 /*******************************************************************//**
 Returns TRUE if recovery is currently running.
-@return	recv_recovery_on */
+@return recv_recovery_on */
 UNIV_INLINE
-ibool
-recv_recovery_is_on(void)
-/*=====================*/
+bool
+recv_recovery_is_on()
+/*=================*/
 {
 	return(recv_recovery_on);
 }
 
-#ifdef UNIV_LOG_ARCHIVE
-/** TRUE when applying redo log records from an archived log file */
-extern ibool	recv_recovery_from_backup_on;
-
-/*******************************************************************//**
-Returns TRUE if recovery from backup is currently running.
-@return	recv_recovery_from_backup_on */
-UNIV_INLINE
-ibool
-recv_recovery_from_backup_is_on(void)
-/*=================================*/
-{
-	return(recv_recovery_from_backup_on);
-}
-#endif /* UNIV_LOG_ARCHIVE */
diff --git a/storage/innobase/include/log0types.h b/storage/innobase/include/log0types.h
new file mode 100644
index 00000000000..7674c758403
--- /dev/null
+++ b/storage/innobase/include/log0types.h
@@ -0,0 +1,50 @@
+/*****************************************************************************
+
+Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0types.h
+Log types
+
+Created 2013-03-15 Sunny Bains
+*******************************************************/
+
+#ifndef log0types_h
+#define log0types_h
+
+#include "univ.i"
+
+/* Type used for all log sequence number storage and arithmetics */
+typedef	ib_uint64_t		lsn_t;
+
+#define LSN_MAX			IB_UINT64_MAX
+
+#define LSN_PF			UINT64PF
+
+/** The redo log manager */
+struct RedoLog;
+
+/** The recovery implementation */
+struct redo_recover_t;
+
+#endif /* log0types_h */
diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h
index 9859def0adc..4d32e2e7170 100644
--- a/storage/innobase/include/mach0data.h
+++ b/storage/innobase/include/mach0data.h
@@ -30,7 +30,7 @@ Created 11/28/1995 Heikki Tuuri
 #ifndef UNIV_INNOCHECKSUM
 
 #include "univ.i"
-#include "ut0byte.h"
+#include "mtr0types.h"
 
 /* The data and all fields are always stored in a database file
 in the same format: ascii, big-endian, ... .
@@ -47,13 +47,13 @@ mach_write_to_1(
 	ulint	n);	 /*!< in: ulint integer to be stored, >= 0, < 256 */
 /********************************************************//**
 The following function is used to fetch data from one byte.
-@return	ulint integer, >= 0, < 256 */
+@return ulint integer, >= 0, < 256 */
 UNIV_INLINE
 ulint
 mach_read_from_1(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to byte */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************//**
 The following function is used to store data in two consecutive
 bytes. We store the most significant byte to the lower address. */
@@ -66,19 +66,19 @@ mach_write_to_2(
 /********************************************************//**
 The following function is used to fetch data from two consecutive
 bytes. The most significant byte is at the lowest address.
-@return	ulint integer, >= 0, < 64k */
+@return ulint integer, >= 0, < 64k */
 UNIV_INLINE
 ulint
 mach_read_from_2(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to two bytes */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /********************************************************//**
 The following function is used to convert a 16-bit data item
 to the canonical format, for fast bytewise equality test
 against memory.
-@return	16-bit integer in canonical format */
+@return 16-bit integer in canonical format */
 UNIV_INLINE
 uint16
 mach_encode_2(
@@ -89,7 +89,7 @@ mach_encode_2(
 The following function is used to convert a 16-bit data item
 from the canonical format, for fast bytewise equality test
 against memory.
-@return	integer in machine-dependent format */
+@return integer in machine-dependent format */
 UNIV_INLINE
 ulint
 mach_decode_2(
@@ -108,13 +108,13 @@ mach_write_to_3(
 /********************************************************//**
 The following function is used to fetch data from 3 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	ulint integer */
+@return ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_3(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to 3 bytes */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************//**
 The following function is used to store data in four consecutive
 bytes. We store the most significant byte to the lowest address. */
@@ -127,16 +127,16 @@ mach_write_to_4(
 /********************************************************//**
 The following function is used to fetch data from 4 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	ulint integer */
+@return ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_4(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to four bytes */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Writes a ulint in a compressed form (1..5 bytes).
-@return	stored size in bytes */
+@return stored size in bytes */
 UNIV_INLINE
 ulint
 mach_write_compressed(
@@ -145,22 +145,21 @@ mach_write_compressed(
 	ulint	n);	/*!< in: ulint integer to be stored */
 /*********************************************************//**
 Returns the size of an ulint when written in the compressed form.
-@return	compressed size in bytes */
+@return compressed size in bytes */
 UNIV_INLINE
 ulint
 mach_get_compressed_size(
 /*=====================*/
 	ulint	n)	/*!< in: ulint integer to be stored */
 	MY_ATTRIBUTE((const));
-/*********************************************************//**
-Reads a ulint in a compressed form.
-@return	read integer */
-UNIV_INLINE
-ulint
-mach_read_compressed(
-/*=================*/
-	const byte*	b)	/*!< in: pointer to memory from where to read */
-	MY_ATTRIBUTE((nonnull, pure));
+/** Read a 32-bit integer in a compressed form.
+@param[in,out]	b	pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
+UNIV_INLINE
+ib_uint32_t
+mach_read_next_compressed(
+	const byte**	b);
 /*******************************************************//**
 The following function is used to store data in 6 consecutive
 bytes. We store the most significant byte to the lowest address. */
@@ -173,13 +172,13 @@ mach_write_to_6(
 /********************************************************//**
 The following function is used to fetch data from 6 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	48-bit integer */
+@return 48-bit integer */
 UNIV_INLINE
 ib_uint64_t
 mach_read_from_6(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to 6 bytes */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************//**
 The following function is used to store data in 7 consecutive
 bytes. We store the most significant byte to the lowest address. */
@@ -192,13 +191,13 @@ mach_write_to_7(
 /********************************************************//**
 The following function is used to fetch data from 7 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	56-bit integer */
+@return 56-bit integer */
 UNIV_INLINE
 ib_uint64_t
 mach_read_from_7(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to 7 bytes */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************//**
 The following function is used to store data in 8 consecutive
 bytes. We store the most significant byte to the lowest address. */
@@ -211,97 +210,77 @@ mach_write_to_8(
 /********************************************************//**
 The following function is used to fetch data from 8 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	64-bit integer */
+@return 64-bit integer */
 UNIV_INLINE
 ib_uint64_t
 mach_read_from_8(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to 8 bytes */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Writes a 64-bit integer in a compressed form (5..9 bytes).
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
-mach_ull_write_compressed(
+mach_u64_write_compressed(
 /*======================*/
 	byte*		b,	/*!< in: pointer to memory where to store */
 	ib_uint64_t	n);	/*!< in: 64-bit integer to be stored */
-/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return	compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_compressed_size(
-/*=========================*/
-	ib_uint64_t	n);	/*!< in: 64-bit integer to be stored */
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form.
-@return	the value read */
+/** Read a 64-bit integer in a compressed form.
+@param[in,out]	b	pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
 UNIV_INLINE
 ib_uint64_t
-mach_ull_read_compressed(
-/*=====================*/
-	const byte*	b)	/*!< in: pointer to memory from where to read */
-	MY_ATTRIBUTE((nonnull, pure));
+mach_u64_read_next_compressed(
+	const byte**	b);
 /*********************************************************//**
 Writes a 64-bit integer in a compressed form (1..11 bytes).
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
-mach_ull_write_much_compressed(
+mach_u64_write_much_compressed(
 /*===========================*/
 	byte*		b,	/*!< in: pointer to memory where to store */
 	ib_uint64_t	n);	/*!< in: 64-bit integer to be stored */
 /*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return	compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_much_compressed_size(
-/*==============================*/
-	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
-	MY_ATTRIBUTE((const));
-/*********************************************************//**
 Reads a 64-bit integer in a compressed form.
-@return	the value read */
+@return the value read */
 UNIV_INLINE
 ib_uint64_t
-mach_ull_read_much_compressed(
+mach_u64_read_much_compressed(
 /*==========================*/
 	const byte*	b)	/*!< in: pointer to memory from where to read */
-	MY_ATTRIBUTE((nonnull, pure));
-/*********************************************************//**
-Reads a ulint in a compressed form if the log record fully contains it.
-@return	pointer to end of the stored field, NULL if not complete */
-UNIV_INTERN
-byte*
+	MY_ATTRIBUTE((warn_unused_result));
+/** Read a 32-bit integer in a compressed form.
+@param[in,out]	ptr	pointer to memory where to read;
+advanced by the number of bytes consumed, or set NULL if out of space
+@param[in]	end_ptr	end of the buffer
+@return unsigned value */
+ib_uint32_t
 mach_parse_compressed(
-/*==================*/
-	byte*	ptr,	/*!< in: pointer to buffer from where to read */
-	byte*	end_ptr,/*!< in: pointer to end of the buffer */
-	ulint*	val);	/*!< out: read value */
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form
-if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
+	const byte**	ptr,
+	const byte*	end_ptr);
+/** Read a 64-bit integer in a compressed form.
+@param[in,out]	ptr	pointer to memory where to read;
+advanced by the number of bytes consumed, or set NULL if out of space
+@param[in]	end_ptr	end of the buffer
+@return unsigned value */
 UNIV_INLINE
-byte*
-mach_ull_parse_compressed(
-/*======================*/
-	byte*		ptr,	/*!< in: pointer to buffer from where to read */
-	byte*		end_ptr,/*!< in: pointer to end of the buffer */
-	ib_uint64_t*	val);	/*!< out: read value */
+ib_uint64_t
+mach_u64_parse_compressed(
+	const byte**	ptr,
+	const byte*	end_ptr);
 #ifndef UNIV_HOTBACKUP
 /*********************************************************//**
 Reads a double. It is stored in a little-endian format.
-@return	double read */
+@return double read */
 UNIV_INLINE
 double
 mach_double_read(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to memory from where to read */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Writes a double. It is stored in a little-endian format. */
 UNIV_INLINE
@@ -312,13 +291,13 @@ mach_double_write(
 	double	d);	/*!< in: double */
 /*********************************************************//**
 Reads a float. It is stored in a little-endian format.
-@return	float read */
+@return float read */
 UNIV_INLINE
 float
 mach_float_read(
 /*============*/
 	const byte*	b)	/*!< in: pointer to memory from where to read */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Writes a float. It is stored in a little-endian format. */
 UNIV_INLINE
@@ -329,14 +308,14 @@ mach_float_write(
 	float	d);	/*!< in: float */
 /*********************************************************//**
 Reads a ulint stored in the little-endian format.
-@return	unsigned long int */
+@return unsigned long int */
 UNIV_INLINE
 ulint
 mach_read_from_n_little_endian(
 /*===========================*/
 	const byte*	buf,		/*!< in: from where to read */
 	ulint		buf_size)	/*!< in: from how many bytes to read */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Writes a ulint in the little-endian format. */
 UNIV_INLINE
@@ -348,13 +327,13 @@ mach_write_to_n_little_endian(
 	ulint	n);		/*!< in: unsigned long int to write */
 /*********************************************************//**
 Reads a ulint stored in the little-endian format.
-@return	unsigned long int */
+@return unsigned long int */
 UNIV_INLINE
 ulint
 mach_read_from_2_little_endian(
 /*===========================*/
 	const byte*	buf)		/*!< in: from where to read */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Writes a ulint in the little-endian format. */
 UNIV_INLINE
@@ -366,7 +345,7 @@ mach_write_to_2_little_endian(
 /*********************************************************//**
 Convert integral type from storage byte order (big endian) to
 host byte order.
-@return	integer value */
+@return integer value */
 UNIV_INLINE
 ib_uint64_t
 mach_read_int_type(
@@ -398,15 +377,16 @@ mach_write_ulonglong(
 	ulint		len,		/*!< in: length of dest */
 	bool		usign);		/*!< in: signed or unsigned flag */
 
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return	value read */
+/** Read 1 to 4 bytes from a file page buffered in the buffer pool.
+@param[in]	ptr	pointer where to read
+@param[in]	type	MLOG_1BYTE, MLOG_2BYTES, or MLOG_4BYTES
+@return value read */
 UNIV_INLINE
 ulint
 mach_read_ulint(
-/*============*/
-	const byte*	ptr,	/*!< in: pointer from where to read */
-	ulint		type);	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	const byte*	ptr,
+	mlog_id_t	type)
+	MY_ATTRIBUTE((warn_unused_result));
 
 #endif /* !UNIV_HOTBACKUP */
 #endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic
index 881b2b6055f..20018844e4b 100644
--- a/storage/innobase/include/mach0data.ic
+++ b/storage/innobase/include/mach0data.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,7 @@ Created 11/28/1995 Heikki Tuuri
 
 #ifndef UNIV_INNOCHECKSUM
 
-#include "ut0mem.h"
+#include "mtr0types.h"
 
 /*******************************************************//**
 The following function is used to store data in one byte. */
@@ -42,20 +42,8 @@ mach_write_to_1(
 
 	b[0] = (byte) n;
 }
-#endif /* !UNIV_INNOCHECKSUM */
 
-/********************************************************//**
-The following function is used to fetch data from one byte.
-@return	ulint integer, >= 0, < 256 */
-UNIV_INLINE
-ulint
-mach_read_from_1(
-/*=============*/
-	const byte*	b)	/*!< in: pointer to byte */
-{
-	ut_ad(b);
-	return((ulint)(b[0]));
-}
+#endif /* !UNIV_INNOCHECKSUM */
 
 /*******************************************************//**
 The following function is used to store data in two consecutive
@@ -75,9 +63,22 @@ mach_write_to_2(
 }
 
 /********************************************************//**
+The following function is used to fetch data from one byte.
+@return ulint integer, >= 0, < 256 */
+UNIV_INLINE
+ulint
+mach_read_from_1(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to byte */
+{
+	ut_ad(b);
+	return((ulint)(b[0]));
+}
+
+/********************************************************//**
 The following function is used to fetch data from 2 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	ulint integer */
+@return ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_2(
@@ -93,7 +94,7 @@ mach_read_from_2(
 The following function is used to convert a 16-bit data item
 to the canonical format, for fast bytewise equality test
 against memory.
-@return	16-bit integer in canonical format */
+@return 16-bit integer in canonical format */
 UNIV_INLINE
 uint16
 mach_encode_2(
@@ -109,7 +110,7 @@ mach_encode_2(
 The following function is used to convert a 16-bit data item
 from the canonical format, for fast bytewise equality test
 against memory.
-@return	integer in machine-dependent format */
+@return integer in machine-dependent format */
 UNIV_INLINE
 ulint
 mach_decode_2(
@@ -141,7 +142,7 @@ mach_write_to_3(
 /********************************************************//**
 The following function is used to fetch data from 3 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	ulint integer */
+@return ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_3(
@@ -155,6 +156,8 @@ mach_read_from_3(
 		);
 }
 
+#endif /* !UNIV_INNOCHECKSUM */
+
 /*******************************************************//**
 The following function is used to store data in four consecutive
 bytes. We store the most significant byte to the lowest address. */
@@ -173,12 +176,10 @@ mach_write_to_4(
 	b[3] = (byte) n;
 }
 
-#endif /* !UNIV_INNOCHECKSUM */
-
 /********************************************************//**
 The following function is used to fetch data from 4 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	ulint integer */
+@return ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_4(
@@ -202,7 +203,7 @@ the byte. If the most significant bit is zero, it means 1-byte storage,
 else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0,
 it means 3-byte storage, else if 4th is 0, it means 4-byte storage,
 else the storage is 5-byte.
-@return	compressed size in bytes */
+@return compressed size in bytes */
 UNIV_INLINE
 ulint
 mach_write_compressed(
@@ -212,20 +213,25 @@ mach_write_compressed(
 {
 	ut_ad(b);
 
-	if (n < 0x80UL) {
+	if (n < 0x80) {
+		/* 0nnnnnnn (7 bits) */
 		mach_write_to_1(b, n);
 		return(1);
-	} else if (n < 0x4000UL) {
-		mach_write_to_2(b, n | 0x8000UL);
+	} else if (n < 0x4000) {
+		/* 10nnnnnn nnnnnnnn (14 bits) */
+		mach_write_to_2(b, n | 0x8000);
 		return(2);
-	} else if (n < 0x200000UL) {
-		mach_write_to_3(b, n | 0xC00000UL);
+	} else if (n < 0x200000) {
+		/* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+		mach_write_to_3(b, n | 0xC00000);
 		return(3);
-	} else if (n < 0x10000000UL) {
-		mach_write_to_4(b, n | 0xE0000000UL);
+	} else if (n < 0x10000000) {
+		/* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+		mach_write_to_4(b, n | 0xE0000000);
 		return(4);
 	} else {
-		mach_write_to_1(b, 0xF0UL);
+		/* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+		mach_write_to_1(b, 0xF0);
 		mach_write_to_4(b + 1, n);
 		return(5);
 	}
@@ -233,53 +239,108 @@ mach_write_compressed(
 
 /*********************************************************//**
 Returns the size of a ulint when written in the compressed form.
-@return	compressed size in bytes */
+@return compressed size in bytes */
 UNIV_INLINE
 ulint
 mach_get_compressed_size(
 /*=====================*/
 	ulint	n)	/*!< in: ulint integer (< 2^32) to be stored */
 {
-	if (n < 0x80UL) {
+	if (n < 0x80) {
+		/* 0nnnnnnn (7 bits) */
 		return(1);
-	} else if (n < 0x4000UL) {
+	} else if (n < 0x4000) {
+		/* 10nnnnnn nnnnnnnn (14 bits) */
 		return(2);
-	} else if (n < 0x200000UL) {
+	} else if (n < 0x200000) {
+		/* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
 		return(3);
-	} else if (n < 0x10000000UL) {
+	} else if (n < 0x10000000) {
+		/* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
 		return(4);
 	} else {
+		/* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
 		return(5);
 	}
 }
 
 /*********************************************************//**
 Reads a ulint in a compressed form.
-@return	read integer (< 2^32) */
+@return read integer (< 2^32) */
 UNIV_INLINE
 ulint
 mach_read_compressed(
 /*=================*/
 	const byte*	b)	/*!< in: pointer to memory from where to read */
 {
-	ulint	flag;
+	ulint	val;
 
 	ut_ad(b);
 
-	flag = mach_read_from_1(b);
+	val = mach_read_from_1(b);
+
+	if (val < 0x80) {
+		/* 0nnnnnnn (7 bits) */
+	} else if (val < 0xC0) {
+		/* 10nnnnnn nnnnnnnn (14 bits) */
+		val = mach_read_from_2(b) & 0x3FFF;
+		ut_ad(val > 0x7F);
+	} else if (val < 0xE0) {
+		/* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+		val = mach_read_from_3(b) & 0x1FFFFF;
+		ut_ad(val > 0x3FFF);
+	} else if (val < 0xF0) {
+		/* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+		val = mach_read_from_4(b) & 0xFFFFFFF;
+		ut_ad(val > 0x1FFFFF);
+	} else {
+		/* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+		ut_ad(val == 0xF0);
+		val = mach_read_from_4(b + 1);
+		ut_ad(val > 0xFFFFFFF);
+	}
 
-	if (flag < 0x80UL) {
-		return(flag);
-	} else if (flag < 0xC0UL) {
-		return(mach_read_from_2(b) & 0x7FFFUL);
-	} else if (flag < 0xE0UL) {
-		return(mach_read_from_3(b) & 0x3FFFFFUL);
-	} else if (flag < 0xF0UL) {
-		return(mach_read_from_4(b) & 0x1FFFFFFFUL);
+	return(val);
+}
+
+/** Read a 32-bit integer in a compressed form.
+@param[in,out]	b	pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
+UNIV_INLINE
+ib_uint32_t
+mach_read_next_compressed(
+	const byte**	b)
+{
+	ulint	val = mach_read_from_1(*b);
+
+	if (val < 0x80) {
+		/* 0nnnnnnn (7 bits) */
+		++*b;
+	} else if (val < 0xC0) {
+		/* 10nnnnnn nnnnnnnn (14 bits) */
+		val = mach_read_from_2(*b) & 0x3FFF;
+		ut_ad(val > 0x7F);
+		*b += 2;
+	} else if (val < 0xE0) {
+		/* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+		val = mach_read_from_3(*b) & 0x1FFFFF;
+		ut_ad(val > 0x3FFF);
+		*b += 3;
+	} else if (val < 0xF0) {
+		/* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+		val = mach_read_from_4(*b) & 0xFFFFFFF;
+		ut_ad(val > 0x1FFFFF);
+		*b += 4;
 	} else {
-		ut_ad(flag == 0xF0UL);
-		return(mach_read_from_4(b + 1));
+		/* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+		ut_ad(val == 0xF0);
+		val = mach_read_from_4(*b + 1);
+		ut_ad(val > 0xFFFFFFF);
+		*b += 5;
 	}
+
+	return(static_cast<ib_uint32_t>(val));
 }
 
 /*******************************************************//**
@@ -303,19 +364,20 @@ mach_write_to_8(
 /********************************************************//**
 The following function is used to fetch data from 8 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	64-bit integer */
+@return 64-bit integer */
 UNIV_INLINE
 ib_uint64_t
 mach_read_from_8(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to 8 bytes */
 {
-	ib_uint64_t	ull;
+	ib_uint64_t	u64;
 
-	ull = ((ib_uint64_t) mach_read_from_4(b)) << 32;
-	ull |= (ib_uint64_t) mach_read_from_4(b + 4);
+	u64 = mach_read_from_4(b);
+	u64 <<= 32;
+	u64 |= mach_read_from_4(b + 4);
 
-	return(ull);
+	return(u64);
 }
 
 #ifndef UNIV_INNOCHECKSUM
@@ -339,7 +401,7 @@ mach_write_to_7(
 /********************************************************//**
 The following function is used to fetch data from 7 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	56-bit integer */
+@return 56-bit integer */
 UNIV_INLINE
 ib_uint64_t
 mach_read_from_7(
@@ -370,7 +432,7 @@ mach_write_to_6(
 /********************************************************//**
 The following function is used to fetch data from 6 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	48-bit integer */
+@return 48-bit integer */
 UNIV_INLINE
 ib_uint64_t
 mach_read_from_6(
@@ -384,10 +446,10 @@ mach_read_from_6(
 
 /*********************************************************//**
 Writes a 64-bit integer in a compressed form (5..9 bytes).
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
-mach_ull_write_compressed(
+mach_u64_write_compressed(
 /*======================*/
 	byte*		b,	/*!< in: pointer to memory where to store */
 	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
@@ -402,48 +464,30 @@ mach_ull_write_compressed(
 	return(size + 4);
 }
 
-/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return	compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_compressed_size(
-/*=========================*/
-	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
-{
-	return(4 + mach_get_compressed_size((ulint) (n >> 32)));
-}
-
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form.
-@return	the value read */
+/** Read a 64-bit integer in a compressed form.
+@param[in,out]	b	pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
 UNIV_INLINE
 ib_uint64_t
-mach_ull_read_compressed(
-/*=====================*/
-	const byte*	b)	/*!< in: pointer to memory from where to read */
+mach_u64_read_next_compressed(
+	const byte**	b)
 {
-	ib_uint64_t	n;
-	ulint		size;
-
-	ut_ad(b);
+	ib_uint64_t	val;
 
-	n = (ib_uint64_t) mach_read_compressed(b);
-
-	size = mach_get_compressed_size((ulint) n);
-
-	n <<= 32;
-	n |= (ib_uint64_t) mach_read_from_4(b + size);
-
-	return(n);
+	val = mach_read_next_compressed(b);
+	val <<= 32;
+	val |= mach_read_from_4(*b);
+	*b += 4;
+	return(val);
 }
 
 /*********************************************************//**
 Writes a 64-bit integer in a compressed form (1..11 bytes).
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
-mach_ull_write_much_compressed(
+mach_u64_write_much_compressed(
 /*===========================*/
 	byte*		b,	/*!< in: pointer to memory where to store */
 	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
@@ -465,94 +509,110 @@ mach_ull_write_much_compressed(
 }
 
 /*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return	compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_much_compressed_size(
-/*==============================*/
-	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
-{
-	if (!(n >> 32)) {
-		return(mach_get_compressed_size((ulint) n));
-	}
-
-	return(1 + mach_get_compressed_size((ulint) (n >> 32))
-	       + mach_get_compressed_size((ulint) n & ULINT32_MASK));
-}
-
-/*********************************************************//**
 Reads a 64-bit integer in a compressed form.
-@return	the value read */
+@return the value read */
 UNIV_INLINE
 ib_uint64_t
-mach_ull_read_much_compressed(
+mach_u64_read_much_compressed(
 /*==========================*/
 	const byte*	b)	/*!< in: pointer to memory from where to read */
 {
 	ib_uint64_t	n;
-	ulint		size;
-
-	ut_ad(b);
-
-	if (*b != (byte)0xFF) {
-		n = 0;
-		size = 0;
-	} else {
-		n = (ib_uint64_t) mach_read_compressed(b + 1);
 
-		size = 1 + mach_get_compressed_size((ulint) n);
-		n <<= 32;
+	if (*b != 0xFF) {
+		return(mach_read_compressed(b));
 	}
 
-	n |= mach_read_compressed(b + size);
+	b++;
+	n = mach_read_next_compressed(&b);
+	n <<= 32;
+	n |= mach_read_compressed(b);
 
 	return(n);
 }
 
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form
-if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
+/** Read a 64-bit integer in a compressed form.
+@param[in,out]	b	pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
 UNIV_INLINE
-byte*
-mach_ull_parse_compressed(
-/*======================*/
-	byte*		ptr,	/* in: pointer to buffer from where to read */
-	byte*		end_ptr,/* in: pointer to end of the buffer */
-	ib_uint64_t*	val)	/* out: read value */
-{
-	ulint		size;
-
-	ut_ad(ptr);
-	ut_ad(end_ptr);
-	ut_ad(val);
-
-	if (end_ptr < ptr + 5) {
-
-		return(NULL);
+ib_uint64_t
+mach_read_next_much_compressed(
+	const byte**	b)
+{
+	ib_uint64_t	val = mach_read_from_1(*b);
+
+	if (val < 0x80) {
+		/* 0nnnnnnn (7 bits) */
+		++*b;
+	} else if (val < 0xC0) {
+		/* 10nnnnnn nnnnnnnn (14 bits) */
+		val = mach_read_from_2(*b) & 0x3FFF;
+		ut_ad(val > 0x7F);
+		*b += 2;
+	} else if (val < 0xE0) {
+		/* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+		val = mach_read_from_3(*b) & 0x1FFFFF;
+		ut_ad(val > 0x3FFF);
+		*b += 3;
+	} else if (val < 0xF0) {
+		/* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+		val = mach_read_from_4(*b) & 0xFFFFFFF;
+		ut_ad(val > 0x1FFFFF);
+		*b += 4;
+	} else if (val == 0xF0) {
+		/* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+		val = mach_read_from_4(*b + 1);
+		ut_ad(val > 0xFFFFFFF);
+		*b += 5;
+	} else {
+		/* 11111111 followed by up to 64 bits */
+		ut_ad(val == 0xFF);
+		++*b;
+		val = mach_read_next_compressed(b);
+		ut_ad(val > 0);
+		val <<= 32;
+		val |= mach_read_next_compressed(b);
 	}
 
-	*val = mach_read_compressed(ptr);
+	return(val);
+}
 
-	size = mach_get_compressed_size((ulint) *val);
+/** Read a 64-bit integer in a compressed form.
+@param[in,out]	ptr	pointer to memory where to read;
+advanced by the number of bytes consumed, or set NULL if out of space
+@param[in]	end_ptr	end of the buffer
+@return unsigned value */
+UNIV_INLINE
+ib_uint64_t
+mach_u64_parse_compressed(
+	const byte**	ptr,
+	const byte*	end_ptr)
+{
+	ib_uint64_t	val = 0;
 
-	ptr += size;
+	if (end_ptr < *ptr + 5) {
+		*ptr = NULL;
+		return(val);
+	}
 
-	if (end_ptr < ptr + 4) {
+	val = mach_read_next_compressed(ptr);
 
-		return(NULL);
+	if (end_ptr < *ptr + 4) {
+		*ptr = NULL;
+		return(val);
 	}
 
-	*val <<= 32;
-	*val |= mach_read_from_4(ptr);
+	val <<= 32;
+	val |= mach_read_from_4(*ptr);
+	*ptr += 4;
 
-	return(ptr + 4);
+	return(val);
 }
 #ifndef UNIV_HOTBACKUP
 /*********************************************************//**
 Reads a double. It is stored in a little-endian format.
-@return	double read */
+@return double read */
 UNIV_INLINE
 double
 mach_double_read(
@@ -601,7 +661,7 @@ mach_double_write(
 
 /*********************************************************//**
 Reads a float. It is stored in a little-endian format.
-@return	float read */
+@return float read */
 UNIV_INLINE
 float
 mach_float_read(
@@ -650,7 +710,7 @@ mach_float_write(
 
 /*********************************************************//**
 Reads a ulint stored in the little-endian format.
-@return	unsigned long int */
+@return unsigned long int */
 UNIV_INLINE
 ulint
 mach_read_from_n_little_endian(
@@ -714,7 +774,7 @@ mach_write_to_n_little_endian(
 
 /*********************************************************//**
 Reads a ulint stored in the little-endian format.
-@return	unsigned long int */
+@return unsigned long int */
 UNIV_INLINE
 ulint
 mach_read_from_2_little_endian(
@@ -746,7 +806,7 @@ mach_write_to_2_little_endian(
 /*********************************************************//**
 Convert integral type from storage byte order (big endian) to
 host byte order.
-@return	integer value */
+@return integer value */
 UNIV_INLINE
 ib_uint64_t
 mach_read_int_type(
@@ -757,8 +817,8 @@ mach_read_int_type(
 {
 	/* XXX this can be optimized on big-endian machines */
 
-	ullint	ret;
-	uint	i;
+	uintmax_t	ret;
+	uint		i;
 
 	if (unsigned_type || (src[0] & 0x80)) {
 
@@ -822,6 +882,8 @@ mach_write_int_type(
 	ulint		len,		/*!< in: length of src */
 	bool		usign)		/*!< in: signed or unsigned flag */
 {
+	ut_ad(len >= 1 && len <= 8);
+
 #ifdef WORDS_BIGENDIAN
         memcpy(dest, src, len);
 #else
@@ -859,32 +921,31 @@ mach_write_ulonglong(
 		*dest ^=  0x80;
 	}
 }
-#endif /* !UNIV_HOTBACKUP */
-#endif /* !UNIV_INNOCHECKSUM */
 
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return	value read */
+/** Read 1 to 4 bytes from a file page buffered in the buffer pool.
+@param[in]	ptr	pointer where to read
+@param[in]	type	MLOG_1BYTE, MLOG_2BYTES, or MLOG_4BYTES
+@return value read */
 UNIV_INLINE
 ulint
 mach_read_ulint(
-/*============*/
-	const byte*	ptr,	/*!< in: pointer from where to read */
-	ulint		type)	/*!< in: 1,2 or 4 bytes */
+	const byte*	ptr,
+	mlog_id_t	type)
 {
 	switch (type) {
-	case 1:
+	case MLOG_1BYTE:
 		return(mach_read_from_1(ptr));
-	case 2:
+	case MLOG_2BYTES:
 		return(mach_read_from_2(ptr));
-	case 4:
+	case MLOG_4BYTES:
 		return(mach_read_from_4(ptr));
 	default:
-		ut_error;
+		break;
 	}
 
+	ut_error;
 	return(0);
 }
 
 #endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/mem0dbg.h b/storage/innobase/include/mem0dbg.h
deleted file mode 100644
index cc339b82910..00000000000
--- a/storage/innobase/include/mem0dbg.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mem0dbg.h
-The memory management: the debug code. This is not a compilation module,
-but is included in mem0mem.* !
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-/* In the debug version each allocated field is surrounded with
-check fields whose sizes are given below */
-
-#ifdef UNIV_MEM_DEBUG
-# ifndef UNIV_HOTBACKUP
-/* The mutex which protects in the debug version the hash table
-containing the list of live memory heaps, and also the global
-variables in mem0dbg.cc. */
-extern ib_mutex_t	mem_hash_mutex;
-# endif /* !UNIV_HOTBACKUP */
-
-#define MEM_FIELD_HEADER_SIZE	ut_calc_align(2 * sizeof(ulint),\
-						UNIV_MEM_ALIGNMENT)
-#define MEM_FIELD_TRAILER_SIZE	sizeof(ulint)
-#else
-#define MEM_FIELD_HEADER_SIZE	0
-#endif
-
-
-/* Space needed when allocating for a user a field of
-length N. The space is allocated only in multiples of
-UNIV_MEM_ALIGNMENT. In the debug version there are also
-check fields at the both ends of the field. */
-#ifdef UNIV_MEM_DEBUG
-#define MEM_SPACE_NEEDED(N) ut_calc_align((N) + MEM_FIELD_HEADER_SIZE\
-		 + MEM_FIELD_TRAILER_SIZE, UNIV_MEM_ALIGNMENT)
-#else
-#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT)
-#endif
-
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/***************************************************************//**
-Checks a memory heap for consistency and prints the contents if requested.
-Outputs the sum of sizes of buffers given to the user (only in
-the debug version), the physical size of the heap and the number of
-blocks in the heap. In case of error returns 0 as sizes and number
-of blocks. */
-UNIV_INTERN
-void
-mem_heap_validate_or_print(
-/*=======================*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	byte*		top,	/*!< in: calculate and validate only until
-				this top pointer in the heap is reached,
-				if this pointer is NULL, ignored */
-	ibool		 print,	 /*!< in: if TRUE, prints the contents
-				of the heap; works only in
-				the debug version */
-	ibool*		 error,	 /*!< out: TRUE if error */
-	ulint*		us_size,/*!< out: allocated memory
-				(for the user) in the heap,
-				if a NULL pointer is passed as this
-				argument, it is ignored; in the
-				non-debug version this is always -1 */
-	ulint*		ph_size,/*!< out: physical size of the heap,
-				if a NULL pointer is passed as this
-				argument, it is ignored */
-	ulint*		n_blocks); /*!< out: number of blocks in the heap,
-				if a NULL pointer is passed as this
-				argument, it is ignored */
-/**************************************************************//**
-Validates the contents of a memory heap.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_validate(
-/*==============*/
-	mem_heap_t*   heap);	/*!< in: memory heap */
-#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Checks that an object is a memory heap (or a block of it)
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_check(
-/*===========*/
-	mem_heap_t*   heap);	/*!< in: memory heap */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_MEM_DEBUG
-/*****************************************************************//**
-TRUE if no memory is currently allocated.
-@return	TRUE if no heaps exist */
-UNIV_INTERN
-ibool
-mem_all_freed(void);
-/*===============*/
-/*****************************************************************//**
-Validates the dynamic memory
-@return	TRUE if error */
-UNIV_INTERN
-ibool
-mem_validate_no_assert(void);
-/*=========================*/
-/************************************************************//**
-Validates the dynamic memory
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-mem_validate(void);
-/*===============*/
-#endif /* UNIV_MEM_DEBUG */
-/************************************************************//**
-Tries to find neigboring memory allocation blocks and dumps to stderr
-the neighborhood of a given pointer. */
-UNIV_INTERN
-void
-mem_analyze_corruption(
-/*===================*/
-	void*	ptr);	/*!< in: pointer to place of possible corruption */
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers. Can only be used in the debug version. */
-UNIV_INTERN
-void
-mem_print_info(void);
-/*================*/
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers since the last ..._print_info or..._print_new_info. */
-UNIV_INTERN
-void
-mem_print_new_info(void);
-/*====================*/
diff --git a/storage/innobase/include/mem0dbg.ic b/storage/innobase/include/mem0dbg.ic
deleted file mode 100644
index ec60ed35337..00000000000
--- a/storage/innobase/include/mem0dbg.ic
+++ /dev/null
@@ -1,109 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/mem0dbg.ic
-The memory management: the debug code. This is not an independent
-compilation module but is included in mem0mem.*.
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifdef UNIV_MEM_DEBUG
-extern ulint	mem_current_allocated_memory;
-
-/******************************************************************//**
-Initializes an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_init(
-/*===========*/
-	byte*	buf,	/*!< in: memory field */
-	ulint	n);	/*!< in: how many bytes the user requested */
-/******************************************************************//**
-Erases an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_erase(
-/*============*/
-	byte*	buf,	/*!< in: memory field */
-	ulint	n);	/*!< in: how many bytes the user requested */
-/***************************************************************//**
-Initializes a buffer to a random combination of hex BA and BE.
-Used to initialize allocated memory. */
-UNIV_INTERN
-void
-mem_init_buf(
-/*=========*/
-	byte*	buf,	/*!< in: pointer to buffer */
-	ulint	 n);	 /*!< in: length of buffer */
-/***************************************************************//**
-Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory. */
-UNIV_INTERN
-void
-mem_erase_buf(
-/*==========*/
-	byte*	buf,	/*!< in: pointer to buffer */
-	ulint	n);	/*!< in: length of buffer */
-/***************************************************************//**
-Inserts a created memory heap to the hash table of
-current allocated memory heaps.
-Initializes the hash table when first called. */
-UNIV_INTERN
-void
-mem_hash_insert(
-/*============*/
-	mem_heap_t*	heap,	   /*!< in: the created heap */
-	const char*	file_name, /*!< in: file name of creation */
-	ulint		line);	   /*!< in: line where created */
-/***************************************************************//**
-Removes a memory heap (which is going to be freed by the caller)
-from the list of live memory heaps. Returns the size of the heap
-in terms of how much memory in bytes was allocated for the user of
-the heap (not the total space occupied by the heap).
-Also validates the heap.
-NOTE: This function does not free the storage occupied by the
-heap itself, only the node in the list of heaps. */
-UNIV_INTERN
-void
-mem_hash_remove(
-/*============*/
-	mem_heap_t*	heap,	   /*!< in: the heap to be freed */
-	const char*	file_name, /*!< in: file name of freeing */
-	ulint		line);	   /*!< in: line where freed */
-
-
-void
-mem_field_header_set_len(byte* field, ulint len);
-
-ulint
-mem_field_header_get_len(byte* field);
-
-void
-mem_field_header_set_check(byte* field, ulint check);
-
-ulint
-mem_field_header_get_check(byte* field);
-
-void
-mem_field_trailer_set_check(byte* field, ulint check);
-
-ulint
-mem_field_trailer_get_check(byte* field);
-#endif /* UNIV_MEM_DEBUG */
diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h
index de9b8b29fd9..f8fdb53e132 100644
--- a/storage/innobase/include/mem0mem.h
+++ b/storage/innobase/include/mem0mem.h
@@ -30,22 +30,20 @@ Created 6/9/1994 Heikki Tuuri
 #include "ut0mem.h"
 #include "ut0byte.h"
 #include "ut0rnd.h"
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-#endif /* UNIV_HOTBACKUP */
-#include "ut0lst.h"
 #include "mach0data.h"
 
+#include <memory>
+
 /* -------------------- MEMORY HEAPS ----------------------------- */
 
-/* A block of a memory heap consists of the info structure
+/** A block of a memory heap consists of the info structure
 followed by an area of memory */
 typedef struct mem_block_info_t	mem_block_t;
 
-/* A memory heap is a nonempty linear list of memory blocks */
+/** A memory heap is a nonempty linear list of memory blocks */
 typedef mem_block_t		mem_heap_t;
 
-/* Types of allocation for memory heaps: DYNAMIC means allocation from the
+/** Types of allocation for memory heaps: DYNAMIC means allocation from the
 dynamic memory pool of the C compiler, BUFFER means allocation from the
 buffer pool; the latter method is used for very big heaps */
 
@@ -59,13 +57,13 @@ buffer pool; the latter method is used for very big heaps */
 					allocation functions can return
 					NULL. */
 
-/* Different type of heaps in terms of which datastructure is using them */
+/** Different type of heaps in terms of which datastructure is using them */
 #define MEM_HEAP_FOR_BTR_SEARCH		(MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER)
 #define MEM_HEAP_FOR_PAGE_HASH		(MEM_HEAP_DYNAMIC)
 #define MEM_HEAP_FOR_RECV_SYS		(MEM_HEAP_BUFFER)
 #define MEM_HEAP_FOR_LOCK_HEAP		(MEM_HEAP_BUFFER)
 
-/* The following start size is used for the first block in the memory heap if
+/** The following start size is used for the first block in the memory heap if
 the size is not specified, i.e., 0 is given as the parameter in the call of
 create. The standard size is the maximum (payload) size of the blocks used for
 allocations of small buffers. */
@@ -74,147 +72,192 @@ allocations of small buffers. */
 #define MEM_BLOCK_STANDARD_SIZE		\
 	(UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
 
-/* If a memory heap is allowed to grow into the buffer pool, the following
+/** If a memory heap is allowed to grow into the buffer pool, the following
 is the maximum size for a single allocated buffer: */
 #define MEM_MAX_ALLOC_IN_BUF		(UNIV_PAGE_SIZE - 200)
 
-/******************************************************************//**
-Initializes the memory system. */
-UNIV_INTERN
-void
-mem_init(
-/*=====*/
-	ulint	size);	/*!< in: common pool size in bytes */
-/******************************************************************//**
-Closes the memory system. */
-UNIV_INTERN
-void
-mem_close(void);
-/*===========*/
+/** Space needed when allocating for a user a field of length N.
+The space is allocated only in multiples of UNIV_MEM_ALIGNMENT.  */
+#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT)
 
 #ifdef UNIV_DEBUG
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-# define mem_heap_create(N)	mem_heap_create_func(		\
-		(N), __FILE__, __LINE__, MEM_HEAP_DYNAMIC)
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
+/** Macro for memory heap creation.
+@param[in]	size		Desired start block size. */
+# define mem_heap_create(size)					\
+	 mem_heap_create_func((size), __FILE__, __LINE__, MEM_HEAP_DYNAMIC)
 
-# define mem_heap_create_typed(N, T)	mem_heap_create_func(	\
-		(N), __FILE__, __LINE__, (T))
+/** Macro for memory heap creation.
+@param[in]	size		Desired start block size.
+@param[in]	type		Heap type */
+# define mem_heap_create_typed(size, type)			\
+	 mem_heap_create_func((size), __FILE__, __LINE__, (type))
 
 #else /* UNIV_DEBUG */
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
+/** Macro for memory heap creation.
+@param[in]	size		Desired start block size. */
+# define mem_heap_create(size) mem_heap_create_func((size), MEM_HEAP_DYNAMIC)
 
-# define mem_heap_create(N)	mem_heap_create_func(		\
-		(N), MEM_HEAP_DYNAMIC)
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-# define mem_heap_create_typed(N, T)	mem_heap_create_func(	\
-		(N), (T))
+/** Macro for memory heap creation.
+@param[in]	size		Desired start block size.
+@param[in]	type		Heap type */
+# define mem_heap_create_typed(size, type)			\
+	 mem_heap_create_func((size), (type))
 
 #endif /* UNIV_DEBUG */
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap freeing. */
 
-#define mem_heap_free(heap) mem_heap_free_func(\
-					  (heap), __FILE__, __LINE__)
-/*****************************************************************//**
-NOTE: Use the corresponding macros instead of this function. Creates a
-memory heap. For debugging purposes, takes also the file name and line as
-arguments.
+/** Creates a memory heap.
+NOTE: Use the corresponding macros instead of this function.
+A single user buffer of 'size' will fit in the block.
+0 creates a default size block.
+@param[in]	size		Desired start block size.
+@param[in]	file_name	File name where created
+@param[in]	line		Line where created
+@param[in]	type		Heap type
 @return own: memory heap, NULL if did not succeed (only possible for
 MEM_HEAP_BTR_SEARCH type heaps) */
 UNIV_INLINE
 mem_heap_t*
 mem_heap_create_func(
-/*=================*/
-	ulint		n,		/*!< in: desired start block size,
-					this means that a single user buffer
-					of size n will fit in the block,
-					0 creates a default size block */
+	ulint		size,
 #ifdef UNIV_DEBUG
-	const char*	file_name,	/*!< in: file name where created */
-	ulint		line,		/*!< in: line where created */
+	const char*	file_name,
+	ulint		line,
 #endif /* UNIV_DEBUG */
-	ulint		type);		/*!< in: heap type */
-/*****************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees the space
-occupied by a memory heap. In the debug version erases the heap memory
-blocks. */
+	ulint		type);
+
+/** Frees the space occupied by a memory heap.
+NOTE: Use the corresponding macro instead of this function.
+@param[in]	heap	Heap to be freed */
 UNIV_INLINE
 void
-mem_heap_free_func(
-/*===============*/
-	mem_heap_t*	heap,		/*!< in, own: heap to be freed */
-	const char*	file_name,	/*!< in: file name where freed */
-	ulint		line);		/*!< in: line where freed */
-/***************************************************************//**
-Allocates and zero-fills n bytes of memory from a memory heap.
-@return	allocated, zero-filled storage */
+mem_heap_free(
+	mem_heap_t*	heap);
+
+/** Allocates and zero-fills n bytes of memory from a memory heap.
+@param[in]	heap	memory heap
+@param[in]	n	number of bytes; if the heap is allowed to grow into
+the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF
+@return allocated, zero-filled storage */
 UNIV_INLINE
 void*
 mem_heap_zalloc(
-/*============*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n);	/*!< in: number of bytes; if the heap is allowed
-				to grow into the buffer pool, this must be
-				<= MEM_MAX_ALLOC_IN_BUF */
-/***************************************************************//**
-Allocates n bytes of memory from a memory heap.
+	mem_heap_t*	heap,
+	ulint		n);
+
+/** Allocates n bytes of memory from a memory heap.
+@param[in]	heap	memory heap
+@param[in]	n	number of bytes; if the heap is allowed to grow into
+the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF
 @return allocated storage, NULL if did not succeed (only possible for
 MEM_HEAP_BTR_SEARCH type heaps) */
 UNIV_INLINE
 void*
 mem_heap_alloc(
-/*===========*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n);	/*!< in: number of bytes; if the heap is allowed
-				to grow into the buffer pool, this must be
-				<= MEM_MAX_ALLOC_IN_BUF */
-/*****************************************************************//**
-Returns a pointer to the heap top.
-@return	pointer to the heap top */
+	mem_heap_t*	heap,
+	ulint		n);
+
+/** Returns a pointer to the heap top.
+@param[in]	heap		memory heap
+@return pointer to the heap top */
 UNIV_INLINE
 byte*
 mem_heap_get_heap_top(
-/*==================*/
-	mem_heap_t*	heap);	/*!< in: memory heap */
-/*****************************************************************//**
-Frees the space in a memory heap exceeding the pointer given. The
-pointer must have been acquired from mem_heap_get_heap_top. The first
-memory block of the heap is not freed. */
+	mem_heap_t*	heap);
+
+/** Frees the space in a memory heap exceeding the pointer given.
+The pointer must have been acquired from mem_heap_get_heap_top.
+The first memory block of the heap is not freed.
+@param[in]	heap		heap from which to free
+@param[in]	old_top		pointer to old top of heap */
 UNIV_INLINE
 void
 mem_heap_free_heap_top(
-/*===================*/
-	mem_heap_t*	heap,	/*!< in: heap from which to free */
-	byte*		old_top);/*!< in: pointer to old top of heap */
-/*****************************************************************//**
-Empties a memory heap. The first memory block of the heap is not freed. */
+	mem_heap_t*	heap,
+	byte*		old_top);
+
+/** Empties a memory heap.
+The first memory block of the heap is not freed.
+@param[in]	heap		heap to empty */
 UNIV_INLINE
 void
 mem_heap_empty(
-/*===========*/
-	mem_heap_t*	heap);	/*!< in: heap to empty */
-/*****************************************************************//**
-Returns a pointer to the topmost element in a memory heap.
+	mem_heap_t*	heap);
+
+/** Returns a pointer to the topmost element in a memory heap.
 The size of the element must be given.
-@return	pointer to the topmost element */
+@param[in]	heap	memory heap
+@param[in]	n	size of the topmost element
+@return pointer to the topmost element */
 UNIV_INLINE
 void*
 mem_heap_get_top(
+	mem_heap_t*	heap,
+	ulint		n);
+
+/** Checks if a given chunk of memory is the topmost element stored in the
+heap. If this is the case, then calling mem_heap_free_top() would free
+that element from the heap.
+@param[in]	heap	memory heap
+@param[in]	buf	presumed topmost element
+@param[in]	buf_sz	size of buf in bytes
+@return true if topmost */
+UNIV_INLINE
+bool
+mem_heap_is_top(
+	mem_heap_t*	heap,
+	const void*	buf,
+	ulint		buf_sz)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/*****************************************************************//**
+Allocate a new chunk of memory from a memory heap, possibly discarding
+the topmost element. If the memory chunk specified with (top, top_sz)
+is the topmost element, then it will be discarded, otherwise it will
+be left untouched and this function will be equivallent to
+mem_heap_alloc().
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+void*
+mem_heap_replace(
 /*=============*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n);	/*!< in: size of the topmost element */
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const void*	top,	/*!< in: chunk to discard if possible */
+	ulint		top_sz,	/*!< in: size of top in bytes */
+	ulint		new_sz);/*!< in: desired size of the new chunk */
+/*****************************************************************//**
+Allocate a new chunk of memory from a memory heap, possibly discarding
+the topmost element and then copy the specified data to it. If the memory
+chunk specified with (top, top_sz) is the topmost element, then it will be
+discarded, otherwise it will be left untouched and this function will be
+equivallent to mem_heap_dup().
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+void*
+mem_heap_dup_replace(
+/*=================*/
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const void*	top,	/*!< in: chunk to discard if possible */
+	ulint		top_sz,	/*!< in: size of top in bytes */
+	const void*	data,	/*!< in: new data to duplicate */
+	ulint		data_sz);/*!< in: size of data in bytes */
+/*****************************************************************//**
+Allocate a new chunk of memory from a memory heap, possibly discarding
+the topmost element and then copy the specified string to it. If the memory
+chunk specified with (top, top_sz) is the topmost element, then it will be
+discarded, otherwise it will be left untouched and this function will be
+equivallent to mem_heap_strdup().
+@return allocated string, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+char*
+mem_heap_strdup_replace(
+/*====================*/
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const void*	top,	/*!< in: chunk to discard if possible */
+	ulint		top_sz,	/*!< in: size of top in bytes */
+	const char*	str);	/*!< in: new data to duplicate */
 /*****************************************************************//**
 Frees the topmost element in a memory heap.
 The size of the element must be given. */
@@ -231,58 +274,10 @@ ulint
 mem_heap_get_size(
 /*==============*/
 	mem_heap_t*	heap);		/*!< in: heap */
-/**************************************************************//**
-Use this macro instead of the corresponding function!
-Macro for memory buffer allocation */
-
-#define mem_zalloc(N)	memset(mem_alloc(N), 0, (N))
-
-#ifdef UNIV_DEBUG
-#define mem_alloc(N)	mem_alloc_func((N), __FILE__, __LINE__, NULL)
-#define mem_alloc2(N,S) mem_alloc_func((N), __FILE__, __LINE__, (S))
-#else /* UNIV_DEBUG */
-#define mem_alloc(N)	mem_alloc_func((N), NULL)
-#define mem_alloc2(N,S) mem_alloc_func((N), (S))
-#endif /* UNIV_DEBUG */
-
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free.
-@return	own: free storage */
-UNIV_INLINE
-void*
-mem_alloc_func(
-/*===========*/
-	ulint		n,		/*!< in: requested size in bytes */
-#ifdef UNIV_DEBUG
-	const char*	file_name,	/*!< in: file name where created */
-	ulint		line,		/*!< in: line where created */
-#endif /* UNIV_DEBUG */
-	ulint*		size);		/*!< out: allocated size in bytes,
-					or NULL */
-
-/**************************************************************//**
-Use this macro instead of the corresponding function!
-Macro for memory buffer freeing */
-
-#define mem_free(PTR)	mem_free_func((PTR), __FILE__, __LINE__)
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Frees a single buffer of storage from
-the dynamic memory of C compiler. Similar to free of C. */
-UNIV_INLINE
-void
-mem_free_func(
-/*==========*/
-	void*		ptr,		/*!< in, own: buffer to be freed */
-	const char*	file_name,	/*!< in: file name where created */
-	ulint		line);		/*!< in: line where created */
 
 /**********************************************************************//**
 Duplicates a NUL-terminated string.
-@return	own: a copy of the string, must be deallocated with mem_free */
+@return own: a copy of the string, must be deallocated with ut_free */
 UNIV_INLINE
 char*
 mem_strdup(
@@ -290,7 +285,7 @@ mem_strdup(
 	const char*	str);	/*!< in: string to be copied */
 /**********************************************************************//**
 Makes a NUL-terminated copy of a nonterminated string.
-@return	own: a copy of the string, must be deallocated with mem_free */
+@return own: a copy of the string, must be deallocated with ut_free */
 UNIV_INLINE
 char*
 mem_strdupl(
@@ -298,19 +293,19 @@ mem_strdupl(
 	const char*	str,	/*!< in: string to be copied */
 	ulint		len);	/*!< in: length of str, in bytes */
 
-/**********************************************************************//**
-Duplicates a NUL-terminated string, allocated from a memory heap.
-@return	own: a copy of the string */
-UNIV_INTERN
+/** Duplicates a NUL-terminated string, allocated from a memory heap.
+@param[in]	heap	memory heap where string is allocated
+@param[in]	str	string to be copied
+@return own: a copy of the string */
 char*
 mem_heap_strdup(
-/*============*/
-	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
-	const char*	str);	/*!< in: string to be copied */
+	mem_heap_t*	heap,
+	const char*	str);
+
 /**********************************************************************//**
 Makes a NUL-terminated copy of a nonterminated string,
 allocated from a memory heap.
-@return	own: a copy of the string */
+@return own: a copy of the string */
 UNIV_INLINE
 char*
 mem_heap_strdupl(
@@ -321,8 +316,7 @@ mem_heap_strdupl(
 
 /**********************************************************************//**
 Concatenate two strings and return the result, using a memory heap.
-@return	own: the result */
-UNIV_INTERN
+@return own: the result */
 char*
 mem_heap_strcat(
 /*============*/
@@ -332,8 +326,7 @@ mem_heap_strcat(
 
 /**********************************************************************//**
 Duplicate a block of data, allocated from a memory heap.
-@return	own: a copy of the data */
-UNIV_INTERN
+@return own: a copy of the data */
 void*
 mem_heap_dup(
 /*=========*/
@@ -346,8 +339,7 @@ A simple sprintf replacement that dynamically allocates the space for the
 formatted string from the given heap. This supports a very limited set of
 the printf syntax: types 's' and 'u' and length modifier 'l' (which is
 required for the 'u' type).
-@return	heap-allocated formatted string */
-UNIV_INTERN
+@return heap-allocated formatted string */
 char*
 mem_heap_printf(
 /*============*/
@@ -355,15 +347,22 @@ mem_heap_printf(
 	const char*	format,	/*!< in: format string */
 	...) MY_ATTRIBUTE ((format (printf, 2, 3)));
 
-#ifdef MEM_PERIODIC_CHECK
-/******************************************************************//**
-Goes through the list of all allocated mem blocks, checks their magic
-numbers, and reports possible corruption. */
-UNIV_INTERN
+/** Checks that an object is a memory heap (or a block of it)
+@param[in]	heap	Memory heap to check */
+UNIV_INLINE
 void
-mem_validate_all_blocks(void);
-/*=========================*/
-#endif
+mem_block_validate(
+	const mem_heap_t*	heap);
+
+#ifdef UNIV_DEBUG
+/** Validates the contents of a memory heap.
+Asserts that the memory heap is consistent
+@param[in]	heap	Memory heap to validate */
+void
+mem_heap_validate(
+	const mem_heap_t*	heap);
+
+#endif /* UNIV_DEBUG */
 
 /*#######################################################################*/
 
@@ -403,11 +402,6 @@ struct mem_block_info_t {
 			pool, this contains the buf_block_t handle;
 			otherwise, this is NULL */
 #endif /* !UNIV_HOTBACKUP */
-#ifdef MEM_PERIODIC_CHECK
-	UT_LIST_NODE_T(mem_block_t) mem_block_list;
-			/* List of all mem blocks allocated; protected
-			by the mem_comm_pool mutex */
-#endif
 };
 
 #define MEM_BLOCK_MAGIC_N	764741555
@@ -416,10 +410,107 @@ struct mem_block_info_t {
 /* Header size for a memory heap block */
 #define MEM_BLOCK_HEADER_SIZE	ut_calc_align(sizeof(mem_block_info_t),\
 							UNIV_MEM_ALIGNMENT)
-#include "mem0dbg.h"
 
 #ifndef UNIV_NONINL
 #include "mem0mem.ic"
 #endif
 
+/** A C++ wrapper class to the mem_heap_t routines, so that it can be used
+as an STL allocator */
+template<typename T>
+class mem_heap_allocator
+{
+public:
+	typedef		T		value_type;
+	typedef		size_t		size_type;
+	typedef		ptrdiff_t	difference_type;
+	typedef		T*		pointer;
+	typedef		const T*	const_pointer;
+	typedef		T&		reference;
+	typedef		const T&	const_reference;
+
+	mem_heap_allocator(mem_heap_t* heap) : m_heap(heap) { }
+
+	mem_heap_allocator(const mem_heap_allocator& other)
+		:
+		m_heap(other.m_heap)
+	{
+		// Do nothing
+	}
+
+	template <typename U>
+	mem_heap_allocator (const mem_heap_allocator<U>& other)
+		:
+		m_heap(other.m_heap)
+	{
+		// Do nothing
+	}
+
+	~mem_heap_allocator() { m_heap = 0; }
+
+	size_type max_size() const
+	{
+		return(ULONG_MAX / sizeof(T));
+	}
+
+	/** This function returns a pointer to the first element of a newly
+	allocated array large enough to contain n objects of type T; only the
+	memory is allocated, and the objects are not constructed. Moreover,
+	an optional pointer argument (that points to an object already
+	allocated by mem_heap_allocator) can be used as a hint to the
+	implementation about where the new memory should be allocated in
+	order to improve locality. */
+	pointer	allocate(size_type n, const_pointer hint = 0)
+	{
+		return(reinterpret_cast<pointer>(
+			mem_heap_alloc(m_heap, n * sizeof(T))));
+	}
+
+	void deallocate(pointer p, size_type n) { }
+
+	pointer address (reference r) const { return(&r); }
+
+	const_pointer address (const_reference r) const { return(&r); }
+
+	void construct(pointer p, const_reference t)
+	{
+		new (reinterpret_cast<void*>(p)) T(t);
+	}
+
+	void destroy(pointer p)
+	{
+		(reinterpret_cast<T*>(p))->~T();
+	}
+
+	/** Allocators are required to supply the below template class member
+	which enables the possibility of obtaining a related allocator,
+	parametrized in terms of a different type. For example, given an
+	allocator type IntAllocator for objects of type int, a related
+	allocator type for objects of type long could be obtained using
+	IntAllocator::rebind<long>::other */
+	template <typename U>
+	struct rebind
+	{
+		typedef mem_heap_allocator<U> other;
+	};
+
+private:
+	mem_heap_t*	m_heap;
+	template <typename U> friend class mem_heap_allocator;
+};
+
+template <class T>
+bool operator== (const mem_heap_allocator<T>& left,
+		 const mem_heap_allocator<T>& right)
+{
+	return(left.heap == right.heap);
+}
+
+template <class T>
+bool operator!= (const mem_heap_allocator<T>& left,
+		 const mem_heap_allocator<T>& right)
+{
+	return(left.heap != right.heap);
+}
+
 #endif
diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic
index 63e68150b61..3b4109ee52d 100644
--- a/storage/innobase/include/mem0mem.ic
+++ b/storage/innobase/include/mem0mem.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,10 +23,7 @@ The memory management
 Created 6/8/1994 Heikki Tuuri
 *************************************************************************/
 
-#include "mem0dbg.ic"
-#ifndef UNIV_HOTBACKUP
-# include "mem0pool.h"
-#endif /* !UNIV_HOTBACKUP */
+#include "ut0new.h"
 
 #ifdef UNIV_DEBUG
 # define mem_heap_create_block(heap, n, type, file_name, line)		\
@@ -43,7 +40,6 @@ Created 6/8/1994 Heikki Tuuri
 Creates a memory heap block where data can be allocated.
 @return own: memory heap block, NULL if did not succeed (only possible
 for MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
 mem_block_t*
 mem_heap_create_block_func(
 /*=======================*/
@@ -56,33 +52,34 @@ mem_heap_create_block_func(
 #endif /* UNIV_DEBUG */
 	ulint		type);	/*!< in: type of heap: MEM_HEAP_DYNAMIC or
 				MEM_HEAP_BUFFER */
+
 /******************************************************************//**
 Frees a block from a memory heap. */
-UNIV_INTERN
 void
 mem_heap_block_free(
 /*================*/
 	mem_heap_t*	heap,	/*!< in: heap */
 	mem_block_t*	block);	/*!< in: block to free */
+
 #ifndef UNIV_HOTBACKUP
 /******************************************************************//**
 Frees the free_block field from a memory heap. */
-UNIV_INTERN
 void
 mem_heap_free_block_free(
 /*=====================*/
 	mem_heap_t*	heap);	/*!< in: heap */
 #endif /* !UNIV_HOTBACKUP */
+
 /***************************************************************//**
 Adds a new block to a memory heap.
+@param[in]	heap	memory heap
+@param[in]	n	number of bytes needed
 @return created block, NULL if did not succeed (only possible for
 MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
 mem_block_t*
 mem_heap_add_block(
-/*===============*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n);	/*!< in: number of bytes user needs */
+	mem_heap_t*	heap,
+	ulint		n);
 
 UNIV_INLINE
 void
@@ -150,41 +147,49 @@ mem_block_get_start(mem_block_t* block)
 	return(block->start);
 }
 
-/***************************************************************//**
-Allocates and zero-fills n bytes of memory from a memory heap.
-@return	allocated, zero-filled storage */
+/** Checks that an object is a memory heap block
+@param[in]	block	Memory block to check. */
+UNIV_INLINE
+void
+mem_block_validate(
+	const mem_block_t*	block)
+{
+	ut_a(block->magic_n == MEM_BLOCK_MAGIC_N);
+}
+
+/** Allocates and zero-fills n bytes of memory from a memory heap.
+@param[in]	heap	memory heap
+@param[in]	n	number of bytes; if the heap is allowed to grow into
+the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF
+@return allocated, zero-filled storage */
 UNIV_INLINE
 void*
 mem_heap_zalloc(
-/*============*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n)	/*!< in: number of bytes; if the heap is allowed
-				to grow into the buffer pool, this must be
-				<= MEM_MAX_ALLOC_IN_BUF */
+	mem_heap_t*	heap,
+	ulint		n)
 {
 	ut_ad(heap);
 	ut_ad(!(heap->type & MEM_HEAP_BTR_SEARCH));
 	return(memset(mem_heap_alloc(heap, n), 0, n));
 }
 
-/***************************************************************//**
-Allocates n bytes of memory from a memory heap.
+/** Allocates n bytes of memory from a memory heap.
+@param[in]	heap	memory heap
+@param[in]	n	number of bytes; if the heap is allowed to grow into
+the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF
 @return allocated storage, NULL if did not succeed (only possible for
 MEM_HEAP_BTR_SEARCH type heaps) */
 UNIV_INLINE
 void*
 mem_heap_alloc(
-/*===========*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n)	/*!< in: number of bytes; if the heap is allowed
-				to grow into the buffer pool, this must be
-				<= MEM_MAX_ALLOC_IN_BUF */
+	mem_heap_t*	heap,
+	ulint		n)
 {
 	mem_block_t*	block;
 	void*		buf;
 	ulint		free;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
@@ -210,35 +215,22 @@ mem_heap_alloc(
 
 	mem_block_set_free(block, free + MEM_SPACE_NEEDED(n));
 
-#ifdef UNIV_MEM_DEBUG
-	UNIV_MEM_ALLOC(buf,
-		       n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE);
-
-	/* In the debug version write debugging info to the field */
-	mem_field_init((byte*) buf, n);
-
-	/* Advance buf to point at the storage which will be given to the
-	caller */
-	buf = (byte*) buf + MEM_FIELD_HEADER_SIZE;
-
-#endif
 	UNIV_MEM_ALLOC(buf, n);
 	return(buf);
 }
 
-/*****************************************************************//**
-Returns a pointer to the heap top.
-@return	pointer to the heap top */
+/** Returns a pointer to the heap top.
+@param[in]	heap	memory heap
+@return pointer to the heap top */
 UNIV_INLINE
 byte*
 mem_heap_get_heap_top(
-/*==================*/
-	mem_heap_t*	heap)	/*!< in: memory heap */
+	mem_heap_t*	heap)
 {
 	mem_block_t*	block;
 	byte*		buf;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
@@ -247,37 +239,21 @@ mem_heap_get_heap_top(
 	return(buf);
 }
 
-/*****************************************************************//**
-Frees the space in a memory heap exceeding the pointer given. The
-pointer must have been acquired from mem_heap_get_heap_top. The first
-memory block of the heap is not freed. */
+/** Frees the space in a memory heap exceeding the pointer given.
+The pointer must have been acquired from mem_heap_get_heap_top.
+The first memory block of the heap is not freed.
+@param[in]	heap		heap from which to free
+@param[in]	old_top		pointer to old top of heap */
 UNIV_INLINE
 void
 mem_heap_free_heap_top(
-/*===================*/
-	mem_heap_t*	heap,	/*!< in: heap from which to free */
-	byte*		old_top)/*!< in: pointer to old top of heap */
+	mem_heap_t*	heap,
+	byte*		old_top)
 {
 	mem_block_t*	block;
 	mem_block_t*	prev_block;
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-	ibool		error;
-	ulint		total_size;
-	ulint		size;
-
-	ut_ad(mem_heap_check(heap));
-
-	/* Validate the heap and get its total allocated size */
-	mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size,
-				   NULL, NULL);
-	ut_a(!error);
 
-	/* Get the size below top pointer */
-	mem_heap_validate_or_print(heap, old_top, FALSE, &error, &size, NULL,
-				   NULL);
-	ut_a(!error);
-
-#endif
+	ut_d(mem_heap_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
@@ -306,15 +282,6 @@ mem_heap_free_heap_top(
 
 	ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
 	UNIV_MEM_ASSERT_W(old_top, (byte*) block + block->len - old_top);
-#if defined UNIV_MEM_DEBUG
-	/* In the debug version erase block from top up */
-	mem_erase_buf(old_top, (byte*) block + block->len - old_top);
-
-	/* Update allocated memory count */
-	mutex_enter(&mem_hash_mutex);
-	mem_current_allocated_memory -= (total_size - size);
-	mutex_exit(&mem_hash_mutex);
-#endif /* UNIV_MEM_DEBUG */
 	UNIV_MEM_ALLOC(old_top, (byte*) block + block->len - old_top);
 
 	/* If free == start, we may free the block if it is not the first
@@ -326,13 +293,13 @@ mem_heap_free_heap_top(
 	}
 }
 
-/*****************************************************************//**
-Empties a memory heap. The first memory block of the heap is not freed. */
+/** Empties a memory heap.
+The first memory block of the heap is not freed.
+@param[in]	heap	heap to empty */
 UNIV_INLINE
 void
 mem_heap_empty(
-/*===========*/
-	mem_heap_t*	heap)	/*!< in: heap to empty */
+	mem_heap_t*	heap)
 {
 	mem_heap_free_heap_top(heap, (byte*) heap + mem_block_get_start(heap));
 #ifndef UNIV_HOTBACKUP
@@ -342,39 +309,123 @@ mem_heap_empty(
 #endif /* !UNIV_HOTBACKUP */
 }
 
-/*****************************************************************//**
-Returns a pointer to the topmost element in a memory heap. The size of the
-element must be given.
-@return	pointer to the topmost element */
+/** Returns a pointer to the topmost element in a memory heap.
+The size of the element must be given.
+@param[in]	heap	memory heap
+@param[in]	n	size of the topmost element
+@return pointer to the topmost element */
 UNIV_INLINE
 void*
 mem_heap_get_top(
-/*=============*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n)	/*!< in: size of the topmost element */
+	mem_heap_t*	heap,
+	ulint		n)
 {
 	mem_block_t*	block;
 	byte*		buf;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
 	buf = (byte*) block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n);
 
-#ifdef UNIV_MEM_DEBUG
-	ut_ad(mem_block_get_start(block) <= (ulint) (buf - (byte*) block));
+	return((void*) buf);
+}
 
-	/* In the debug version, advance buf to point at the storage which
-	was given to the caller in the allocation*/
+/** Checks if a given chunk of memory is the topmost element stored in the
+heap. If this is the case, then calling mem_heap_free_top() would free
+that element from the heap.
+@param[in]	heap	memory heap
+@param[in]	buf	presumed topmost element
+@param[in]	buf_sz	size of buf in bytes
+@return true if topmost */
+UNIV_INLINE
+bool
+mem_heap_is_top(
+	mem_heap_t*	heap,
+	const void*	buf,
+	ulint		buf_sz)
+{
+	const byte*	first_free_byte;
+	const byte*	presumed_start_of_buf;
 
-	buf += MEM_FIELD_HEADER_SIZE;
+	ut_d(mem_block_validate(heap));
 
-	/* Check that the field lengths agree */
-	ut_ad(n == mem_field_header_get_len(buf));
-#endif
+	first_free_byte = mem_heap_get_heap_top(heap);
 
-	return((void*) buf);
+	presumed_start_of_buf = first_free_byte - MEM_SPACE_NEEDED(buf_sz);
+
+	return(presumed_start_of_buf == buf);
+}
+
+/*****************************************************************//**
+Allocate a new chunk of memory from a memory heap, possibly discarding
+the topmost element. If the memory chunk specified with (top, top_sz)
+is the topmost element, then it will be discarded, otherwise it will
+be left untouched and this function will be equivallent to
+mem_heap_alloc().
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+void*
+mem_heap_replace(
+/*=============*/
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const void*	top,	/*!< in: chunk to discard if possible */
+	ulint		top_sz,	/*!< in: size of top in bytes */
+	ulint		new_sz)	/*!< in: desired size of the new chunk */
+{
+	if (mem_heap_is_top(heap, top, top_sz)) {
+		mem_heap_free_top(heap, top_sz);
+	}
+
+	return(mem_heap_alloc(heap, new_sz));
+}
+
+/*****************************************************************//**
+Allocate a new chunk of memory from a memory heap, possibly discarding
+the topmost element and then copy the specified data to it. If the memory
+chunk specified with (top, top_sz) is the topmost element, then it will be
+discarded, otherwise it will be left untouched and this function will be
+equivallent to mem_heap_dup().
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+void*
+mem_heap_dup_replace(
+/*=================*/
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const void*	top,	/*!< in: chunk to discard if possible */
+	ulint		top_sz,	/*!< in: size of top in bytes */
+	const void*	data,	/*!< in: new data to duplicate */
+	ulint		data_sz)/*!< in: size of data in bytes */
+{
+	void*	p = mem_heap_replace(heap, top, top_sz, data_sz);
+
+	memcpy(p, data, data_sz);
+
+	return(p);
+}
+
+/*****************************************************************//**
+Allocate a new chunk of memory from a memory heap, possibly discarding
+the topmost element and then copy the specified string to it. If the memory
+chunk specified with (top, top_sz) is the topmost element, then it will be
+discarded, otherwise it will be left untouched and this function will be
+equivallent to mem_heap_strdup().
+@return allocated string, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+char*
+mem_heap_strdup_replace(
+/*====================*/
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const void*	top,	/*!< in: chunk to discard if possible */
+	ulint		top_sz,	/*!< in: size of top in bytes */
+	const char*	str)	/*!< in: new data to duplicate */
+{
+	return(reinterpret_cast<char*>(mem_heap_dup_replace(
+			heap, top, top_sz, str, strlen(str) + 1)));
 }
 
 /*****************************************************************//**
@@ -389,7 +440,7 @@ mem_heap_free_top(
 {
 	mem_block_t*	block;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
@@ -397,13 +448,6 @@ mem_heap_free_top(
 	mem_block_set_free(block, mem_block_get_free(block)
 			   - MEM_SPACE_NEEDED(n));
 	UNIV_MEM_ASSERT_W((byte*) block + mem_block_get_free(block), n);
-#ifdef UNIV_MEM_DEBUG
-
-	ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
-
-	/* In the debug version check the consistency, and erase field */
-	mem_field_erase((byte*) block + mem_block_get_free(block), n);
-#endif
 
 	/* If free == start, we may free the block if it is not the first
 	one */
@@ -420,81 +464,66 @@ mem_heap_free_top(
 	}
 }
 
-/*****************************************************************//**
-NOTE: Use the corresponding macros instead of this function. Creates a
-memory heap. For debugging purposes, takes also the file name and line as
-argument.
+/** Creates a memory heap.
+NOTE: Use the corresponding macros instead of this function.
+A single user buffer of 'size' will fit in the block.
+0 creates a default size block.
+@param[in]	size		Desired start block size.
+@param[in]	file_name	File name where created
+@param[in]	line		Line where created
+@param[in]	type		Heap type
 @return own: memory heap, NULL if did not succeed (only possible for
 MEM_HEAP_BTR_SEARCH type heaps) */
 UNIV_INLINE
 mem_heap_t*
 mem_heap_create_func(
-/*=================*/
-	ulint		n,		/*!< in: desired start block size,
-					this means that a single user buffer
-					of size n will fit in the block,
-					0 creates a default size block */
+	ulint		size,
 #ifdef UNIV_DEBUG
-	const char*	file_name,	/*!< in: file name where created */
-	ulint		line,		/*!< in: line where created */
+	const char*	file_name,
+	ulint		line,
 #endif /* UNIV_DEBUG */
-	ulint		type)		/*!< in: heap type */
+	ulint		type)
 {
 	mem_block_t*   block;
 
-	if (!n) {
-		n = MEM_BLOCK_START_SIZE;
+	if (!size) {
+		size = MEM_BLOCK_START_SIZE;
 	}
 
-	block = mem_heap_create_block(NULL, n, type, file_name, line);
+	block = mem_heap_create_block(NULL, size, type, file_name, line);
 
 	if (block == NULL) {
 
 		return(NULL);
 	}
 
-	UT_LIST_INIT(block->base);
-
-	/* Add the created block itself as the first block in the list */
-	UT_LIST_ADD_FIRST(list, block->base, block);
-
-#ifdef UNIV_MEM_DEBUG
+	/* The first block should not be in buffer pool,
+	because it might be relocated to resize buffer pool. */
+	ut_ad(block->buf_block == NULL);
 
-	mem_hash_insert(block, file_name, line);
+	UT_LIST_INIT(block->base, &mem_block_t::list);
 
-#endif
+	/* Add the created block itself as the first block in the list */
+	UT_LIST_ADD_FIRST(block->base, block);
 
 	return(block);
 }
 
-/*****************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees the space
-occupied by a memory heap. In the debug version erases the heap memory
-blocks. */
+/** Frees the space occupied by a memory heap.
+NOTE: Use the corresponding macro instead of this function.
+@param[in]	heap	Heap to be freed */
 UNIV_INLINE
 void
-mem_heap_free_func(
-/*===============*/
-	mem_heap_t*	heap,		/*!< in, own: heap to be freed */
-	const char*	file_name MY_ATTRIBUTE((unused)),
-					/*!< in: file name where freed */
-	ulint		line  MY_ATTRIBUTE((unused)))
+mem_heap_free(
+	mem_heap_t*	heap)
 {
 	mem_block_t*	block;
 	mem_block_t*	prev_block;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
-#ifdef UNIV_MEM_DEBUG
-
-	/* In the debug version remove the heap from the hash table of heaps
-	and check its consistency */
-
-	mem_hash_remove(heap, file_name, line);
-
-#endif
 #ifndef UNIV_HOTBACKUP
 	if (heap->free_block) {
 		mem_heap_free_block_free(heap);
@@ -513,73 +542,6 @@ mem_heap_free_func(
 	}
 }
 
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free.
-@return	own: free storage */
-UNIV_INLINE
-void*
-mem_alloc_func(
-/*===========*/
-	ulint		n,		/*!< in: desired number of bytes */
-#ifdef UNIV_DEBUG
-	const char*	file_name,	/*!< in: file name where created */
-	ulint		line,		/*!< in: line where created */
-#endif /* UNIV_DEBUG */
-	ulint*		size)		/*!< out: allocated size in bytes,
-					or NULL */
-{
-	mem_heap_t*	heap;
-	void*		buf;
-
-	heap = mem_heap_create_at(n, file_name, line);
-
-	/* Note that as we created the first block in the heap big enough
-	for the buffer requested by the caller, the buffer will be in the
-	first block and thus we can calculate the pointer to the heap from
-	the pointer to the buffer when we free the memory buffer. */
-
-	if (size) {
-		/* Adjust the allocation to the actual size of the
-		memory block. */
-		ulint	m = mem_block_get_len(heap)
-			- mem_block_get_free(heap);
-#ifdef UNIV_MEM_DEBUG
-		m -= MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE;
-#endif /* UNIV_MEM_DEBUG */
-		ut_ad(m >= n);
-		n = m;
-		*size = m;
-	}
-
-	buf = mem_heap_alloc(heap, n);
-
-	ut_a((byte*) heap == (byte*) buf - MEM_BLOCK_HEADER_SIZE
-	     - MEM_FIELD_HEADER_SIZE);
-	return(buf);
-}
-
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees a single
-buffer of storage from the dynamic memory of the C compiler. Similar to the
-free of C. */
-UNIV_INLINE
-void
-mem_free_func(
-/*==========*/
-	void*		ptr,		/*!< in, own: buffer to be freed */
-	const char*	file_name,	/*!< in: file name where created */
-	ulint		line)		/*!< in: line where created */
-{
-	mem_heap_t*   heap;
-
-	heap = (mem_heap_t*)((byte*) ptr - MEM_BLOCK_HEADER_SIZE
-			     - MEM_FIELD_HEADER_SIZE);
-	mem_heap_free_func(heap, file_name, line);
-}
-
 /*****************************************************************//**
 Returns the space in bytes occupied by a memory heap. */
 UNIV_INLINE
@@ -590,7 +552,7 @@ mem_heap_get_size(
 {
 	ulint		size	= 0;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	size = heap->total_size;
 
@@ -605,7 +567,7 @@ mem_heap_get_size(
 
 /**********************************************************************//**
 Duplicates a NUL-terminated string.
-@return	own: a copy of the string, must be deallocated with mem_free */
+@return own: a copy of the string, must be deallocated with ut_free */
 UNIV_INLINE
 char*
 mem_strdup(
@@ -613,12 +575,12 @@ mem_strdup(
 	const char*	str)	/*!< in: string to be copied */
 {
 	ulint	len = strlen(str) + 1;
-	return((char*) memcpy(mem_alloc(len), str, len));
+	return(static_cast<char*>(memcpy(ut_malloc_nokey(len), str, len)));
 }
 
 /**********************************************************************//**
 Makes a NUL-terminated copy of a nonterminated string.
-@return	own: a copy of the string, must be deallocated with mem_free */
+@return own: a copy of the string, must be deallocated with ut_free */
 UNIV_INLINE
 char*
 mem_strdupl(
@@ -626,15 +588,15 @@ mem_strdupl(
 	const char*	str,	/*!< in: string to be copied */
 	ulint		len)	/*!< in: length of str, in bytes */
 {
-	char*	s = (char*) mem_alloc(len + 1);
+	char*	s = static_cast<char*>(ut_malloc_nokey(len + 1));
 	s[len] = 0;
-	return((char*) memcpy(s, str, len));
+	return(static_cast<char*>(memcpy(s, str, len)));
 }
 
 /**********************************************************************//**
 Makes a NUL-terminated copy of a nonterminated string,
 allocated from a memory heap.
-@return	own: a copy of the string */
+@return own: a copy of the string */
 UNIV_INLINE
 char*
 mem_heap_strdupl(
diff --git a/storage/innobase/include/mem0pool.h b/storage/innobase/include/mem0pool.h
deleted file mode 100644
index a65ba50fdf9..00000000000
--- a/storage/innobase/include/mem0pool.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mem0pool.h
-The lowest-level memory management
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef mem0pool_h
-#define mem0pool_h
-
-#include "univ.i"
-#include "os0file.h"
-#include "ut0lst.h"
-
-/** Memory pool */
-struct mem_pool_t;
-
-/** The common memory pool */
-extern mem_pool_t*	mem_comm_pool;
-
-/** Memory area header */
-struct mem_area_t{
-	ulint		size_and_free;	/*!< memory area size is obtained by
-					anding with ~MEM_AREA_FREE; area in
-					a free list if ANDing with
-					MEM_AREA_FREE results in nonzero */
-	UT_LIST_NODE_T(mem_area_t)
-			free_list;	/*!< free list node */
-};
-
-/** Each memory area takes this many extra bytes for control information */
-#define MEM_AREA_EXTRA_SIZE	(ut_calc_align(sizeof(struct mem_area_t),\
-			UNIV_MEM_ALIGNMENT))
-
-/********************************************************************//**
-Creates a memory pool.
-@return	memory pool */
-UNIV_INTERN
-mem_pool_t*
-mem_pool_create(
-/*============*/
-	ulint	size);	/*!< in: pool size in bytes */
-/********************************************************************//**
-Frees a memory pool. */
-UNIV_INTERN
-void
-mem_pool_free(
-/*==========*/
-	mem_pool_t*	pool);	/*!< in, own: memory pool */
-/********************************************************************//**
-Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*!
-@return	own: allocated memory buffer */
-UNIV_INTERN
-void*
-mem_area_alloc(
-/*===========*/
-	ulint*		psize,	/*!< in: requested size in bytes; for optimum
-				space usage, the size should be a power of 2
-				minus MEM_AREA_EXTRA_SIZE;
-				out: allocated size in bytes (greater than
-				or equal to the requested size) */
-	mem_pool_t*	pool);	/*!< in: memory pool */
-/********************************************************************//**
-Frees memory to a pool. */
-UNIV_INTERN
-void
-mem_area_free(
-/*==========*/
-	void*		ptr,	/*!< in, own: pointer to allocated memory
-				buffer */
-	mem_pool_t*	pool);	/*!< in: memory pool */
-/********************************************************************//**
-Returns the amount of reserved memory.
-@return	reserved mmeory in bytes */
-UNIV_INTERN
-ulint
-mem_pool_get_reserved(
-/*==================*/
-	mem_pool_t*	pool);	/*!< in: memory pool */
-/********************************************************************//**
-Validates a memory pool.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-mem_pool_validate(
-/*==============*/
-	mem_pool_t*	pool);	/*!< in: memory pool */
-/********************************************************************//**
-Prints info of a memory pool. */
-UNIV_INTERN
-void
-mem_pool_print_info(
-/*================*/
-	FILE*		outfile,/*!< in: output file to write to */
-	mem_pool_t*	pool);	/*!< in: memory pool */
-
-
-#ifndef UNIV_NONINL
-#include "mem0pool.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h
index 18a345d050f..3819d3f694a 100644
--- a/storage/innobase/include/mtr0log.h
+++ b/storage/innobase/include/mtr0log.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,24 +28,26 @@ Created 12/7/1995 Heikki Tuuri
 
 #include "univ.i"
 #include "mtr0mtr.h"
-#include "dict0types.h"
+#include "dyn0buf.h"
+
+// Forward declaration
+struct dict_index_t;
 
 #ifndef UNIV_HOTBACKUP
 /********************************************************//**
 Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
 record to the mini-transaction log if mtr is not NULL. */
-UNIV_INTERN
 void
 mlog_write_ulint(
 /*=============*/
-	byte*	ptr,	/*!< in: pointer where to write */
-	ulint	val,	/*!< in: value to write */
-	byte	type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-	mtr_t*	mtr);	/*!< in: mini-transaction handle */
+	byte*		ptr,	/*!< in: pointer where to write */
+	ulint		val,	/*!< in: value to write */
+	mlog_id_t	type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+
 /********************************************************//**
 Writes 8 bytes to a file page. Writes the corresponding log
 record to the mini-transaction log, only if mtr is not NULL */
-UNIV_INTERN
 void
 mlog_write_ull(
 /*===========*/
@@ -55,7 +57,6 @@ mlog_write_ull(
 /********************************************************//**
 Writes a string to a file page buffered in the buffer pool. Writes the
 corresponding log record to the mini-transaction log. */
-UNIV_INTERN
 void
 mlog_write_string(
 /*==============*/
@@ -66,7 +67,6 @@ mlog_write_string(
 /********************************************************//**
 Logs a write of a string to a file page buffered in the buffer pool.
 Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
 void
 mlog_log_string(
 /*============*/
@@ -76,40 +76,34 @@ mlog_log_string(
 /********************************************************//**
 Writes initial part of a log record consisting of one-byte item
 type and four-byte space and page numbers. */
-UNIV_INTERN
 void
 mlog_write_initial_log_record(
 /*==========================*/
 	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
 				frame holding the file page where
 				modification is made */
-	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	mlog_id_t	type,	/*!< in: log item type: MLOG_1BYTE, ... */
 	mtr_t*		mtr);	/*!< in: mini-transaction handle */
 /********************************************************//**
-Writes a log record about an .ibd file create/delete/rename.
-@return	new value of log_ptr */
+Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
 UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
-	ulint	type,	/*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
-			MLOG_FILE_RENAME */
-	ulint	space_id,/*!< in: space id, if applicable */
-	ulint	page_no,/*!< in: page number (not relevant currently) */
-	byte*	log_ptr,/*!< in: pointer to mtr log which has been opened */
-	mtr_t*	mtr);	/*!< in: mtr */
+void
+mlog_catenate_ulint(
+/*================*/
+	mtr_buf_t*	dyn_buf,	/*!< in/out: buffer to write */
+	ulint		val,		/*!< in: value to write */
+	mlog_id_t	type);		/*!< in: type of value to write */
 /********************************************************//**
 Catenates 1 - 4 bytes to the mtr log. */
 UNIV_INLINE
 void
 mlog_catenate_ulint(
 /*================*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	ulint	val,	/*!< in: value to write */
-	ulint	type);	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_t*		mtr,	/*!< in: mtr */
+	ulint		val,	/*!< in: value to write */
+	mlog_id_t	type);	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
 /********************************************************//**
 Catenates n bytes to the mtr log. */
-UNIV_INTERN
 void
 mlog_catenate_string(
 /*=================*/
@@ -122,8 +116,8 @@ UNIV_INLINE
 void
 mlog_catenate_ulint_compressed(
 /*===========================*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	ulint	val);	/*!< in: value to write */
+	mtr_t*		mtr,	/*!< in: mtr */
+	ulint		val);	/*!< in: value to write */
 /********************************************************//**
 Catenates a compressed 64-bit integer to mlog. */
 UNIV_INLINE
@@ -134,27 +128,45 @@ mlog_catenate_ull_compressed(
 	ib_uint64_t	val);	/*!< in: value to write */
 /********************************************************//**
 Opens a buffer to mlog. It must be closed with mlog_close.
-@return	buffer, NULL if log mode MTR_LOG_NONE */
+@return buffer, NULL if log mode MTR_LOG_NONE */
 UNIV_INLINE
 byte*
 mlog_open(
 /*======*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	ulint	size);	/*!< in: buffer size in bytes; MUST be
-			smaller than DYN_ARRAY_DATA_SIZE! */
+	mtr_t*		mtr,	/*!< in: mtr */
+	ulint		size);	/*!< in: buffer size in bytes; MUST be
+				smaller than DYN_ARRAY_DATA_SIZE! */
 /********************************************************//**
 Closes a buffer opened to mlog. */
 UNIV_INLINE
 void
 mlog_close(
 /*=======*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	byte*	ptr);	/*!< in: buffer space from ptr up was not used */
+	mtr_t*		mtr,	/*!< in: mtr */
+	byte*		ptr);	/*!< in: buffer space from ptr up was
+				not used */
+
+/** Writes a log record about an operation.
+@param[in]	type		redo log record type
+@param[in]	space_id	tablespace identifier
+@param[in]	page_no		page number
+@param[in,out]	log_ptr		current end of mini-transaction log
+@param[in,out]	mtr		mini-transaction
+@return	end of mini-transaction log */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_low(
+	mlog_id_t	type,
+	ulint		space_id,
+	ulint		page_no,
+	byte*		log_ptr,
+	mtr_t*		mtr);
+
 /********************************************************//**
 Writes the initial part of a log record (3..11 bytes).
 If the implementation of this function is changed, all
 size parameters to mlog_open() should be adjusted accordingly!
-@return	new value of log_ptr */
+@return new value of log_ptr */
 UNIV_INLINE
 byte*
 mlog_write_initial_log_record_fast(
@@ -162,7 +174,7 @@ mlog_write_initial_log_record_fast(
 	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
 				frame holding the file page where
 				modification is made */
-	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	mlog_id_t	type,	/*!< in: log item type: MLOG_1BYTE, ... */
 	byte*		log_ptr,/*!< in: pointer to mtr log which has
 				been opened */
 	mtr_t*		mtr);	/*!< in: mtr */
@@ -172,32 +184,30 @@ mlog_write_initial_log_record_fast(
 #endif /* !UNIV_HOTBACKUP */
 /********************************************************//**
 Parses an initial log record written by mlog_write_initial_log_record.
-@return	parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
 byte*
 mlog_parse_initial_log_record(
 /*==========================*/
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	byte*	type,	/*!< out: log record type: MLOG_1BYTE, ... */
-	ulint*	space,	/*!< out: space id */
-	ulint*	page_no);/*!< out: page number */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
+	mlog_id_t*	type,	/*!< out: log record type: MLOG_1BYTE, ... */
+	ulint*		space,	/*!< out: space id */
+	ulint*		page_no);/*!< out: page number */
 /********************************************************//**
 Parses a log record written by mlog_write_ulint or mlog_write_ull.
-@return	parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
 byte*
 mlog_parse_nbytes(
 /*==============*/
-	ulint	type,	/*!< in: log record type: MLOG_1BYTE, ... */
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	byte*	page,	/*!< in: page where to apply the log record, or NULL */
-	void*	page_zip);/*!< in/out: compressed page, or NULL */
+	mlog_id_t	type,	/*!< in: log record type: MLOG_1BYTE, ... */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
+	byte*		page,	/*!< in: page where to apply the log record,
+				or NULL */
+	void*		page_zip);/*!< in/out: compressed page, or NULL */
 /********************************************************//**
 Parses a log record written by mlog_write_string.
-@return	parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
 byte*
 mlog_parse_string(
 /*==============*/
@@ -212,15 +222,14 @@ Opens a buffer for mlog, writes the initial log record and,
 if needed, the field lengths of an index.  Reserves space
 for further log entries.  The log entry must be closed with
 mtr_close().
-@return	buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INTERN
+@return buffer, NULL if log mode MTR_LOG_NONE */
 byte*
 mlog_open_and_write_index(
 /*======================*/
 	mtr_t*			mtr,	/*!< in: mtr */
 	const byte*		rec,	/*!< in: index record or page */
 	const dict_index_t*	index,	/*!< in: record descriptor */
-	byte			type,	/*!< in: log item type */
+	mlog_id_t		type,	/*!< in: log item type */
 	ulint			size);	/*!< in: requested buffer size in bytes
 					(if 0, calls mlog_close() and
 					returns NULL) */
@@ -228,8 +237,7 @@ mlog_open_and_write_index(
 
 /********************************************************//**
 Parses a log record written by mlog_open_and_write_index.
-@return	parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
 byte*
 mlog_parse_index(
 /*=============*/
@@ -239,13 +247,13 @@ mlog_parse_index(
 	dict_index_t**	index);	/*!< out, own: dummy index */
 
 #ifndef UNIV_HOTBACKUP
-/* Insert, update, and maybe other functions may use this value to define an
+/** Insert, update, and maybe other functions may use this value to define an
 extra mlog buffer size for variable size data */
 #define MLOG_BUF_MARGIN	256
 #endif /* !UNIV_HOTBACKUP */
 
 #ifndef UNIV_NONINL
 #include "mtr0log.ic"
-#endif
+#endif /* UNIV_NOINL */
 
-#endif
+#endif /* mtr0log_h */
diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
index 6457e02d455..4015fe36d19 100644
--- a/storage/innobase/include/mtr0log.ic
+++ b/storage/innobase/include/mtr0log.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,36 +23,31 @@ Mini-transaction logging routines
 Created 12/7/1995 Heikki Tuuri
 *******************************************************/
 
-#include "mach0data.h"
-#include "ut0lst.h"
-#include "buf0buf.h"
 #include "buf0dblwr.h"
 #include "fsp0types.h"
-#include "trx0sys.h"
+#include "mach0data.h"
+#include "trx0types.h"
 
 /********************************************************//**
 Opens a buffer to mlog. It must be closed with mlog_close.
-@return	buffer, NULL if log mode MTR_LOG_NONE */
+@return buffer, NULL if log mode MTR_LOG_NONE or MTR_LOG_NO_REDO */
 UNIV_INLINE
 byte*
 mlog_open(
 /*======*/
 	mtr_t*	mtr,	/*!< in: mtr */
 	ulint	size)	/*!< in: buffer size in bytes; MUST be
-			smaller than DYN_ARRAY_DATA_SIZE! */
+			smaller than mtr_t::buf_t::MAX_DATA_SIZE! */
 {
-	dyn_array_t*	mlog;
-
-	mtr->modifications = TRUE;
+	mtr->set_modified();
 
-	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE
+	    || mtr_get_log_mode(mtr) == MTR_LOG_NO_REDO) {
 
 		return(NULL);
 	}
 
-	mlog = &(mtr->log);
-
-	return(dyn_array_open(mlog, size));
+	return(mtr->get_log()->open(size));
 }
 
 /********************************************************//**
@@ -64,13 +59,10 @@ mlog_close(
 	mtr_t*	mtr,	/*!< in: mtr */
 	byte*	ptr)	/*!< in: buffer space from ptr up was not used */
 {
-	dyn_array_t*	mlog;
-
 	ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE);
+	ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NO_REDO);
 
-	mlog = &(mtr->log);
-
-	dyn_array_close(mlog, ptr);
+	mtr->get_log()->close(ptr);
 }
 
 #ifndef UNIV_HOTBACKUP
@@ -80,45 +72,52 @@ UNIV_INLINE
 void
 mlog_catenate_ulint(
 /*================*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	ulint	val,	/*!< in: value to write */
-	ulint	type)	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_buf_t*	mtr_buf,	/*!< in/out: buffer to write */
+	ulint		val,		/*!< in: value to write */
+	mlog_id_t	type)		/*!< in: type of value to write */
 {
-	dyn_array_t*	mlog;
-	byte*		ptr;
+	ut_ad(MLOG_1BYTE == 1);
+	ut_ad(MLOG_2BYTES == 2);
+	ut_ad(MLOG_4BYTES == 4);
+	ut_ad(MLOG_8BYTES == 8);
 
-	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+	byte*	ptr = mtr_buf->push<byte*>(type);
 
-		return;
-	}
-
-	mlog = &(mtr->log);
-
-#if MLOG_1BYTE != 1
-# error "MLOG_1BYTE != 1"
-#endif
-#if MLOG_2BYTES != 2
-# error "MLOG_2BYTES != 2"
-#endif
-#if MLOG_4BYTES != 4
-# error "MLOG_4BYTES != 4"
-#endif
-#if MLOG_8BYTES != 8
-# error "MLOG_8BYTES != 8"
-#endif
-	ptr = (byte*) dyn_array_push(mlog, type);
-
-	if (type == MLOG_4BYTES) {
+	switch (type) {
+	case MLOG_4BYTES:
 		mach_write_to_4(ptr, val);
-	} else if (type == MLOG_2BYTES) {
+		break;
+	case MLOG_2BYTES:
 		mach_write_to_2(ptr, val);
-	} else {
-		ut_ad(type == MLOG_1BYTE);
+		break;
+	case MLOG_1BYTE:
 		mach_write_to_1(ptr, val);
+		break;
+	default:
+		ut_error;
 	}
 }
 
 /********************************************************//**
+Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
+UNIV_INLINE
+void
+mlog_catenate_ulint(
+/*================*/
+	mtr_t*		mtr,	/*!< in/out: mtr */
+	ulint		val,	/*!< in: value to write */
+	mlog_id_t	type)	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+{
+	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE
+	    || mtr_get_log_mode(mtr) == MTR_LOG_NO_REDO) {
+
+		return;
+	}
+
+	mlog_catenate_ulint(mtr->get_log(), val, type);
+}
+
+/********************************************************//**
 Catenates a compressed ulint to mlog. */
 UNIV_INLINE
 void
@@ -161,16 +160,44 @@ mlog_catenate_ull_compressed(
 		return;
 	}
 
-	log_ptr += mach_ull_write_compressed(log_ptr, val);
+	log_ptr += mach_u64_write_compressed(log_ptr, val);
 
 	mlog_close(mtr, log_ptr);
 }
 
+/** Writes a log record about an operation.
+@param[in]	type		redo log record type
+@param[in]	space_id	tablespace identifier
+@param[in]	page_no		page number
+@param[in,out]	log_ptr		current end of mini-transaction log
+@param[in,out]	mtr		mini-transaction
+@return	end of mini-transaction log */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_low(
+	mlog_id_t	type,
+	ulint		space_id,
+	ulint		page_no,
+	byte*		log_ptr,
+	mtr_t*		mtr)
+{
+	ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type));
+
+	mach_write_to_1(log_ptr, type);
+	log_ptr++;
+
+	log_ptr += mach_write_compressed(log_ptr, space_id);
+	log_ptr += mach_write_compressed(log_ptr, page_no);
+
+	mtr->added_rec();
+	return(log_ptr);
+}
+
 /********************************************************//**
 Writes the initial part of a log record (3..11 bytes).
 If the implementation of this function is changed, all
 size parameters to mlog_open() should be adjusted accordingly!
-@return	new value of log_ptr */
+@return new value of log_ptr */
 UNIV_INLINE
 byte*
 mlog_write_initial_log_record_fast(
@@ -178,21 +205,17 @@ mlog_write_initial_log_record_fast(
 	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
 				frame holding the file page where
 				modification is made */
-	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	mlog_id_t	type,	/*!< in: log item type: MLOG_1BYTE, ... */
 	byte*		log_ptr,/*!< in: pointer to mtr log which has
 				been opened */
-	mtr_t*		mtr)	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in/out: mtr */
 {
-#ifdef UNIV_DEBUG
-	buf_block_t*	block;
-#endif
 	const byte*	page;
 	ulint		space;
 	ulint		offset;
 
-	ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type));
-	ut_ad(ptr && log_ptr);
+	ut_ad(log_ptr);
+	ut_d(mtr->memo_modify_page(ptr));
 
 	page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
 	space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
@@ -202,6 +225,7 @@ mlog_write_initial_log_record_fast(
 	the doublewrite buffer is located in pages
 	FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the
 	system tablespace */
+
 	if (space == TRX_SYS_SPACE
 	    && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
 		if (buf_dblwr_being_created) {
@@ -210,67 +234,17 @@ mlog_write_initial_log_record_fast(
 			anything for the doublewrite buffer pages. */
 			return(log_ptr);
 		} else {
-			fprintf(stderr,
-				"Error: trying to redo log a record of type "
-				"%d on page %lu of space %lu in the "
-				"doublewrite buffer, continuing anyway.\n"
-				"Please post a bug report to "
-				"bugs.mysql.com.\n",
-				type, offset, space);
+			ib::error() << "Trying to redo log a record of type "
+				<< type << "  on page "
+				<< page_id_t(space, offset) << "in the"
+				" doublewrite buffer, continuing anyway."
+				" Please post a bug report to"
+				" bugs.mysql.com.";
 			ut_ad(0);
 		}
 	}
 
-	mach_write_to_1(log_ptr, type);
-	log_ptr++;
-	log_ptr += mach_write_compressed(log_ptr, space);
-	log_ptr += mach_write_compressed(log_ptr, offset);
-
-	mtr->n_log_recs++;
-
-#ifdef UNIV_LOG_DEBUG
-	fprintf(stderr,
-		"Adding to mtr log record type %lu space %lu page no %lu\n",
-		(ulong) type, space, offset);
-#endif
-
-#ifdef UNIV_DEBUG
-	/* We now assume that all x-latched pages have been modified! */
-	block = (buf_block_t*) buf_block_align(ptr);
-
-	if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {
-
-		mtr_memo_push(mtr, block, MTR_MEMO_MODIFY);
-	}
-#endif
-	return(log_ptr);
-}
-
-/********************************************************//**
-Writes a log record about an .ibd file create/delete/rename.
-@return	new value of log_ptr */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
-	ulint	type,	/*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
-			MLOG_FILE_RENAME */
-	ulint	space_id,/*!< in: space id, if applicable */
-	ulint	page_no,/*!< in: page number (not relevant currently) */
-	byte*	log_ptr,/*!< in: pointer to mtr log which has been opened */
-	mtr_t*	mtr)	/*!< in: mtr */
-{
-	ut_ad(log_ptr);
-
-	mach_write_to_1(log_ptr, type);
-	log_ptr++;
-
-	/* We write dummy space id and page number */
-	log_ptr += mach_write_compressed(log_ptr, space_id);
-	log_ptr += mach_write_compressed(log_ptr, page_no);
-
-	mtr->n_log_recs++;
-
-	return(log_ptr);
+	return(mlog_write_initial_log_record_low(type, space, offset,
+						 log_ptr, mtr));
 }
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index c3307985532..3526436f042 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -29,427 +29,588 @@ Created 11/26/1995 Heikki Tuuri
 #define mtr0mtr_h
 
 #include "univ.i"
-#include "mem0mem.h"
-#include "dyn0dyn.h"
-#include "buf0types.h"
-#include "sync0rw.h"
-#include "ut0byte.h"
+#include "log0types.h"
 #include "mtr0types.h"
-#include "page0types.h"
+#include "buf0types.h"
 #include "trx0types.h"
+#include "dyn0buf.h"
 
-/* Logging modes for a mini-transaction */
-#define MTR_LOG_ALL		21	/* default mode: log all operations
-					modifying disk-based data */
-#define	MTR_LOG_NONE		22	/* log no operations */
-#define	MTR_LOG_NO_REDO		23	/* Don't generate REDO */
-/*#define	MTR_LOG_SPACE	23 */	/* log only operations modifying
-					file space page allocation data
-					(operations in fsp0fsp.* ) */
-#define	MTR_LOG_SHORT_INSERTS	24	/* inserts are logged in a shorter
-					form */
-
-/* Types for the mlock objects to store in the mtr memo; NOTE that the
-first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-#define	MTR_MEMO_PAGE_S_FIX	RW_S_LATCH
-#define	MTR_MEMO_PAGE_X_FIX	RW_X_LATCH
-#define	MTR_MEMO_BUF_FIX	RW_NO_LATCH
-#ifdef UNIV_DEBUG
-# define MTR_MEMO_MODIFY	54
-#endif /* UNIV_DEBUG */
-#define	MTR_MEMO_S_LOCK		55
-#define	MTR_MEMO_X_LOCK		56
-
-/** @name Log item types
-The log items are declared 'byte' so that the compiler can warn if val
-and type parameters are switched in a call to mlog_write_ulint. NOTE!
-For 1 - 8 bytes, the flag value must give the length also! @{ */
-#define	MLOG_SINGLE_REC_FLAG	128		/*!< if the mtr contains only
-						one log record for one page,
-						i.e., write_initial_log_record
-						has been called only once,
-						this flag is ORed to the type
-						of that first log record */
-#define	MLOG_1BYTE		(1)		/*!< one byte is written */
-#define	MLOG_2BYTES		(2)		/*!< 2 bytes ... */
-#define	MLOG_4BYTES		(4)		/*!< 4 bytes ... */
-#define	MLOG_8BYTES		(8)		/*!< 8 bytes ... */
-#define	MLOG_REC_INSERT		((byte)9)	/*!< record insert */
-#define	MLOG_REC_CLUST_DELETE_MARK ((byte)10)	/*!< mark clustered index record
-						deleted */
-#define	MLOG_REC_SEC_DELETE_MARK ((byte)11)	/*!< mark secondary index record
-						deleted */
-#define MLOG_REC_UPDATE_IN_PLACE ((byte)13)	/*!< update of a record,
-						preserves record field sizes */
-#define MLOG_REC_DELETE		((byte)14)	/*!< delete a record from a
-						page */
-#define	MLOG_LIST_END_DELETE	((byte)15)	/*!< delete record list end on
-						index page */
-#define	MLOG_LIST_START_DELETE	((byte)16)	/*!< delete record list start on
-						index page */
-#define	MLOG_LIST_END_COPY_CREATED ((byte)17)	/*!< copy record list end to a
-						new created index page */
-#define	MLOG_PAGE_REORGANIZE	((byte)18)	/*!< reorganize an
-						index page in
-						ROW_FORMAT=REDUNDANT */
-#define MLOG_PAGE_CREATE	((byte)19)	/*!< create an index page */
-#define	MLOG_UNDO_INSERT	((byte)20)	/*!< insert entry in an undo
-						log */
-#define MLOG_UNDO_ERASE_END	((byte)21)	/*!< erase an undo log
-						page end */
-#define	MLOG_UNDO_INIT		((byte)22)	/*!< initialize a page in an
-						undo log */
-#define MLOG_UNDO_HDR_DISCARD	((byte)23)	/*!< discard an update undo log
-						header */
-#define	MLOG_UNDO_HDR_REUSE	((byte)24)	/*!< reuse an insert undo log
-						header */
-#define MLOG_UNDO_HDR_CREATE	((byte)25)	/*!< create an undo
-						log header */
-#define MLOG_REC_MIN_MARK	((byte)26)	/*!< mark an index
-						record as the
-						predefined minimum
-						record */
-#define MLOG_IBUF_BITMAP_INIT	((byte)27)	/*!< initialize an
-						ibuf bitmap page */
-/*#define	MLOG_FULL_PAGE	((byte)28)	full contents of a page */
-#ifdef UNIV_LOG_LSN_DEBUG
-# define MLOG_LSN		((byte)28)	/* current LSN */
-#endif
-#define MLOG_INIT_FILE_PAGE	((byte)29)	/*!< this means that a
-						file page is taken
-						into use and the prior
-						contents of the page
-						should be ignored: in
-						recovery we must not
-						trust the lsn values
-						stored to the file
-						page */
-#define MLOG_WRITE_STRING	((byte)30)	/*!< write a string to
-						a page */
-#define	MLOG_MULTI_REC_END	((byte)31)	/*!< if a single mtr writes
-						several log records,
-						this log record ends the
-						sequence of these records */
-#define MLOG_DUMMY_RECORD	((byte)32)	/*!< dummy log record used to
-						pad a log block full */
-#define MLOG_FILE_CREATE	((byte)33)	/*!< log record about an .ibd
-						file creation */
-#define MLOG_FILE_RENAME	((byte)34)	/*!< log record about an .ibd
-						file rename */
-#define MLOG_FILE_DELETE	((byte)35)	/*!< log record about an .ibd
-						file deletion */
-#define MLOG_COMP_REC_MIN_MARK	((byte)36)	/*!< mark a compact
-						index record as the
-						predefined minimum
-						record */
-#define MLOG_COMP_PAGE_CREATE	((byte)37)	/*!< create a compact
-						index page */
-#define MLOG_COMP_REC_INSERT	((byte)38)	/*!< compact record insert */
-#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39)
-						/*!< mark compact
-						clustered index record
-						deleted */
-#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/*!< mark compact
-						secondary index record
-						deleted; this log
-						record type is
-						redundant, as
-						MLOG_REC_SEC_DELETE_MARK
-						is independent of the
-						record format. */
-#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/*!< update of a
-						compact record,
-						preserves record field
-						sizes */
-#define MLOG_COMP_REC_DELETE	((byte)42)	/*!< delete a compact record
-						from a page */
-#define MLOG_COMP_LIST_END_DELETE ((byte)43)	/*!< delete compact record list
-						end on index page */
-#define MLOG_COMP_LIST_START_DELETE ((byte)44)	/*!< delete compact record list
-						start on index page */
-#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45)
-						/*!< copy compact
-						record list end to a
-						new created index
-						page */
-#define MLOG_COMP_PAGE_REORGANIZE ((byte)46)	/*!< reorganize an index page */
-#define MLOG_FILE_CREATE2	((byte)47)	/*!< log record about creating
-						an .ibd file, with format */
-#define MLOG_ZIP_WRITE_NODE_PTR	((byte)48)	/*!< write the node pointer of
-						a record on a compressed
-						non-leaf B-tree page */
-#define MLOG_ZIP_WRITE_BLOB_PTR	((byte)49)	/*!< write the BLOB pointer
-						of an externally stored column
-						on a compressed page */
-#define MLOG_ZIP_WRITE_HEADER	((byte)50)	/*!< write to compressed page
-						header */
-#define MLOG_ZIP_PAGE_COMPRESS	((byte)51)	/*!< compress an index page */
-#define MLOG_ZIP_PAGE_COMPRESS_NO_DATA	((byte)52)/*!< compress an index page
-						without logging it's image */
-#define MLOG_ZIP_PAGE_REORGANIZE ((byte)53)	/*!< reorganize a compressed
-						page */
-#define MLOG_BIGGEST_TYPE	((byte)53)	/*!< biggest value (used in
-						assertions) */
-
-#define MLOG_FILE_WRITE_CRYPT_DATA ((byte)100)	/*!< log record for
-						writing/updating crypt data of
-						a tablespace */
-
-#define EXTRA_CHECK_MLOG_NUMBER(x) \
-  ((x) == MLOG_FILE_WRITE_CRYPT_DATA)
-
-/* @} */
-
-/** @name Flags for MLOG_FILE operations
-(stored in the page number parameter, called log_flags in the
-functions).  The page number parameter was originally written as 0. @{ */
-#define MLOG_FILE_FLAG_TEMP	1	/*!< identifies TEMPORARY TABLE in
-					MLOG_FILE_CREATE, MLOG_FILE_CREATE2 */
-/* @} */
-
-/* included here because it needs MLOG_LSN defined */
-#include "log0log.h"
-
-/***************************************************************//**
-Starts a mini-transaction. */
-UNIV_INLINE
-void
-mtr_start_trx(
-/*======*/
-	mtr_t*	mtr,	/*!< out: mini-transaction */
-	trx_t*	trx)	/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull (1)));
-/***************************************************************//**
-Starts a mini-transaction. */
-UNIV_INLINE
-void
-mtr_start(
-/*======*/
-	mtr_t*	mtr)	/*!< out: mini-transaction */
-{
-	mtr_start_trx(mtr, NULL);
-}
-	MY_ATTRIBUTE((nonnull))
-/***************************************************************//**
-Commits a mini-transaction. */
-UNIV_INTERN
-void
-mtr_commit(
-/*=======*/
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
-	MY_ATTRIBUTE((nonnull));
-/**********************************************************//**
-Sets and returns a savepoint in mtr.
+/** Start a mini-transaction. */
+#define mtr_start(m)		(m)->start()
+/** Start a mini-transaction. */
+#define mtr_start_trx(m, t)		(m)->start((t))
+
+/** Start a synchronous mini-transaction */
+#define mtr_start_sync(m)	(m)->start(true)
+
+/** Start an asynchronous read-only mini-transaction */
+#define mtr_start_ro(m)	(m)->start(true, true)
+
+/** Commit a mini-transaction. */
+#define mtr_commit(m)		(m)->commit()
+
+/** Set and return a savepoint in mtr.
 @return	savepoint */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
-	mtr_t*	mtr);	/*!< in: mtr */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Releases the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-UNIV_INLINE
-void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
-	mtr_t*		mtr,		/*!< in: mtr */
-	ulint		savepoint,	/*!< in: savepoint */
-	rw_lock_t*	lock);		/*!< in: latch to release */
-#else /* !UNIV_HOTBACKUP */
-# define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************//**
-Releases a buf_page stored in an mtr memo after a
+#define mtr_set_savepoint(m)	(m)->get_savepoint()
+
+/** Release the (index tree) s-latch stored in an mtr memo after a
 savepoint. */
-UNIV_INTERN
-void
-mtr_release_buf_page_at_savepoint(
-/*=============================*/
-	mtr_t*		mtr,		/*!< in: mtr */
-	ulint		savepoint,	/*!< in: savepoint */
-	buf_block_t*	block);		/*!< in: block to release */
-
-/***************************************************************//**
-Gets the logging mode of a mini-transaction.
+#define mtr_release_s_latch_at_savepoint(m, s, l)			\
+				(m)->release_s_latch_at_savepoint((s), (l))
+
+/** Get the logging mode of a mini-transaction.
 @return	logging mode: MTR_LOG_NONE, ... */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
-	mtr_t*	mtr);	/*!< in: mtr */
-/***************************************************************//**
-Changes the logging mode of a mini-transaction.
+#define mtr_get_log_mode(m)	(m)->get_log_mode()
+
+/** Change the logging mode of a mini-transaction.
 @return	old mode */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	ulint	mode);	/*!< in: logging mode: MTR_LOG_NONE, ... */
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+#define mtr_set_log_mode(m, d)	(m)->set_log_mode((d))
+
+/** Get the flush observer of a mini-transaction.
+@return flush observer object */
+#define mtr_get_flush_observer(m)	(m)->get_flush_observer()
+
+/** Set the flush observer of a mini-transaction. */
+#define mtr_set_flush_observer(m, d)	(m)->set_flush_observer((d))
+
+/** Read 1 - 4 bytes from a file page buffered in the buffer pool.
 @return	value read */
-UNIV_INTERN
-ulint
-mtr_read_ulint(
-/*===========*/
-	const byte*	ptr,	/*!< in: pointer from where to read */
-	ulint		type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-	mtr_t*		mtr);	/*!< in: mini-transaction handle */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-This macro locks an rw-lock in s-mode. */
-#define mtr_s_lock(B, MTR)	mtr_s_lock_func((B), __FILE__, __LINE__,\
-						(MTR))
-/*********************************************************************//**
-This macro locks an rw-lock in x-mode. */
-#define mtr_x_lock(B, MTR)	mtr_x_lock_func((B), __FILE__, __LINE__,\
-						(MTR))
-/*********************************************************************//**
-NOTE! Use the macro above!
-Locks a lock in s-mode. */
-UNIV_INLINE
-void
-mtr_s_lock_func(
-/*============*/
-	rw_lock_t*	lock,	/*!< in: rw-lock */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line number */
-	mtr_t*		mtr);	/*!< in: mtr */
-/*********************************************************************//**
-NOTE! Use the macro above!
-Locks a lock in x-mode. */
-UNIV_INLINE
-void
-mtr_x_lock_func(
-/*============*/
-	rw_lock_t*	lock,	/*!< in: rw-lock */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line number */
-	mtr_t*		mtr);	/*!< in: mtr */
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************//**
-Releases an object in the memo stack.
+#define mtr_read_ulint(p, t, m)	(m)->read_ulint((p), (t))
+
+/** Release an object in the memo stack.
 @return true if released */
-UNIV_INTERN
-bool
-mtr_memo_release(
-/*=============*/
-	mtr_t*	mtr,	/*!< in/out: mini-transaction */
-	void*	object,	/*!< in: object */
-	ulint	type)	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
-	MY_ATTRIBUTE((nonnull));
+#define mtr_memo_release(m, o, t)					\
+				(m)->memo_release((o), (t))
+
 #ifdef UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Checks if memo contains the given item.
+
+/** Check if memo contains the given item. */
+#define mtr_is_block_fix(m, o, t, table) mtr_memo_contains(m, o, t)
+
+/** Check if memo contains the given page. */
+#define mtr_is_page_fix(m, p, t, table) mtr_memo_contains_page(m, p, t)
+
+/** Check if memo contains the given item.
 @return	TRUE if contains */
-UNIV_INLINE
-bool
-mtr_memo_contains(
-/*==============*/
-	mtr_t*		mtr,	/*!< in: mtr */
-	const void*	object,	/*!< in: object to search */
-	ulint		type)	/*!< in: type of object */
-	MY_ATTRIBUTE((warn_unused_result, nonnull));
-
-/**********************************************************//**
-Checks if memo contains the given page.
+#define mtr_memo_contains(m, o, t)					\
+				(m)->memo_contains((m)->get_memo(), (o), (t))
+
+/** Check if memo contains the given page.
 @return	TRUE if contains */
-UNIV_INTERN
-ibool
-mtr_memo_contains_page(
-/*===================*/
-	mtr_t*		mtr,	/*!< in: mtr */
-	const byte*	ptr,	/*!< in: pointer to buffer frame */
-	ulint		type);	/*!< in: type of object */
-/*********************************************************//**
-Prints info of an mtr handle. */
-UNIV_INTERN
-void
-mtr_print(
-/*======*/
-	mtr_t*	mtr);	/*!< in: mtr */
-# else /* !UNIV_HOTBACKUP */
-#  define mtr_memo_contains(mtr, object, type)		TRUE
-#  define mtr_memo_contains_page(mtr, ptr, type)	TRUE
-# endif /* !UNIV_HOTBACKUP */
+#define mtr_memo_contains_page(m, p, t)					\
+	(m)->memo_contains_page_flagged((p), (t))
 #endif /* UNIV_DEBUG */
-/*######################################################################*/
 
-#define	MTR_BUF_MEMO_SIZE	200	/* number of slots in memo */
+/** Print info of an mtr handle. */
+#define mtr_print(m)		(m)->print()
 
-/***************************************************************//**
-Returns the log object of a mini-transaction buffer.
+/** Return the log object of a mini-transaction buffer.
 @return	log */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
-	mtr_t*	mtr);	/*!< in: mini-transaction */
-/***************************************************//**
-Pushes an object to an mtr memo stack. */
-UNIV_INLINE
+#define mtr_get_log(m)		(m)->get_log()
+
+/** Push an object to an mtr memo stack. */
+#define mtr_memo_push(m, o, t)	(m)->memo_push(o, t)
+
+/** Lock an rw-lock in s-mode. */
+#define mtr_s_lock(l, m)	(m)->s_lock((l), __FILE__, __LINE__)
+
+/** Lock an rw-lock in x-mode. */
+#define mtr_x_lock(l, m)	(m)->x_lock((l), __FILE__, __LINE__)
+
+/** Lock a tablespace in x-mode. */
+#define mtr_x_lock_space(s, m)	(m)->x_lock_space((s), __FILE__, __LINE__)
+
+/** Lock an rw-lock in sx-mode. */
+#define mtr_sx_lock(l, m)	(m)->sx_lock((l), __FILE__, __LINE__)
+
+#define mtr_memo_contains_flagged(m, p, l)				\
+				(m)->memo_contains_flagged((p), (l))
+
+#define mtr_memo_contains_page_flagged(m, p, l)				\
+				(m)->memo_contains_page_flagged((p), (l))
+
+#define mtr_release_block_at_savepoint(m, s, b)				\
+				(m)->release_block_at_savepoint((s), (b))
+
+#define mtr_block_sx_latch_at_savepoint(m, s, b)			\
+				(m)->sx_latch_at_savepoint((s), (b))
+
+#define mtr_block_x_latch_at_savepoint(m, s, b)				\
+				(m)->x_latch_at_savepoint((s), (b))
+
+/** Check if a mini-transaction is dirtying a clean page.
+@param b	block being x-fixed
+@return true if the mtr is dirtying a clean page. */
+#define mtr_block_dirtied(b)	mtr_t::is_block_dirtied((b))
+
+/** Forward declaration of a tablespace object */
+struct fil_space_t;
+
+/** Append records to the system-wide redo log buffer.
+@param[in]	log	redo log records */
 void
-mtr_memo_push(
-/*==========*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	void*	object,	/*!< in: object */
-	ulint	type);	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
+mtr_write_log(
+	const mtr_buf_t*	log);
 
 /** Mini-transaction memo stack slot. */
-struct mtr_memo_slot_t{
-	ulint	type;	/*!< type of the stored object (MTR_MEMO_S_LOCK, ...) */
-	void*	object;	/*!< pointer to the object */
+struct mtr_memo_slot_t {
+	/** pointer to the object */
+	void*		object;
+
+	/** type of the stored object (MTR_MEMO_S_LOCK, ...) */
+	ulint		type;
 };
 
-/* Mini-transaction handle and buffer */
-struct mtr_t{
+/** Mini-transaction handle and buffer */
+struct mtr_t {
+
+	/** State variables of the mtr */
+	struct Impl {
+
+		/** memo stack for locks etc. */
+		mtr_buf_t	m_memo;
+
+		/** mini-transaction log */
+		mtr_buf_t	m_log;
+
+		/** true if mtr has made at least one buffer pool page dirty */
+		bool		m_made_dirty;
+
+		/** true if inside ibuf changes */
+		bool		m_inside_ibuf;
+
+		/** true if the mini-transaction modified buffer pool pages */
+		bool		m_modifications;
+
+		/** Count of how many page initial log records have been
+		written to the mtr log */
+		ib_uint32_t	m_n_log_recs;
+
+		/** specifies which operations should be logged; default
+		value MTR_LOG_ALL */
+		mtr_log_t	m_log_mode;
 #ifdef UNIV_DEBUG
-	ulint		state;	/*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
-#endif
-	dyn_array_t	memo;	/*!< memo stack for locks etc. */
-	dyn_array_t	log;	/*!< mini-transaction log */
-	unsigned	inside_ibuf:1;
-				/*!< TRUE if inside ibuf changes */
-	unsigned	modifications:1;
-				/*!< TRUE if the mini-transaction
-				modified buffer pool pages */
-	unsigned	made_dirty:1;
-				/*!< TRUE if mtr has made at least
-				one buffer pool page dirty */
-	ulint		n_log_recs;
-				/* count of how many page initial log records
-				have been written to the mtr log */
-	ulint		n_freed_pages;
-				/* number of pages that have been freed in
-				this mini-transaction */
-	ulint		log_mode; /* specifies which operations should be
-				logged; default value MTR_LOG_ALL */
-	lsn_t		start_lsn;/* start lsn of the possible log entry for
-				this mtr */
-	lsn_t		end_lsn;/* end lsn of the possible log entry for
-				this mtr */
+		/** Persistent user tablespace associated with the
+		mini-transaction, or 0 (TRX_SYS_SPACE) if none yet */
+		ulint		m_user_space_id;
+#endif /* UNIV_DEBUG */
+		/** User tablespace that is being modified by the
+		mini-transaction */
+		fil_space_t*	m_user_space;
+		/** Undo tablespace that is being modified by the
+		mini-transaction */
+		fil_space_t*	m_undo_space;
+		/** System tablespace if it is being modified by the
+		mini-transaction */
+		fil_space_t*	m_sys_space;
+
+		/** State of the transaction */
+		mtr_state_t	m_state;
+
+		/** Flush Observer */
+		FlushObserver*	m_flush_observer;
+
 #ifdef UNIV_DEBUG
-	ulint		magic_n;
+		/** For checking corruption. */
+		ulint		m_magic_n;
 #endif /* UNIV_DEBUG */
-	trx_t*		trx;	/*!< transaction */
-};
+
+		/** Owning mini-transaction */
+		mtr_t*		m_mtr;
+
+		/* Transaction handle */
+		trx_t*			m_trx;
+	};
+
+	mtr_t()
+	{
+		m_impl.m_state = MTR_STATE_INIT;
+	}
+
+	~mtr_t() { }
+
+	/** Release the free extents that was reserved using
+	fsp_reserve_free_extents().  This is equivalent to calling
+	fil_space_release_free_extents().  This is intended for use
+	with index pages.
+	@param[in]	n_reserved	number of reserved extents */
+	void release_free_extents(ulint n_reserved);
+
+	/** Start a mini-transaction.
+	@param sync		true if it is a synchronous mini-transaction
+	@param read_only	true if read only mini-transaction */
+	void start(bool sync = true, bool read_only = false);
+
+	/** Start a mini-transaction.
+	@param sync		true if it is a synchronous mini-transaction
+	@param read_only	true if read only mini-transaction */
+	void start(trx_t* trx, bool sync = true, bool read_only = false);
+
+	/** @return whether this is an asynchronous mini-transaction. */
+	bool is_async() const
+	{
+		return(!m_sync);
+	}
+
+	/** Request a future commit to be synchronous. */
+	void set_sync()
+	{
+		m_sync = true;
+	}
+
+	/** Commit the mini-transaction. */
+	void commit();
+
+	/** Commit a mini-transaction that did not modify any pages,
+	but generated some redo log on a higher level, such as
+	MLOG_FILE_NAME records and a MLOG_CHECKPOINT marker.
+	The caller must invoke log_mutex_enter() and log_mutex_exit().
+	This is to be used at log_checkpoint().
+	@param[in]	checkpoint_lsn	the LSN of the log checkpoint  */
+	void commit_checkpoint(lsn_t checkpoint_lsn);
+
+	/** Return current size of the buffer.
+	@return	savepoint */
+	ulint get_savepoint() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_ad(is_active());
+		ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+
+		return(m_impl.m_memo.size());
+	}
+
+	/** Release the (index tree) s-latch stored in an mtr memo after a
+	savepoint.
+	@param savepoint	value returned by @see set_savepoint.
+	@param lock		latch to release */
+	inline void release_s_latch_at_savepoint(
+		ulint		savepoint,
+		rw_lock_t*	lock);
+
+	/** Release the block in an mtr memo after a savepoint. */
+	inline void release_block_at_savepoint(
+		ulint		savepoint,
+		buf_block_t*	block);
+
+	/** SX-latch a not yet latched block after a savepoint. */
+	inline void sx_latch_at_savepoint(ulint savepoint, buf_block_t* block);
+
+	/** X-latch a not yet latched block after a savepoint. */
+	inline void x_latch_at_savepoint(ulint savepoint, buf_block_t*	block);
+
+	/** Get the logging mode.
+	@return	logging mode */
+	inline mtr_log_t get_log_mode() const
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Change the logging mode.
+	@param mode	 logging mode
+	@return	old mode */
+	inline mtr_log_t set_log_mode(mtr_log_t mode);
+
+	/** Note that the mini-transaction is modifying the system tablespace
+	(for example, for the change buffer or for undo logs)
+	@return the system tablespace */
+	fil_space_t* set_sys_modified()
+	{
+		if (!m_impl.m_sys_space) {
+			lookup_sys_space();
+		}
+		return(m_impl.m_sys_space);
+	}
+
+	/** Copy the tablespaces associated with the mini-transaction
+	(needed for generating MLOG_FILE_NAME records)
+	@param[in]	mtr	mini-transaction that may modify
+	the same set of tablespaces as this one */
+	void set_spaces(const mtr_t& mtr)
+	{
+		ut_ad(m_impl.m_user_space_id == TRX_SYS_SPACE);
+		ut_ad(!m_impl.m_user_space);
+		ut_ad(!m_impl.m_undo_space);
+		ut_ad(!m_impl.m_sys_space);
+
+		ut_d(m_impl.m_user_space_id = mtr.m_impl.m_user_space_id);
+		m_impl.m_user_space = mtr.m_impl.m_user_space;
+		m_impl.m_undo_space = mtr.m_impl.m_undo_space;
+		m_impl.m_sys_space = mtr.m_impl.m_sys_space;
+	}
+
+	/** Set the tablespace associated with the mini-transaction
+	(needed for generating a MLOG_FILE_NAME record)
+	@param[in]	space_id	user or system tablespace ID
+	@return	the tablespace */
+	fil_space_t* set_named_space(ulint space_id)
+	{
+		ut_ad(m_impl.m_user_space_id == TRX_SYS_SPACE);
+		ut_d(m_impl.m_user_space_id = space_id);
+		if (space_id == TRX_SYS_SPACE) {
+			return(set_sys_modified());
+		} else {
+			lookup_user_space(space_id);
+			return(m_impl.m_user_space);
+		}
+	}
+
+	/** Set the tablespace associated with the mini-transaction
+	(needed for generating a MLOG_FILE_NAME record)
+	@param[in]	space	user or system tablespace */
+	void set_named_space(fil_space_t* space);
 
 #ifdef UNIV_DEBUG
-# define MTR_MAGIC_N		54551
+	/** Check the tablespace associated with the mini-transaction
+	(needed for generating a MLOG_FILE_NAME record)
+	@param[in]	space	tablespace
+	@return whether the mini-transaction is associated with the space */
+	bool is_named_space(ulint space) const;
 #endif /* UNIV_DEBUG */
 
-#define MTR_ACTIVE		12231
-#define MTR_COMMITTING		56456
-#define MTR_COMMITTED		34676
+	/** Read 1 - 4 bytes from a file page buffered in the buffer pool.
+	@param ptr	pointer from where to read
+	@param type)	MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES
+	@return	value read */
+	inline ulint read_ulint(const byte* ptr, mlog_id_t type) const
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Locks a rw-latch in S mode.
+	NOTE: use mtr_s_lock().
+	@param lock	rw-lock
+	@param file	file name from where called
+	@param line	line number in file */
+	inline void s_lock(rw_lock_t* lock, const char* file, ulint line);
+
+	/** Locks a rw-latch in X mode.
+	NOTE: use mtr_x_lock().
+	@param lock	rw-lock
+	@param file	file name from where called
+	@param line	line number in file */
+	inline void x_lock(rw_lock_t* lock, const char*	file, ulint line);
+
+	/** Locks a rw-latch in X mode.
+	NOTE: use mtr_sx_lock().
+	@param lock	rw-lock
+	@param file	file name from where called
+	@param line	line number in file */
+	inline void sx_lock(rw_lock_t* lock, const char* file, ulint line);
+
+	/** Acquire a tablespace X-latch.
+	NOTE: use mtr_x_lock_space().
+	@param[in]	space_id	tablespace ID
+	@param[in]	file		file name from where called
+	@param[in]	line		line number in file
+	@return the tablespace object (never NULL) */
+	fil_space_t* x_lock_space(
+		ulint		space_id,
+		const char*	file,
+		ulint		line);
+
+	/** Release an object in the memo stack.
+	@param object	object
+	@param type	object type: MTR_MEMO_S_LOCK, ...
+	@return bool if lock released */
+	bool memo_release(const void* object, ulint type);
+	/** Release a page latch.
+	@param[in]	ptr	pointer to within a page frame
+	@param[in]	type	object type: MTR_MEMO_PAGE_X_FIX, ... */
+	void release_page(const void* ptr, mtr_memo_type_t type);
+
+	/** Note that the mini-transaction has modified data. */
+	void set_modified()
+	{
+		m_impl.m_modifications = true;
+	}
+
+	/** Set the state to not-modified. This will not log the
+	changes.  This is only used during redo log apply, to avoid
+	logging the changes. */
+	void discard_modifications()
+	{
+		m_impl.m_modifications = false;
+	}
+
+	/** Get the LSN of commit().
+	@return the commit LSN
+	@retval 0 if the transaction only modified temporary tablespaces */
+	lsn_t commit_lsn() const
+	{
+		ut_ad(has_committed());
+		return(m_commit_lsn);
+	}
+
+	/** Note that we are inside the change buffer code. */
+	void enter_ibuf()
+	{
+		m_impl.m_inside_ibuf = true;
+	}
+
+	/** Note that we have exited from the change buffer code. */
+	void exit_ibuf()
+	{
+		m_impl.m_inside_ibuf = false;
+	}
+
+	/** @return true if we are inside the change buffer code */
+	bool is_inside_ibuf() const
+	{
+		return(m_impl.m_inside_ibuf);
+	}
+
+	/*
+	@return true if the mini-transaction is active */
+	bool is_active() const
+	{
+		return(m_impl.m_state == MTR_STATE_ACTIVE);
+	}
+
+	/** Get flush observer
+	@return flush observer */
+	FlushObserver* get_flush_observer() const
+	{
+		return(m_impl.m_flush_observer);
+	}
+
+	/** Set flush observer
+	@param[in]	observer	flush observer */
+	void set_flush_observer(FlushObserver*	observer)
+	{
+		ut_ad(observer == NULL
+		      || m_impl.m_log_mode == MTR_LOG_NO_REDO);
+
+		m_impl.m_flush_observer = observer;
+	}
+
+#ifdef UNIV_DEBUG
+	/** Check if memo contains the given item.
+	@param memo	memo stack
+	@param object,	object to search
+	@param type	type of object
+	@return	true if contains */
+	static bool memo_contains(
+		mtr_buf_t*	memo,
+		const void*	object,
+		ulint		type)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Check if memo contains the given item.
+	@param object		object to search
+	@param flags		specify types of object (can be ORred) of
+				MTR_MEMO_PAGE_S_FIX ... values
+	@return true if contains */
+	bool memo_contains_flagged(const void* ptr, ulint flags) const;
+
+	/** Check if memo contains the given page.
+	@param[in]	ptr	pointer to within buffer frame
+	@param[in]	flags	specify types of object with OR of
+				MTR_MEMO_PAGE_S_FIX... values
+	@return	the block
+	@retval	NULL	if not found */
+	buf_block_t* memo_contains_page_flagged(
+		const byte*	ptr,
+		ulint		flags) const;
+
+	/** Mark the given latched page as modified.
+	@param[in]	ptr	pointer to within buffer frame */
+	void memo_modify_page(const byte* ptr);
+
+	/** Print info of an mtr handle. */
+	void print() const;
+
+	/** @return true if the mini-transaction has committed */
+	bool has_committed() const
+	{
+		return(m_impl.m_state == MTR_STATE_COMMITTED);
+	}
+
+	/** @return true if the mini-transaction is committing */
+	bool is_committing() const
+	{
+		return(m_impl.m_state == MTR_STATE_COMMITTING);
+	}
+
+	/** @return true if mini-transaction contains modifications. */
+	bool has_modifications() const
+	{
+		return(m_impl.m_modifications);
+	}
+
+	/** @return the memo stack */
+	const mtr_buf_t* get_memo() const
+	{
+		return(&m_impl.m_memo);
+	}
+
+	/** @return the memo stack */
+	mtr_buf_t* get_memo()
+	{
+		return(&m_impl.m_memo);
+	}
+#endif /* UNIV_DEBUG */
+
+	/** @return true if a record was added to the mini-transaction */
+	bool is_dirty() const
+	{
+		return(m_impl.m_made_dirty);
+	}
+
+	/** Note that a record has been added to the log */
+	void added_rec()
+	{
+		++m_impl.m_n_log_recs;
+	}
+
+	/** Get the buffered redo log of this mini-transaction.
+	@return	redo log */
+	const mtr_buf_t* get_log() const
+	{
+		ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+
+		return(&m_impl.m_log);
+	}
+
+	/** Get the buffered redo log of this mini-transaction.
+	@return	redo log */
+	mtr_buf_t* get_log()
+	{
+		ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+
+		return(&m_impl.m_log);
+	}
+
+	/** Push an object to an mtr memo stack.
+	@param object	object
+	@param type	object type: MTR_MEMO_S_LOCK, ... */
+	inline void memo_push(void* object, mtr_memo_type_t type);
+
+	/** Check if this mini-transaction is dirtying a clean page.
+	@param block	block being x-fixed
+	@return true if the mtr is dirtying a clean page. */
+	static bool is_block_dirtied(const buf_block_t* block)
+		MY_ATTRIBUTE((warn_unused_result));
+
+private:
+	/** Look up the system tablespace. */
+	void lookup_sys_space();
+	/** Look up the user tablespace.
+	@param[in]	space_id	tablespace ID  */
+	void lookup_user_space(ulint space_id);
+
+	class Command;
+
+	friend class Command;
+
+private:
+	Impl			m_impl;
+
+	/** LSN at commit time */
+	volatile lsn_t		m_commit_lsn;
+
+	/** true if it is synchronous mini-transaction */
+	bool			m_sync;
+};
 
 #ifndef UNIV_NONINL
 #include "mtr0mtr.ic"
-#endif
+#endif /* UNIV_NOINL */
 
-#endif
+#endif /* mtr0mtr_h */
diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic
index 37cea34d4eb..f0354756b23 100644
--- a/storage/innobase/include/mtr0mtr.ic
+++ b/storage/innobase/include/mtr0mtr.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,276 +23,258 @@ Mini-transaction buffer
 Created 11/26/1995 Heikki Tuuri
 *******************************************************/
 
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-# include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "mach0data.h"
-
-/***************************************************//**
-Checks if a mini-transaction is dirtying a clean page.
-@return TRUE if the mtr is dirtying a clean page. */
-UNIV_INTERN
-ibool
-mtr_block_dirtied(
-/*==============*/
-	const buf_block_t*	block)	/*!< in: block being x-fixed */
-	MY_ATTRIBUTE((nonnull,warn_unused_result));
-
-/***************************************************************//**
-Starts a mini-transaction. */
-UNIV_INLINE
-void
-mtr_start_trx(
-/*======*/
-	mtr_t*	mtr,	/*!< out: mini-transaction */
-	trx_t*	trx)	/*!< in: transaction */
-{
-	UNIV_MEM_INVALID(mtr, sizeof *mtr);
-
-	dyn_array_create(&(mtr->memo));
-	dyn_array_create(&(mtr->log));
-
-	mtr->log_mode = MTR_LOG_ALL;
-	mtr->inside_ibuf = FALSE;
-	mtr->modifications = FALSE;
-	mtr->made_dirty = FALSE;
-	mtr->n_log_recs = 0;
-	mtr->n_freed_pages = 0;
-	mtr->trx = trx;
-
-	ut_d(mtr->state = MTR_ACTIVE);
-	ut_d(mtr->magic_n = MTR_MAGIC_N);
-}
+#include "buf0buf.h"
 
-/***************************************************//**
+/**
 Pushes an object to an mtr memo stack. */
-UNIV_INLINE
 void
-mtr_memo_push(
-/*==========*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	void*	object,	/*!< in: object */
-	ulint	type)	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
+mtr_t::memo_push(void* object, mtr_memo_type_t type)
 {
-	dyn_array_t*		memo;
-	mtr_memo_slot_t*	slot;
-
-	ut_ad(object);
+	ut_ad(is_active());
+	ut_ad(object != NULL);
 	ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
-	ut_ad(type <= MTR_MEMO_X_LOCK);
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	ut_ad(type <= MTR_MEMO_SX_LOCK);
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+	ut_ad(ut_is_2pow(type));
 
 	/* If this mtr has x-fixed a clean page then we set
 	the made_dirty flag. This tells us if we need to
 	grab log_flush_order_mutex at mtr_commit so that we
 	can insert the dirtied page to the flush list. */
-	if (type == MTR_MEMO_PAGE_X_FIX && !mtr->made_dirty) {
-		mtr->made_dirty =
-			mtr_block_dirtied((const buf_block_t*) object);
+
+	if ((type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX)
+	    && !m_impl.m_made_dirty) {
+
+		m_impl.m_made_dirty = is_block_dirtied(
+			reinterpret_cast<const buf_block_t*>(object));
 	}
 
-	memo = &(mtr->memo);
+	mtr_memo_slot_t*	slot;
 
-	slot = (mtr_memo_slot_t*) dyn_array_push(memo, sizeof *slot);
+	slot = m_impl.m_memo.push<mtr_memo_slot_t*>(sizeof(*slot));
 
-	slot->object = object;
 	slot->type = type;
+	slot->object = object;
 }
 
-/**********************************************************//**
-Sets and returns a savepoint in mtr.
-@return	savepoint */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
-	mtr_t*	mtr)	/*!< in: mtr */
+/**
+Releases the (index tree) s-latch stored in an mtr memo after a
+savepoint. */
+void
+mtr_t::release_s_latch_at_savepoint(
+	ulint		savepoint,
+	rw_lock_t*	lock)
 {
-	dyn_array_t*	memo;
+	ut_ad(is_active());
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+
+	ut_ad(m_impl.m_memo.size() > savepoint);
 
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	mtr_memo_slot_t* slot = m_impl.m_memo.at<mtr_memo_slot_t*>(savepoint);
+
+	ut_ad(slot->object == lock);
+	ut_ad(slot->type == MTR_MEMO_S_LOCK);
 
-	memo = &(mtr->memo);
+	rw_lock_s_unlock(lock);
 
-	return(dyn_array_get_data_size(memo));
+	slot->object = NULL;
 }
 
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Releases the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-UNIV_INLINE
+/**
+SX-latches the not yet latched block after a savepoint. */
+
 void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
-	mtr_t*		mtr,		/*!< in: mtr */
-	ulint		savepoint,	/*!< in: savepoint */
-	rw_lock_t*	lock)		/*!< in: latch to release */
+mtr_t::sx_latch_at_savepoint(
+	ulint		savepoint,
+	buf_block_t*	block)
 {
-	mtr_memo_slot_t* slot;
-	dyn_array_t*	memo;
+	ut_ad(is_active());
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+	ut_ad(m_impl.m_memo.size() > savepoint);
 
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	ut_ad(!memo_contains_flagged(
+			block,
+			MTR_MEMO_PAGE_S_FIX
+			| MTR_MEMO_PAGE_X_FIX
+			| MTR_MEMO_PAGE_SX_FIX));
 
-	memo = &(mtr->memo);
+	mtr_memo_slot_t*	slot;
 
-	ut_ad(dyn_array_get_data_size(memo) > savepoint);
+	slot = m_impl.m_memo.at<mtr_memo_slot_t*>(savepoint);
 
-	slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint);
+	ut_ad(slot->object == block);
 
-	ut_ad(slot->object == lock);
-	ut_ad(slot->type == MTR_MEMO_S_LOCK);
+	/* == RW_NO_LATCH */
+	ut_a(slot->type == MTR_MEMO_BUF_FIX);
 
-	rw_lock_s_unlock(lock);
+	rw_lock_sx_lock(&block->lock);
 
-	slot->object = NULL;
+	if (!m_impl.m_made_dirty) {
+		m_impl.m_made_dirty = is_block_dirtied(block);
+	}
+
+	slot->type = MTR_MEMO_PAGE_SX_FIX;
 }
 
-# ifdef UNIV_DEBUG
-/**********************************************************//**
-Checks if memo contains the given item.
-@return	TRUE if contains */
-UNIV_INLINE
-bool
-mtr_memo_contains(
-/*==============*/
-	mtr_t*		mtr,	/*!< in: mtr */
-	const void*	object,	/*!< in: object to search */
-	ulint		type)	/*!< in: type of object */
+/**
+X-latches the not yet latched block after a savepoint. */
+
+void
+mtr_t::x_latch_at_savepoint(
+	ulint		savepoint,
+	buf_block_t*	block)
 {
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_ACTIVE || mtr->state == MTR_COMMITTING);
-
-	for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
-	     block;
-	     block = dyn_array_get_prev_block(&mtr->memo, block)) {
-		const mtr_memo_slot_t*	start
-			= reinterpret_cast<mtr_memo_slot_t*>(
-				dyn_block_get_data(block));
-		mtr_memo_slot_t*	slot
-			= reinterpret_cast<mtr_memo_slot_t*>(
-				dyn_block_get_data(block)
-				+ dyn_block_get_used(block));
-
-		ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
-
-		while (slot-- != start) {
-			if (object == slot->object && type == slot->type) {
-				return(true);
-			}
-		}
+	ut_ad(is_active());
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+	ut_ad(m_impl.m_memo.size() > savepoint);
+
+	ut_ad(!memo_contains_flagged(
+			block,
+			MTR_MEMO_PAGE_S_FIX
+			| MTR_MEMO_PAGE_X_FIX
+			| MTR_MEMO_PAGE_SX_FIX));
+
+	mtr_memo_slot_t*	slot;
+
+	slot = m_impl.m_memo.at<mtr_memo_slot_t*>(savepoint);
+
+	ut_ad(slot->object == block);
+
+	/* == RW_NO_LATCH */
+	ut_a(slot->type == MTR_MEMO_BUF_FIX);
+
+	rw_lock_x_lock(&block->lock);
+
+	if (!m_impl.m_made_dirty) {
+		m_impl.m_made_dirty = is_block_dirtied(block);
 	}
 
-	return(false);
+	slot->type = MTR_MEMO_PAGE_X_FIX;
 }
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-Returns the log object of a mini-transaction buffer.
-@return	log */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
-	mtr_t*	mtr)	/*!< in: mini-transaction */
+
+/**
+Releases the block in an mtr memo after a savepoint. */
+
+void
+mtr_t::release_block_at_savepoint(
+	ulint		savepoint,
+	buf_block_t*	block)
 {
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(is_active());
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+
+	mtr_memo_slot_t*	slot;
+
+	slot = m_impl.m_memo.at<mtr_memo_slot_t*>(savepoint);
 
-	return(&(mtr->log));
+	ut_a(slot->object == block);
+
+	buf_block_unfix(reinterpret_cast<buf_block_t*>(block));
+
+	buf_page_release_latch(block, slot->type);
+
+	slot->object = NULL;
 }
 
-/***************************************************************//**
+/**
 Gets the logging mode of a mini-transaction.
 @return	logging mode: MTR_LOG_NONE, ... */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
-	mtr_t*	mtr)	/*!< in: mtr */
+
+mtr_log_t
+mtr_t::get_log_mode() const
 {
-	ut_ad(mtr);
-	ut_ad(mtr->log_mode >= MTR_LOG_ALL);
-	ut_ad(mtr->log_mode <= MTR_LOG_SHORT_INSERTS);
+	ut_ad(m_impl.m_log_mode >= MTR_LOG_ALL);
+	ut_ad(m_impl.m_log_mode <= MTR_LOG_SHORT_INSERTS);
 
-	return(mtr->log_mode);
+	return(m_impl.m_log_mode);
 }
 
-/***************************************************************//**
+/**
 Changes the logging mode of a mini-transaction.
 @return	old mode */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	ulint	mode)	/*!< in: logging mode: MTR_LOG_NONE, ... */
-{
-	ulint	old_mode;
 
-	ut_ad(mtr);
+mtr_log_t
+mtr_t::set_log_mode(mtr_log_t mode)
+{
 	ut_ad(mode >= MTR_LOG_ALL);
 	ut_ad(mode <= MTR_LOG_SHORT_INSERTS);
 
-	old_mode = mtr->log_mode;
-
-	if ((mode == MTR_LOG_SHORT_INSERTS) && (old_mode == MTR_LOG_NONE)) {
-		/* Do nothing */
-	} else {
-		mtr->log_mode = mode;
+	const mtr_log_t	old_mode = m_impl.m_log_mode;
+
+	switch (old_mode) {
+	case MTR_LOG_NO_REDO:
+		/* Once this mode is set, it must not be changed. */
+		ut_ad(mode == MTR_LOG_NO_REDO || mode == MTR_LOG_NONE);
+		return(old_mode);
+	case MTR_LOG_NONE:
+		if (mode == old_mode || mode == MTR_LOG_SHORT_INSERTS) {
+			/* Keep MTR_LOG_NONE. */
+			return(old_mode);
+		}
+		/* fall through */
+	case MTR_LOG_SHORT_INSERTS:
+		ut_ad(mode == MTR_LOG_ALL);
+		/* fall through */
+	case MTR_LOG_ALL:
+		/* MTR_LOG_NO_REDO can only be set before generating
+		any redo log records. */
+		ut_ad(mode != MTR_LOG_NO_REDO
+		      || m_impl.m_n_log_recs == 0);
+		m_impl.m_log_mode = mode;
+		return(old_mode);
 	}
 
-	ut_ad(old_mode >= MTR_LOG_ALL);
-	ut_ad(old_mode <= MTR_LOG_SHORT_INSERTS);
-
+	ut_ad(0);
 	return(old_mode);
 }
 
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
+/**
 Locks a lock in s-mode. */
-UNIV_INLINE
+
 void
-mtr_s_lock_func(
-/*============*/
-	rw_lock_t*	lock,	/*!< in: rw-lock */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line number */
-	mtr_t*		mtr)	/*!< in: mtr */
+mtr_t::s_lock(rw_lock_t* lock, const char* file, ulint line)
 {
-	ut_ad(mtr);
-	ut_ad(lock);
-
 	rw_lock_s_lock_inline(lock, 0, file, line);
 
-	mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
+	memo_push(lock, MTR_MEMO_S_LOCK);
 }
 
-/*********************************************************************//**
+/**
 Locks a lock in x-mode. */
-UNIV_INLINE
+
 void
-mtr_x_lock_func(
-/*============*/
-	rw_lock_t*	lock,	/*!< in: rw-lock */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line number */
-	mtr_t*		mtr)	/*!< in: mtr */
+mtr_t::x_lock(rw_lock_t* lock, const char* file, ulint line)
 {
-	ut_ad(mtr);
-	ut_ad(lock);
-
 	rw_lock_x_lock_inline(lock, 0, file, line);
 
-	mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
+	memo_push(lock, MTR_MEMO_X_LOCK);
+}
+
+/**
+Locks a lock in sx-mode. */
+
+void
+mtr_t::sx_lock(rw_lock_t* lock, const char* file, ulint line)
+{
+	rw_lock_sx_lock_inline(lock, 0, file, line);
+
+	memo_push(lock, MTR_MEMO_SX_LOCK);
+}
+
+/**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return	value read */
+
+ulint
+mtr_t::read_ulint(const byte* ptr, mlog_id_t type) const
+{
+	ut_ad(is_active());
+
+	ut_ad(memo_contains_page_flagged(
+			ptr,
+			MTR_MEMO_PAGE_S_FIX
+			| MTR_MEMO_PAGE_X_FIX
+			| MTR_MEMO_PAGE_SX_FIX));
+
+	return(mach_read_ulint(ptr, type));
 }
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
index 43368c0b726..69a68830af1 100644
--- a/storage/innobase/include/mtr0types.h
+++ b/storage/innobase/include/mtr0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,270 @@ Created 11/26/1995 Heikki Tuuri
 #ifndef mtr0types_h
 #define mtr0types_h
 
+#ifndef UNIV_INNOCHECKSUM
+#include "sync0rw.h"
+#endif /* UNIV_INNOCHECKSUM */
+
 struct mtr_t;
 
-#endif
+/** Logging modes for a mini-transaction */
+enum mtr_log_t {
+	/** Default mode: log all operations modifying disk-based data */
+	MTR_LOG_ALL = 21,
+
+	/** Log no operations and dirty pages are not added to the flush list */
+	MTR_LOG_NONE = 22,
+
+	/** Don't generate REDO log but add dirty pages to flush list */
+	MTR_LOG_NO_REDO = 23,
+
+	/** Inserts are logged in a shorter form */
+	MTR_LOG_SHORT_INSERTS = 24
+};
+
+/** @name Log item types
+The log items are declared 'byte' so that the compiler can warn if val
+and type parameters are switched in a call to mlog_write_ulint. NOTE!
+For 1 - 8 bytes, the flag value must give the length also! @{ */
+enum mlog_id_t {
+	/** if the mtr contains only one log record for one page,
+	i.e., write_initial_log_record has been called only once,
+	this flag is ORed to the type of that first log record */
+	MLOG_SINGLE_REC_FLAG = 128,
+
+	/** one byte is written */
+	MLOG_1BYTE = 1,
+
+	/** 2 bytes ... */
+	MLOG_2BYTES = 2,
+
+	/** 4 bytes ... */
+	MLOG_4BYTES = 4,
+
+	/** 8 bytes ... */
+	MLOG_8BYTES = 8,
+
+	/** Record insert */
+	MLOG_REC_INSERT = 9,
+
+	/** Mark clustered index record deleted */
+	MLOG_REC_CLUST_DELETE_MARK = 10,
+
+	/** Mark secondary index record deleted */
+	MLOG_REC_SEC_DELETE_MARK = 11,
+
+	/** update of a record, preserves record field sizes */
+	MLOG_REC_UPDATE_IN_PLACE = 13,
+
+	/*!< Delete a record from a page */
+	MLOG_REC_DELETE = 14,
+
+	/** Delete record list end on index page */
+	MLOG_LIST_END_DELETE = 15,
+
+	/** Delete record list start on index page */
+	MLOG_LIST_START_DELETE = 16,
+
+	/** Copy record list end to a new created index page */
+	MLOG_LIST_END_COPY_CREATED = 17,
+
+	/** Reorganize an index page in ROW_FORMAT=REDUNDANT */
+	MLOG_PAGE_REORGANIZE = 18,
+
+	/** Create an index page */
+	MLOG_PAGE_CREATE = 19,
+
+	/** Insert entry in an undo log */
+	MLOG_UNDO_INSERT = 20,
+
+	/** erase an undo log page end */
+	MLOG_UNDO_ERASE_END = 21,
+
+	/** initialize a page in an undo log */
+	MLOG_UNDO_INIT = 22,
+
+	/** discard an update undo log header */
+	MLOG_UNDO_HDR_DISCARD = 23,
+
+	/** reuse an insert undo log header */
+	MLOG_UNDO_HDR_REUSE = 24,
+
+	/** create an undo log header */
+	MLOG_UNDO_HDR_CREATE = 25,
+
+	/** mark an index record as the predefined minimum record */
+	MLOG_REC_MIN_MARK = 26,
+
+	/** initialize an ibuf bitmap page */
+	MLOG_IBUF_BITMAP_INIT = 27,
+
+#ifdef UNIV_LOG_LSN_DEBUG
+	/** Current LSN */
+	MLOG_LSN = 28,
+#endif /* UNIV_LOG_LSN_DEBUG */
+
+	/** this means that a file page is taken into use and the prior
+	contents of the page should be ignored: in recovery we must not
+	trust the lsn values stored to the file page.
+	Note: it's deprecated because it causes crash recovery problem
+	in bulk create index, and actually we don't need to reset page
+	lsn in recv_recover_page_func() now. */
+	MLOG_INIT_FILE_PAGE = 29,
+
+	/** write a string to a page */
+	MLOG_WRITE_STRING = 30,
+
+	/** If a single mtr writes several log records, this log
+	record ends the sequence of these records */
+	MLOG_MULTI_REC_END = 31,
+
+	/** dummy log record used to pad a log block full */
+	MLOG_DUMMY_RECORD = 32,
+
+	/** log record about an .ibd file creation */
+	//MLOG_FILE_CREATE = 33,
+
+	/** rename databasename/tablename (no .ibd file name suffix) */
+	//MLOG_FILE_RENAME = 34,
+
+	/** delete a tablespace file that starts with (space_id,page_no) */
+	MLOG_FILE_DELETE = 35,
+
+	/** mark a compact index record as the predefined minimum record */
+	MLOG_COMP_REC_MIN_MARK = 36,
+
+	/** create a compact index page */
+	MLOG_COMP_PAGE_CREATE = 37,
+
+	/** compact record insert */
+	MLOG_COMP_REC_INSERT = 38,
+
+	/** mark compact clustered index record deleted */
+	MLOG_COMP_REC_CLUST_DELETE_MARK = 39,
+
+	/** mark compact secondary index record deleted; this log
+	record type is redundant, as MLOG_REC_SEC_DELETE_MARK is
+	independent of the record format. */
+	MLOG_COMP_REC_SEC_DELETE_MARK = 40,
+
+	/** update of a compact record, preserves record field sizes */
+	MLOG_COMP_REC_UPDATE_IN_PLACE = 41,
+
+	/** delete a compact record from a page */
+	MLOG_COMP_REC_DELETE = 42,
+
+	/** delete compact record list end on index page */
+	MLOG_COMP_LIST_END_DELETE = 43,
+
+	/*** delete compact record list start on index page */
+	MLOG_COMP_LIST_START_DELETE = 44,
+
+	/** copy compact record list end to a new created index page */
+	MLOG_COMP_LIST_END_COPY_CREATED = 45,
+
+	/** reorganize an index page */
+	MLOG_COMP_PAGE_REORGANIZE = 46,
+
+	/** log record about creating an .ibd file, with format */
+	MLOG_FILE_CREATE2 = 47,
+
+	/** write the node pointer of a record on a compressed
+	non-leaf B-tree page */
+	MLOG_ZIP_WRITE_NODE_PTR = 48,
+
+	/** write the BLOB pointer of an externally stored column
+	on a compressed page */
+	MLOG_ZIP_WRITE_BLOB_PTR = 49,
+
+	/** write to compressed page header */
+	MLOG_ZIP_WRITE_HEADER = 50,
+
+	/** compress an index page */
+	MLOG_ZIP_PAGE_COMPRESS = 51,
+
+	/** compress an index page without logging it's image */
+	MLOG_ZIP_PAGE_COMPRESS_NO_DATA = 52,
+
+	/** reorganize a compressed page */
+	MLOG_ZIP_PAGE_REORGANIZE = 53,
+
+	/** rename a tablespace file that starts with (space_id,page_no) */
+	MLOG_FILE_RENAME2 = 54,
+
+	/** note the first use of a tablespace file since checkpoint */
+	MLOG_FILE_NAME = 55,
+
+	/** note that all buffered log was written since a checkpoint */
+	MLOG_CHECKPOINT = 56,
+
+	/** Create a R-Tree index page */
+	MLOG_PAGE_CREATE_RTREE = 57,
+
+	/** create a R-tree compact page */
+	MLOG_COMP_PAGE_CREATE_RTREE = 58,
+
+	/** this means that a file page is taken into use.
+	We use it to replace MLOG_INIT_FILE_PAGE. */
+	MLOG_INIT_FILE_PAGE2 = 59,
+
+	/** Table is being truncated. (Marked only for file-per-table) */
+	MLOG_TRUNCATE = 60,
+
+	/** notify that an index tree is being loaded without writing
+	redo log about individual pages */
+	MLOG_INDEX_LOAD = 61,
+
+	/** biggest value (used in assertions) */
+	MLOG_BIGGEST_TYPE = MLOG_INDEX_LOAD,
+
+	/** log record for writing/updating crypt data of
+	a tablespace */
+	MLOG_FILE_WRITE_CRYPT_DATA = 100,
+};
+
+/* @} */
+
+#define EXTRA_CHECK_MLOG_NUMBER(x) \
+  ((x) == MLOG_FILE_WRITE_CRYPT_DATA)
+
+/** Size of a MLOG_CHECKPOINT record in bytes.
+The record consists of a MLOG_CHECKPOINT byte followed by
+mach_write_to_8(checkpoint_lsn). */
+#define SIZE_OF_MLOG_CHECKPOINT	9
+
+/** Types for the mlock objects to store in the mtr memo; NOTE that the
+first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+enum mtr_memo_type_t {
+#ifndef UNIV_INNOCHECKSUM
+	MTR_MEMO_PAGE_S_FIX = RW_S_LATCH,
+
+	MTR_MEMO_PAGE_X_FIX = RW_X_LATCH,
+
+	MTR_MEMO_PAGE_SX_FIX = RW_SX_LATCH,
+
+	MTR_MEMO_BUF_FIX = RW_NO_LATCH,
+#endif /* !UNIV_CHECKSUM */
+
+#ifdef UNIV_DEBUG
+	MTR_MEMO_MODIFY = 32,
+#endif /* UNIV_DEBUG */
+
+	MTR_MEMO_S_LOCK = 64,
+
+	MTR_MEMO_X_LOCK = 128,
+
+	MTR_MEMO_SX_LOCK = 256
+};
+
+#ifdef UNIV_DEBUG
+# define MTR_MAGIC_N		54551
+#endif /* UNIV_DEBUG */
+
+enum mtr_state_t {
+	MTR_STATE_INIT = 0,
+	MTR_STATE_ACTIVE = 12231,
+	MTR_STATE_COMMITTING = 56456,
+	MTR_STATE_COMMITTED = 34676
+};
+
+#endif /* mtr0types_h */
diff --git a/storage/innobase/include/os0event.h b/storage/innobase/include/os0event.h
new file mode 100644
index 00000000000..d5fdc6ba080
--- /dev/null
+++ b/storage/innobase/include/os0event.h
@@ -0,0 +1,135 @@
+/*****************************************************************************
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0event.h
+The interface to the operating system condition variables
+
+Created 2012-09-23 Sunny Bains (split from os0sync.h)
+*******************************************************/
+
+#ifndef os0event_h
+#define os0event_h
+
+#include "univ.i"
+
+// Forward declaration.
+struct os_event;
+typedef struct os_event* os_event_t;
+
+/** Denotes an infinite delay for os_event_wait_time() */
+#define OS_SYNC_INFINITE_TIME   ULINT_UNDEFINED
+
+/** Return value of os_event_wait_time() when the time is exceeded */
+#define OS_SYNC_TIME_EXCEEDED   1
+
+/**
+Creates an event semaphore, i.e., a semaphore which may just have two states:
+signaled and nonsignaled. The created event is manual reset: it must be reset
+explicitly by calling os_event_reset().
+@return	the event handle */
+os_event_t
+os_event_create(
+/*============*/
+	const char*	name);	/*!< in: the name of the event, if NULL
+				the event is created without a name */
+
+/**
+Sets an event semaphore to the signaled state: lets waiting threads
+proceed. */
+void
+os_event_set(
+/*=========*/
+	os_event_t	event);	/*!< in/out: event to set */
+
+/**
+Check if the event is set.
+@return true if set */
+bool
+os_event_is_set(
+/*============*/
+	const os_event_t	event);	/*!< in: event to set */
+
+/**
+Resets an event semaphore to the nonsignaled state. Waiting threads will
+stop to wait for the event.
+The return value should be passed to os_even_wait_low() if it is desired
+that this thread should not wait in case of an intervening call to
+os_event_set() between this os_event_reset() and the
+os_event_wait_low() call. See comments for os_event_wait_low(). */
+int64_t
+os_event_reset(
+/*===========*/
+	os_event_t	event);	/*!< in/out: event to reset */
+
+/**
+Frees an event object. */
+void
+os_event_destroy(
+/*=============*/
+	os_event_t&	event);	/*!< in/own: event to free */
+
+/**
+Waits for an event object until it is in the signaled state.
+
+Typically, if the event has been signalled after the os_event_reset()
+we'll return immediately because event->is_set == TRUE.
+There are, however, situations (e.g.: sync_array code) where we may
+lose this information. For example:
+
+thread A calls os_event_reset()
+thread B calls os_event_set()   [event->is_set == TRUE]
+thread C calls os_event_reset() [event->is_set == FALSE]
+thread A calls os_event_wait()  [infinite wait!]
+thread C calls os_event_wait()  [infinite wait!]
+
+Where such a scenario is possible, to avoid infinite wait, the
+value returned by os_event_reset() should be passed in as
+reset_sig_count. */
+void
+os_event_wait_low(
+/*==============*/
+	os_event_t	event,		/*!< in/out: event to wait */
+	int64_t		reset_sig_count);/*!< in: zero or the value
+					returned by previous call of
+					os_event_reset(). */
+
+/** Blocking infinite wait on an event, until signealled.
+@param e - event to wait on. */
+#define os_event_wait(e) os_event_wait_low((e), 0)
+
+/**
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded. In Unix the timeout is always infinite.
+@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+ulint
+os_event_wait_time_low(
+/*===================*/
+	os_event_t	event,			/*!< in/out: event to wait */
+	ulint		time_in_usec,		/*!< in: timeout in
+						microseconds, or
+						OS_SYNC_INFINITE_TIME */
+	int64_t		reset_sig_count);	/*!< in: zero or the value
+						returned by previous call of
+						os_event_reset(). */
+
+/** Blocking timed wait on an event.
+@param e - event to wait on.
+@param t - timeout in microseconds */
+#define os_event_wait_time(e, t) os_event_wait_time_low((e), (t), 0)
+
+#endif /* !os0event_h */
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 2425a682e22..5e36cfc2ac0 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -2,7 +2,7 @@
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2015, MariaDB Corporation.
+Copyright (c) 2013, 2016, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted
 by Percona Inc.. Those modifications are
@@ -38,60 +38,67 @@ Created 10/21/1995 Heikki Tuuri
 
 #include "univ.i"
 
-#ifndef __WIN__
+#ifndef _WIN32
 #include <dirent.h>
 #include <sys/stat.h>
 #include <time.h>
-#endif
+#endif /* !_WIN32 */
 
 /** File node of a tablespace or the log data space */
 struct fil_node_t;
 
-extern ibool	os_has_said_disk_full;
-/** Flag: enable debug printout for asynchronous i/o */
-extern ibool	os_aio_print_debug;
-
-/** Number of pending os_file_pread() operations */
-extern ulint	os_file_n_pending_preads;
-/** Number of pending os_file_pwrite() operations */
-extern ulint	os_file_n_pending_pwrites;
+extern bool	os_has_said_disk_full;
 
 /** Number of pending read operations */
 extern ulint	os_n_pending_reads;
 /** Number of pending write operations */
 extern ulint	os_n_pending_writes;
 
-#ifdef __WIN__
+/** File offset in bytes */
+typedef ib_uint64_t os_offset_t;
+
+#ifdef _WIN32
+
+/**
+Gets the operating system version. Currently works only on Windows.
+@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
+OS_WIN7. */
+
+ulint
+os_get_os_version();
+
+typedef HANDLE	os_file_dir_t;	/*!< directory stream */
 
 /** We define always WIN_ASYNC_IO, and check at run-time whether
-   the OS actually supports it: Win 95 does not, NT does. */
-#define WIN_ASYNC_IO
+the OS actually supports it: Win 95 does not, NT does. */
+# define WIN_ASYNC_IO
 
 /** Use unbuffered I/O */
-#define UNIV_NON_BUFFERED_IO
-
-#endif
+# define UNIV_NON_BUFFERED_IO
 
-/** File offset in bytes */
-typedef ib_uint64_t os_offset_t;
-#ifdef __WIN__
 /** File handle */
 # define os_file_t	HANDLE
+
 /** Convert a C file descriptor to a native file handle
-@param fd	file descriptor
-@return		native file handle */
+@param fd file descriptor
+@return native file handle */
 # define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
-#else
+
+#else /* _WIN32 */
+
+typedef DIR*	os_file_dir_t;	/*!< directory stream */
+
 /** File handle */
 typedef int	os_file_t;
+
 /** Convert a C file descriptor to a native file handle
-@param fd	file descriptor
-@return		native file handle */
+@param fd file descriptor
+@return native file handle */
 # define OS_FILE_FROM_FD(fd) fd
-#endif
 
-/** Umask for creating files */
-extern ulint	os_innodb_umask;
+#endif /* _WIN32 */
+
+static const os_file_t OS_FILE_CLOSED = os_file_t(~0);
 
 /** The next value should be smaller or equal to the smallest sector size used
 on any disk. A log block is required to be a portion of disk which is written
@@ -125,285 +132,784 @@ enum os_file_create_t {
 					ON_ERROR_NO_EXIT is set */
 };
 
-#define OS_FILE_READ_ONLY		333
-#define	OS_FILE_READ_WRITE		444
-#define	OS_FILE_READ_ALLOW_DELETE	555	/* for mysqlbackup */
+static const ulint OS_FILE_READ_ONLY = 333;
+static const ulint OS_FILE_READ_WRITE = 444;
+
+/** Used by MySQLBackup */
+static const ulint OS_FILE_READ_ALLOW_DELETE = 555;
 
 /* Options for file_create */
-#define	OS_FILE_AIO			61
-#define	OS_FILE_NORMAL			62
+static const ulint OS_FILE_AIO = 61;
+static const ulint OS_FILE_NORMAL = 62;
 /* @} */
 
 /** Types for file create @{ */
-#define	OS_DATA_FILE			100
-#define OS_LOG_FILE			101
+static const ulint OS_DATA_FILE = 100;
+static const ulint OS_LOG_FILE = 101;
+static const ulint OS_DATA_TEMP_FILE = 102;
 /* @} */
 
 /** Error codes from os_file_get_last_error @{ */
-#define	OS_FILE_NAME_TOO_LONG		36
-#define	OS_FILE_NOT_FOUND		71
-#define	OS_FILE_DISK_FULL		72
-#define	OS_FILE_ALREADY_EXISTS		73
-#define	OS_FILE_PATH_ERROR		74
-#define	OS_FILE_AIO_RESOURCES_RESERVED	75	/* wait for OS aio resources
-						to become available again */
-#define	OS_FILE_SHARING_VIOLATION	76
-#define	OS_FILE_ERROR_NOT_SPECIFIED	77
-#define	OS_FILE_INSUFFICIENT_RESOURCE	78
-#define	OS_FILE_AIO_INTERRUPTED		79
-#define	OS_FILE_OPERATION_ABORTED	80
-#define	OS_FILE_ACCESS_VIOLATION	81
-#define	OS_FILE_OPERATION_NOT_SUPPORTED	125
-#define	OS_FILE_ERROR_MAX		200
+static const ulint OS_FILE_NAME_TOO_LONG = 36;
+static const ulint OS_FILE_NOT_FOUND = 71;
+static const ulint OS_FILE_DISK_FULL = 72;
+static const ulint OS_FILE_ALREADY_EXISTS = 73;
+static const ulint OS_FILE_PATH_ERROR = 74;
+
+/** wait for OS aio resources to become available again */
+static const ulint OS_FILE_AIO_RESOURCES_RESERVED = 75;
+
+static const ulint OS_FILE_SHARING_VIOLATION = 76;
+static const ulint OS_FILE_ERROR_NOT_SPECIFIED = 77;
+static const ulint OS_FILE_INSUFFICIENT_RESOURCE = 78;
+static const ulint OS_FILE_AIO_INTERRUPTED = 79;
+static const ulint OS_FILE_OPERATION_ABORTED = 80;
+static const ulint OS_FILE_ACCESS_VIOLATION = 81;
+static const ulint OS_FILE_OPERATION_NOT_SUPPORTED = 125;
+static const ulint OS_FILE_ERROR_MAX = 200;
 /* @} */
 
-/** Types for aio operations @{ */
-#define OS_FILE_READ	10
-#define OS_FILE_WRITE	11
+/** Compression algorithm. */
+struct Compression {
 
-#define OS_FILE_LOG	256	/* This can be ORed to type */
-/* @} */
+	/** Algorithm types supported */
+	enum Type {
+		/* Note: During recovery we don't have the compression type
+		because the .frm file has not been read yet. Therefore
+		we write the recovered pages out without compression. */
 
-#define OS_AIO_N_PENDING_IOS_PER_THREAD 32	/*!< Win NT does not allow more
-						than 64 */
+		/** No compression */
+		NONE = 0,
 
-/** Modes for aio operations @{ */
-#define OS_AIO_NORMAL	21	/*!< Normal asynchronous i/o not for ibuf
-				pages or ibuf bitmap pages */
-#define OS_AIO_IBUF	22	/*!< Asynchronous i/o for ibuf pages or ibuf
-				bitmap pages */
-#define OS_AIO_LOG	23	/*!< Asynchronous i/o for the log */
-#define OS_AIO_SYNC	24	/*!< Asynchronous i/o where the calling thread
-				will itself wait for the i/o to complete,
-				doing also the job of the i/o-handler thread;
-				can be used for any pages, ibuf or non-ibuf.
-				This is used to save CPU time, as we can do
-				with fewer thread switches. Plain synchronous
-				i/o is not as good, because it must serialize
-				the file seek and read or write, causing a
-				bottleneck for parallelism. */
-
-#define OS_AIO_SIMULATED_WAKE_LATER	512 /*!< This can be ORed to mode
-				in the call of os_aio(...),
-				if the caller wants to post several i/o
-				requests in a batch, and only after that
-				wake the i/o-handler thread; this has
-				effect only in simulated aio */
-/* @} */
+		/** Use ZLib */
+		ZLIB = 1,
 
-#define OS_WIN31	1	/*!< Microsoft Windows 3.x */
-#define OS_WIN95	2	/*!< Microsoft Windows 95 */
-#define OS_WINNT	3	/*!< Microsoft Windows NT 3.x */
-#define OS_WIN2000	4	/*!< Microsoft Windows 2000 */
-#define OS_WINXP	5	/*!< Microsoft Windows XP
-				or Windows Server 2003 */
-#define OS_WINVISTA	6	/*!< Microsoft Windows Vista
-				or Windows Server 2008 */
-#define OS_WIN7		7	/*!< Microsoft Windows 7
-				or Windows Server 2008 R2 */
+		/** Use LZ4 faster variant, usually lower compression. */
+		LZ4 = 2
+	};
 
+	/** Compressed page meta-data */
+	struct meta_t {
 
-extern ulint	os_n_file_reads;
-extern ulint	os_n_file_writes;
-extern ulint	os_n_fsyncs;
+		/** Version number */
+		uint8_t		m_version;
 
-#ifdef UNIV_PFS_IO
-/* Keys to register InnoDB I/O with performance schema */
-extern mysql_pfs_key_t	innodb_file_data_key;
-extern mysql_pfs_key_t	innodb_file_log_key;
-extern mysql_pfs_key_t	innodb_file_temp_key;
+		/** Algorithm type */
+		Type		m_algorithm;
 
-/* Following four macros are instumentations to register
-various file I/O operations with performance schema.
-1) register_pfs_file_open_begin() and register_pfs_file_open_end() are
-used to register file creation, opening, closing and renaming.
-2) register_pfs_file_io_begin() and register_pfs_file_io_end() are
-used to register actual file read, write and flush
-3) register_pfs_file_close_begin() and register_pfs_file_close_end()
-are used to register file deletion operations*/
-# define register_pfs_file_open_begin(state, locker, key, op, name,	\
-				      src_file, src_line)		\
-do {									\
-	locker = PSI_FILE_CALL(get_thread_file_name_locker)(		\
-		state, key, op, name, &locker);				\
-	if (UNIV_LIKELY(locker != NULL)) {				\
-		PSI_FILE_CALL(start_file_open_wait)(			\
-			locker, src_file, src_line);			\
-	}								\
-} while (0)
+		/** Original page type */
+		uint16_t	m_original_type;
 
-# define register_pfs_file_open_end(locker, file)			\
-do {									\
-	if (UNIV_LIKELY(locker != NULL)) {				\
-		PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(\
-			locker, file);					\
-	}								\
-} while (0)
+		/** Original page size, before compression */
+		uint16_t	m_original_size;
 
-# define register_pfs_file_close_begin(state, locker, key, op, name,	\
-				      src_file, src_line)		\
-do {									\
-	locker = PSI_FILE_CALL(get_thread_file_name_locker)(		\
-		state, key, op, name, &locker);				\
-	if (UNIV_LIKELY(locker != NULL)) {				\
-		PSI_FILE_CALL(start_file_close_wait)(			\
-			locker, src_file, src_line);			\
-	}								\
-} while (0)
+		/** Size after compression */
+		uint16_t	m_compressed_size;
+	};
 
-# define register_pfs_file_close_end(locker, result)			\
-do {									\
-	if (UNIV_LIKELY(locker != NULL)) {				\
-		PSI_FILE_CALL(end_file_close_wait)(			\
-			locker, result);				\
-	}								\
-} while (0)
+	/** Default constructor */
+	Compression() : m_type(NONE) { };
 
-# define register_pfs_file_io_begin(state, locker, file, count, op,	\
-				    src_file, src_line)			\
-do {									\
-	locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(	\
-		state, file, op);					\
-	if (UNIV_LIKELY(locker != NULL)) {				\
-		PSI_FILE_CALL(start_file_wait)(				\
-			locker, count, src_file, src_line);		\
-	}								\
-} while (0)
+	/** Specific constructor
+	@param[in]	type		Algorithm type */
+	explicit Compression(Type type)
+		:
+		m_type(type)
+	{
+#ifdef UNIV_DEBUG
+		switch (m_type) {
+		case NONE:
+		case ZLIB:
+		case LZ4:
+
+		default:
+			ut_error;
+		}
+#endif /* UNIV_DEBUG */
+	}
+
+	/** Check the page header type field.
+	@param[in]	page		Page contents
+	@return true if it is a compressed page */
+	static bool is_compressed_page(const byte* page)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Check wether the compression algorithm is supported.
+        @param[in]      algorithm       Compression algorithm to check
+        @param[out]     type            The type that algorithm maps to
+        @return DB_SUCCESS or error code */
+	static dberr_t check(const char* algorithm, Compression* type)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Validate the algorithm string.
+        @param[in]      algorithm       Compression algorithm to check
+        @return DB_SUCCESS or error code */
+	static dberr_t validate(const char* algorithm)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Convert to a "string".
+        @param[in]      type            The compression type
+        @return the string representation */
+        static const char* to_string(Type type)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Convert the meta data to a std::string.
+        @param[in]      meta		Page Meta data
+        @return the string representation */
+        static std::string to_string(const meta_t& meta)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Deserizlise the page header compression meta-data
+	@param[in]	header		Pointer to the page header
+	@param[out]	control		Deserialised data */
+	static void deserialize_header(
+		const byte*	page,
+		meta_t*		control);
+
+        /** Check if the string is "empty" or "none".
+        @param[in]      algorithm       Compression algorithm to check
+        @return true if no algorithm requested */
+	static bool is_none(const char* algorithm)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Decompress the page data contents. Page type must be
+	FIL_PAGE_COMPRESSED, if not then the source contents are
+	left unchanged and DB_SUCCESS is returned.
+	@param[in]	dblwr_recover	true of double write recovery
+					in progress
+	@param[in,out]	src		Data read from disk, decompressed
+					data will be copied to this page
+	@param[in,out]	dst		Scratch area to use for decompression
+	@param[in]	dst_len		Size of the scratch area in bytes
+	@return DB_SUCCESS or error code */
+	static dberr_t deserialize(
+		bool		dblwr_recover,
+		byte*		src,
+		byte*		dst,
+		ulint		dst_len)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Compression type */
+	Type		m_type;
+};
 
-# define register_pfs_file_io_end(locker, count)			\
-do {									\
-	if (UNIV_LIKELY(locker != NULL)) {				\
-		PSI_FILE_CALL(end_file_wait)(locker, count);		\
-	}								\
-} while (0)
-#endif /* UNIV_PFS_IO  */
+/** Encryption key length */
+static const ulint ENCRYPTION_KEY_LEN = 32;
 
-/* Following macros/functions are file I/O APIs that would be performance
-schema instrumented if "UNIV_PFS_IO" is defined. They would point to
-wrapper functions with performance schema instrumentation in such case.
+/** Encryption magic bytes size */
+static const ulint ENCRYPTION_MAGIC_SIZE = 3;
 
-os_file_create
-os_file_create_simple
-os_file_create_simple_no_error_handling
-os_file_close
-os_file_rename
-os_aio
-os_file_read
-os_file_read_no_error_handling
-os_file_write
+/** Encryption magic bytes for 5.7.11, it's for checking the encryption information
+version. */
+static const char ENCRYPTION_KEY_MAGIC_V1[] = "lCA";
 
-The wrapper functions have the prefix of "innodb_". */
+/** Encryption magic bytes for 5.7.12+, it's for checking the encryption information
+version. */
+static const char ENCRYPTION_KEY_MAGIC_V2[] = "lCB";
 
-#ifdef UNIV_PFS_IO
-# define os_file_create(key, name, create, purpose, type, success, atomic_writes)	\
-	pfs_os_file_create_func(key, name, create, purpose,	type,	\
-				success, atomic_writes, __FILE__, __LINE__)
+/** Encryption master key prifix */
+static const char ENCRYPTION_MASTER_KEY_PRIFIX[] = "INNODBKey";
 
-# define os_file_create_simple(key, name, create, access, success)	\
-	pfs_os_file_create_simple_func(key, name, create, access,	\
-				       success, __FILE__, __LINE__)
+/** Encryption master key prifix size */
+static const ulint ENCRYPTION_MASTER_KEY_PRIFIX_LEN = 9;
 
-# define os_file_create_simple_no_error_handling(			\
-	key, name, create_mode, access, success, atomic_writes)		\
-	pfs_os_file_create_simple_no_error_handling_func(		\
-		key, name, create_mode, access, success, atomic_writes, __FILE__, __LINE__)
+/** Encryption master key prifix size */
+static const ulint ENCRYPTION_MASTER_KEY_NAME_MAX_LEN = 100;
 
-# define os_file_close(file)						\
-	pfs_os_file_close_func(file, __FILE__, __LINE__)
+/** UUID of server instance, it's needed for composing master key name */
+static const ulint ENCRYPTION_SERVER_UUID_LEN = 36;
 
-# define os_aio(type, is_log, mode, name, file, buf, offset,		\
-	n, page_size, message1, message2, write_size)			\
-	pfs_os_aio_func(type, is_log, mode, name, file, buf, offset,	\
-		        n, page_size, message1, message2, write_size,	\
-		        __FILE__, __LINE__)
+/** Encryption information total size for 5.7.11: magic number + master_key_id +
+key + iv + checksum */
+static const ulint ENCRYPTION_INFO_SIZE_V1 = (ENCRYPTION_MAGIC_SIZE \
+					 + (ENCRYPTION_KEY_LEN * 2) \
+					 + 2 * sizeof(ulint));
 
+/** Encryption information total size: magic number + master_key_id +
+key + iv + server_uuid + checksum */
+static const ulint ENCRYPTION_INFO_SIZE_V2 = (ENCRYPTION_MAGIC_SIZE \
+					 + (ENCRYPTION_KEY_LEN * 2) \
+					 + ENCRYPTION_SERVER_UUID_LEN \
+					 + 2 * sizeof(ulint));
 
-# define os_file_read(file, buf, offset, n)				\
-	pfs_os_file_read_func(file, buf, offset, n, __FILE__, __LINE__)
+class IORequest;
 
-# define os_file_read_no_error_handling(file, buf, offset, n)		\
-	pfs_os_file_read_no_error_handling_func(file, buf, offset, n,	\
-		                                __FILE__, __LINE__)
+/** Encryption algorithm. */
+struct Encryption {
 
-# define os_file_write(name, file, buf, offset, n)			\
-	pfs_os_file_write_func(name, file, buf, offset,			\
-			       n, __FILE__, __LINE__)
+	/** Algorithm types supported */
+	enum Type {
 
-# define os_file_flush(file)						\
-	pfs_os_file_flush_func(file, __FILE__, __LINE__)
+		/** No encryption */
+		NONE = 0,
 
-# define os_file_rename(key, oldpath, newpath)				\
-	pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
+		/** Use AES */
+		AES = 1,
+	};
 
-# define os_file_delete(key, name)					\
-	pfs_os_file_delete_func(key, name, __FILE__, __LINE__)
+	/** Encryption information format version */
+	enum Version {
 
-# define os_file_delete_if_exists(key, name)				\
-	pfs_os_file_delete_if_exists_func(key, name, __FILE__, __LINE__)
-#else /* UNIV_PFS_IO */
+		/** Version in 5.7.11 */
+		ENCRYPTION_VERSION_1 = 0,
 
-/* If UNIV_PFS_IO is not defined, these I/O APIs point
-to original un-instrumented file I/O APIs */
-# define os_file_create(key, name, create, purpose, type, success, atomic_writes)	\
-	os_file_create_func(name, create, purpose, type, success, atomic_writes)
+		/** Version in > 5.7.11 */
+		ENCRYPTION_VERSION_2 = 1,
+	};
 
-# define os_file_create_simple(key, name, create_mode, access, success) \
-	os_file_create_simple_func(name, create_mode, access, success)
+	/** Default constructor */
+	Encryption() : m_type(NONE) { };
 
-# define os_file_create_simple_no_error_handling(			\
-	key, name, create_mode, access, success, atomic_writes)		\
-		os_file_create_simple_no_error_handling_func(		\
-			name, create_mode, access, success, atomic_writes)
+	/** Specific constructor
+	@param[in]	type		Algorithm type */
+	explicit Encryption(Type type)
+		:
+		m_type(type)
+	{
+#ifdef UNIV_DEBUG
+		switch (m_type) {
+		case NONE:
+		case AES:
 
-# define os_file_close(file)	os_file_close_func(file)
+		default:
+			ut_error;
+		}
+#endif /* UNIV_DEBUG */
+	}
+
+	/** Copy constructor */
+	Encryption(const Encryption& other)
+		:
+		m_type(other.m_type),
+		m_key(other.m_key),
+		m_klen(other.m_klen),
+		m_iv(other.m_iv)
+	{ };
+
+	/** Check if page is encrypted page or not
+	@param[in]	page	page which need to check
+	@return true if it is a encrypted page */
+	static bool is_encrypted_page(const byte* page)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Check the encryption option and set it
+	@param[in]	option		encryption option
+	@param[in/out]	encryption	The encryption type
+	@return DB_SUCCESS or DB_UNSUPPORTED */
+	dberr_t set_algorithm(const char* option, Encryption* type)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Validate the algorithm string.
+        @param[in]      algorithm       Encryption algorithm to check
+        @return DB_SUCCESS or error code */
+	static dberr_t validate(const char* algorithm)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Convert to a "string".
+        @param[in]      type            The encryption type
+        @return the string representation */
+        static const char* to_string(Type type)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Check if the string is "empty" or "none".
+        @param[in]      algorithm       Encryption algorithm to check
+        @return true if no algorithm requested */
+	static bool is_none(const char* algorithm)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Generate random encryption value for key and iv.
+        @param[in,out]	value	Encryption value */
+	static void random_value(byte* value);
+
+	/** Create new master key for key rotation.
+        @param[in,out]	master_key	master key */
+	static void create_master_key(byte** master_key);
+
+        /** Get master key by key id.
+        @param[in]	master_key_id	master key id
+	@param[in]	srv_uuid	uuid of server instance
+        @param[in,out]	master_key	master key */
+	static void get_master_key(ulint master_key_id,
+				   char* srv_uuid,
+				   byte** master_key);
+
+        /** Get current master key and key id.
+        @param[in,out]	master_key_id	master key id
+        @param[in,out]	master_key	master key
+        @param[in,out]	version		encryption information version */
+	static void get_master_key(ulint* master_key_id,
+				   byte** master_key,
+				   Encryption::Version*  version);
+
+	/** Encrypt the page data contents. Page type can't be
+	FIL_PAGE_ENCRYPTED, FIL_PAGE_COMPRESSED_AND_ENCRYPTED,
+	FIL_PAGE_ENCRYPTED_RTREE.
+	@param[in]	type		IORequest
+	@param[in,out]	src		page data which need to encrypt
+	@param[in]	src_len		Size of the source in bytes
+	@param[in,out]	dst		destination area
+	@param[in,out]	dst_len		Size of the destination in bytes
+	@return buffer data, dst_len will have the length of the data */
+	byte* encrypt(
+		const IORequest&	type,
+		byte*			src,
+		ulint			src_len,
+		byte*			dst,
+		ulint*			dst_len)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Decrypt the page data contents. Page type must be
+	FIL_PAGE_ENCRYPTED, FIL_PAGE_COMPRESSED_AND_ENCRYPTED,
+	FIL_PAGE_ENCRYPTED_RTREE, if not then the source contents are
+	left unchanged and DB_SUCCESS is returned.
+	@param[in]	type		IORequest
+	@param[in,out]	src		Data read from disk, decrypt
+					data will be copied to this page
+	@param[in]	src_len		source data length
+	@param[in,out]	dst		Scratch area to use for decrypt
+	@param[in]	dst_len		Size of the scratch area in bytes
+	@return DB_SUCCESS or error code */
+	dberr_t decrypt(
+		const IORequest&	type,
+		byte*			src,
+		ulint			src_len,
+		byte*			dst,
+		ulint			dst_len)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Encrypt type */
+	Type			m_type;
+
+	/** Encrypt key */
+	byte*			m_key;
+
+	/** Encrypt key length*/
+	ulint			m_klen;
+
+	/** Encrypt initial vector */
+	byte*			m_iv;
+
+	/** Current master key id */
+	static ulint		master_key_id;
+
+	/** Current uuid of server instance */
+	static char		uuid[ENCRYPTION_SERVER_UUID_LEN + 1];
+};
 
-# define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \
-	message2, write_size)						\
-	os_aio_func(type, is_log, mode, name, file, buf, offset, n,	\
-		page_size, message1, message2, write_size)
+/** Types for AIO operations @{ */
+
+/** No transformations during read/write, write as is. */
+#define IORequestRead		IORequest(IORequest::READ)
+#define IORequestWrite		IORequest(IORequest::WRITE)
+#define IORequestLogRead	IORequest(IORequest::LOG | IORequest::READ)
+#define IORequestLogWrite	IORequest(IORequest::LOG | IORequest::WRITE)
+
+/**
+The IO Context that is passed down to the low level IO code */
+class IORequest {
+public:
+	/** Flags passed in the request, they can be ORred together. */
+	enum {
+		READ = 1,
+		WRITE = 2,
+
+		/** Double write buffer recovery. */
+		DBLWR_RECOVER = 4,
+
+		/** Enumarations below can be ORed to READ/WRITE above*/
+
+		/** Data file */
+		DATA_FILE = 8,
+
+		/** Log file request*/
+		LOG = 16,
+
+		/** Disable partial read warnings */
+		DISABLE_PARTIAL_IO_WARNINGS = 32,
+
+		/** Do not to wake i/o-handler threads, but the caller will do
+		the waking explicitly later, in this way the caller can post
+		several requests in a batch; NOTE that the batch must not be
+		so big that it exhausts the slots in AIO arrays! NOTE that
+		a simulated batch may introduce hidden chances of deadlocks,
+		because I/Os are not actually handled until all
+		have been posted: use with great caution! */
+		DO_NOT_WAKE = 64,
+
+		/** Ignore failed reads of non-existent pages */
+		IGNORE_MISSING = 128,
+
+		/** Use punch hole if available, only makes sense if
+		compression algorithm != NONE. Ignored if not set */
+		PUNCH_HOLE = 256,
+
+		/** Force raw read, do not try to compress/decompress.
+		This can be used to force a read and write without any
+		compression e.g., for redo log, merge sort temporary files
+		and the truncate redo log. */
+		NO_COMPRESSION = 512
+
+	};
+
+	/** Default constructor */
+	IORequest()
+		:
+		m_block_size(UNIV_SECTOR_SIZE),
+		m_type(READ),
+		m_compression()
+	{
+		/* No op */
+	}
+
+	/**
+	@param[in]	type		Request type, can be a value that is
+					ORed from the above enum */
+	explicit IORequest(ulint type)
+		:
+		m_block_size(UNIV_SECTOR_SIZE),
+		m_type(static_cast<uint16_t>(type)),
+		m_compression()
+	{
+		if (is_log()) {
+			disable_compression();
+		}
+
+		if (!is_punch_hole_supported()) {
+			clear_punch_hole();
+		}
+	}
+
+	/** Destructor */
+	~IORequest() { }
+
+	/** @return true if ignore missing flag is set */
+	static bool ignore_missing(ulint type)
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((type & IGNORE_MISSING) == IGNORE_MISSING);
+	}
+
+	/** @return true if it is a read request */
+	bool is_read() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & READ) == READ);
+	}
+
+	/** @return true if it is a write request */
+	bool is_write() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & WRITE) == WRITE);
+	}
+
+	/** @return true if it is a redo log write */
+	bool is_log() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & LOG) == LOG);
+	}
+
+	/** @return true if the simulated AIO thread should be woken up */
+	bool is_wake() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & DO_NOT_WAKE) == 0);
+	}
+
+	/** @return true if partial read warning disabled */
+	bool is_partial_io_warning_disabled() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & DISABLE_PARTIAL_IO_WARNINGS)
+		       == DISABLE_PARTIAL_IO_WARNINGS);
+	}
+
+	/** Disable partial read warnings */
+	void disable_partial_io_warnings()
+	{
+		m_type |= DISABLE_PARTIAL_IO_WARNINGS;
+	}
+
+	/** @return true if missing files should be ignored */
+	bool ignore_missing() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(ignore_missing(m_type));
+	}
+
+	/** @return true if punch hole should be used */
+	bool punch_hole() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & PUNCH_HOLE) == PUNCH_HOLE);
+	}
+
+	/** @return true if the read should be validated */
+	bool validate() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_a(is_read() ^ is_write());
+
+		return(!is_read() || !punch_hole());
+	}
+
+	/** Set the punch hole flag */
+	void set_punch_hole()
+	{
+		if (is_punch_hole_supported()) {
+			m_type |= PUNCH_HOLE;
+		}
+	}
+
+	/** Clear the do not wake flag */
+	void clear_do_not_wake()
+	{
+		m_type &= ~DO_NOT_WAKE;
+	}
+
+	/** Clear the punch hole flag */
+	void clear_punch_hole()
+	{
+		m_type &= ~PUNCH_HOLE;
+	}
+
+	/** @return the block size to use for IO */
+	ulint block_size() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(m_block_size);
+	}
+
+	/** Set the block size for IO
+	@param[in] block_size		Block size to set */
+	void block_size(ulint block_size)
+	{
+		m_block_size = static_cast<uint32_t>(block_size);
+	}
+
+	/** Clear all compression related flags */
+	void clear_compressed()
+	{
+		clear_punch_hole();
+
+		m_compression.m_type  = Compression::NONE;
+	}
+
+	/** Compare two requests
+	@reutrn true if the are equal */
+	bool operator==(const IORequest& rhs) const
+	{
+		return(m_type == rhs.m_type);
+	}
+
+	/** Set compression algorithm
+	@param[in] compression	The compression algorithm to use */
+	void compression_algorithm(Compression::Type type)
+	{
+		if (type == Compression::NONE) {
+			return;
+		}
+
+		set_punch_hole();
+
+		m_compression.m_type = type;
+	}
+
+	/** Get the compression algorithm.
+	@return the compression algorithm */
+	Compression compression_algorithm() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(m_compression);
+	}
+
+	/** @return true if the page should be compressed */
+	bool is_compressed() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(compression_algorithm().m_type != Compression::NONE);
+	}
+
+	/** @return true if the page read should not be transformed. */
+	bool is_compression_enabled() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & NO_COMPRESSION) == 0);
+	}
+
+	/** Disable transformations. */
+	void disable_compression()
+	{
+		m_type |= NO_COMPRESSION;
+	}
+
+	/** Set encryption algorithm
+	@param[in] type		The encryption algorithm to use */
+	void encryption_algorithm(Encryption::Type type)
+	{
+		if (type == Encryption::NONE) {
+			return;
+		}
+
+		m_encryption.m_type = type;
+	}
+
+	/** Set encryption key and iv
+	@param[in] key		The encryption key to use
+	@param[in] key_len	length of the encryption key
+	@param[in] iv		The encryption iv to use */
+	void encryption_key(byte* key,
+			    ulint key_len,
+			    byte* iv)
+	{
+		m_encryption.m_key = key;
+		m_encryption.m_klen = key_len;
+		m_encryption.m_iv = iv;
+	}
+
+	/** Get the encryption algorithm.
+	@return the encryption algorithm */
+	Encryption encryption_algorithm() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(m_encryption);
+	}
+
+	/** @return true if the page should be encrypted. */
+	bool is_encrypted() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(m_encryption.m_type != Encryption::NONE);
+	}
+
+	/** Clear all encryption related flags */
+	void clear_encrypted()
+	{
+		m_encryption.m_key = NULL;
+		m_encryption.m_klen = 0;
+		m_encryption.m_iv = NULL;
+		m_encryption.m_type = Encryption::NONE;
+	}
+
+	/** Note that the IO is for double write recovery. */
+	void dblwr_recover()
+	{
+		m_type |= DBLWR_RECOVER;
+	}
+
+	/** @return true if the request is from the dblwr recovery */
+	bool is_dblwr_recover() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & DBLWR_RECOVER) == DBLWR_RECOVER);
+	}
+
+	/** @return true if punch hole is supported */
+	static bool is_punch_hole_supported()
+	{
+
+		/* In this debugging mode, we act as if punch hole is supported,
+		and then skip any calls to actually punch a hole here.
+		In this way, Transparent Page Compression is still being tested. */
+		DBUG_EXECUTE_IF("ignore_punch_hole",
+			return(true);
+		);
+
+#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
+		return(true);
+#else
+		return(false);
+#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || _WIN32 */
+	}
 
-# define os_file_read(file, buf, offset, n)				\
-	os_file_read_func(file, buf, offset, n)
+private:
+	/* File system best block size */
+	uint32_t		m_block_size;
 
-# define os_file_read_no_error_handling(file, buf, offset, n)		\
-	os_file_read_no_error_handling_func(file, buf, offset, n)
+	/** Request type bit flags */
+	uint16_t		m_type;
 
-# define os_file_write(name, file, buf, offset, n)			\
-	os_file_write_func(name, file, buf, offset, n)
+	/** Compression algorithm */
+	Compression		m_compression;
 
-# define os_file_flush(file)	os_file_flush_func(file)
+	/** Encryption algorithm */
+	Encryption		m_encryption;
+};
 
-# define os_file_rename(key, oldpath, newpath)				\
-	os_file_rename_func(oldpath, newpath)
+/* @} */
 
-# define os_file_delete(key, name)	os_file_delete_func(name)
+/** Sparse file size information. */
+struct os_file_size_t {
+	/** Total size of file in bytes */
+	os_offset_t	m_total_size;
 
-# define os_file_delete_if_exists(key, name)				\
-	os_file_delete_if_exists_func(name)
+	/** If it is a sparse file then this is the number of bytes
+	actually allocated for the file. */
+	os_offset_t	m_alloc_size;
+};
 
-#endif /* UNIV_PFS_IO */
+/** Win NT does not allow more than 64 */
+static const ulint OS_AIO_N_PENDING_IOS_PER_THREAD = 32;
+
+/** Modes for aio operations @{ */
+/** Normal asynchronous i/o not for ibuf pages or ibuf bitmap pages */
+static const ulint OS_AIO_NORMAL = 21;
+
+/**  Asynchronous i/o for ibuf pages or ibuf bitmap pages */
+static const ulint OS_AIO_IBUF = 22;
+
+/** Asynchronous i/o for the log */
+static const ulint OS_AIO_LOG = 23;
+
+/** Asynchronous i/o where the calling thread will itself wait for
+the i/o to complete, doing also the job of the i/o-handler thread;
+can be used for any pages, ibuf or non-ibuf.  This is used to save
+CPU time, as we can do with fewer thread switches. Plain synchronous
+I/O is not as good, because it must serialize the file seek and read
+or write, causing a bottleneck for parallelism. */
+static const ulint OS_AIO_SYNC = 24;
+/* @} */
+
+extern ulint	os_n_file_reads;
+extern ulint	os_n_file_writes;
+extern ulint	os_n_fsyncs;
 
 /* File types for directory entry data type */
 
 enum os_file_type_t {
 	OS_FILE_TYPE_UNKNOWN = 0,
-	OS_FILE_TYPE_FILE,			/* regular file
-						(or a character/block device) */
+	OS_FILE_TYPE_FILE,			/* regular file */
 	OS_FILE_TYPE_DIR,			/* directory */
-	OS_FILE_TYPE_LINK			/* symbolic link */
+	OS_FILE_TYPE_LINK,			/* symbolic link */
+	OS_FILE_TYPE_BLOCK			/* block device */
 };
 
 /* Maximum path string length in bytes when referring to tables with in the
 './databasename/tablename.ibd' path format; we can allocate at least 2 buffers
 of this size from the thread stack; that is why this should not be made much
-bigger than 4000 bytes */
+bigger than 4000 bytes.  The maximum path length used by any storage engine
+in the server must be at least this big. */
+
+/* MySQL 5.7 my_global.h */
+#ifndef FN_REFLEN_SE
+#define FN_REFLEN_SE        4000
+#endif
+
 #define OS_FILE_MAX_PATH	4000
+#if (FN_REFLEN_SE < OS_FILE_MAX_PATH)
+# error "(FN_REFLEN_SE < OS_FILE_MAX_PATH)"
+#endif
 
 /** Struct used in fetching information of a file in a directory */
 struct os_file_stat_t {
 	char		name[OS_FILE_MAX_PATH];	/*!< path to a file */
 	os_file_type_t	type;			/*!< file type */
-	ib_int64_t	size;			/*!< file size */
+	os_offset_t	size;			/*!< file size in bytes */
+	os_offset_t	alloc_size;		/*!< Allocated size for
+						sparse files in bytes */
+	size_t		block_size;		/*!< Block size to use for IO
+						in bytes*/
 	time_t		ctime;			/*!< creation time */
 	time_t		mtime;			/*!< modification time */
 	time_t		atime;			/*!< access time */
@@ -412,615 +918,818 @@ struct os_file_stat_t {
 						if type == OS_FILE_TYPE_FILE */
 };
 
-#ifdef __WIN__
-typedef HANDLE	os_file_dir_t;	/*!< directory stream */
-#else
-typedef DIR*	os_file_dir_t;	/*!< directory stream */
-#endif
-
-#ifdef __WIN__
-/***********************************************************************//**
-Gets the operating system version. Currently works only on Windows.
-@return	OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
-OS_WIN7. */
-UNIV_INTERN
-ulint
-os_get_os_version(void);
-/*===================*/
-#endif /* __WIN__ */
 #ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Creates the seek mutexes used in positioned reads and writes. */
-UNIV_INTERN
-void
-os_io_init_simple(void);
-/*===================*/
-
-
 /** Create a temporary file. This function is like tmpfile(3), but
 the temporary file is created in the given parameter path. If the path
 is null then it will create the file in the mysql server configuration
 parameter (--tmpdir).
 @param[in]	path	location for creating temporary file
 @return temporary file handle, or NULL on error */
-UNIV_INTERN
 FILE*
 os_file_create_tmpfile(
 	const char*	path);
-
 #endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-The os_file_opendir() function opens a directory stream corresponding to the
+
+/** The os_file_opendir() function opens a directory stream corresponding to the
 directory named by the dirname argument. The directory stream is positioned
 at the first entry. In both Unix and Windows we automatically skip the '.'
 and '..' items at the start of the directory listing.
-@return	directory stream, NULL if error */
-UNIV_INTERN
+
+@param[in]	dirname		directory name; it must not contain a trailing
+				'\' or '/'
+@param[in]	is_fatal	true if we should treat an error as a fatal
+				error; if we try to open symlinks then we do
+				not wish a fatal error if it happens not to be
+				a directory
+@return directory stream, NULL if error */
 os_file_dir_t
 os_file_opendir(
-/*============*/
-	const char*	dirname,	/*!< in: directory name; it must not
-					contain a trailing '\' or '/' */
-	ibool		error_is_fatal);/*!< in: TRUE if we should treat an
-					error as a fatal error; if we try to
-					open symlinks then we do not wish a
-					fatal error if it happens not to be
-					a directory */
-/***********************************************************************//**
+	const char*	dirname,
+	bool		is_fatal);
+
+/**
 Closes a directory stream.
-@return	0 if success, -1 if failure */
-UNIV_INTERN
+@param[in] dir	directory stream
+@return 0 if success, -1 if failure */
 int
 os_file_closedir(
-/*=============*/
-	os_file_dir_t	dir);	/*!< in: directory stream */
-/***********************************************************************//**
-This function returns information of the next file in the directory. We jump
+	os_file_dir_t	dir);
+
+/** This function returns information of the next file in the directory. We jump
 over the '.' and '..' entries in the directory.
-@return	0 if ok, -1 if error, 1 if at the end of the directory */
-UNIV_INTERN
+@param[in]	dirname		directory name or path
+@param[in]	dir		directory stream
+@param[out]	info		buffer where the info is returned
+@return 0 if ok, -1 if error, 1 if at the end of the directory */
 int
 os_file_readdir_next_file(
-/*======================*/
-	const char*	dirname,/*!< in: directory name or path */
-	os_file_dir_t	dir,	/*!< in: directory stream */
-	os_file_stat_t*	info);	/*!< in/out: buffer where the info is returned */
-/*****************************************************************//**
+	const char*	dirname,
+	os_file_dir_t	dir,
+	os_file_stat_t*	info);
+
+/**
 This function attempts to create a directory named pathname. The new directory
 gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
 directory exists already, nothing is done and the call succeeds, unless the
 fail_if_exists arguments is true.
-@return	TRUE if call succeeds, FALSE on error */
-UNIV_INTERN
-ibool
+
+@param[in]	pathname	directory name as null-terminated string
+@param[in]	fail_if_exists	if true, pre-existing directory is treated
+				as an error.
+@return true if call succeeds, false on error */
+bool
 os_file_create_directory(
-/*=====================*/
-	const char*	pathname,	/*!< in: directory name as
-					null-terminated string */
-	ibool		fail_if_exists);/*!< in: if TRUE, pre-existing directory
-					is treated as an error. */
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create_simple(), not directly
+	const char*	pathname,
+	bool		fail_if_exists);
+
+/** NOTE! Use the corresponding macro os_file_create_simple(), not directly
 this function!
 A simple function to open or create a file.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeed, false if error
 @return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
+	can be retrieved with os_file_get_last_error */
 os_file_t
 os_file_create_simple_func(
-/*=======================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
-				OS_FILE_READ_WRITE */
-	ibool*		success);/*!< out: TRUE if succeed, FALSE if error */
-/****************************************************************//**
-NOTE! Use the corresponding macro
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success);
+
+/** NOTE! Use the corresponding macro
 os_file_create_simple_no_error_handling(), not directly this function!
 A simple function to open or create a file.
+@param[in]	name		name of the file or path as a null-terminated string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+				OS_FILE_READ_ALLOW_DELETE; the last option
+				is used by a backup program reading the file
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
 @return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
+	can be retrieved with os_file_get_last_error */
 os_file_t
 os_file_create_simple_no_error_handling_func(
-/*=========================================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY,
-				OS_FILE_READ_WRITE, or
-				OS_FILE_READ_ALLOW_DELETE; the last option is
-				used by a backup program reading the file */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint		atomic_writes)/*!< in: atomic writes table option
-				      value */
-	__attribute__((nonnull, warn_unused_result));
-/****************************************************************//**
-Tries to disable OS caching on an opened file descriptor. */
-UNIV_INTERN
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Tries to disable OS caching on an opened file descriptor.
+@param[in]	fd		file descriptor to alter
+@param[in]	file_name	file name, used in the diagnostic message
+@param[in]	name		"open" or "create"; used in the diagnostic
+				message */
 void
 os_file_set_nocache(
 /*================*/
 	os_file_t	fd,		/*!< in: file descriptor to alter */
-	const char*	file_name,	/*!< in: file name, used in the
-					diagnostic message */
-	const char*	operation_name);/*!< in: "open" or "create"; used in the
-					diagnostic message */
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create(), not directly
+	const char*	file_name,
+	const char*	operation_name);
+
+/** NOTE! Use the corresponding macro os_file_create(), not directly
 this function!
 Opens an existing file or creates a new.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	purpose		OS_FILE_AIO, if asynchronous, non-buffered I/O
+				is desired, OS_FILE_NORMAL, if any normal file;
+				NOTE that it also depends on type, os_aio_..
+				and srv_.. variables whether we really use
+				async I/O or unbuffered I/O: look in the
+				function source code for the exact rules
+@param[in]	type		OS_DATA_FILE or OS_LOG_FILE
+@param[in]	read_only	if true read only mode checks are enforced
+@param[in]	success		true if succeeded
 @return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
+	can be retrieved with os_file_get_last_error */
 os_file_t
 os_file_create_func(
-/*================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		purpose,/*!< in: OS_FILE_AIO, if asynchronous,
-				non-buffered i/o is desired,
-				OS_FILE_NORMAL, if any normal file;
-				NOTE that it also depends on type, os_aio_..
-				and srv_.. variables whether we really use
-				async i/o or unbuffered i/o: look in the
-				function source code for the exact rules */
-	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint		atomic_writes)/*!< in: atomic writes table option
-				      value */
-	__attribute__((nonnull, warn_unused_result));
-/***********************************************************************//**
-Deletes a file. The file has to be closed before calling this.
-@return	TRUE if success */
-UNIV_INTERN
+	const char*	name,
+	ulint		create_mode,
+	ulint		purpose,
+	ulint		type,
+	bool		read_only,
+	bool*		success)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Deletes a file. The file has to be closed before calling this.
+@param[in]	name		file path as a null-terminated string
+@return true if success */
 bool
-os_file_delete_func(
-/*================*/
-	const char*	name);	/*!< in: file path as a null-terminated
-				string */
+os_file_delete_func(const char* name);
 
-/***********************************************************************//**
-Deletes a file if it exists. The file has to be closed before calling this.
-@return	TRUE if success */
-UNIV_INTERN
+/** Deletes a file if it exists. The file has to be closed before calling this.
+@param[in]	name		file path as a null-terminated string
+@param[out]	exist		indicate if file pre-exist
+@return true if success */
 bool
-os_file_delete_if_exists_func(
-/*==========================*/
-	const char*	name);	/*!< in: file path as a null-terminated
-				string */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_rename(), not directly
+os_file_delete_if_exists_func(const char* name, bool* exist);
+
+/** NOTE! Use the corresponding macro os_file_rename(), not directly
 this function!
 Renames a file (can also move it to another directory). It is safest that the
 file is closed before calling this function.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-os_file_rename_func(
-/*================*/
-	const char*	oldpath,	/*!< in: old file path as a
-					null-terminated string */
-	const char*	newpath);	/*!< in: new file path */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_close(), not directly this
+@param[in]	oldpath		old file path as a null-terminated string
+@param[in]	newpath		new file path
+@return true if success */
+bool
+os_file_rename_func(const char* oldpath, const char* newpath);
+
+/** NOTE! Use the corresponding macro os_file_close(), not directly this
 function!
 Closes a file handle. In case of error, error number can be retrieved with
 os_file_get_last_error.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close_func(
-/*===============*/
-	os_file_t	file);	/*!< in, own: handle to a file */
+@param[in]	file		own: handle to a file
+@return true if success */
+bool
+os_file_close_func(os_file_t file);
 
 #ifdef UNIV_PFS_IO
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create_simple(),
+
+/* Keys to register InnoDB I/O with performance schema */
+extern mysql_pfs_key_t	innodb_data_file_key;
+extern mysql_pfs_key_t	innodb_log_file_key;
+extern mysql_pfs_key_t	innodb_temp_file_key;
+
+/* Following four macros are instumentations to register
+various file I/O operations with performance schema.
+1) register_pfs_file_open_begin() and register_pfs_file_open_end() are
+used to register file creation, opening, closing and renaming.
+2) register_pfs_file_io_begin() and register_pfs_file_io_end() are
+used to register actual file read, write and flush
+3) register_pfs_file_close_begin() and register_pfs_file_close_end()
+are used to register file deletion operations*/
+# define register_pfs_file_open_begin(state, locker, key, op, name,	\
+				      src_file, src_line)		\
+do {									\
+	locker = PSI_FILE_CALL(get_thread_file_name_locker)(		\
+		state, key, op, name, &locker);				\
+	if (locker != NULL) {						\
+		PSI_FILE_CALL(start_file_open_wait)(			\
+			locker, src_file, src_line);			\
+	}								\
+} while (0)
+
+# define register_pfs_file_open_end(locker, file)			\
+do {									\
+	if (locker != NULL) {						\
+		PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(\
+			locker, file);					\
+	}								\
+} while (0)
+
+# define register_pfs_file_close_begin(state, locker, key, op, name,	\
+				      src_file, src_line)		\
+do {									\
+	locker = PSI_FILE_CALL(get_thread_file_name_locker)(		\
+		state, key, op, name, &locker);				\
+	if (locker != NULL) {						\
+		PSI_FILE_CALL(start_file_close_wait)(			\
+			locker, src_file, src_line);			\
+	}								\
+} while (0)
+
+# define register_pfs_file_close_end(locker, result)			\
+do {									\
+	if (locker != NULL) {						\
+		PSI_FILE_CALL(end_file_close_wait)(			\
+			locker, result);				\
+	}								\
+} while (0)
+
+# define register_pfs_file_io_begin(state, locker, file, count, op,	\
+				    src_file, src_line)			\
+do {									\
+	locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(	\
+		state, file, op);					\
+	if (locker != NULL) {						\
+		PSI_FILE_CALL(start_file_wait)(				\
+			locker, count, src_file, src_line);		\
+	}								\
+} while (0)
+
+# define register_pfs_file_io_end(locker, count)			\
+do {									\
+	if (locker != NULL) {						\
+		PSI_FILE_CALL(end_file_wait)(locker, count);		\
+	}								\
+} while (0)
+
+/* Following macros/functions are file I/O APIs that would be performance
+schema instrumented if "UNIV_PFS_IO" is defined. They would point to
+wrapper functions with performance schema instrumentation in such case.
+
+os_file_create
+os_file_create_simple
+os_file_create_simple_no_error_handling
+os_file_close
+os_file_rename
+os_aio
+os_file_read
+os_file_read_no_error_handling
+os_file_write
+
+The wrapper functions have the prefix of "innodb_". */
+
+# define os_file_create(key, name, create, purpose, type, read_only,	\
+			success)					\
+	pfs_os_file_create_func(key, name, create, purpose,	type,	\
+		read_only, success, __FILE__, __LINE__)
+
+# define os_file_create_simple(key, name, create, access,		\
+		read_only, success)					\
+	pfs_os_file_create_simple_func(key, name, create, access,	\
+		read_only, success, __FILE__, __LINE__)
+
+# define os_file_create_simple_no_error_handling(			\
+	key, name, create_mode, access, read_only, success)		\
+	pfs_os_file_create_simple_no_error_handling_func(		\
+		key, name, create_mode, access,				\
+		read_only, success, __FILE__, __LINE__)
+
+# define os_file_close(file)						\
+	pfs_os_file_close_func(file, __FILE__, __LINE__)
+
+# define os_aio(type, mode, name, file, buf, offset,			\
+	n, read_only, message1, message2, wsize)			\
+	pfs_os_aio_func(type, mode, name, file, buf, offset,		\
+		n, read_only, message1, message2, wsize,		\
+			__FILE__, __LINE__)
+
+# define os_file_read(type, file, buf, offset, n)			\
+	pfs_os_file_read_func(type, file, buf, offset, n, __FILE__, __LINE__)
+
+# define os_file_read_no_error_handling(type, file, buf, offset, n, o)	\
+	pfs_os_file_read_no_error_handling_func(			\
+		type, file, buf, offset, n, o, __FILE__, __LINE__)
+
+# define os_file_write(type, name, file, buf, offset, n)	\
+	pfs_os_file_write_func(type, name, file, buf, offset,	\
+			       n, __FILE__, __LINE__)
+
+# define os_file_flush(file)						\
+	pfs_os_file_flush_func(file, __FILE__, __LINE__)
+
+# define os_file_rename(key, oldpath, newpath)				\
+	pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
+
+# define os_file_delete(key, name)					\
+	pfs_os_file_delete_func(key, name, __FILE__, __LINE__)
+
+# define os_file_delete_if_exists(key, name, exist)			\
+	pfs_os_file_delete_if_exists_func(key, name, exist, __FILE__, __LINE__)
+
+/** NOTE! Please use the corresponding macro os_file_create_simple(),
 not directly this function!
 A performance schema instrumented wrapper function for
 os_file_create_simple() which opens or creates a file.
+@param[in]	key		Performance Schema Key
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
 @return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
+	can be retrieved with os_file_get_last_error */
 UNIV_INLINE
 os_file_t
 pfs_os_file_create_simple_func(
-/*===========================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
-				OS_FILE_READ_WRITE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/****************************************************************//**
-NOTE! Please use the corresponding macro
+	mysql_pfs_key_t key,
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success,
+	const char*	src_file,
+	ulint		src_line)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** NOTE! Please use the corresponding macro
 os_file_create_simple_no_error_handling(), not directly this function!
 A performance schema instrumented wrapper function for
 os_file_create_simple_no_error_handling(). Add instrumentation to
 monitor file creation/open.
+@param[in]	key		Performance Schema Key
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+				OS_FILE_READ_ALLOW_DELETE; the last option is
+				used by a backup program reading the file
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
 @return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
+	can be retrieved with os_file_get_last_error */
 UNIV_INLINE
 os_file_t
 pfs_os_file_create_simple_no_error_handling_func(
-/*=============================================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode, /*!< in: file create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY,
-				OS_FILE_READ_WRITE, or
-				OS_FILE_READ_ALLOW_DELETE; the last option is
-				used by a backup program reading the file */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint		atomic_writes,/*!< in: atomic writes table option
-				value */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create(), not directly
+	mysql_pfs_key_t key,
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success,
+	const char*	src_file,
+	ulint		src_line)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** NOTE! Please use the corresponding macro os_file_create(), not directly
 this function!
 A performance schema wrapper function for os_file_create().
 Add instrumentation to monitor file creation/open.
+@param[in]	key		Performance Schema Key
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	purpose		OS_FILE_AIO, if asynchronous, non-buffered I/O
+				is desired, OS_FILE_NORMAL, if any normal file;
+				NOTE that it also depends on type, os_aio_..
+				and srv_.. variables whether we really use
+				async I/O or unbuffered I/O: look in the
+				function source code for the exact rules
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
 @return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
+	can be retrieved with os_file_get_last_error */
 UNIV_INLINE
 os_file_t
 pfs_os_file_create_func(
-/*====================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: file create mode */
-	ulint		purpose,/*!< in: OS_FILE_AIO, if asynchronous,
-				non-buffered i/o is desired,
-				OS_FILE_NORMAL, if any normal file;
-				NOTE that it also depends on type, os_aio_..
-				and srv_.. variables whether we really use
-				async i/o or unbuffered i/o: look in the
-				function source code for the exact rules */
-	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint		atomic_writes,/*!< in: atomic writes table option
-				      value*/
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_close(), not directly
+	mysql_pfs_key_t key,
+	const char*	name,
+	ulint		create_mode,
+	ulint		purpose,
+	ulint		type,
+	bool		read_only,
+	bool*		success,
+	const char*	src_file,
+	ulint		src_line)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** NOTE! Please use the corresponding macro os_file_close(), not directly
 this function!
 A performance schema instrumented wrapper function for os_file_close().
-@return TRUE if success */
+@param[in]	file		handle to a file
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
 UNIV_INLINE
-ibool
+bool
 pfs_os_file_close_func(
-/*===================*/
-        os_file_t	file,	/*!< in, own: handle to a file */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_read(), not directly
+	os_file_t	file,
+	const char*	src_file,
+	ulint		src_line);
+
+/** NOTE! Please use the corresponding macro os_file_read(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_read() which requests a synchronous read operation.
-@return	TRUE if request was successful, FALSE if fail */
+@param[in, out]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was successful */
 UNIV_INLINE
-ibool
+dberr_t
 pfs_os_file_read_func(
-/*==================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n,	/*!< in: number of bytes to read */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	const char*	src_file,
+	ulint		src_line);
+
+/** NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
 not directly this function!
 This is the performance schema instrumented wrapper function for
 os_file_read_no_error_handling_func() which requests a synchronous
 read operation.
-@return	TRUE if request was successful, FALSE if fail */
+@param[in, out]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[out]	o		number of bytes actually read
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was successful */
 UNIV_INLINE
-ibool
+dberr_t
 pfs_os_file_read_no_error_handling_func(
-/*====================================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n,	/*!< in: number of bytes to read */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_aio(), not directly this
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	ulint*		o,
+	const char*	src_file,
+	ulint		src_line);
+
+/** NOTE! Please use the corresponding macro os_aio(), not directly this
 function!
 Performance schema wrapper function of os_aio() which requests
-an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
+an asynchronous I/O operation.
+@param[in]	type		IO request context
+@param[in]	mode		IO mode
+@param[in]	name		Name of the file or path as NUL terminated
+				string
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	read_only	if true read only mode checks are enforced
+@param[in,out]	m1		Message for the AIO handler, (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@param[in,out]	m2		message for the AIO handler (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was queued successfully, FALSE if fail */
 UNIV_INLINE
-ibool
+dberr_t
 pfs_os_aio_func(
-/*============*/
-	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
-	ulint		is_log,	/*!< in: 1 is OS_FILE_LOG or 0 */
-	ulint		mode,	/*!< in: OS_AIO_NORMAL etc. I/O mode */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read or from which
-				to write */
-	os_offset_t	offset,	/*!< in: file offset where to read or write */
-	ulint		n,	/*!< in: number of bytes to read or write */
-	ulint           page_size, /*!< in: page size in bytes */
-	fil_node_t*	message1,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-				OS_AIO_SYNC */
-	void*		message2,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-                                OS_AIO_SYNC */
-	ulint*		write_size,/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_write(), not directly
+	IORequest&	type,
+	ulint		mode,
+	const char*	name,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	bool		read_only,
+	fil_node_t*	m1,
+	void*		m2,
+	ulint*		wsize,
+	const char*	src_file,
+	ulint		src_line);
+
+/** NOTE! Please use the corresponding macro os_file_write(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_write() which requests a synchronous write operation.
-@return	TRUE if request was successful, FALSE if fail */
+@param[in, out]	type		IO request context
+@param[in]	name		Name of the file or path as NUL terminated
+				string
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was successful */
 UNIV_INLINE
-ibool
+dberr_t
 pfs_os_file_write_func(
-/*===================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	const void*	buf,	/*!< in: buffer from which to write */
-	os_offset_t	offset,	/*!< in: file offset where to write */
-	ulint		n,	/*!< in: number of bytes to write */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_flush(), not directly
+	IORequest&	type,
+	const char*	name,
+	os_file_t	file,
+	const void*	buf,
+	os_offset_t	offset,
+	ulint		n,
+	const char*	src_file,
+	ulint		src_line);
+
+/** NOTE! Please use the corresponding macro os_file_flush(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_flush() which flushes the write buffers of a given file to the disk.
 Flushes the write buffers of a given file to the disk.
+@param[in]	file		Open file handle
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
 @return TRUE if success */
 UNIV_INLINE
-ibool
+bool
 pfs_os_file_flush_func(
-/*===================*/
-	os_file_t	file,	/*!< in, own: handle to a file */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
+	os_file_t	file,
+	const char*	src_file,
+	ulint		src_line);
 
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_rename(), not directly
+/** NOTE! Please use the corresponding macro os_file_rename(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_rename()
-@return TRUE if success */
+@param[in]	key		Performance Schema Key
+@param[in]	oldpath		old file path as a null-terminated string
+@param[in]	newpath		new file path
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
 UNIV_INLINE
-ibool
+bool
 pfs_os_file_rename_func(
-/*====================*/
-	mysql_pfs_key_t	key,	/*!< in: Performance Schema Key */
-	const char*	oldpath,/*!< in: old file path as a null-terminated
-				string */
-	const char*	newpath,/*!< in: new file path */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-
-/***********************************************************************//**
+	mysql_pfs_key_t	key,
+	const char*	oldpath,
+	const char*	newpath,
+	const char*	src_file,
+	ulint		src_line);
+
+/**
 NOTE! Please use the corresponding macro os_file_delete(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_delete()
-@return TRUE if success */
+@param[in]	key		Performance Schema Key
+@param[in]	name		old file path as a null-terminated string
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
 UNIV_INLINE
 bool
 pfs_os_file_delete_func(
-/*====================*/
-	mysql_pfs_key_t	key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: old file path as a null-terminated
-				string */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-
-/***********************************************************************//**
+	mysql_pfs_key_t	key,
+	const char*	name,
+	const char*	src_file,
+	ulint		src_line);
+
+/**
 NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
 directly this function!
 This is the performance schema instrumented wrapper function for
 os_file_delete_if_exists()
-@return TRUE if success */
+@param[in]	key		Performance Schema Key
+@param[in]	name		old file path as a null-terminated string
+@param[in]	exist		indicate if file pre-exist
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
 UNIV_INLINE
 bool
 pfs_os_file_delete_if_exists_func(
-/*==============================*/
-	mysql_pfs_key_t	key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: old file path as a null-terminated
-				string */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
+	mysql_pfs_key_t	key,
+	const char*	name,
+	bool*		exist,
+	const char*	src_file,
+	ulint		src_line);
+
+#else /* UNIV_PFS_IO */
+
+/* If UNIV_PFS_IO is not defined, these I/O APIs point
+to original un-instrumented file I/O APIs */
+# define os_file_create(key, name, create, purpose, type, read_only,	\
+			success)					\
+	os_file_create_func(name, create, purpose, type, read_only,	\
+			success)
+
+# define os_file_create_simple(key, name, create_mode, access,		\
+		read_only, success)					\
+	os_file_create_simple_func(name, create_mode, access,		\
+		read_only, success)
+
+# define os_file_create_simple_no_error_handling(			\
+	key, name, create_mode, access, read_only, success)		\
+	os_file_create_simple_no_error_handling_func(			\
+		name, create_mode, access, read_only, success)
+
+# define os_file_close(file)	os_file_close_func(file)
+
+# define os_aio(type, mode, name, file, buf, offset,			\
+	n, read_only, message1, message2, wsize)			\
+	os_aio_func(type, mode, name, file, buf, offset,		\
+		n, read_only, message1, message2, wsize)
+
+# define os_file_read(type, file, buf, offset, n)			\
+	os_file_read_func(type, file, buf, offset, n)
+
+# define os_file_read_no_error_handling(type, file, buf, offset, n, o)	\
+	os_file_read_no_error_handling_func(type, file, buf, offset, n, o)
+
+# define os_file_write(type, name, file, buf, offset, n)		\
+	os_file_write_func(type, name, file, buf, offset, n)
+
+# define os_file_flush(file)	os_file_flush_func(file)
+
+# define os_file_rename(key, oldpath, newpath)				\
+	os_file_rename_func(oldpath, newpath)
+
+# define os_file_delete(key, name)	os_file_delete_func(name)
+
+# define os_file_delete_if_exists(key, name, exist)			\
+	os_file_delete_if_exists_func(name, exist)
+
 #endif	/* UNIV_PFS_IO */
 
 #ifdef UNIV_HOTBACKUP
-/***********************************************************************//**
-Closes a file handle.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close_no_error_handling(
-/*============================*/
-	os_file_t	file);	/*!< in, own: handle to a file */
+/** Closes a file handle.
+@param[in] file		handle to a file
+@return true if success */
+bool
+os_file_close_no_error_handling(os_file_t file);
 #endif /* UNIV_HOTBACKUP */
-/***********************************************************************//**
-Gets a file size.
-@return	file size, or (os_offset_t) -1 on failure */
-UNIV_INTERN
+
+/** Gets a file size.
+@param[in]	file		handle to a file
+@return file size if OK, else set m_total_size to ~0 and m_alloc_size
+	to errno */
+os_file_size_t
+os_file_get_size(
+	const char*	filename)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Gets a file size.
+@param[in]	file		handle to a file
+@return file size, or (os_offset_t) -1 on failure */
 os_offset_t
 os_file_get_size(
-/*=============*/
-	os_file_t	file)	/*!< in: handle to a file */
+	os_file_t	file)
 	MY_ATTRIBUTE((warn_unused_result));
-/***********************************************************************//**
-Write the specified number of zeros to a newly created file.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+
+/** Write the specified number of zeros to a newly created file.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	file		handle to a file
+@param[in]	size		file size
+@param[in]	read_only	Enable read-only checks if true
+@return true if success */
+bool
 os_file_set_size(
-/*=============*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	os_offset_t	size)	/*!< in: file size */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***********************************************************************//**
-Truncates a file at its current position.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+	const char*	name,
+	os_file_t	file,
+	os_offset_t	size,
+	bool		read_only)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Truncates a file at its current position.
+@param[in/out]	file	file to be truncated
+@return true if success */
+bool
 os_file_set_eof(
-/*============*/
 	FILE*		file);	/*!< in: file to be truncated */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_flush(), not directly this function!
+
+/** Truncates a file to a specified size in bytes. Do nothing if the size
+preserved is smaller or equal than current size of file.
+@param[in]	pathname	file path
+@param[in]	file		file to be truncated
+@param[in]	size		size preserved in bytes
+@return true if success */
+bool
+os_file_truncate(
+	const char*	pathname,
+	os_file_t	file,
+	os_offset_t	size);
+
+/** NOTE! Use the corresponding macro os_file_flush(), not directly this
+function!
 Flushes the write buffers of a given file to the disk.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+@param[in]	file		handle to a file
+@return true if success */
+bool
 os_file_flush_func(
-/*===============*/
-	os_file_t	file);	/*!< in, own: handle to a file */
-/***********************************************************************//**
-Retrieves the last error number if an error occurs in a file io function.
+	os_file_t	file);
+
+/** Retrieves the last error number if an error occurs in a file io function.
 The number should be retrieved before any other OS calls (because they may
 overwrite the error number). If the number is not known to this program,
 the OS error number + 100 is returned.
-@return	error number, or OS error number + 100 */
-UNIV_INTERN
+@param[in]	report		true if we want an error message printed
+				for all errors
+@return error number, or OS error number + 100 */
 ulint
 os_file_get_last_error(
-/*===================*/
-	bool	report_all_errors);	/*!< in: TRUE if we want an error message
-					printed of all errors */
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read(), not directly this function!
+	bool		report);
+
+/** NOTE! Use the corresponding macro os_file_read(), not directly this
+function!
 Requests a synchronous read operation.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@return DB_SUCCESS if request was successful */
+dberr_t
 os_file_read_func(
-/*==============*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n);	/*!< in: number of bytes to read */
-/*******************************************************************//**
-Rewind file to its start, read at most size - 1 bytes from it to str, and
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Rewind file to its start, read at most size - 1 bytes from it to str, and
 NUL-terminate str. All errors are silently ignored. This function is
-mostly meant to be used with temporary files. */
-UNIV_INTERN
+mostly meant to be used with temporary files.
+@param[in,out]	file		file to read from
+@param[in,out]	str		buffer where to read
+@param[in]	size		size of buffer */
 void
 os_file_read_string(
-/*================*/
-	FILE*	file,	/*!< in: file to read from */
-	char*	str,	/*!< in: buffer where to read */
-	ulint	size);	/*!< in: size of buffer */
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read_no_error_handling(),
+	FILE*		file,
+	char*		str,
+	ulint		size);
+
+/** NOTE! Use the corresponding macro os_file_read_no_error_handling(),
 not directly this function!
 Requests a synchronous positioned read operation. This function does not do
 any error handling. In case of error it returns FALSE.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[out]	o		number of bytes actually read
+@return DB_SUCCESS or error code */
+dberr_t
 os_file_read_no_error_handling_func(
-/*================================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n);	/*!< in: number of bytes to read */
-
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_write(), not directly this
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	ulint*		o)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** NOTE! Use the corresponding macro os_file_write(), not directly this
 function!
 Requests a synchronous write operation.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in,out]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@return DB_SUCCESS if request was successful */
+dberr_t
 os_file_write_func(
-/*===============*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	const void*	buf,	/*!< in: buffer from which to write */
-	os_offset_t	offset,	/*!< in: file offset where to write */
-	ulint		n);	/*!< in: number of bytes to write */
-
-/*******************************************************************//**
-Check the existence and type of the given file.
-@return	TRUE if call succeeded */
-UNIV_INTERN
-ibool
+	IORequest&	type,
+	const char*	name,
+	os_file_t	file,
+	const void*	buf,
+	os_offset_t	offset,
+	ulint		n)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Check the existence and type of the given file.
+@param[in]	path		pathname of the file
+@param[out]	exists		true if file exists
+@param[out]	type		type of the file (if it exists)
+@return true if call succeeded */
+bool
 os_file_status(
-/*===========*/
-	const char*	path,	/*!< in:	pathname of the file */
-	ibool*		exists,	/*!< out: TRUE if file exists */
-	os_file_type_t* type);	/*!< out: type of the file (if it exists) */
-/****************************************************************//**
-The function os_file_dirname returns a directory component of a
-null-terminated pathname string.  In the usual case, dirname returns
-the string up to, but not including, the final '/', and basename
-is the component following the final '/'.  Trailing '/' characters
-are not counted as part of the pathname.
-
-If path does not contain a slash, dirname returns the string ".".
-
-Concatenating the string returned by dirname, a "/", and the basename
-yields a complete pathname.
-
-The return value is  a copy of the directory component of the pathname.
-The copy is allocated from heap. It is the caller responsibility
-to free it after it is no longer needed.
-
-The following list of examples (taken from SUSv2) shows the strings
-returned by dirname and basename for different paths:
-
-       path	      dirname	     basename
-       "/usr/lib"     "/usr"	     "lib"
-       "/usr/"	      "/"	     "usr"
-       "usr"	      "."	     "usr"
-       "/"	      "/"	     "/"
-       "."	      "."	     "."
-       ".."	      "."	     ".."
-
-@return	own: directory component of the pathname */
-UNIV_INTERN
-char*
-os_file_dirname(
-/*============*/
-	const char*	path);	/*!< in: pathname */
-/****************************************************************//**
-This function returns a new path name after replacing the basename
+	const char*	path,
+	bool*		exists,
+	os_file_type_t* type);
+
+/** This function returns a new path name after replacing the basename
 in an old path with a new basename.  The old_path is a full path
 name including the extension.  The tablename is in the normal
 form "databasename/tablename".  The new base name is found after
@@ -1029,35 +1738,15 @@ the forward slash.  Both input strings are null terminated.
 This function allocates memory to be returned.  It is the callers
 responsibility to free the return value after it is no longer needed.
 
-@return	own: new full pathname */
-UNIV_INTERN
+@param[in]	old_path		pathname
+@param[in]	new_name		new file name
+@return own: new full pathname */
 char*
 os_file_make_new_pathname(
-/*======================*/
-	const char*	old_path,	/*!< in: pathname */
-	const char*	new_name);	/*!< in: new file name */
-/****************************************************************//**
-This function returns a remote path name by combining a data directory
-path provided in a DATA DIRECTORY clause with the tablename which is
-in the form 'database/tablename'.  It strips the file basename (which
-is the tablename) found after the last directory in the path provided.
-The full filepath created will include the database name as a directory
-under the path provided.  The filename is the tablename with the '.ibd'
-extension. All input and output strings are null-terminated.
+	const char*	old_path,
+	const char*	new_name);
 
-This function allocates memory to be returned.  It is the callers
-responsibility to free the return value after it is no longer needed.
-
-@return	own: A full pathname; data_dir_path/databasename/tablename.ibd */
-UNIV_INTERN
-char*
-os_file_make_remote_pathname(
-/*=========================*/
-	const char*	data_dir_path,	/*!< in: pathname */
-	const char*	tablename,	/*!< in: tablename */
-	const char*	extention);	/*!< in: file extention; ibd,cfg*/
-/****************************************************************//**
-This function reduces a null-terminated full remote path name into
+/** This function reduces a null-terminated full remote path name into
 the path that is sent by MySQL for DATA DIRECTORY clause.  It replaces
 the 'databasename/tablename.ibd' found at the end of the path with just
 'tablename'.
@@ -1067,278 +1756,270 @@ is allocated. The caller should allocate memory for the path sent in.
 This function manipulates that path in place.
 
 If the path format is not as expected, just return.  The result is used
-to inform a SHOW CREATE TABLE command. */
-UNIV_INTERN
+to inform a SHOW CREATE TABLE command.
+@param[in,out]	data_dir_path		Full path/data_dir_path */
 void
 os_file_make_data_dir_path(
-/*========================*/
-	char*	data_dir_path);	/*!< in/out: full path/data_dir_path */
-/****************************************************************//**
-Creates all missing subdirectories along the given path.
-@return	TRUE if call succeeded FALSE otherwise */
-UNIV_INTERN
-ibool
+	char*	data_dir_path);
+
+/** Create all missing subdirectories along the given path.
+@return DB_SUCCESS if OK, otherwise error code. */
+dberr_t
 os_file_create_subdirs_if_needed(
-/*=============================*/
-	const char*	path);	/*!< in: path name */
-/***********************************************************************
-Initializes the asynchronous io system. Creates one array each for ibuf
+	const char*	path);
+
+#ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
+/* Test the function os_file_get_parent_dir. */
+void
+unit_test_os_file_get_parent_dir();
+#endif /* UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR */
+
+/** Initializes the asynchronous io system. Creates one array each for ibuf
 and log i/o. Also creates one array each for read and write where each
 array is divided logically into n_read_segs and n_write_segs
 respectively. The caller must create an i/o handler thread for each
 segment in these arrays. This function also creates the sync array.
-No i/o handler thread needs to be created for that */
-UNIV_INTERN
-ibool
+No i/o handler thread needs to be created for that
+@param[in]	n_read_segs	number of reader threads
+@param[in]	n_write_segs	number of writer threads
+@param[in]	n_slots_sync	number of slots in the sync aio array */
+
+bool
 os_aio_init(
-/*========*/
-	ulint	n_per_seg,	/*<! in: maximum number of pending aio
-				operations allowed per segment */
-	ulint	n_read_segs,	/*<! in: number of reader threads */
-	ulint	n_write_segs,	/*<! in: number of writer threads */
-	ulint	n_slots_sync);	/*<! in: number of slots in the sync aio
-				array */
-/***********************************************************************
+	ulint		n_read_segs,
+	ulint		n_write_segs,
+	ulint		n_slots_sync);
+
+/**
 Frees the asynchronous io system. */
-UNIV_INTERN
 void
-os_aio_free(void);
-/*=============*/
+os_aio_free();
 
-/*******************************************************************//**
+/**
 NOTE! Use the corresponding macro os_aio(), not directly this function!
 Requests an asynchronous i/o operation.
-@return	TRUE if request was queued successfully, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in]	type		IO request context
+@param[in]	mode		IO mode
+@param[in]	name		Name of the file or path as NUL terminated
+				string
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	read_only	if true read only mode checks are enforced
+@param[in,out]	m1		Message for the AIO handler, (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@param[in,out]	m2		message for the AIO handler (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@return DB_SUCCESS or error code */
+dberr_t
 os_aio_func(
-/*========*/
-	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
-	ulint		is_log,	/*!< in: 1 is OS_FILE_LOG or 0 */
-	ulint		mode,	/*!< in: OS_AIO_NORMAL, ..., possibly ORed
-				to OS_AIO_SIMULATED_WAKE_LATER: the
-				last flag advises this function not to wake
-				i/o-handler threads, but the caller will
-				do the waking explicitly later, in this
-				way the caller can post several requests in
-				a batch; NOTE that the batch must not be
-				so big that it exhausts the slots in aio
-				arrays! NOTE that a simulated batch
-				may introduce hidden chances of deadlocks,
-				because i/os are not actually handled until
-				all have been posted: use with great
-				caution! */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read or from which
-				to write */
-	os_offset_t	offset,	/*!< in: file offset where to read or write */
-	ulint		n,	/*!< in: number of bytes to read or write */
-	ulint           page_size, /*!< in: page size in bytes */
-	fil_node_t*	message1,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-				OS_AIO_SYNC */
-	void*		message2,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-				OS_AIO_SYNC */
-	ulint*		write_size);/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-/************************************************************************//**
-Wakes up all async i/o threads so that they know to exit themselves in
+	IORequest&	type,
+	ulint		mode,
+	const char*	name,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	bool		read_only,
+	fil_node_t*	m1,
+	void*		m2,
+	ulint*		wsize);
+
+/** Wakes up all async i/o threads so that they know to exit themselves in
 shutdown. */
-UNIV_INTERN
 void
-os_aio_wake_all_threads_at_shutdown(void);
-/*=====================================*/
-/************************************************************************//**
-Waits until there are no pending writes in os_aio_write_array. There can
+os_aio_wake_all_threads_at_shutdown();
+
+/** Waits until there are no pending writes in os_aio_write_array. There can
 be other, synchronous, pending writes. */
-UNIV_INTERN
 void
-os_aio_wait_until_no_pending_writes(void);
-/*=====================================*/
-/**********************************************************************//**
-Wakes up simulated aio i/o-handler threads if they have something to do. */
-UNIV_INTERN
+os_aio_wait_until_no_pending_writes();
+
+/** Wakes up simulated aio i/o-handler threads if they have something to do. */
 void
-os_aio_simulated_wake_handler_threads(void);
-/*=======================================*/
-/**********************************************************************//**
-This function can be called if one wants to post a batch of reads and
+os_aio_simulated_wake_handler_threads();
+
+/** This function can be called if one wants to post a batch of reads and
 prefers an i/o-handler thread to handle them all at once later. You must
 call os_aio_simulated_wake_handler_threads later to ensure the threads
 are not left sleeping! */
-UNIV_INTERN
 void
-os_aio_simulated_put_read_threads_to_sleep(void);
-/*============================================*/
+os_aio_simulated_put_read_threads_to_sleep();
 
-#ifdef WIN_ASYNC_IO
-/**********************************************************************//**
-This function is only used in Windows asynchronous i/o.
+/** This is the generic AIO handler interface function.
 Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
+for completed requests. The AIO array of pending requests is divided
 into segments. The thread specifies which segment or slot it wants to wait
 for. NOTE: this function will also take care of freeing the aio slot,
 therefore no other thread is allowed to do the freeing!
-@return	TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_windows_handle(
-/*==================*/
-	ulint	segment,	/*!< in: the number of the segment in the aio
-				arrays to wait for; segment 0 is the ibuf
-				i/o thread, segment 1 the log i/o thread,
-				then follow the non-ibuf read threads, and as
-				the last are the non-ibuf write threads; if
-				this is ULINT_UNDEFINED, then it means that
-				sync aio is used, and this parameter is
-				ignored */
-	ulint	pos,		/*!< this parameter is used only in sync aio:
-				wait for the aio slot at this position */
-	fil_node_t**message1,	/*!< out: the messages passed with the aio
-				request; note that also in the case where
-				the aio operation failed, these output
-				parameters are valid and can be used to
-				restart the operation, for example */
-	void**	message2,
-	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
-#endif
-
-/**********************************************************************//**
-Does simulated aio. This function should be called by an i/o-handler
-thread.
-@return	TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_simulated_handle(
-/*====================*/
-	ulint	segment,	/*!< in: the number of the segment in the aio
-				arrays to wait for; segment 0 is the ibuf
-				i/o thread, segment 1 the log i/o thread,
-				then follow the non-ibuf read threads, and as
-				the last are the non-ibuf write threads */
-	fil_node_t**message1,	/*!< out: the messages passed with the aio
-				request; note that also in the case where
-				the aio operation failed, these output
-				parameters are valid and can be used to
-				restart the operation, for example */
-	void**	message2,
-	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
-/**********************************************************************//**
-Validates the consistency of the aio system.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-os_aio_validate(void);
-/*=================*/
-/**********************************************************************//**
-Prints info of the aio arrays. */
-UNIV_INTERN
+@param[in]	segment		the number of the segment in the aio arrays to
+				wait for; segment 0 is the ibuf I/O thread,
+				segment 1 the log I/O thread, then follow the
+				non-ibuf read threads, and as the last are the
+				non-ibuf write threads; if this is
+				ULINT_UNDEFINED, then it means that sync AIO
+				is used, and this parameter is ignored
+@param[out]	m1		the messages passed with the AIO request;
+				note that also in the case where the AIO
+				operation failed, these output parameters
+				are valid and can be used to restart the
+				operation, for example
+@param[out]	m2		callback message
+@param[out]	type		OS_FILE_WRITE or ..._READ
+@return DB_SUCCESS or error code */
+dberr_t
+os_aio_handler(
+	ulint		segment,
+	fil_node_t**	m1,
+	void**		m2,
+	IORequest*	type);
+
+/** Prints info of the aio arrays.
+@param[in/out]	file		file where to print */
 void
-os_aio_print(
-/*=========*/
-	FILE*	file);	/*!< in: file where to print */
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
+os_aio_print(FILE* file);
+
+/** Refreshes the statistics used to print per-second averages. */
 void
-os_aio_refresh_stats(void);
-/*======================*/
+os_aio_refresh_stats();
 
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that all slots in the system have been freed, that is, there are
+/** Checks that all slots in the system have been freed, that is, there are
 no pending io operations. */
-UNIV_INTERN
-ibool
-os_aio_all_slots_free(void);
-/*=======================*/
+bool
+os_aio_all_slots_free();
+
+#ifdef UNIV_DEBUG
+
+/** Prints all pending IO
+@param[in]	file	file where to print */
+void
+os_aio_print_pending_io(FILE* file);
+
 #endif /* UNIV_DEBUG */
 
-/*******************************************************************//**
-This function returns information about the specified file
-@return	DB_SUCCESS if all OK */
-UNIV_INTERN
+/** This function returns information about the specified file
+@param[in]	path		pathname of the file
+@param[in]	stat_info	information of a file in a directory
+@param[in]	check_rw_perm	for testing whether the file can be opened
+				in RW mode
+@param[in]	read_only	if true read only mode checks are enforced
+@return DB_SUCCESS if all OK */
 dberr_t
 os_file_get_status(
-/*===============*/
-	const char*	path,		/*!< in: pathname of the file */
-	os_file_stat_t* stat_info,	/*!< information of a file in a
-					directory */
-	bool		check_rw_perm);	/*!< in: for testing whether the
-					file can be opened in RW mode */
+	const char*	path,
+	os_file_stat_t* stat_info,
+	bool		check_rw_perm,
+	bool		read_only);
 
 #if !defined(UNIV_HOTBACKUP)
-/** Create a temporary file in the location specified by the parameter
-path. If the path is null, then it will be created in tmpdir.
+/** Creates a temporary file in the location specified by the parameter
+path. If the path is NULL then it will be created on --tmpdir location.
+This function is defined in ha_innodb.cc.
 @param[in]	path	location for creating temporary file
 @return temporary file descriptor, or < 0 on error */
-UNIV_INTERN
 int
 innobase_mysql_tmpfile(
 	const char*	path);
 #endif /* !UNIV_HOTBACKUP */
 
 
-#if defined(LINUX_NATIVE_AIO)
-/**************************************************************************
-This function is only used in Linux native asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing!
-@return	TRUE if the IO was successful */
-UNIV_INTERN
-ibool
-os_aio_linux_handle(
-/*================*/
-	ulint	global_seg,	/*!< in: segment number in the aio array
-				to wait for; segment 0 is the ibuf
-				i/o thread, segment 1 is log i/o thread,
-				then follow the non-ibuf read threads,
-				and the last are the non-ibuf write
-				threads. */
-	fil_node_t**message1,	/*!< out: the messages passed with the */
-	void**	message2,	/*!< aio request; note that in case the
-				aio operation failed, these output
-				parameters are valid and can be used to
-				restart the operation. */
-	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
-#endif /* LINUX_NATIVE_AIO */
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return	TRUE if we should retry the operation */
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
-	const char*	name,		/*!< in: name of a file or NULL */
-	const char*	operation,	/*!< in: operation */
-	ibool		on_error_silent,/*!< in: if TRUE then don't print
-					any message to the log. */
-	const char*	file,		/*!< in: file name */
-	const ulint	line);		/*!< in: line */
-
-/***********************************************************************//**
-Try to get number of bytes per sector from file system.
-@return	file block size */
-UNIV_INTERN
+/** If it is a compressed page return the compressed page data + footer size
+@param[in]	buf		Buffer to check, must include header + 10 bytes
+@return ULINT_UNDEFINED if the page is not a compressed page or length
+	of the compressed data (including footer) if it is a compressed page */
 ulint
-os_file_get_block_size(
-/*===================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	const char*	name);	/*!< in: file name */
+os_file_compressed_page_size(const byte* buf);
+
+/** If it is a compressed page return the original page data + footer size
+@param[in]	buf		Buffer to check, must include header + 10 bytes
+@return ULINT_UNDEFINED if the page is not a compressed page or length
+	of the original data + footer if it is a compressed page */
+ulint
+os_file_original_page_size(const byte* buf);
+
+/** Set the file create umask
+@param[in]	umask		The umask to use for file creation. */
+void
+os_file_set_umask(ulint umask);
+
+/** Free storage space associated with a section of the file.
+@param[in]	fh		Open file handle
+@param[in]	off		Starting offset (SEEK_SET)
+@param[in]	len		Size of the hole
+@return DB_SUCCESS or error code */
+dberr_t
+os_file_punch_hole(
+	os_file_t	fh,
+	os_offset_t	off,
+	os_offset_t	len)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Check if the file system supports sparse files.
+
+Warning: On POSIX systems we try and punch a hole from offset 0 to
+the system configured page size. This should only be called on an empty
+file.
+
+Note: On Windows we use the name and on Unices we use the file handle.
+
+@param[in]	name		File name
+@param[in]	fh		File handle for the file - if opened
+@return true if the file system supports sparse files */
+bool
+os_is_sparse_file_supported(
+	const char*	path,
+	os_file_t	fh)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Decompress the page data contents. Page type must be FIL_PAGE_COMPRESSED, if
+not then the source contents are left unchanged and DB_SUCCESS is returned.
+@param[in]	dblwr_recover	true of double write recovery in progress
+@param[in,out]	src		Data read from disk, decompressed data will be
+				copied to this page
+@param[in,out]	dst		Scratch area to use for decompression
+@param[in]	dst_len		Size of the scratch area in bytes
+@return DB_SUCCESS or error code */
+
+dberr_t
+os_file_decompress_page(
+	bool		dblwr_recover,
+	byte*		src,
+	byte*		dst,
+	ulint		dst_len)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Normalizes a directory path for the current OS:
+On Windows, we convert '/' to '\', else we convert '\' to '/'.
+@param[in,out] str A null-terminated directory and file path */
+void os_normalize_path(char*	str);
+
+/* Determine if a path is an absolute path or not.
+@param[in]	OS directory or file path to evaluate
+@retval true if an absolute path
+@retval false if a relative path */
+UNIV_INLINE
+bool
+is_absolute_path(
+	const char*	path)
+{
+	if (path[0] == OS_PATH_SEPARATOR) {
+		return(true);
+	}
+
+#ifdef _WIN32
+	if (path[1] == ':' && path[2] == OS_PATH_SEPARATOR) {
+		return(true);
+	}
+#endif /* _WIN32 */
+
+	return(false);
+}
 
 #ifndef UNIV_NONINL
 #include "os0file.ic"
-#endif
+#endif /* UNIV_NONINL */
 
-#endif
+#endif /* os0file_h */
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index 6ca8b371093..74d0b2c83a8 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,40 +27,45 @@ Created 2/20/2010 Jimmy Yang
 #include "univ.i"
 
 #ifdef UNIV_PFS_IO
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create_simple(),
+/** NOTE! Please use the corresponding macro os_file_create_simple(),
 not directly this function!
 A performance schema instrumented wrapper function for
 os_file_create_simple() which opens or creates a file.
+@param[in]	key		Performance Schema Key
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
 @return own: handle to the file, not defined if error, error number
 can be retrieved with os_file_get_last_error */
 UNIV_INLINE
 os_file_t
 pfs_os_file_create_simple_func(
-/*===========================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
-				OS_FILE_READ_WRITE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	mysql_pfs_key_t key,
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success,
+	const char*	src_file,
+	ulint		src_line)
 {
-	os_file_t	file;
-	struct PSI_file_locker* locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker* locker = NULL;
 
 	/* register a file open or creation depending on "create_mode" */
-	register_pfs_file_open_begin(&state, locker, key,
-				     ((create_mode == OS_FILE_CREATE)
-					? PSI_FILE_CREATE
-					: PSI_FILE_OPEN),
-				     name, src_file, src_line);
+	register_pfs_file_open_begin(
+		&state, locker, key,
+		(create_mode == OS_FILE_CREATE)
+		? PSI_FILE_CREATE : PSI_FILE_OPEN,
+		name, src_file, src_line);
 
-	file = os_file_create_simple_func(name, create_mode,
-					  access_type, success);
+	os_file_t	file = os_file_create_simple_func(
+		name, create_mode, access_type, read_only, success);
 
 	/* Regsiter the returning "file" value with the system */
 	register_pfs_file_open_end(locker, file);
@@ -68,394 +73,441 @@ pfs_os_file_create_simple_func(
 	return(file);
 }
 
-/****************************************************************//**
-NOTE! Please use the corresponding macro
+/** NOTE! Please use the corresponding macro
 os_file_create_simple_no_error_handling(), not directly this function!
 A performance schema instrumented wrapper function for
 os_file_create_simple_no_error_handling(). Add instrumentation to
 monitor file creation/open.
+@param[in]	key		Performance Schema Key
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+				OS_FILE_READ_ALLOW_DELETE; the last option is
+				used by a backup program reading the file
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
 @return own: handle to the file, not defined if error, error number
 can be retrieved with os_file_get_last_error */
 UNIV_INLINE
 os_file_t
 pfs_os_file_create_simple_no_error_handling_func(
-/*=============================================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode, /*!< in: file create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY,
-				OS_FILE_READ_WRITE, or
-				OS_FILE_READ_ALLOW_DELETE; the last option is
-				used by a backup program reading the file */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint		atomic_writes,/*!< in: atomic writes table option
-				value */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	mysql_pfs_key_t key,
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success,
+	const char*	src_file,
+	ulint		src_line)
 {
-	os_file_t	file;
-	struct PSI_file_locker* locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker* locker = NULL;
 
 	/* register a file open or creation depending on "create_mode" */
-	register_pfs_file_open_begin(&state, locker, key,
-				     ((create_mode == OS_FILE_CREATE)
-					? PSI_FILE_CREATE
-					: PSI_FILE_OPEN),
-				     name, src_file, src_line);
+	register_pfs_file_open_begin(
+		&state, locker, key,
+		create_mode == OS_FILE_CREATE
+		? PSI_FILE_CREATE : PSI_FILE_OPEN,
+		name, src_file, src_line);
 
-	file = os_file_create_simple_no_error_handling_func(
-		name, create_mode, access_type, success, atomic_writes);
+	os_file_t	file = os_file_create_simple_no_error_handling_func(
+		name, create_mode, access_type, read_only, success);
 
 	register_pfs_file_open_end(locker, file);
 
 	return(file);
 }
 
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create(), not directly
+/** NOTE! Please use the corresponding macro os_file_create(), not directly
 this function!
 A performance schema wrapper function for os_file_create().
 Add instrumentation to monitor file creation/open.
+@param[in]	key		Performance Schema Key
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	purpose		OS_FILE_AIO, if asynchronous, non-buffered I/O
+				is desired, OS_FILE_NORMAL, if any normal file;
+				NOTE that it also depends on type, os_aio_..
+				and srv_.. variables whether we really us
+				async I/O or unbuffered I/O: look in the
+				function source code for the exact rules
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
 @return own: handle to the file, not defined if error, error number
 can be retrieved with os_file_get_last_error */
 UNIV_INLINE
 os_file_t
 pfs_os_file_create_func(
-/*====================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: file create mode */
-	ulint		purpose,/*!< in: OS_FILE_AIO, if asynchronous,
-				non-buffered i/o is desired,
-				OS_FILE_NORMAL, if any normal file;
-				NOTE that it also depends on type, os_aio_..
-				and srv_.. variables whether we really use
-				async i/o or unbuffered i/o: look in the
-				function source code for the exact rules */
-	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint		atomic_writes, /*!< in: atomic writes table option
-				       value */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	mysql_pfs_key_t key,
+	const char*	name,
+	ulint		create_mode,
+	ulint		purpose,
+	ulint		type,
+	bool		read_only,
+	bool*		success,
+	const char*	src_file,
+	ulint		src_line)
 {
-	os_file_t	file;
-	struct PSI_file_locker* locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker* locker = NULL;
 
 	/* register a file open or creation depending on "create_mode" */
-	register_pfs_file_open_begin(&state, locker, key,
-				     ((create_mode == OS_FILE_CREATE)
-					? PSI_FILE_CREATE
-					: PSI_FILE_OPEN),
-				     name, src_file, src_line);
+	register_pfs_file_open_begin(
+		&state, locker, key,
+		create_mode == OS_FILE_CREATE
+		? PSI_FILE_CREATE : PSI_FILE_OPEN,
+		name, src_file, src_line);
 
-	file = os_file_create_func(name, create_mode, purpose, type, success, atomic_writes);
+	os_file_t	file = os_file_create_func(
+		name, create_mode, purpose, type, read_only, success);
 
 	register_pfs_file_open_end(locker, file);
 
 	return(file);
 }
-
-/***********************************************************************//**
+/**
 NOTE! Please use the corresponding macro os_file_close(), not directly
 this function!
 A performance schema instrumented wrapper function for os_file_close().
-@return TRUE if success */
+@param[in]	file		handle to a file
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
 UNIV_INLINE
-ibool
+bool
 pfs_os_file_close_func(
-/*===================*/
-        os_file_t	file,	/*!< in, own: handle to a file */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	os_file_t	file,
+	const char*	src_file,
+	ulint		src_line)
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
 	/* register the file close */
-	register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE,
-				   src_file, src_line);
+	register_pfs_file_io_begin(
+		&state, locker, file, 0, PSI_FILE_CLOSE, src_file, src_line);
 
-	result = os_file_close_func(file);
+	bool	result = os_file_close_func(file);
 
 	register_pfs_file_io_end(locker, 0);
 
 	return(result);
 }
 
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_aio(), not directly this
+/** NOTE! Please use the corresponding macro os_aio(), not directly this
 function!
-Performance schema instrumented wrapper function of os_aio() which
-requests an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
+Performance schema wrapper function of os_aio() which requests
+an asynchronous i/o operation.
+@param[in]	type		IO request context
+@param[in]	mode		IO mode
+@param[in]	name		Name of the file or path as NUL terminated
+				string
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	read_only	if true read only mode checks are enforced
+@param[in,out]	m1		Message for the AIO handler, (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@param[in,out]	m2		message for the AIO handler (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@param[in,out]	write_size	Actual write size initialized
+				after fist successfull trim
+				operation for this page and if
+				initialized we do not trim again if
+				actual page size 
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was queued successfully, FALSE if fail */
 UNIV_INLINE
-ibool
+dberr_t
 pfs_os_aio_func(
-/*============*/
-	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
-	ulint		is_log,	/*!< in: 1 is OS_FILE_LOG or 0 */
-	ulint		mode,	/*!< in: OS_AIO_NORMAL etc. I/O mode */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read or from which
-				to write */
-	os_offset_t	offset,	/*!< in: file offset where to read or write */
-	ulint		n,	/*!< in: number of bytes to read or write */
-	ulint           page_size, /*!< in: page size in bytes */
-	fil_node_t*	message1,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-				OS_AIO_SYNC */
-	void*		message2,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-                                OS_AIO_SYNC */
-	ulint*		write_size,/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	IORequest&	type,
+	ulint		mode,
+	const char*	name,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	bool		read_only,
+	fil_node_t*	m1,
+	void*		m2,
+	ulint*		write_size,
+	const char*	src_file,
+	ulint		src_line)
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
+
+	ut_ad(type.validate());
 
 	/* Register the read or write I/O depending on "type" */
-	register_pfs_file_io_begin(&state, locker, file, n,
-				   (type == OS_FILE_WRITE)
-					? PSI_FILE_WRITE
-					: PSI_FILE_READ,
-				   src_file, src_line);
+	register_pfs_file_io_begin(
+		&state, locker, file, n,
+		type.is_write() ? PSI_FILE_WRITE : PSI_FILE_READ,
+		src_file, src_line);
 
-	result = os_aio_func(type, is_log, mode, name, file, buf, offset,
-			     n, page_size, message1, message2, write_size);
+	dberr_t	result = os_aio_func(
+		type, mode, name, file, buf, offset, n, read_only, m1, m2, write_size);
 
 	register_pfs_file_io_end(locker, n);
 
 	return(result);
 }
 
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_read(), not directly
+/** NOTE! Please use the corresponding macro os_file_read(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_read() which requests a synchronous read operation.
-@return TRUE if request was successful, FALSE if fail */
+@param[in, out]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was successful */
 UNIV_INLINE
-ibool
+dberr_t
 pfs_os_file_read_func(
-/*==================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n,	/*!< in: number of bytes to read */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	const char*	src_file,
+	ulint		src_line)
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
+
+	ut_ad(type.validate());
+
+	register_pfs_file_io_begin(
+		&state, locker, file, n, PSI_FILE_READ, src_file, src_line);
 
-	register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
-				   src_file, src_line);
+	dberr_t		result;
 
-	result = os_file_read_func(file, buf, offset, n);
+	result = os_file_read_func(type, file, buf, offset, n);
 
 	register_pfs_file_io_end(locker, n);
 
 	return(result);
 }
 
-/*******************************************************************//**
-NOTE! Please use the corresponding macro
-os_file_read_no_error_handling(), not directly this function!
+/** NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
+not directly this function!
 This is the performance schema instrumented wrapper function for
-os_file_read_no_error_handling() which requests a synchronous
-positioned read operation. This function does not do any error
-handling. In case of error it returns FALSE.
-@return TRUE if request was successful, FALSE if fail */
+os_file_read_no_error_handling_func() which requests a synchronous
+read operation.
+@param[in, out]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[out]	o		number of bytes actually read
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was successful */
 UNIV_INLINE
-ibool
+dberr_t
 pfs_os_file_read_no_error_handling_func(
-/*====================================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n,	/*!< in: number of bytes to read */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	ulint*		o,
+	const char*	src_file,
+	ulint		src_line)
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
-	register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
-				   src_file, src_line);
+	register_pfs_file_io_begin(
+		&state, locker, file, n, PSI_FILE_READ, src_file, src_line);
 
-	result = os_file_read_no_error_handling_func(file, buf, offset, n);
+	dberr_t	result = os_file_read_no_error_handling_func(
+		type, file, buf, offset, n, o);
 
 	register_pfs_file_io_end(locker, n);
 
 	return(result);
 }
 
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_write(), not directly
+/** NOTE! Please use the corresponding macro os_file_write(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_write() which requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
+@param[in, out]	type		IO request context
+@param[in]	name		Name of the file or path as NUL terminated
+				string
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was successful */
 UNIV_INLINE
-ibool
+dberr_t
 pfs_os_file_write_func(
-/*===================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	const void*	buf,	/*!< in: buffer from which to write */
-	os_offset_t	offset,	/*!< in: file offset where to write */
-	ulint		n,	/*!< in: number of bytes to write */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	IORequest&	type,
+	const char*	name,
+	os_file_t	file,
+	const void*	buf,
+	os_offset_t	offset,
+	ulint		n,
+	const char*	src_file,
+	ulint		src_line)
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
+
+	register_pfs_file_io_begin(
+		&state, locker, file, n, PSI_FILE_WRITE, src_file, src_line);
 
-	register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_WRITE,
-				   src_file, src_line);
+	dberr_t		result;
 
-	result = os_file_write_func(name, file, buf, offset, n);
+	result = os_file_write_func(type, name, file, buf, offset, n);
 
 	register_pfs_file_io_end(locker, n);
 
 	return(result);
 }
 
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_flush(), not directly
+/** NOTE! Please use the corresponding macro os_file_flush(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_flush() which flushes the write buffers of a given file to the disk.
+Flushes the write buffers of a given file to the disk.
+@param[in]	file		Open file handle
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
 @return TRUE if success */
 UNIV_INLINE
-ibool
+bool
 pfs_os_file_flush_func(
-/*===================*/
-	os_file_t	file,	/*!< in, own: handle to a file */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	os_file_t	file,
+	const char*	src_file,
+	ulint		src_line)
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
-	register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
-				   src_file, src_line);
-	result = os_file_flush_func(file);
+	register_pfs_file_io_begin(
+		&state, locker, file, 0, PSI_FILE_SYNC, src_file, src_line);
+
+	bool	result = os_file_flush_func(file);
 
 	register_pfs_file_io_end(locker, 0);
 
 	return(result);
 }
 
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_rename(), not directly
+/** NOTE! Please use the corresponding macro os_file_rename(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_rename()
-@return TRUE if success */
+@param[in]	key		Performance Schema Key
+@param[in]	oldpath		old file path as a null-terminated string
+@param[in]	newpath		new file path
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
 UNIV_INLINE
-ibool
+bool
 pfs_os_file_rename_func(
-/*====================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	oldpath,/*!< in: old file path as a null-terminated
-				string */
-	const char*	newpath,/*!< in: new file path */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	mysql_pfs_key_t	key,
+	const char*	oldpath,
+	const char*	newpath,
+	const char*	src_file,
+	ulint		src_line)
+
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
-	register_pfs_file_open_begin(&state, locker, key, PSI_FILE_RENAME, newpath,
-				     src_file, src_line);
+	register_pfs_file_open_begin(
+		&state, locker, key, PSI_FILE_RENAME, newpath,
+		src_file, src_line);
 
-	result = os_file_rename_func(oldpath, newpath);
+	bool	result = os_file_rename_func(oldpath, newpath);
 
 	register_pfs_file_open_end(locker, 0);
 
 	return(result);
 }
 
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_delete(), not directly
+/** NOTE! Please use the corresponding macro os_file_delete(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_delete()
-@return TRUE if success */
+@param[in]	key		Performance Schema Key
+@param[in]	name		old file path as a null-terminated string
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
 UNIV_INLINE
 bool
 pfs_os_file_delete_func(
-/*====================*/
-	mysql_pfs_key_t key,		/*!< in: Performance Schema Key */
-	const char*	name,		/*!< in: file path as a null-terminated
-					string */
-	const char*	src_file,	/*!< in: file name where func invoked */
-	ulint		src_line)	/*!< in: line where the func invoked */
+	mysql_pfs_key_t	key,
+	const char*	name,
+	const char*	src_file,
+	ulint		src_line)
 {
-	bool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
-	register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
-				      name, src_file, src_line);
+	register_pfs_file_close_begin(
+		&state, locker, key, PSI_FILE_DELETE, name, src_file, src_line);
 
-	result = os_file_delete_func(name);
+	bool	result = os_file_delete_func(name);
 
 	register_pfs_file_close_end(locker, 0);
 
 	return(result);
 }
 
-/***********************************************************************//**
+/**
 NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
 directly this function!
 This is the performance schema instrumented wrapper function for
 os_file_delete_if_exists()
-@return TRUE if success */
+@param[in]	key		Performance Schema Key
+@param[in]	name		old file path as a null-terminated string
+@param[in]	exist		indicate if file pre-exist
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
 UNIV_INLINE
 bool
 pfs_os_file_delete_if_exists_func(
-/*==============================*/
-	mysql_pfs_key_t key,		/*!< in: Performance Schema Key */
-	const char*	name,		/*!< in: file path as a null-terminated
-					string */
-	const char*	src_file,	/*!< in: file name where func invoked */
-	ulint		src_line)	/*!< in: line where the func invoked */
+	mysql_pfs_key_t	key,
+	const char*	name,
+	bool*		exist,
+	const char*	src_file,
+	ulint		src_line)
 {
-	bool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
-	register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
-				      name, src_file, src_line);
+	register_pfs_file_close_begin(
+		&state, locker, key, PSI_FILE_DELETE, name, src_file, src_line);
 
-	result = os_file_delete_if_exists_func(name);
+	bool	result = os_file_delete_if_exists_func(name, exist);
 
 	register_pfs_file_close_end(locker, 0);
 
 	return(result);
 }
 #endif /* UNIV_PFS_IO */
+
diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h
index a8bbaf1d2d4..05a45a69f33 100644
--- a/storage/innobase/include/os0once.h
+++ b/storage/innobase/include/os0once.h
@@ -29,7 +29,6 @@ Created Feb 20, 2014 Vasil Dimov
 
 #include "univ.i"
 
-#include "os0sync.h"
 #include "ut0ut.h"
 
 /** Execute a given function exactly once in a multi-threaded environment
@@ -67,7 +66,6 @@ public:
 	/** Finished execution. */
 	static const state_t	DONE = 2;
 
-#ifdef HAVE_ATOMIC_BUILTINS
 	/** Call a given function or wait its execution to complete if it is
 	already called by another thread.
 	@param[in,out]	state		control variable
@@ -80,22 +78,19 @@ public:
 		void			(*do_func)(void*),
 		void*			do_func_arg)
 	{
-		/* Avoid calling os_compare_and_swap_uint32() in the most
-		common case. */
+		int32 oldval = NEVER_DONE;
+
+		/* Avoid calling my_atomic_cas32() in the most common case. */
 		if (*state == DONE) {
 			return;
 		}
 
-		if (os_compare_and_swap_uint32(state,
-					       NEVER_DONE, IN_PROGRESS)) {
+		if (my_atomic_cas32((int32*) state, &oldval, IN_PROGRESS)) {
 			/* We are the first. Call the function. */
 
 			do_func(do_func_arg);
 
-			const bool	swapped = os_compare_and_swap_uint32(
-				state, IN_PROGRESS, DONE);
-
-			ut_a(swapped);
+			my_atomic_store32((int32*) state, DONE);
 		} else {
 			/* The state is not NEVER_DONE, so either it is
 			IN_PROGRESS (somebody is calling the function right
@@ -119,7 +114,6 @@ public:
 			}
 		}
 	}
-#endif /* HAVE_ATOMIC_BUILTINS */
 };
 
 #endif /* os0once_h */
diff --git a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
index 613e3bd6947..af57b5d6a7a 100644
--- a/storage/innobase/include/os0proc.h
+++ b/storage/innobase/include/os0proc.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -37,38 +37,35 @@ Created 9/30/1995 Heikki Tuuri
 typedef void*			os_process_t;
 typedef unsigned long int	os_process_id_t;
 
-extern ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-extern ulint os_large_page_size;
-
-/****************************************************************//**
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'.
-@return	process id as a number */
-UNIV_INTERN
+/** The total amount of memory currently allocated from the operating
+system with os_mem_alloc_large(). */
+extern ulint	os_total_large_mem_allocated;
+
+/** Whether to use large pages in the buffer pool */
+extern my_bool	os_use_large_pages;
+
+/** Large page size. This may be a boot-time option on some platforms */
+extern uint	os_large_page_size;
+
+/** Converts the current process id to a number.
+@return process id as a number */
 ulint
 os_proc_get_number(void);
-/*====================*/
-/****************************************************************//**
-Allocates large pages memory.
-@return	allocated memory */
-UNIV_INTERN
+
+/** Allocates large pages memory.
+@param[in,out]	n	Number of bytes to allocate
+@return allocated memory */
 void*
 os_mem_alloc_large(
-/*===============*/
-	ulint*	n);			/*!< in/out: number of bytes */
-/****************************************************************//**
-Frees large pages memory. */
-UNIV_INTERN
+	ulint*	n);
+
+/** Frees large pages memory.
+@param[in]	ptr	pointer returned by os_mem_alloc_large()
+@param[in]	size	size returned by os_mem_alloc_large() */
 void
 os_mem_free_large(
-/*==============*/
-	void	*ptr,			/*!< in: pointer returned by
-					os_mem_alloc_large() */
-	ulint	size);			/*!< in: size returned by
-					os_mem_alloc_large() */
+	void	*ptr,
+	ulint	size);
 
 #ifndef UNIV_NONINL
 #include "os0proc.ic"
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
deleted file mode 100644
index 0754210c47a..00000000000
--- a/storage/innobase/include/os0sync.h
+++ /dev/null
@@ -1,899 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0sync.h
-The interface to the operating system
-synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0sync_h
-#define os0sync_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "sync0types.h"
-
-#if defined __i386__ || defined __x86_64__ || defined _M_IX86 \
-    || defined _M_X64 || defined __WIN__
-
-#define IB_STRONG_MEMORY_MODEL
-
-#endif /* __i386__ || __x86_64__ || _M_IX86 || _M_X64 || __WIN__ */
-
-#ifdef HAVE_WINDOWS_ATOMICS
-typedef LONG lock_word_t;	/*!< On Windows, InterlockedExchange operates
-				on LONG variable */
-#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE)
-typedef ulint lock_word_t;
-#else
-
-#define IB_LOCK_WORD_IS_BYTE
-
-typedef byte lock_word_t;
-
-#endif /* HAVE_WINDOWS_ATOMICS */
-
-#ifdef __WIN__
-/** Native event (slow)*/
-typedef HANDLE			os_native_event_t;
-/** Native mutex */
-typedef CRITICAL_SECTION	fast_mutex_t;
-/** Native condition variable. */
-typedef CONDITION_VARIABLE	os_cond_t;
-#else
-/** Native mutex */
-typedef pthread_mutex_t		fast_mutex_t;
-/** Native condition variable */
-typedef pthread_cond_t		os_cond_t;
-#endif
-
-/** Structure that includes Performance Schema Probe pfs_psi
-in the os_fast_mutex structure if UNIV_PFS_MUTEX is defined */
-struct os_fast_mutex_t {
-	fast_mutex_t		mutex;	/*!< os_fast_mutex */
-#ifdef UNIV_PFS_MUTEX
-	struct PSI_mutex*	pfs_psi;/*!< The performance schema
-					instrumentation hook */
-#endif
-};
-
-/** Operating system event handle */
-typedef struct os_event*	os_event_t;
-
-/** An asynchronous signal sent between threads */
-struct os_event {
-#ifdef __WIN__
-	HANDLE		handle;		/*!< kernel event object, slow,
-					used on older Windows */
-#endif
-	os_fast_mutex_t	os_mutex;	/*!< this mutex protects the next
-					fields */
-	ibool		is_set;		/*!< this is TRUE when the event is
-					in the signaled state, i.e., a thread
-					does not stop if it tries to wait for
-					this event */
-	ib_int64_t	signal_count;	/*!< this is incremented each time
-					the event becomes signaled */
-	os_cond_t	cond_var;	/*!< condition variable is used in
-					waiting for the event */
-	UT_LIST_NODE_T(os_event_t) os_event_list;
-					/*!< list of all created events */
-};
-
-/** Denotes an infinite delay for os_event_wait_time() */
-#define OS_SYNC_INFINITE_TIME   ULINT_UNDEFINED
-
-/** Return value of os_event_wait_time() when the time is exceeded */
-#define OS_SYNC_TIME_EXCEEDED   1
-
-/** Operating system mutex handle */
-typedef struct os_mutex_t*	os_ib_mutex_t;
-
-/** Mutex protecting counts and the event and OS 'slow' mutex lists */
-extern os_ib_mutex_t	os_sync_mutex;
-
-/** This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit */
-extern ulint		os_thread_count;
-
-extern ulint		os_event_count;
-extern ulint		os_mutex_count;
-extern ulint		os_fast_mutex_count;
-
-/*********************************************************//**
-Initializes global event and OS 'slow' mutex lists. */
-UNIV_INTERN
-void
-os_sync_init(void);
-/*==============*/
-/*********************************************************//**
-Frees created events and OS 'slow' mutexes. */
-UNIV_INTERN
-void
-os_sync_free(void);
-/*==============*/
-/*********************************************************//**
-Creates an event semaphore, i.e., a semaphore which may just have two states:
-signaled and nonsignaled. The created event is manual reset: it must be reset
-explicitly by calling sync_os_reset_event.
-@return	the event handle */
-UNIV_INTERN
-os_event_t
-os_event_create(void);
-/*==================*/
-/**********************************************************//**
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-UNIV_INTERN
-void
-os_event_set(
-/*=========*/
-	os_event_t	event);	/*!< in: event to set */
-/**********************************************************//**
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
-UNIV_INTERN
-ib_int64_t
-os_event_reset(
-/*===========*/
-	os_event_t	event);	/*!< in: event to reset */
-/**********************************************************//**
-Frees an event object. */
-UNIV_INTERN
-void
-os_event_free(
-/*==========*/
-	os_event_t	event);	/*!< in: event to free */
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state.
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set()   [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait()  [infinite wait!]
-thread C calls os_event_wait()  [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-UNIV_INTERN
-void
-os_event_wait_low(
-/*==============*/
-	os_event_t	event,		/*!< in: event to wait */
-	ib_int64_t	reset_sig_count);/*!< in: zero or the value
-					returned by previous call of
-					os_event_reset(). */
-
-#define os_event_wait(event) os_event_wait_low(event, 0)
-#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded. In Unix the timeout is always infinite.
-@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
-UNIV_INTERN
-ulint
-os_event_wait_time_low(
-/*===================*/
-	os_event_t	event,			/*!< in: event to wait */
-	ulint		time_in_usec,		/*!< in: timeout in
-						microseconds, or
-						OS_SYNC_INFINITE_TIME */
-	ib_int64_t	reset_sig_count);	/*!< in: zero or the value
-						returned by previous call of
-						os_event_reset(). */
-/*********************************************************//**
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
-@return	the mutex handle */
-UNIV_INTERN
-os_ib_mutex_t
-os_mutex_create(void);
-/*=================*/
-/**********************************************************//**
-Acquires ownership of a mutex semaphore. */
-UNIV_INTERN
-void
-os_mutex_enter(
-/*===========*/
-	os_ib_mutex_t	mutex);	/*!< in: mutex to acquire */
-/**********************************************************//**
-Releases ownership of a mutex. */
-UNIV_INTERN
-void
-os_mutex_exit(
-/*==========*/
-	os_ib_mutex_t	mutex);	/*!< in: mutex to release */
-/**********************************************************//**
-Frees an mutex object. */
-UNIV_INTERN
-void
-os_mutex_free(
-/*==========*/
-	os_ib_mutex_t	mutex);	/*!< in: mutex to free */
-/**********************************************************//**
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock!
-@return	0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
-	os_fast_mutex_t*	fast_mutex);	/*!< in: mutex to acquire */
-
-/**********************************************************************
-Following os_fast_ mutex APIs would be performance schema instrumented:
-
-os_fast_mutex_init
-os_fast_mutex_lock
-os_fast_mutex_unlock
-os_fast_mutex_free
-
-These mutex APIs will point to corresponding wrapper functions that contain
-the performance schema instrumentation.
-
-NOTE! The following macro should be used in mutex operation, not the
-corresponding function. */
-
-#ifdef UNIV_PFS_MUTEX
-# define os_fast_mutex_init(K, M)			\
-	pfs_os_fast_mutex_init(K, M)
-
-# define os_fast_mutex_lock(M)				\
-	pfs_os_fast_mutex_lock(M, __FILE__, __LINE__)
-
-# define os_fast_mutex_unlock(M)	pfs_os_fast_mutex_unlock(M)
-
-# define os_fast_mutex_free(M)		pfs_os_fast_mutex_free(M)
-
-/*********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly
-this function!
-A wrapper function for os_fast_mutex_init_func(). Initializes an operating
-system fast mutex semaphore. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_init(
-/*===================*/
-	PSI_mutex_key		key,		/*!< in: Performance Schema
-						key */
-	os_fast_mutex_t*	fast_mutex);	/*!< out: fast mutex */
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly
-this function!
-Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_free(
-/*===================*/
-	os_fast_mutex_t*	fast_mutex);	/*!< in/out: mutex to free */
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly
-this function!
-Wrapper function of os_fast_mutex_lock. Acquires ownership of a fast mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_lock(
-/*===================*/
-	os_fast_mutex_t*	fast_mutex,	/*!< in/out: mutex to acquire */
-	const char*		file_name,	/*!< in: file name where
-						 locked */
-	ulint			line);		/*!< in: line where locked */
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly
-this function!
-Wrapper function of os_fast_mutex_unlock. Releases ownership of a fast mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_unlock(
-/*=====================*/
-	os_fast_mutex_t*	fast_mutex);	/*!< in/out: mutex to release */
-
-#else /* UNIV_PFS_MUTEX */
-
-# define os_fast_mutex_init(K, M)			\
-	os_fast_mutex_init_func(&((os_fast_mutex_t*)(M))->mutex)
-
-# define os_fast_mutex_lock(M)				\
-	os_fast_mutex_lock_func(&((os_fast_mutex_t*)(M))->mutex)
-
-# define os_fast_mutex_unlock(M)			\
-	os_fast_mutex_unlock_func(&((os_fast_mutex_t*)(M))->mutex)
-
-# define os_fast_mutex_free(M)				\
-	os_fast_mutex_free_func(&((os_fast_mutex_t*)(M))->mutex)
-#endif /* UNIV_PFS_MUTEX */
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required.
-@return	0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock_full_barrier(
-/*==================*/
-	os_fast_mutex_t*	fast_mutex);	/*!< in: mutex to acquire */
-/**********************************************************//**
-Releases ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_func(
-/*======================*/
-	fast_mutex_t*		fast_mutex);	/*!< in: mutex to release */
-/**********************************************************//**
-Releases ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_full_barrier(
-/*=================*/
-	os_fast_mutex_t*	fast_mutex);	/*!< in: mutex to release */
-/*********************************************************//**
-Initializes an operating system fast mutex semaphore. */
-UNIV_INTERN
-void
-os_fast_mutex_init_func(
-/*====================*/
-	fast_mutex_t*		fast_mutex);	/*!< in: fast mutex */
-/**********************************************************//**
-Acquires ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_lock_func(
-/*====================*/
-	fast_mutex_t*		fast_mutex);	/*!< in: mutex to acquire */
-/**********************************************************//**
-Frees an mutex object. */
-UNIV_INTERN
-void
-os_fast_mutex_free_func(
-/*====================*/
-	fast_mutex_t*		fast_mutex);	/*!< in: mutex to free */
-
-/**********************************************************//**
-Atomic compare-and-swap and increment for InnoDB. */
-
-#if defined(HAVE_IB_GCC_ATOMIC_BUILTINS)
-
-# define HAVE_ATOMIC_BUILTINS
-
-# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE
-#  define HAVE_ATOMIC_BUILTINS_BYTE
-# endif
-
-# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_64
-#  define HAVE_ATOMIC_BUILTINS_64
-# endif
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap(ptr, old_val, new_val) \
-	__sync_bool_compare_and_swap(ptr, old_val, new_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
-	os_compare_and_swap(ptr, old_val, new_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
-	os_compare_and_swap(ptr, old_val, new_val)
-
-#  define os_compare_and_swap_uint32(ptr, old_val, new_val) \
-	os_compare_and_swap(ptr, old_val, new_val)
-
-# ifdef HAVE_IB_ATOMIC_PTHREAD_T_GCC
-#  define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
-	os_compare_and_swap(ptr, old_val, new_val)
-#  define INNODB_RW_LOCKS_USE_ATOMICS
-#  define IB_ATOMICS_STARTUP_MSG \
-	"Mutexes and rw_locks use GCC atomic builtins"
-# else /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
-#  define IB_ATOMICS_STARTUP_MSG \
-	"Mutexes use GCC atomic builtins, rw_locks do not"
-# endif /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment(ptr, amount) \
-	__sync_add_and_fetch(ptr, amount)
-
-# define os_atomic_increment_lint(ptr, amount) \
-	os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_uint32(ptr, amount ) \
-	os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_ulint(ptr, amount) \
-	os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_uint64(ptr, amount) \
-	os_atomic_increment(ptr, amount)
-
-/* Returns the resulting value, ptr is pointer to target, amount is the
-amount to decrement. */
-
-# define os_atomic_decrement(ptr, amount) \
-	__sync_sub_and_fetch(ptr, amount)
-
-# define os_atomic_decrement_uint32(ptr, amount) \
-	os_atomic_decrement(ptr, amount)
-
-# define os_atomic_decrement_lint(ptr, amount) \
-	os_atomic_decrement(ptr, amount)
-
-# define os_atomic_decrement_ulint(ptr, amount) \
-	os_atomic_decrement(ptr, amount)
-
-# define os_atomic_decrement_uint64(ptr, amount) \
-	os_atomic_decrement(ptr, amount)
-
-# if defined(HAVE_ATOMIC_BUILTINS)
-
-/** Do an atomic test and set.
-@param[in,out]	ptr		Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
-	return(__sync_lock_test_and_set(ptr, 1));
-}
-
-/** Do an atomic release.
-@param[in,out]	ptr		Memory location to write to
-@return the previous value */
-inline
-void
-os_atomic_clear(volatile lock_word_t* ptr)
-{
-	__sync_lock_release(ptr);
-}
-
-# elif defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET)
-
-/** Do an atomic test-and-set.
-@param[in,out]	ptr		Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
-       return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE));
-}
-
-/** Do an atomic clear.
-@param[in,out]	ptr		Memory location to set to zero */
-inline
-void
-os_atomic_clear(volatile lock_word_t* ptr)
-{
-	__atomic_clear(ptr, __ATOMIC_RELEASE);
-}
-
-# else
-
-#  error "Unsupported platform"
-
-# endif /* HAVE_IB_GCC_ATOMIC_TEST_AND_SET */
-
-#if defined(__powerpc__) || defined(__aarch64__)
-/*
-  os_atomic_test_and_set_byte_release() should imply a release barrier before
-  setting, and a full barrier after. But __sync_lock_test_and_set() is only
-  documented as an aquire barrier. So on PowerPC we need to add the full
-  barrier explicitly.  */
-# define os_atomic_test_and_set_byte_release(ptr, new_val) \
-        do { __sync_lock_release(ptr); \
-             __sync_synchronize(); } while (0)
-#else
-/*
-  On x86, __sync_lock_test_and_set() happens to be full barrier, due to
-  LOCK prefix.
-*/
-# define os_atomic_test_and_set_byte_release(ptr, new_val) \
-	__sync_lock_test_and_set(ptr, (byte) new_val)
-#endif
-/*
-  os_atomic_test_and_set_byte_acquire() is a full memory barrier on x86. But
-  in general, just an aquire barrier should be sufficient. */
-# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \
-	__sync_lock_test_and_set(ptr, (byte) new_val)
-
-#elif defined(HAVE_IB_SOLARIS_ATOMICS)
-
-# define HAVE_ATOMIC_BUILTINS
-# define HAVE_ATOMIC_BUILTINS_BYTE
-# define HAVE_ATOMIC_BUILTINS_64
-
-/* If not compiling with GCC or GCC doesn't support the atomic
-intrinsics and running on Solaris >= 10 use Solaris atomics */
-
-# include <atomic.h>
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
-	(atomic_cas_32(ptr, old_val, new_val) == old_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
-	(atomic_cas_ulong(ptr, old_val, new_val) == old_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
-	((lint) atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val)
-
-# ifdef HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS
-#  if SIZEOF_PTHREAD_T == 4
-#   define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
-	((pthread_t) atomic_cas_32(ptr, old_val, new_val) == old_val)
-#  elif SIZEOF_PTHREAD_T == 8
-#   define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
-	((pthread_t) atomic_cas_64(ptr, old_val, new_val) == old_val)
-#  else
-#   error "SIZEOF_PTHREAD_T != 4 or 8"
-#  endif /* SIZEOF_PTHREAD_T CHECK */
-#  define INNODB_RW_LOCKS_USE_ATOMICS
-#  define IB_ATOMICS_STARTUP_MSG \
-	"Mutexes and rw_locks use Solaris atomic functions"
-# else /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
-#  define IB_ATOMICS_STARTUP_MSG \
-	"Mutexes use Solaris atomic functions, rw_locks do not"
-# endif /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment_uint32(ptr, amount) \
-	atomic_add_32_nv(ptr, amount)
-
-# define os_atomic_increment_ulint(ptr, amount) \
-	atomic_add_long_nv(ptr, amount)
-
-# define os_atomic_increment_lint(ptr, amount) \
-	os_atomic_increment_ulint((ulong_t*) ptr, amount)
-
-# define os_atomic_increment_uint64(ptr, amount) \
-	atomic_add_64_nv(ptr, amount)
-
-/* Returns the resulting value, ptr is pointer to target, amount is the
-amount to decrement. */
-
-# define os_atomic_decrement_uint32(ptr, amount) \
-	os_atomic_increment_uint32(ptr, -(amount))
-
-# define os_atomic_decrement_lint(ptr, amount) \
-	os_atomic_increment_ulint((ulong_t*) ptr, -(amount))
-
-# define os_atomic_decrement_ulint(ptr, amount) \
-	os_atomic_increment_ulint(ptr, -(amount))
-
-# define os_atomic_decrement_uint64(ptr, amount) \
-	os_atomic_increment_uint64(ptr, -(amount))
-
-# ifdef IB_LOCK_WORD_IS_BYTE
-
-/** Do an atomic xchg and set to non-zero.
-@param[in,out]	ptr		Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
-	return(atomic_swap_uchar(ptr, 1));
-}
-
-/** Do an atomic xchg and set to zero.
-@param[in,out]	ptr		Memory location to set to zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_clear(volatile lock_word_t* ptr)
-{
-	return(atomic_swap_uchar(ptr, 0));
-}
-
-# else
-
-/** Do an atomic xchg and set to non-zero.
-@param[in,out]	ptr		Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
-	return(atomic_swap_ulong(ptr, 1));
-}
-
-/** Do an atomic xchg and set to zero.
-@param[in,out]	ptr		Memory location to set to zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_clear(volatile lock_word_t* ptr)
-{
-	return(atomic_swap_ulong(ptr, 0));
-}
-
-# endif /* IB_LOCK_WORD_IS_BYTE */
-
-# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \
-	atomic_swap_uchar(ptr, new_val)
-
-# define os_atomic_test_and_set_byte_release(ptr, new_val) \
-	atomic_swap_uchar(ptr, new_val)
-
-#elif defined(HAVE_WINDOWS_ATOMICS)
-
-# define HAVE_ATOMIC_BUILTINS
-# define HAVE_ATOMIC_BUILTINS_BYTE
-
-# ifndef _WIN32
-#  define HAVE_ATOMIC_BUILTINS_64
-# endif
-
-/**********************************************************//**
-Atomic compare and exchange of signed integers (both 32 and 64 bit).
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-lint
-win_cmp_and_xchg_lint(
-/*==================*/
-	volatile lint*	ptr,		/*!< in/out: source/destination */
-	lint		new_val,	/*!< in: exchange value */
-	lint		old_val);	/*!< in: value to compare to */
-
-/**********************************************************//**
-Atomic addition of signed integers.
-@return Initial value of the variable pointed to by ptr */
-UNIV_INLINE
-lint
-win_xchg_and_add(
-/*=============*/
-	volatile lint*	ptr,	/*!< in/out: address of destination */
-	lint		val);	/*!< in: number to be added */
-
-/**********************************************************//**
-Atomic compare and exchange of unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-ulint
-win_cmp_and_xchg_ulint(
-/*===================*/
-	volatile ulint*	ptr,		/*!< in/out: source/destination */
-	ulint		new_val,	/*!< in: exchange value */
-	ulint		old_val);	/*!< in: value to compare to */
-
-/**********************************************************//**
-Atomic compare and exchange of 32 bit unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-DWORD
-win_cmp_and_xchg_dword(
-/*===================*/
-	volatile DWORD*	ptr,		/*!< in/out: source/destination */
-	DWORD		new_val,	/*!< in: exchange value */
-	DWORD		old_val);	/*!< in: value to compare to */
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
-	(InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), \
-				    new_val, old_val) == old_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
-	(win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
-	(win_cmp_and_xchg_lint(ptr, new_val, old_val) == old_val)
-
-/* windows thread objects can always be passed to windows atomic functions */
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
-	(win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val)
-
-# define INNODB_RW_LOCKS_USE_ATOMICS
-# define IB_ATOMICS_STARTUP_MSG \
-	"Mutexes and rw_locks use Windows interlocked functions"
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment_lint(ptr, amount) \
-	(win_xchg_and_add(ptr, amount) + amount)
-
-# define os_atomic_increment_uint32(ptr, amount) \
-	((ulint) InterlockedExchangeAdd((long*) ptr, amount))
-
-# define os_atomic_increment_ulint(ptr, amount) \
-	((ulint) (win_xchg_and_add((lint*) ptr, (lint) amount) + amount))
-
-# define os_atomic_increment_uint64(ptr, amount)		\
-	((ib_uint64_t) (InterlockedExchangeAdd64(		\
-				(ib_int64_t*) ptr,		\
-				(ib_int64_t) amount) + amount))
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount to decrement. There is no atomic substract function on Windows */
-
-# define os_atomic_decrement_uint32(ptr, amount) \
-	((ulint) InterlockedExchangeAdd((long*) ptr, (-amount)))
-
-# define os_atomic_decrement_lint(ptr, amount) \
-	(win_xchg_and_add(ptr, -(lint) amount) - amount)
-
-# define os_atomic_decrement_ulint(ptr, amount) \
-	((ulint) (win_xchg_and_add((lint*) ptr, -(lint) amount) - amount))
-
-# define os_atomic_decrement_uint64(ptr, amount)		\
-	((ib_uint64_t) (InterlockedExchangeAdd64(		\
-				(ib_int64_t*) ptr,		\
-				-(ib_int64_t) amount) - amount))
-
-/** Do an atomic test and set.
-InterlockedExchange() operates on LONG, and the LONG will be clobbered
-@param[in,out]	ptr		Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
-	return(InterlockedExchange(ptr, 1));
-}
-
-/** Do an atomic release.
-InterlockedExchange() operates on LONG, and the LONG will be clobbered
-@param[in,out]	ptr		Memory location to set to zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_clear(volatile lock_word_t* ptr)
-{
-	return(InterlockedExchange(ptr, 0));
-}
-
-# define os_atomic_lock_release_byte(ptr) \
-	(void) InterlockedExchange(ptr, 0)
-
-#else
-# define IB_ATOMICS_STARTUP_MSG \
-	"Mutexes and rw_locks use InnoDB's own implementation"
-#endif
-#ifdef HAVE_ATOMIC_BUILTINS
-#define os_atomic_inc_ulint(m,v,d)	os_atomic_increment_ulint(v, d)
-#define os_atomic_dec_ulint(m,v,d)	os_atomic_decrement_ulint(v, d)
-#else
-#define os_atomic_inc_ulint(m,v,d)	os_atomic_inc_ulint_func(m, v, d)
-#define os_atomic_dec_ulint(m,v,d)	os_atomic_dec_ulint_func(m, v, d)
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-/**********************************************************//**
-Following macros are used to update specified counter atomically
-if HAVE_ATOMIC_BUILTINS defined. Otherwise, use mutex passed in
-for synchronization */
-#ifdef HAVE_ATOMIC_BUILTINS
-#define os_increment_counter_by_amount(mutex, counter, amount)	\
-	(void) os_atomic_increment_ulint(&counter, amount)
-
-#define os_decrement_counter_by_amount(mutex, counter, amount)	\
-	(void) os_atomic_increment_ulint(&counter, (-((lint) amount)))
-#else
-#define os_increment_counter_by_amount(mutex, counter, amount)	\
-	do {							\
-		mutex_enter(&(mutex));				\
-		(counter) += (amount);				\
-		mutex_exit(&(mutex));				\
-	} while (0)
-
-#define os_decrement_counter_by_amount(mutex, counter, amount)	\
-	do {							\
-		ut_a(counter >= amount);			\
-		mutex_enter(&(mutex));				\
-		(counter) -= (amount);				\
-		mutex_exit(&(mutex));				\
-	} while (0)
-#endif  /* HAVE_ATOMIC_BUILTINS */
-
-#define os_inc_counter(mutex, counter)				\
-	os_increment_counter_by_amount(mutex, counter, 1)
-
-#define os_dec_counter(mutex, counter)				\
-	do {							\
-		os_decrement_counter_by_amount(mutex, counter, 1);\
-	} while (0);
-
-/** barrier definitions for memory ordering */
-#if defined(HAVE_IB_GCC_ATOMIC_THREAD_FENCE)
-# define HAVE_MEMORY_BARRIER
-# define os_rmb	__atomic_thread_fence(__ATOMIC_ACQUIRE)
-# define os_wmb	__atomic_thread_fence(__ATOMIC_RELEASE)
-# define os_mb __atomic_thread_fence(__ATOMIC_SEQ_CST)
-
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
-	"GCC builtin __atomic_thread_fence() is used for memory barrier"
-
-#elif defined(HAVE_IB_GCC_SYNC_SYNCHRONISE)
-# define HAVE_MEMORY_BARRIER
-# define os_rmb	__sync_synchronize()
-# define os_wmb	__sync_synchronize()
-# define os_mb	__sync_synchronize()
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
-	"GCC builtin __sync_synchronize() is used for memory barrier"
-
-#elif defined(HAVE_IB_MACHINE_BARRIER_SOLARIS)
-# define HAVE_MEMORY_BARRIER
-# include <mbarrier.h>
-# define os_rmb	__machine_r_barrier()
-# define os_wmb	__machine_w_barrier()
-# define os_mb __machine_rw_barrier()
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
-	"Solaris memory ordering functions are used for memory barrier"
-
-#elif defined(HAVE_WINDOWS_MM_FENCE)
-# define HAVE_MEMORY_BARRIER
-# include <intrin.h>
-# define os_rmb	_mm_lfence()
-# define os_wmb	_mm_sfence()
-# define os_mb	_mm_mfence()
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
-	"_mm_lfence() and _mm_sfence() are used for memory barrier"
-
-#else
-# define os_rmb do { } while(0)
-# define os_wmb do { } while(0)
-# define os_mb do { } while(0)
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
-	"Memory barrier is not used"
-#endif
-
-#ifndef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/os0sync.ic b/storage/innobase/include/os0sync.ic
deleted file mode 100644
index 4ebf84dba98..00000000000
--- a/storage/innobase/include/os0sync.ic
+++ /dev/null
@@ -1,266 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0sync.ic
-The interface to the operating system synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#ifdef __WIN__
-#include <winbase.h>
-#endif
-
-/**********************************************************//**
-Acquires ownership of a fast mutex.
-@return	0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
-	os_fast_mutex_t*	fast_mutex)	/*!< in: mutex to acquire */
-{
-	fast_mutex_t*	mutex = &fast_mutex->mutex;
-
-#ifdef __WIN__
-	return(!TryEnterCriticalSection(mutex));
-#else
-	/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
-	so that it returns 0 on success. In the operating system
-	libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
-	returns 1 on success (but MySQL remaps that to 0), while Linux,
-	FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
-
-	return((ulint) pthread_mutex_trylock(mutex));
-#endif
-}
-
-#ifdef UNIV_PFS_MUTEX
-/*********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly
-this function!
-A wrapper function for os_fast_mutex_init_func(). Initializes an operating
-system fast mutex semaphore. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_init(
-/*===================*/
-	PSI_mutex_key		key,		/*!< in: Performance Schema
-						key */
-	os_fast_mutex_t*	fast_mutex)	/*!< out: fast mutex */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
-	fast_mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, &fast_mutex->mutex);
-#else
-	fast_mutex->pfs_psi = NULL;
-#endif
-
-	os_fast_mutex_init_func(&fast_mutex->mutex);
-}
-/******************************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly
-this function!
-Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_free(
-/*===================*/
-	os_fast_mutex_t*	fast_mutex)  /*!< in/out: mutex */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
-	if (fast_mutex->pfs_psi != NULL)
-		PSI_MUTEX_CALL(destroy_mutex)(fast_mutex->pfs_psi);
-#endif
-	fast_mutex->pfs_psi = NULL;
-
-	os_fast_mutex_free_func(&fast_mutex->mutex);
-}
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly
-this function!
-Wrapper function of os_fast_mutex_lock_func. Acquires ownership of a fast
-mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_lock(
-/*===================*/
-	os_fast_mutex_t*	fast_mutex,	/*!< in/out: mutex to acquire */
-	const char*		file_name,	/*!< in: file name where
-						 locked */
-	ulint			line)		/*!< in: line where locked */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
-	if (fast_mutex->pfs_psi != NULL)
-	{
-		PSI_mutex_locker* 	locker;
-		PSI_mutex_locker_state	state;
-
-		locker = PSI_MUTEX_CALL(start_mutex_wait)(
-			&state, fast_mutex->pfs_psi,
-			PSI_MUTEX_LOCK, file_name,
-			static_cast<uint>(line));
-
-		os_fast_mutex_lock_func(&fast_mutex->mutex);
-
-		if (locker != NULL)
-			PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
-	}
-	else
-#endif
-	{
-		os_fast_mutex_lock_func(&fast_mutex->mutex);
-	}
-
-	return;
-}
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly
-this function!
-Wrapper function of os_fast_mutex_unlock_func. Releases ownership of a
-fast mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_unlock(
-/*=====================*/
-	os_fast_mutex_t*	fast_mutex)	/*!< in/out: mutex to release */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
-	if (fast_mutex->pfs_psi != NULL)
-		PSI_MUTEX_CALL(unlock_mutex)(fast_mutex->pfs_psi);
-#endif
-
-	os_fast_mutex_unlock_func(&fast_mutex->mutex);
-}
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef HAVE_WINDOWS_ATOMICS
-
-/* Use inline functions to make 64 and 32 bit versions of windows atomic
-functions so that typecasts are evaluated at compile time. Take advantage
-that lint is either __int64 or long int and windows atomic functions work
-on __int64 and LONG */
-
-/**********************************************************//**
-Atomic compare and exchange of unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-lint
-win_cmp_and_xchg_lint(
-/*==================*/
-	volatile lint*	ptr,		/*!< in/out: source/destination */
-	lint		new_val,	/*!< in: exchange value */
-	lint		old_val)	/*!< in: value to compare to */
-{
-# ifdef _WIN64
-	return(InterlockedCompareExchange64(ptr, new_val, old_val));
-# else
-	return(InterlockedCompareExchange(ptr, new_val, old_val));
-# endif
-}
-
-/**********************************************************//**
-Atomic addition of signed integers.
-@return Initial value of the variable pointed to by ptr */
-UNIV_INLINE
-lint
-win_xchg_and_add(
-/*=============*/
-	volatile lint*	ptr,	/*!< in/out: address of destination */
-	lint		val)	/*!< in: number to be added */
-{
-#ifdef _WIN64
-	return(InterlockedExchangeAdd64(ptr, val));
-#else
-	return(InterlockedExchangeAdd(ptr, val));
-#endif
-}
-
-/**********************************************************//**
-Atomic compare and exchange of unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-ulint
-win_cmp_and_xchg_ulint(
-/*===================*/
-	volatile ulint*	ptr,		/*!< in/out: source/destination */
-	ulint		new_val,	/*!< in: exchange value */
-	ulint		old_val)	/*!< in: value to compare to */
-{
-	return((ulint) win_cmp_and_xchg_lint(
-		(volatile lint*) ptr,
-		(lint) new_val,
-		(lint) old_val));
-}
-
-/**********************************************************//**
-Atomic compare and exchange of 32-bit unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-DWORD
-win_cmp_and_xchg_dword(
-/*===================*/
-	volatile DWORD*	ptr,		/*!< in/out: source/destination */
-	DWORD		new_val,	/*!< in: exchange value */
-	DWORD		old_val)	/*!< in: value to compare to */
-{
-	ut_ad(sizeof(DWORD) == sizeof(LONG));	/* We assume this. */
-	return(InterlockedCompareExchange(
-		(volatile LONG*) ptr,
-		(LONG) new_val,
-		(LONG) old_val));
-}
-
-#endif /* HAVE_WINDOWS_ATOMICS */
-
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required.
-@return	0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock_full_barrier(
-/*==================*/
-	os_fast_mutex_t*	fast_mutex)	/*!< in: mutex to acquire */
-{
-#ifdef __WIN__
-	if (TryEnterCriticalSection(&fast_mutex->mutex)) {
-
-		return(0);
-	} else {
-
-		return(1);
-	}
-#else
-	/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
-	so that it returns 0 on success. In the operating system
-	libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
-	returns 1 on success (but MySQL remaps that to 0), while Linux,
-	FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
-
-#ifdef __powerpc__
-	os_mb;
-#endif
-	return((ulint) pthread_mutex_trylock(&fast_mutex->mutex));
-#endif
-}
diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h
index 9a1ada8fa0d..9ba35b6f359 100644
--- a/storage/innobase/include/os0thread.h
+++ b/storage/innobase/include/os0thread.h
@@ -41,8 +41,8 @@ can wait inside InnoDB */
 #define OS_THREAD_PRIORITY_NORMAL	2
 #define OS_THREAD_PRIORITY_ABOVE_NORMAL	3
 
-#ifdef __WIN__
-typedef void*			os_thread_t;
+#ifdef _WIN32
+typedef DWORD			os_thread_t;
 typedef DWORD			os_thread_id_t;	/*!< In Windows the thread id
 						is an unsigned long int */
 extern "C"  {
@@ -62,7 +62,7 @@ don't access the arguments and don't return any value, we should be safe. */
 #else
 
 typedef pthread_t		os_thread_t;
-typedef os_thread_t		os_thread_id_t;	/*!< In Unix we use the thread
+typedef pthread_t		os_thread_id_t;	/*!< In Unix we use the thread
 						handle itself as the id of
 						the thread */
 extern "C"  { typedef void*	(*os_thread_func_t)(void*); }
@@ -71,7 +71,7 @@ extern "C"  { typedef void*	(*os_thread_func_t)(void*); }
 #define DECLARE_THREAD(func)	func
 #define os_thread_create(f,a,i)	os_thread_create_func(f, a, i)
 
-#endif /* __WIN__ */
+#endif /* _WIN32 */
 
 /* Define a function pointer type to use in a typecast */
 typedef void* (*os_posix_f_t) (void*);
@@ -79,12 +79,14 @@ typedef void* (*os_posix_f_t) (void*);
 #ifdef HAVE_PSI_INTERFACE
 /* Define for performance schema registration key */
 typedef unsigned int    mysql_pfs_key_t;
-#endif
+#endif /* HAVE_PSI_INTERFACE */
+
+/** Number of threads active. */
+extern	ulint	os_thread_count;
 
 /***************************************************************//**
 Compares two thread ids for equality.
-@return	TRUE if equal */
-UNIV_INTERN
+@return TRUE if equal */
 ibool
 os_thread_eq(
 /*=========*/
@@ -93,20 +95,18 @@ os_thread_eq(
 /****************************************************************//**
 Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
 unique for the thread though!
-@return	thread identifier as a number */
-UNIV_INTERN
+@return thread identifier as a number */
 ulint
 os_thread_pf(
 /*=========*/
 	os_thread_id_t	a);	/*!< in: OS thread identifier */
 /****************************************************************//**
 Creates a new thread of execution. The execution starts from
-the function given. The start function takes a void* parameter
-and returns a ulint.
+the function given.
 NOTE: We count the number of threads in os_thread_exit(). A created
-thread should always use that to exit and not use return() to exit.
-@return	handle to the thread */
-UNIV_INTERN
+thread should always use that to exit so thatthe thread count will be
+decremented.
+We do not return an error code because if there is one, we crash here. */
 os_thread_t
 os_thread_create_func(
 /*==================*/
@@ -117,36 +117,48 @@ os_thread_create_func(
 	os_thread_id_t*		thread_id);	/*!< out: id of the created
 						thread, or NULL */
 
-/*****************************************************************//**
-Exits the current thread. */
-UNIV_INTERN
+/** Exits the current thread. */
 void
-os_thread_exit(
-/*===========*/
-	void*	exit_value)	/*!< in: exit value; in Windows this void*
-				is cast as a DWORD */
+os_thread_exit()
 	UNIV_COLD MY_ATTRIBUTE((noreturn));
+
 /*****************************************************************//**
 Returns the thread identifier of current thread.
-@return	current thread identifier */
-UNIV_INTERN
+@return current thread identifier */
 os_thread_id_t
 os_thread_get_curr_id(void);
 /*========================*/
 /*****************************************************************//**
 Advises the os to give up remainder of the thread's time slice. */
-UNIV_INTERN
 void
 os_thread_yield(void);
 /*=================*/
 /*****************************************************************//**
 The thread sleeps at least the time given in microseconds. */
-UNIV_INTERN
 void
 os_thread_sleep(
 /*============*/
 	ulint	tm);	/*!< in: time in microseconds */
 
+/**
+Initializes OS thread management data structures. */
+void
+os_thread_init();
+/*============*/
+
+/**
+Frees OS thread management data structures. */
+void
+os_thread_free();
+/*============*/
+
+/*****************************************************************//**
+Check if there are threads active.
+@return true if the thread count > 0. */
+bool
+os_thread_active();
+/*==============*/
+
 #ifndef UNIV_NONINL
 #include "os0thread.ic"
 #endif
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
index f04667ff29c..94b5896d3ad 100644
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@@ -33,29 +33,15 @@ Created 10/4/1994 Heikki Tuuri
 #include "rem0rec.h"
 #include "data0data.h"
 #include "mtr0mtr.h"
+#include "gis0type.h"
 
 
 #define PAGE_CUR_ADAPT
 
-/* Page cursor search modes; the values must be in this order! */
-
-#define	PAGE_CUR_UNSUPP	0
-#define	PAGE_CUR_G	1
-#define	PAGE_CUR_GE	2
-#define	PAGE_CUR_L	3
-#define	PAGE_CUR_LE	4
-/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in
-				 "column LIKE 'abc%' ORDER BY column DESC";
-				 we have to find strings which are <= 'abc' or
-				 which extend it */
-#ifdef UNIV_SEARCH_DEBUG
-# define PAGE_CUR_DBG	6	/* As PAGE_CUR_LE, but skips search shortcut */
-#endif /* UNIV_SEARCH_DEBUG */
-
 #ifdef UNIV_DEBUG
 /*********************************************************//**
 Gets pointer to the page frame where the cursor is positioned.
-@return	page */
+@return page */
 UNIV_INLINE
 page_t*
 page_cur_get_page(
@@ -63,7 +49,7 @@ page_cur_get_page(
 	page_cur_t*	cur);	/*!< in: page cursor */
 /*********************************************************//**
 Gets pointer to the buffer block where the cursor is positioned.
-@return	page */
+@return page */
 UNIV_INLINE
 buf_block_t*
 page_cur_get_block(
@@ -71,7 +57,7 @@ page_cur_get_block(
 	page_cur_t*	cur);	/*!< in: page cursor */
 /*********************************************************//**
 Gets pointer to the page frame where the cursor is positioned.
-@return	page */
+@return page */
 UNIV_INLINE
 page_zip_des_t*
 page_cur_get_page_zip(
@@ -79,7 +65,7 @@ page_cur_get_page_zip(
 	page_cur_t*	cur);	/*!< in: page cursor */
 /*********************************************************//**
 Gets the record where the cursor is positioned.
-@return	record */
+@return record */
 UNIV_INLINE
 rec_t*
 page_cur_get_rec(
@@ -111,7 +97,7 @@ page_cur_set_after_last(
 	page_cur_t*		cur);	/*!< in: cursor */
 /*********************************************************//**
 Returns TRUE if the cursor is before first user record on page.
-@return	TRUE if at start */
+@return TRUE if at start */
 UNIV_INLINE
 ibool
 page_cur_is_before_first(
@@ -119,7 +105,7 @@ page_cur_is_before_first(
 	const page_cur_t*	cur);	/*!< in: cursor */
 /*********************************************************//**
 Returns TRUE if the cursor is after last user record.
-@return	TRUE if at end */
+@return TRUE if at end */
 UNIV_INLINE
 ibool
 page_cur_is_after_last(
@@ -136,13 +122,6 @@ page_cur_position(
 					the record */
 	page_cur_t*		cur);	/*!< out: page cursor */
 /**********************************************************//**
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
-	page_cur_t*	cur);	/*!< out: page cursor */
-/**********************************************************//**
 Moves the cursor to the next record on page. */
 UNIV_INLINE
 void
@@ -168,7 +147,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	pointer to record if succeed, NULL otherwise */
+@return pointer to record if succeed, NULL otherwise */
 UNIV_INLINE
 rec_t*
 page_cur_tuple_insert(
@@ -179,7 +158,10 @@ page_cur_tuple_insert(
 	ulint**		offsets,/*!< out: offsets on *rec */
 	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+	mtr_t*		mtr,	/*!< in: mini-transaction handle, or NULL */
+	bool		use_cache = false)
+				/*!< in: if true, then use record cache to
+				hold the tuple converted record. */
 	MY_ATTRIBUTE((nonnull(1,2,3,4,5), warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
@@ -193,7 +175,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	pointer to record if succeed, NULL otherwise */
+@return pointer to record if succeed, NULL otherwise */
 UNIV_INLINE
 rec_t*
 page_cur_rec_insert(
@@ -207,8 +189,7 @@ page_cur_rec_insert(
 Inserts a record next to page cursor on an uncompressed page.
 Returns pointer to inserted record if succeed, i.e., enough
 space available, NULL otherwise. The cursor stays at the same position.
-@return	pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
+@return pointer to record if succeed, NULL otherwise */
 rec_t*
 page_cur_insert_rec_low(
 /*====================*/
@@ -219,6 +200,7 @@ page_cur_insert_rec_low(
 	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
 	MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result));
+
 /***********************************************************//**
 Inserts a record next to page cursor on a compressed and uncompressed
 page. Returns pointer to inserted record if succeed, i.e.,
@@ -230,8 +212,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
+@return pointer to record if succeed, NULL otherwise */
 rec_t*
 page_cur_insert_rec_zip(
 /*====================*/
@@ -249,7 +230,6 @@ IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
 if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
 void
 page_copy_rec_list_end_to_created_page(
 /*===================================*/
@@ -260,7 +240,6 @@ page_copy_rec_list_end_to_created_page(
 /***********************************************************//**
 Deletes a record at the page cursor. The cursor is moved to the
 next record after the deleted one. */
-UNIV_INTERN
 void
 page_cur_delete_rec(
 /*================*/
@@ -270,51 +249,83 @@ page_cur_delete_rec(
 					cursor->rec, index) */
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 #ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Searches the right position for a page cursor.
-@return	number of matched fields on the left */
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple data tuple
+@param[in] mode PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE
+@param[out] cursor page cursor
+@return number of matched fields on the left */
 UNIV_INLINE
 ulint
 page_cur_search(
-/*============*/
-	const buf_block_t*	block,	/*!< in: buffer block */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	const dtuple_t*		tuple,	/*!< in: data tuple */
-	ulint			mode,	/*!< in: PAGE_CUR_L,
-					PAGE_CUR_LE, PAGE_CUR_G, or
-					PAGE_CUR_GE */
-	page_cur_t*		cursor);/*!< out: page cursor */
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
+	page_cur_mode_t		mode,
+	page_cur_t*		cursor);
+
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple data tuple
+@param[out] cursor page cursor
+@return number of matched fields on the left */
+UNIV_INLINE
+ulint
+page_cur_search(
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
+	page_cur_t*		cursor);
+
 /****************************************************************//**
 Searches the right position for a page cursor. */
-UNIV_INTERN
 void
 page_cur_search_with_match(
 /*=======================*/
 	const buf_block_t*	block,	/*!< in: buffer block */
 	const dict_index_t*	index,	/*!< in: record descriptor */
 	const dtuple_t*		tuple,	/*!< in: data tuple */
-	ulint			mode,	/*!< in: PAGE_CUR_L,
+	page_cur_mode_t		mode,	/*!< in: PAGE_CUR_L,
 					PAGE_CUR_LE, PAGE_CUR_G, or
 					PAGE_CUR_GE */
 	ulint*			iup_matched_fields,
 					/*!< in/out: already matched
 					fields in upper limit record */
-	ulint*			iup_matched_bytes,
-					/*!< in/out: already matched
-					bytes in a field not yet
-					completely matched */
 	ulint*			ilow_matched_fields,
 					/*!< in/out: already matched
 					fields in lower limit record */
+	page_cur_t*		cursor,	/*!< out: page cursor */
+	rtr_info_t*		rtr_info);/*!< in/out: rtree search stack */
+/** Search the right position for a page cursor.
+@param[in]	block			buffer block
+@param[in]	index			index tree
+@param[in]	tuple			key to be searched for
+@param[in]	mode			search mode
+@param[in,out]	iup_matched_fields	already matched fields in the
+upper limit record
+@param[in,out]	iup_matched_bytes	already matched bytes in the
+first partially matched field in the upper limit record
+@param[in,out]	ilow_matched_fields	already matched fields in the
+lower limit record
+@param[in,out]	ilow_matched_bytes	already matched bytes in the
+first partially matched field in the lower limit record
+@param[out]	cursor			page cursor */
+void
+page_cur_search_with_match_bytes(
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
+	page_cur_mode_t		mode,
+	ulint*			iup_matched_fields,
+	ulint*			iup_matched_bytes,
+	ulint*			ilow_matched_fields,
 	ulint*			ilow_matched_bytes,
-					/*!< in/out: already matched
-					bytes in a field not yet
-					completely matched */
-	page_cur_t*		cursor);/*!< out: page cursor */
+	page_cur_t*		cursor);
 /***********************************************************//**
 Positions a page cursor on a randomly chosen user record on a page. If there
 are no user records, sets the cursor on the infimum record. */
-UNIV_INTERN
 void
 page_cur_open_on_rnd_user_rec(
 /*==========================*/
@@ -323,21 +334,19 @@ page_cur_open_on_rnd_user_rec(
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Parses a log record of a record insert on a page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_cur_parse_insert_rec(
 /*======================*/
 	ibool		is_short,/*!< in: TRUE if short inserts */
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
 	buf_block_t*	block,	/*!< in: page or NULL */
 	dict_index_t*	index,	/*!< in: record descriptor */
 	mtr_t*		mtr);	/*!< in: mtr or NULL */
 /**********************************************************//**
 Parses a log record of copying a record list end to a new created page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_parse_copy_rec_list_to_created_page(
 /*=====================================*/
@@ -348,8 +357,7 @@ page_parse_copy_rec_list_to_created_page(
 	mtr_t*		mtr);	/*!< in: mtr or NULL */
 /***********************************************************//**
 Parses log record of a record delete on a page.
-@return	pointer to record end or NULL */
-UNIV_INTERN
+@return pointer to record end or NULL */
 byte*
 page_cur_parse_delete_rec(
 /*======================*/
@@ -361,8 +369,7 @@ page_cur_parse_delete_rec(
 /*******************************************************//**
 Removes the record from a leaf page. This function does not log
 any changes. It is used by the IMPORT tablespace functions.
-@return	true if success, i.e., the page did not become too empty */
-UNIV_INTERN
+@return true if success, i.e., the page did not become too empty */
 bool
 page_delete_rec(
 /*============*/
@@ -376,7 +383,9 @@ page_delete_rec(
 /** Index page cursor */
 
 struct page_cur_t{
-	byte*		rec;	/*!< pointer to a record on page */
+	const dict_index_t*	index;
+	rec_t*		rec;	/*!< pointer to a record on page */
+	ulint*		offsets;
 	buf_block_t*	block;	/*!< pointer to the block containing rec */
 };
 
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
index 6e068d9f739..bfd9da47803 100644
--- a/storage/innobase/include/page0cur.ic
+++ b/storage/innobase/include/page0cur.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2015, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -32,7 +32,7 @@ Created 10/4/1994 Heikki Tuuri
 
 /*********************************************************//**
 Gets pointer to the page frame where the cursor is positioned.
-@return	page */
+@return page */
 UNIV_INLINE
 page_t*
 page_cur_get_page(
@@ -50,7 +50,7 @@ page_cur_get_page(
 
 /*********************************************************//**
 Gets pointer to the buffer block where the cursor is positioned.
-@return	page */
+@return page */
 UNIV_INLINE
 buf_block_t*
 page_cur_get_block(
@@ -68,7 +68,7 @@ page_cur_get_block(
 
 /*********************************************************//**
 Gets pointer to the page frame where the cursor is positioned.
-@return	page */
+@return page */
 UNIV_INLINE
 page_zip_des_t*
 page_cur_get_page_zip(
@@ -80,7 +80,7 @@ page_cur_get_page_zip(
 
 /*********************************************************//**
 Gets the record where the cursor is positioned.
-@return	record */
+@return record */
 UNIV_INLINE
 rec_t*
 page_cur_get_rec(
@@ -127,7 +127,7 @@ page_cur_set_after_last(
 
 /*********************************************************//**
 Returns TRUE if the cursor is before first user record on page.
-@return	TRUE if at start */
+@return TRUE if at start */
 UNIV_INLINE
 ibool
 page_cur_is_before_first(
@@ -141,7 +141,7 @@ page_cur_is_before_first(
 
 /*********************************************************//**
 Returns TRUE if the cursor is after last user record.
-@return	TRUE if at end */
+@return TRUE if at end */
 UNIV_INLINE
 ibool
 page_cur_is_after_last(
@@ -172,20 +172,6 @@ page_cur_position(
 }
 
 /**********************************************************//**
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
-	page_cur_t*	cur)	/*!< out: page cursor */
-{
-	ut_ad(cur);
-
-	cur->rec = NULL;
-	cur->block = NULL;
-}
-
-/**********************************************************//**
 Moves the cursor to the next record on page. */
 UNIV_INLINE
 void
@@ -212,35 +198,47 @@ page_cur_move_to_prev(
 }
 
 #ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Searches the right position for a page cursor.
-@return	number of matched fields on the left */
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple data tuple
+@param[in] mode PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE
+@param[out] cursor page cursor
+@return number of matched fields on the left */
 UNIV_INLINE
 ulint
 page_cur_search(
-/*============*/
-	const buf_block_t*	block,	/*!< in: buffer block */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	const dtuple_t*		tuple,	/*!< in: data tuple */
-	ulint			mode,	/*!< in: PAGE_CUR_L,
-					PAGE_CUR_LE, PAGE_CUR_G, or
-					PAGE_CUR_GE */
-	page_cur_t*		cursor)	/*!< out: page cursor */
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
+	page_cur_mode_t		mode,
+	page_cur_t*		cursor)
 {
-	ulint		low_matched_fields = 0;
-	ulint		low_matched_bytes = 0;
-	ulint		up_matched_fields = 0;
-	ulint		up_matched_bytes = 0;
+	ulint		low_match = 0;
+	ulint		up_match = 0;
 
 	ut_ad(dtuple_check_typed(tuple));
 
 	page_cur_search_with_match(block, index, tuple, mode,
-				   &up_matched_fields,
-				   &up_matched_bytes,
-				   &low_matched_fields,
-				   &low_matched_bytes,
-				   cursor);
-	return(low_matched_fields);
+				   &up_match, &low_match, cursor, NULL);
+	return(low_match);
+}
+
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple data tuple
+@param[out] cursor page cursor
+@return number of matched fields on the left */
+UNIV_INLINE
+ulint
+page_cur_search(
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
+	page_cur_t*		cursor)
+{
+	return(page_cur_search(block, index, tuple, PAGE_CUR_LE, cursor));
 }
 
 /***********************************************************//**
@@ -254,7 +252,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	pointer to record if succeed, NULL otherwise */
+@return pointer to record if succeed, NULL otherwise */
 UNIV_INLINE
 rec_t*
 page_cur_tuple_insert(
@@ -265,11 +263,13 @@ page_cur_tuple_insert(
 	ulint**		offsets,/*!< out: offsets on *rec */
 	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+	mtr_t*		mtr,	/*!< in: mini-transaction handle, or NULL */
+	bool		use_cache)
+				/*!< in: if true, then use record cache to
+				hold the tuple converted record. */
 {
-	ulint		size
-		= rec_get_converted_size(index, tuple, n_ext);
 	rec_t*		rec;
+	ulint		size = rec_get_converted_size(index, tuple, n_ext);
 
 	if (!*heap) {
 		*heap = mem_heap_create(size
@@ -280,8 +280,8 @@ page_cur_tuple_insert(
 
 	rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(*heap, size),
 					index, tuple, n_ext);
-	*offsets = rec_get_offsets(
-		rec, index, *offsets, ULINT_UNDEFINED, heap);
+
+	*offsets = rec_get_offsets(rec, index, *offsets, ULINT_UNDEFINED, heap);
 
 	if (buf_block_get_page_zip(cursor->block)) {
 		rec = page_cur_insert_rec_zip(
@@ -307,7 +307,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	pointer to record if succeed, NULL otherwise */
+@return pointer to record if succeed, NULL otherwise */
 UNIV_INLINE
 rec_t*
 page_cur_rec_insert(
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index e8b4265bc68..3bb622127fb 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -36,7 +36,9 @@ Created 2/2/1994 Heikki Tuuri
 #include "data0data.h"
 #include "dict0dict.h"
 #include "rem0rec.h"
+#endif /* !UNIV_INNOCHECKSUM*/
 #include "fsp0fsp.h"
+#ifndef UNIV_INNOCHECKSUM
 #include "mtr0mtr.h"
 
 #ifdef UNIV_MATERIALIZE
@@ -44,14 +46,13 @@ Created 2/2/1994 Heikki Tuuri
 #define UNIV_INLINE
 #endif
 
-#endif /* !UNIV_INNOCHECKSUM */
-
 /*			PAGE HEADER
 			===========
 
 Index page header starts at the first offset left free by the FIL-module */
 
 typedef	byte		page_header_t;
+#endif /* !UNIV_INNOCHECKSUM */
 
 #define	PAGE_HEADER	FSEG_PAGE_DATA	/* index page header starts at this
 				offset */
@@ -82,6 +83,9 @@ typedef	byte		page_header_t;
 #define	PAGE_INDEX_ID	 28	/* index id where the page belongs.
 				This field should not be written to after
 				page creation. */
+
+#ifndef UNIV_INNOCHECKSUM
+
 #define PAGE_BTR_SEG_LEAF 36	/* file segment header for the leaf pages in
 				a B-tree: defined only on the root page of a
 				B-tree, but not in the root of an ibuf tree */
@@ -121,8 +125,6 @@ typedef	byte		page_header_t;
 				a new-style compact page */
 /*-----------------------------*/
 
-#ifndef UNIV_INNOCHECKSUM
-
 /* Heap numbers */
 #define PAGE_HEAP_NO_INFIMUM	0	/* page infimum */
 #define PAGE_HEAP_NO_SUPREMUM	1	/* page supremum */
@@ -167,7 +169,7 @@ extern my_bool srv_immediate_scrub_data_uncompressed;
 
 /************************************************************//**
 Gets the start of a page.
-@return	start of the page */
+@return start of the page */
 UNIV_INLINE
 page_t*
 page_align(
@@ -176,7 +178,7 @@ page_align(
 		MY_ATTRIBUTE((const));
 /************************************************************//**
 Gets the offset within a page.
-@return	offset from the start of the page */
+@return offset from the start of the page */
 UNIV_INLINE
 ulint
 page_offset(
@@ -192,7 +194,6 @@ page_get_max_trx_id(
 	const page_t*	page);	/*!< in: page */
 /*************************************************************//**
 Sets the max trx id field value. */
-UNIV_INTERN
 void
 page_set_max_trx_id(
 /*================*/
@@ -213,6 +214,27 @@ page_update_max_trx_id(
 	trx_id_t	trx_id,	/*!< in: transaction id */
 	mtr_t*		mtr);	/*!< in/out: mini-transaction */
 /*************************************************************//**
+Returns the RTREE SPLIT SEQUENCE NUMBER (FIL_RTREE_SPLIT_SEQ_NUM).
+@return SPLIT SEQUENCE NUMBER */
+UNIV_INLINE
+node_seq_t
+page_get_ssn_id(
+/*============*/
+	const page_t*	page);	/*!< in: page */
+/*************************************************************//**
+Sets the RTREE SPLIT SEQUENCE NUMBER field value */
+UNIV_INLINE
+void
+page_set_ssn_id(
+/*============*/
+	buf_block_t*	block,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	node_seq_t	ssn_id,	/*!< in: split sequence id */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+
+#endif /* !UNIV_INNOCHECKSUM */
+/*************************************************************//**
 Reads the given header field. */
 UNIV_INLINE
 ulint
@@ -220,6 +242,8 @@ page_header_get_field(
 /*==================*/
 	const page_t*	page,	/*!< in: page */
 	ulint		field);	/*!< in: PAGE_N_DIR_SLOTS, ... */
+
+#ifndef UNIV_INNOCHECKSUM
 /*************************************************************//**
 Sets the given header field. */
 UNIV_INLINE
@@ -233,14 +257,14 @@ page_header_set_field(
 	ulint		val);	/*!< in: value */
 /*************************************************************//**
 Returns the offset stored in the given header field.
-@return	offset from the start of the page, or 0 */
+@return offset from the start of the page, or 0 */
 UNIV_INLINE
 ulint
 page_header_get_offs(
 /*=================*/
 	const page_t*	page,	/*!< in: page */
 	ulint		field)	/*!< in: PAGE_FREE, ... */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*************************************************************//**
 Returns the pointer stored in the given header field, or NULL. */
@@ -273,7 +297,7 @@ page_header_reset_last_insert(
 #endif /* !UNIV_HOTBACKUP */
 /************************************************************//**
 Gets the offset of the first record on the page.
-@return	offset of the first record in record list, relative from page */
+@return offset of the first record in record list, relative from page */
 UNIV_INLINE
 ulint
 page_get_infimum_offset(
@@ -281,7 +305,7 @@ page_get_infimum_offset(
 	const page_t*	page);	/*!< in: page which must have record(s) */
 /************************************************************//**
 Gets the offset of the last record on the page.
-@return	offset of the last record in record list, relative from page */
+@return offset of the last record in record list, relative from page */
 UNIV_INLINE
 ulint
 page_get_supremum_offset(
@@ -293,8 +317,7 @@ page_get_supremum_offset(
 /************************************************************//**
 Returns the nth record of the record list.
 This is the inverse function of page_rec_get_n_recs_before().
-@return	nth record */
-UNIV_INTERN
+@return nth record */
 const rec_t*
 page_rec_get_nth_const(
 /*===================*/
@@ -304,7 +327,7 @@ page_rec_get_nth_const(
 /************************************************************//**
 Returns the nth record of the record list.
 This is the inverse function of page_rec_get_n_recs_before().
-@return	nth record */
+@return nth record */
 UNIV_INLINE
 rec_t*
 page_rec_get_nth(
@@ -312,87 +335,59 @@ page_rec_get_nth(
 	page_t*	page,	/*< in: page */
 	ulint	nth)	/*!< in: nth record */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
 #ifndef UNIV_HOTBACKUP
 /************************************************************//**
 Returns the middle record of the records on the page. If there is an
 even number of records in the list, returns the first record of the
 upper half-list.
-@return	middle record */
+@return middle record */
 UNIV_INLINE
 rec_t*
 page_get_middle_rec(
 /*================*/
 	page_t*	page)	/*!< in: page */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
-	const dtuple_t*	dtuple,	/*!< in: data tuple */
-	const rec_t*	rec,	/*!< in: physical record on a page; may also
-				be page infimum or supremum, in which case
-				matched-parameter values below are not
-				affected */
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint*		matched_fields, /*!< in/out: number of already completely
-				matched fields; when function returns
-				contains the value for current comparison */
-	ulint*		matched_bytes); /*!< in/out: number of already matched
-				bytes within the first field not completely
-				matched; when function returns contains the
-				value for current comparison */
 #endif /* !UNIV_HOTBACKUP */
-#endif /* !UNIV_INNOCHECKSUM */
 /*************************************************************//**
 Gets the page number.
-@return	page number */
+@return page number */
 UNIV_INLINE
 ulint
 page_get_page_no(
 /*=============*/
 	const page_t*	page);	/*!< in: page */
-#ifndef UNIV_INNOCHECKSUM
+
 /*************************************************************//**
 Gets the tablespace identifier.
-@return	space id */
+@return space id */
 UNIV_INLINE
 ulint
 page_get_space_id(
 /*==============*/
 	const page_t*	page);	/*!< in: page */
-#endif /* !UNIV_INNOCHECKSUM */
+
 /*************************************************************//**
 Gets the number of user records on page (the infimum and supremum records
 are not user records).
-@return	number of user records */
+@return number of user records */
 UNIV_INLINE
 ulint
 page_get_n_recs(
 /*============*/
 	const page_t*	page);	/*!< in: index page */
-#ifndef UNIV_INNOCHECKSUM
+
 /***************************************************************//**
 Returns the number of records before the given record in chain.
 The number includes infimum and supremum records.
 This is the inverse function of page_rec_get_nth().
-@return	number of records */
-UNIV_INTERN
+@return number of records */
 ulint
 page_rec_get_n_recs_before(
 /*=======================*/
 	const rec_t*	rec);	/*!< in: the physical record */
 /*************************************************************//**
 Gets the number of records in the heap.
-@return	number of user records */
+@return number of user records */
 UNIV_INLINE
 ulint
 page_dir_get_n_heap(
@@ -413,7 +408,7 @@ page_dir_set_n_heap(
 	ulint		n_heap);/*!< in: number of records */
 /*************************************************************//**
 Gets the number of dir slots in directory.
-@return	number of slots */
+@return number of slots */
 UNIV_INLINE
 ulint
 page_dir_get_n_slots(
@@ -432,7 +427,7 @@ page_dir_set_n_slots(
 #ifdef UNIV_DEBUG
 /*************************************************************//**
 Gets pointer to nth directory slot.
-@return	pointer to dir slot */
+@return pointer to dir slot */
 UNIV_INLINE
 page_dir_slot_t*
 page_dir_get_nth_slot(
@@ -440,13 +435,13 @@ page_dir_get_nth_slot(
 	const page_t*	page,	/*!< in: index page */
 	ulint		n);	/*!< in: position */
 #else /* UNIV_DEBUG */
-# define page_dir_get_nth_slot(page, n)		\
-	((page) + UNIV_PAGE_SIZE - PAGE_DIR	\
-	 - (n + 1) * PAGE_DIR_SLOT_SIZE)
+# define page_dir_get_nth_slot(page, n)			\
+	((page) + (UNIV_PAGE_SIZE - PAGE_DIR		\
+		   - (n + 1) * PAGE_DIR_SLOT_SIZE))
 #endif /* UNIV_DEBUG */
 /**************************************************************//**
 Used to check the consistency of a record on a page.
-@return	TRUE if succeed */
+@return TRUE if succeed */
 UNIV_INLINE
 ibool
 page_rec_check(
@@ -454,7 +449,7 @@ page_rec_check(
 	const rec_t*	rec);	/*!< in: record */
 /***************************************************************//**
 Gets the record pointed to by a directory slot.
-@return	pointer to record */
+@return pointer to record */
 UNIV_INLINE
 const rec_t*
 page_dir_slot_get_rec(
@@ -470,7 +465,7 @@ page_dir_slot_set_rec(
 	rec_t*		 rec);	/*!< in: record on the page */
 /***************************************************************//**
 Gets the number of records owned by a directory slot.
-@return	number of records */
+@return number of records */
 UNIV_INLINE
 ulint
 page_dir_slot_get_n_owned(
@@ -497,8 +492,7 @@ page_dir_calc_reserved_space(
 	ulint	n_recs);	/*!< in: number of records */
 /***************************************************************//**
 Looks for the directory slot which owns the given record.
-@return	the directory slot number */
-UNIV_INTERN
+@return the directory slot number */
 ulint
 page_dir_find_owner_slot(
 /*=====================*/
@@ -514,7 +508,7 @@ page_is_comp(
 	const page_t*	page);	/*!< in: index page */
 /************************************************************//**
 TRUE if the record is on a page in compact format.
-@return	nonzero if in compact format */
+@return nonzero if in compact format */
 UNIV_INLINE
 ulint
 page_rec_is_comp(
@@ -522,44 +516,50 @@ page_rec_is_comp(
 	const rec_t*	rec);	/*!< in: record */
 /***************************************************************//**
 Returns the heap number of a record.
-@return	heap number */
+@return heap number */
 UNIV_INLINE
 ulint
 page_rec_get_heap_no(
 /*=================*/
 	const rec_t*	rec);	/*!< in: the physical record */
-#endif /* !UNIV_INNOCHECKSUM */
 /************************************************************//**
 Determine whether the page is a B-tree leaf.
-@return	true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
+@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
 UNIV_INLINE
 bool
 page_is_leaf(
 /*=========*/
 	const page_t*	page)	/*!< in: page */
-	MY_ATTRIBUTE((nonnull, pure));
-#ifndef UNIV_INNOCHECKSUM
+	MY_ATTRIBUTE((warn_unused_result));
 /************************************************************//**
 Determine whether the page is empty.
-@return	true if the page is empty (PAGE_N_RECS = 0) */
+@return true if the page is empty (PAGE_N_RECS = 0) */
 UNIV_INLINE
 bool
 page_is_empty(
 /*==========*/
 	const page_t*	page)	/*!< in: page */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
+/** Determine whether a page is an index root page.
+@param[in]	page	page frame
+@return true if the page is a root page of an index */
+UNIV_INLINE
+bool
+page_is_root(
+	const page_t*	page)
+	MY_ATTRIBUTE((warn_unused_result));
 /************************************************************//**
 Determine whether the page contains garbage.
-@return	true if the page contains garbage (PAGE_GARBAGE is not 0) */
+@return true if the page contains garbage (PAGE_GARBAGE is not 0) */
 UNIV_INLINE
 bool
 page_has_garbage(
 /*=============*/
 	const page_t*	page)	/*!< in: page */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /************************************************************//**
 Gets the pointer to the next record on the page.
-@return	pointer to next record */
+@return pointer to next record */
 UNIV_INLINE
 const rec_t*
 page_rec_get_next_low(
@@ -568,7 +568,7 @@ page_rec_get_next_low(
 	ulint		comp);	/*!< in: nonzero=compact page layout */
 /************************************************************//**
 Gets the pointer to the next record on the page.
-@return	pointer to next record */
+@return pointer to next record */
 UNIV_INLINE
 rec_t*
 page_rec_get_next(
@@ -576,7 +576,7 @@ page_rec_get_next(
 	rec_t*	rec);	/*!< in: pointer to record */
 /************************************************************//**
 Gets the pointer to the next record on the page.
-@return	pointer to next record */
+@return pointer to next record */
 UNIV_INLINE
 const rec_t*
 page_rec_get_next_const(
@@ -586,7 +586,7 @@ page_rec_get_next_const(
 Gets the pointer to the next non delete-marked record on the page.
 If all subsequent records are delete-marked, then this function
 will return the supremum record.
-@return	pointer to next non delete-marked record or pointer to supremum */
+@return pointer to next non delete-marked record or pointer to supremum */
 UNIV_INLINE
 const rec_t*
 page_rec_get_next_non_del_marked(
@@ -604,7 +604,7 @@ page_rec_set_next(
 				must not be page infimum */
 /************************************************************//**
 Gets the pointer to the previous record.
-@return	pointer to previous record */
+@return pointer to previous record */
 UNIV_INLINE
 const rec_t*
 page_rec_get_prev_const(
@@ -613,7 +613,7 @@ page_rec_get_prev_const(
 				infimum */
 /************************************************************//**
 Gets the pointer to the previous record.
-@return	pointer to previous record */
+@return pointer to previous record */
 UNIV_INLINE
 rec_t*
 page_rec_get_prev(
@@ -622,7 +622,7 @@ page_rec_get_prev(
 				must not be page infimum */
 /************************************************************//**
 TRUE if the record is a user record on the page.
-@return	TRUE if a user record */
+@return TRUE if a user record */
 UNIV_INLINE
 ibool
 page_rec_is_user_rec_low(
@@ -631,7 +631,7 @@ page_rec_is_user_rec_low(
 	MY_ATTRIBUTE((const));
 /************************************************************//**
 TRUE if the record is the supremum record on a page.
-@return	TRUE if the supremum record */
+@return TRUE if the supremum record */
 UNIV_INLINE
 ibool
 page_rec_is_supremum_low(
@@ -640,7 +640,7 @@ page_rec_is_supremum_low(
 	MY_ATTRIBUTE((const));
 /************************************************************//**
 TRUE if the record is the infimum record on a page.
-@return	TRUE if the infimum record */
+@return TRUE if the infimum record */
 UNIV_INLINE
 ibool
 page_rec_is_infimum_low(
@@ -650,35 +650,80 @@ page_rec_is_infimum_low(
 
 /************************************************************//**
 TRUE if the record is a user record on the page.
-@return	TRUE if a user record */
+@return TRUE if a user record */
 UNIV_INLINE
 ibool
 page_rec_is_user_rec(
 /*=================*/
 	const rec_t*	rec)	/*!< in: record */
-	MY_ATTRIBUTE((const));
+	MY_ATTRIBUTE((warn_unused_result));
 /************************************************************//**
 TRUE if the record is the supremum record on a page.
-@return	TRUE if the supremum record */
+@return TRUE if the supremum record */
 UNIV_INLINE
 ibool
 page_rec_is_supremum(
 /*=================*/
 	const rec_t*	rec)	/*!< in: record */
-	MY_ATTRIBUTE((const));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /************************************************************//**
 TRUE if the record is the infimum record on a page.
-@return	TRUE if the infimum record */
+@return TRUE if the infimum record */
 UNIV_INLINE
 ibool
 page_rec_is_infimum(
 /*================*/
 	const rec_t*	rec)	/*!< in: record */
-	MY_ATTRIBUTE((const));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/************************************************************//**
+true if the record is the first user record on a page.
+@return true if the first user record */
+UNIV_INLINE
+bool
+page_rec_is_first(
+/*==============*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/************************************************************//**
+true if the record is the second user record on a page.
+@return true if the second user record */
+UNIV_INLINE
+bool
+page_rec_is_second(
+/*===============*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/************************************************************//**
+true if the record is the last user record on a page.
+@return true if the last user record */
+UNIV_INLINE
+bool
+page_rec_is_last(
+/*=============*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/************************************************************//**
+true if the record is the second last user record on a page.
+@return true if the second last user record */
+UNIV_INLINE
+bool
+page_rec_is_second_last(
+/*====================*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+	MY_ATTRIBUTE((warn_unused_result));
+
 /***************************************************************//**
 Looks for the record which owns the given record.
-@return	the owner record */
+@return the owner record */
 UNIV_INLINE
 rec_t*
 page_rec_find_owner_rec(
@@ -700,7 +745,7 @@ page_rec_write_field(
 /************************************************************//**
 Returns the maximum combined size of records which can be inserted on top
 of record heap.
-@return	maximum combined size for inserted records */
+@return maximum combined size for inserted records */
 UNIV_INLINE
 ulint
 page_get_max_insert_size(
@@ -710,7 +755,7 @@ page_get_max_insert_size(
 /************************************************************//**
 Returns the maximum combined size of records which can be inserted on top
 of record heap if page is first reorganized.
-@return	maximum combined size for inserted records */
+@return maximum combined size for inserted records */
 UNIV_INLINE
 ulint
 page_get_max_insert_size_after_reorganize(
@@ -719,7 +764,7 @@ page_get_max_insert_size_after_reorganize(
 	ulint		n_recs);/*!< in: number of records */
 /*************************************************************//**
 Calculates free space if a page is emptied.
-@return	free space */
+@return free space */
 UNIV_INLINE
 ulint
 page_get_free_space_of_empty(
@@ -729,7 +774,7 @@ page_get_free_space_of_empty(
 /**********************************************************//**
 Returns the base extra size of a physical record.  This is the
 size of the fixed header, independent of the record size.
-@return	REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
+@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
 UNIV_INLINE
 ulint
 page_rec_get_base_extra_size(
@@ -738,7 +783,7 @@ page_rec_get_base_extra_size(
 /************************************************************//**
 Returns the sum of the sizes of the records in the record list
 excluding the infimum and supremum records.
-@return	data in bytes */
+@return data in bytes */
 UNIV_INLINE
 ulint
 page_get_data_size(
@@ -760,8 +805,7 @@ page_mem_alloc_free(
 	ulint		need);	/*!< in: number of bytes allocated */
 /************************************************************//**
 Allocates a block of memory from the heap of an index page.
-@return	pointer to start of allocated buffer, or NULL if allocation fails */
-UNIV_INTERN
+@return pointer to start of allocated buffer, or NULL if allocation fails */
 byte*
 page_mem_alloc_heap(
 /*================*/
@@ -789,32 +833,38 @@ page_mem_free(
 					 rec_get_offsets() */
 /**********************************************************//**
 Create an uncompressed B-tree index page.
-@return	pointer to the page */
-UNIV_INTERN
+@return pointer to the page */
 page_t*
 page_create(
 /*========*/
 	buf_block_t*	block,		/*!< in: a buffer block where the
 					page is created */
 	mtr_t*		mtr,		/*!< in: mini-transaction handle */
-	ulint		comp);		/*!< in: nonzero=compact page format */
+	ulint		comp,		/*!< in: nonzero=compact page format */
+	bool		is_rtree);	/*!< in: if creating R-tree page */
 /**********************************************************//**
 Create a compressed B-tree index page.
-@return	pointer to the page */
-UNIV_INTERN
+@return pointer to the page */
 page_t*
 page_create_zip(
 /*============*/
-	buf_block_t*	block,		/*!< in/out: a buffer frame where the
-					page is created */
-	dict_index_t*	index,		/*!< in: the index of the page */
-	ulint		level,		/*!< in: the B-tree level of the page */
-	trx_id_t	max_trx_id,	/*!< in: PAGE_MAX_TRX_ID */
-	mtr_t*		mtr)		/*!< in/out: mini-transaction */
-	MY_ATTRIBUTE((nonnull));
+	buf_block_t*		block,		/*!< in/out: a buffer frame
+						where the page is created */
+	dict_index_t*		index,		/*!< in: the index of the
+						page, or NULL when applying
+						TRUNCATE log
+						record during recovery */
+	ulint			level,		/*!< in: the B-tree level of
+						the page */
+	trx_id_t		max_trx_id,	/*!< in: PAGE_MAX_TRX_ID */
+	const redo_page_compress_t* page_comp_info,
+						/*!< in: used for applying
+						TRUNCATE log
+						record during recovery */
+	mtr_t*			mtr);		/*!< in/out: mini-transaction
+						handle */
 /**********************************************************//**
 Empty a previously created B-tree index page. */
-UNIV_INTERN
 void
 page_create_empty(
 /*==============*/
@@ -830,7 +880,6 @@ IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
 if new_block is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
 void
 page_copy_rec_list_end_no_locks(
 /*============================*/
@@ -851,7 +900,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
 
 @return pointer to the original successor of the infimum record on
 new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
 rec_t*
 page_copy_rec_list_end(
 /*===================*/
@@ -873,7 +921,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
 
 @return pointer to the original predecessor of the supremum record on
 new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
 rec_t*
 page_copy_rec_list_start(
 /*=====================*/
@@ -886,7 +933,6 @@ page_copy_rec_list_start(
 /*************************************************************//**
 Deletes records from a page from a given record onward, including that record.
 The infimum and supremum records are not deleted. */
-UNIV_INTERN
 void
 page_delete_rec_list_end(
 /*=====================*/
@@ -903,7 +949,6 @@ page_delete_rec_list_end(
 /*************************************************************//**
 Deletes records from page, up to the given record, NOT including
 that record. Infimum and supremum records are not deleted. */
-UNIV_INTERN
 void
 page_delete_rec_list_start(
 /*=======================*/
@@ -923,7 +968,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
 
 @return TRUE on success; FALSE on compression failure (new_block will
 be decompressed) */
-UNIV_INTERN
 ibool
 page_move_rec_list_end(
 /*===================*/
@@ -942,8 +986,7 @@ if new_block is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	TRUE on success; FALSE on compression failure */
-UNIV_INTERN
+@return TRUE on success; FALSE on compression failure */
 ibool
 page_move_rec_list_start(
 /*=====================*/
@@ -955,7 +998,6 @@ page_move_rec_list_start(
 	MY_ATTRIBUTE((nonnull(1, 2, 4, 5)));
 /****************************************************************//**
 Splits a directory slot which owns too many records. */
-UNIV_INTERN
 void
 page_dir_split_slot(
 /*================*/
@@ -969,7 +1011,6 @@ Tries to balance the given directory slot with too few records
 with the upper neighbor, so that there are at least the minimum number
 of records owned by the slot; this may result in the merging of
 two slots. */
-UNIV_INTERN
 void
 page_dir_balance_slot(
 /*==================*/
@@ -979,12 +1020,11 @@ page_dir_balance_slot(
 	MY_ATTRIBUTE((nonnull(1)));
 /**********************************************************//**
 Parses a log record of a record list end or start deletion.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_parse_delete_rec_list(
 /*=======================*/
-	byte		type,	/*!< in: MLOG_LIST_END_DELETE,
+	mlog_id_t	type,	/*!< in: MLOG_LIST_END_DELETE,
 				MLOG_LIST_START_DELETE,
 				MLOG_COMP_LIST_END_DELETE or
 				MLOG_COMP_LIST_START_DELETE */
@@ -993,23 +1033,19 @@ page_parse_delete_rec_list(
 	buf_block_t*	block,	/*!< in/out: buffer block or NULL */
 	dict_index_t*	index,	/*!< in: record descriptor */
 	mtr_t*		mtr);	/*!< in: mtr or NULL */
-/***********************************************************//**
-Parses a redo log record of creating a page.
-@return	end of log record or NULL */
-UNIV_INTERN
-byte*
+/** Parses a redo log record of creating a page.
+@param[in,out]	block	buffer block, or NULL
+@param[in]	comp	nonzero=compact page format
+@param[in]	is_rtree whether it is rtree page */
+void
 page_parse_create(
-/*==============*/
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
-	ulint		comp,	/*!< in: nonzero=compact page format */
-	buf_block_t*	block,	/*!< in: block or NULL */
-	mtr_t*		mtr);	/*!< in: mtr or NULL */
+	buf_block_t*	block,
+	ulint		comp,
+	bool		is_rtree);
 #ifndef UNIV_HOTBACKUP
 /************************************************************//**
 Prints record contents including the data relevant only in
 the index page context. */
-UNIV_INTERN
 void
 page_rec_print(
 /*===========*/
@@ -1019,7 +1055,6 @@ page_rec_print(
 /***************************************************************//**
 This is used to print the contents of the directory for
 debugging purposes. */
-UNIV_INTERN
 void
 page_dir_print(
 /*===========*/
@@ -1028,7 +1063,6 @@ page_dir_print(
 /***************************************************************//**
 This is used to print the contents of the page record list for
 debugging purposes. */
-UNIV_INTERN
 void
 page_print_list(
 /*============*/
@@ -1037,7 +1071,6 @@ page_print_list(
 	ulint		pr_n);	/*!< in: print n first and n last entries */
 /***************************************************************//**
 Prints the info in a page header. */
-UNIV_INTERN
 void
 page_header_print(
 /*==============*/
@@ -1045,7 +1078,6 @@ page_header_print(
 /***************************************************************//**
 This is used to print the contents of the page for
 debugging purposes. */
-UNIV_INTERN
 void
 page_print(
 /*=======*/
@@ -1061,28 +1093,27 @@ page_print(
 The following is used to validate a record on a page. This function
 differs from rec_validate as it can also check the n_owned field and
 the heap_no field.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 page_rec_validate(
 /*==============*/
 	const rec_t*	rec,	/*!< in: physical record */
 	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+#ifdef UNIV_DEBUG
 /***************************************************************//**
 Checks that the first directory slot points to the infimum record and
 the last to the supremum. This function is intended to track if the
 bug fixed in 4.0.14 has caused corruption to users' databases. */
-UNIV_INTERN
 void
 page_check_dir(
 /*===========*/
 	const page_t*	page);	/*!< in: index page */
+#endif /* UNIV_DEBUG */
 /***************************************************************//**
 This function checks the consistency of an index page when we do not
 know the index. This is also resilient so that this should never crash
 even if the page is total garbage.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 page_simple_validate_old(
 /*=====================*/
@@ -1091,16 +1122,14 @@ page_simple_validate_old(
 This function checks the consistency of an index page when we do not
 know the index. This is also resilient so that this should never crash
 even if the page is total garbage.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 page_simple_validate_new(
 /*=====================*/
 	const page_t*	page);	/*!< in: index page in ROW_FORMAT!=REDUNDANT */
 /***************************************************************//**
 This function checks the consistency of an index page.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 page_validate(
 /*==========*/
@@ -1109,8 +1138,7 @@ page_validate(
 				the page record type definition */
 /***************************************************************//**
 Looks in the page record list for a record with the given heap number.
-@return	record, NULL if not found */
-
+@return record, NULL if not found */
 const rec_t*
 page_find_rec_with_heap_no(
 /*=======================*/
@@ -1120,7 +1148,6 @@ page_find_rec_with_heap_no(
 @param[in]	page	index tree leaf page
 @return the last record, not delete-marked
 @retval infimum record if all records are delete-marked */
-
 const rec_t*
 page_find_rec_max_not_deleted(
 	const page_t*	page);
@@ -1129,14 +1156,12 @@ page_find_rec_max_not_deleted(
 but different than the global setting innodb_checksum_algorithm.
 @param[in]	current_algo	current checksum algorithm
 @param[in]	page_checksum	page valid checksum
-@param[in]	space_id	tablespace id
-@param[in]	page_no		page number */
+@param[in]	page_id		page identifier */
 void
 page_warn_strict_checksum(
 	srv_checksum_algorithm_t	curr_algo,
 	srv_checksum_algorithm_t	page_checksum,
-	ulint				space_id,
-	ulint				page_no);
+	const page_id_t&		page_id);
 
 #ifdef UNIV_MATERIALIZE
 #undef UNIV_INLINE
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index d7f1db82858..0a0ff41774c 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,9 +23,11 @@ Index page routines
 Created 2/2/1994 Heikki Tuuri
 *******************************************************/
 
-#ifndef UNIV_INNOCHECKSUM
+#ifndef page0page_ic
+#define page0page_ic
 
 #include "mach0data.h"
+#ifndef UNIV_INNOCHECKSUM
 #ifdef UNIV_DEBUG
 # include "log0recv.h"
 #endif /* !UNIV_DEBUG */
@@ -40,10 +42,9 @@ Created 2/2/1994 Heikki Tuuri
 #define UNIV_INLINE
 #endif
 
-#endif /* !UNIV_INNOCHECKSUM */
 /************************************************************//**
 Gets the start of a page.
-@return	start of the page */
+@return start of the page */
 UNIV_INLINE
 page_t*
 page_align(
@@ -53,10 +54,9 @@ page_align(
 	return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE));
 }
 
-#ifndef UNIV_INNOCHECKSUM
 /************************************************************//**
 Gets the offset within a page.
-@return	offset from the start of the page */
+@return offset from the start of the page */
 UNIV_INLINE
 ulint
 page_offset(
@@ -108,7 +108,55 @@ page_update_max_trx_id(
 	}
 }
 
+/*************************************************************//**
+Returns the RTREE SPLIT SEQUENCE NUMBER (FIL_RTREE_SPLIT_SEQ_NUM).
+@return	SPLIT SEQUENCE NUMBER */
+UNIV_INLINE
+node_seq_t
+page_get_ssn_id(
+/*============*/
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page);
+
+	return(static_cast<node_seq_t>(
+		mach_read_from_8(page + FIL_RTREE_SPLIT_SEQ_NUM)));
+}
+
+/*************************************************************//**
+Sets the RTREE SPLIT SEQUENCE NUMBER field value */
+UNIV_INLINE
+void
+page_set_ssn_id(
+/*============*/
+	buf_block_t*	block,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	node_seq_t	ssn_id,	/*!< in: transaction id */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+{
+	page_t*	page = buf_block_get_frame(block);
+#ifndef UNIV_HOTBACKUP
+	ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_SX_FIX)
+	      || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+#endif /* !UNIV_HOTBACKUP */
+
+	if (page_zip) {
+		mach_write_to_8(page + FIL_RTREE_SPLIT_SEQ_NUM, ssn_id);
+		page_zip_write_header(page_zip,
+				      page + FIL_RTREE_SPLIT_SEQ_NUM,
+				      8, mtr);
+#ifndef UNIV_HOTBACKUP
+	} else if (mtr) {
+		mlog_write_ull(page + FIL_RTREE_SPLIT_SEQ_NUM, ssn_id, mtr);
+#endif /* !UNIV_HOTBACKUP */
+	} else {
+		mach_write_to_8(page + FIL_RTREE_SPLIT_SEQ_NUM, ssn_id);
+	}
+}
+
 #endif /* !UNIV_INNOCHECKSUM */
+
 /*************************************************************//**
 Reads the given header field. */
 UNIV_INLINE
@@ -125,6 +173,7 @@ page_header_get_field(
 }
 
 #ifndef UNIV_INNOCHECKSUM
+
 /*************************************************************//**
 Sets the given header field. */
 UNIV_INLINE
@@ -151,7 +200,7 @@ page_header_set_field(
 
 /*************************************************************//**
 Returns the offset stored in the given header field.
-@return	offset from the start of the page, or 0 */
+@return offset from the start of the page, or 0 */
 UNIV_INLINE
 ulint
 page_header_get_offs(
@@ -216,7 +265,8 @@ page_header_reset_last_insert(
 				uncompressed part will be updated, or NULL */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	ut_ad(page && mtr);
+	ut_ad(page != NULL);
+	ut_ad(mtr != NULL);
 
 	if (page_zip) {
 		mach_write_to_2(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0);
@@ -230,12 +280,22 @@ page_header_reset_last_insert(
 }
 #endif /* !UNIV_HOTBACKUP */
 
-#endif /* !UNIV_INNOCHECKSUM */
+/************************************************************//**
+Determine whether the page is in new-style compact format.
+@return nonzero if the page is in compact format, zero if it is in
+old-style format */
+UNIV_INLINE
+ulint
+page_is_comp(
+/*=========*/
+	const page_t*	page)	/*!< in: index page */
+{
+	return(page_header_get_field(page, PAGE_N_HEAP) & 0x8000);
+}
 
-#ifndef UNIV_INNOCHECKSUM
 /************************************************************//**
 TRUE if the record is on a page in compact format.
-@return	nonzero if in compact format */
+@return nonzero if in compact format */
 UNIV_INLINE
 ulint
 page_rec_is_comp(
@@ -247,7 +307,7 @@ page_rec_is_comp(
 
 /***************************************************************//**
 Returns the heap number of a record.
-@return	heap number */
+@return heap number */
 UNIV_INLINE
 ulint
 page_rec_get_heap_no(
@@ -261,10 +321,9 @@ page_rec_get_heap_no(
 	}
 }
 
-#endif /* !UNIV_INNOCHECKSUM */
 /************************************************************//**
 Determine whether the page is a B-tree leaf.
-@return	true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
+@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
 UNIV_INLINE
 bool
 page_is_leaf(
@@ -274,10 +333,9 @@ page_is_leaf(
 	return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL)));
 }
 
-#ifndef UNIV_INNOCHECKSUM
 /************************************************************//**
 Determine whether the page is empty.
-@return	true if the page is empty (PAGE_N_RECS = 0) */
+@return true if the page is empty (PAGE_N_RECS = 0) */
 UNIV_INLINE
 bool
 page_is_empty(
@@ -287,9 +345,34 @@ page_is_empty(
 	return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_N_RECS)));
 }
 
+/** Determine whether a page is an index root page.
+@param[in]	page	page frame
+@return true if the page is a root page of an index */
+UNIV_INLINE
+bool
+page_is_root(
+	const page_t*	page)
+{
+#if FIL_PAGE_PREV % 8
+# error FIL_PAGE_PREV must be 64-bit aligned
+#endif
+#if FIL_PAGE_NEXT != FIL_PAGE_PREV + 4
+# error FIL_PAGE_NEXT must be adjacent to FIL_PAGE_PREV
+#endif
+#if FIL_NULL != 0xffffffff
+# error FIL_NULL != 0xffffffff
+#endif
+	/* Check that this is an index page and both the PREV and NEXT
+	pointers are FIL_NULL, because the root page does not have any
+	siblings. */
+	return(fil_page_index_page_check(page)
+	       && *reinterpret_cast<const ib_uint64_t*>(page + FIL_PAGE_PREV)
+	       == IB_UINT64_MAX);
+}
+
 /************************************************************//**
 Determine whether the page contains garbage.
-@return	true if the page contains garbage (PAGE_GARBAGE is not 0) */
+@return true if the page contains garbage (PAGE_GARBAGE is not 0) */
 UNIV_INLINE
 bool
 page_has_garbage(
@@ -301,7 +384,7 @@ page_has_garbage(
 
 /************************************************************//**
 Gets the offset of the first record on the page.
-@return	offset of the first record in record list, relative from page */
+@return offset of the first record in record list, relative from page */
 UNIV_INLINE
 ulint
 page_get_infimum_offset(
@@ -320,7 +403,7 @@ page_get_infimum_offset(
 
 /************************************************************//**
 Gets the offset of the last record on the page.
-@return	offset of the last record in record list, relative from page */
+@return offset of the last record in record list, relative from page */
 UNIV_INLINE
 ulint
 page_get_supremum_offset(
@@ -339,7 +422,7 @@ page_get_supremum_offset(
 
 /************************************************************//**
 TRUE if the record is a user record on the page.
-@return	TRUE if a user record */
+@return TRUE if a user record */
 UNIV_INLINE
 ibool
 page_rec_is_user_rec_low(
@@ -375,7 +458,7 @@ page_rec_is_user_rec_low(
 
 /************************************************************//**
 TRUE if the record is the supremum record on a page.
-@return	TRUE if the supremum record */
+@return TRUE if the supremum record */
 UNIV_INLINE
 ibool
 page_rec_is_supremum_low(
@@ -391,7 +474,7 @@ page_rec_is_supremum_low(
 
 /************************************************************//**
 TRUE if the record is the infimum record on a page.
-@return	TRUE if the infimum record */
+@return TRUE if the infimum record */
 UNIV_INLINE
 ibool
 page_rec_is_infimum_low(
@@ -406,7 +489,7 @@ page_rec_is_infimum_low(
 
 /************************************************************//**
 TRUE if the record is a user record on the page.
-@return	TRUE if a user record */
+@return TRUE if a user record */
 UNIV_INLINE
 ibool
 page_rec_is_user_rec(
@@ -420,7 +503,7 @@ page_rec_is_user_rec(
 
 /************************************************************//**
 TRUE if the record is the supremum record on a page.
-@return	TRUE if the supremum record */
+@return TRUE if the supremum record */
 UNIV_INLINE
 ibool
 page_rec_is_supremum(
@@ -434,7 +517,7 @@ page_rec_is_supremum(
 
 /************************************************************//**
 TRUE if the record is the infimum record on a page.
-@return	TRUE if the infimum record */
+@return TRUE if the infimum record */
 UNIV_INLINE
 ibool
 page_rec_is_infimum(
@@ -447,9 +530,72 @@ page_rec_is_infimum(
 }
 
 /************************************************************//**
+true if the record is the first user record on a page.
+@return true if the first user record */
+UNIV_INLINE
+bool
+page_rec_is_first(
+/*==============*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page_get_n_recs(page) > 0);
+
+	return(page_rec_get_next_const(page_get_infimum_rec(page)) == rec);
+}
+
+/************************************************************//**
+true if the record is the second user record on a page.
+@return true if the second user record */
+UNIV_INLINE
+bool
+page_rec_is_second(
+/*===============*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page_get_n_recs(page) > 1);
+
+	return(page_rec_get_next_const(
+		page_rec_get_next_const(page_get_infimum_rec(page))) == rec);
+}
+
+/************************************************************//**
+true if the record is the last user record on a page.
+@return true if the last user record */
+UNIV_INLINE
+bool
+page_rec_is_last(
+/*=============*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page_get_n_recs(page) > 0);
+
+	return(page_rec_get_next_const(rec) == page_get_supremum_rec(page));
+}
+
+/************************************************************//**
+true if the record is the second last user record on a page.
+@return true if the second last user record */
+UNIV_INLINE
+bool
+page_rec_is_second_last(
+/*====================*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page_get_n_recs(page) > 1);
+	ut_ad(!page_rec_is_last(rec, page));
+
+	return(page_rec_get_next_const(
+		page_rec_get_next_const(rec)) == page_get_supremum_rec(page));
+}
+
+/************************************************************//**
 Returns the nth record of the record list.
 This is the inverse function of page_rec_get_n_recs_before().
-@return	nth record */
+@return nth record */
 UNIV_INLINE
 rec_t*
 page_rec_get_nth(
@@ -465,7 +611,7 @@ page_rec_get_nth(
 Returns the middle record of the records on the page. If there is an
 even number of records in the list, returns the first record of the
 upper half-list.
-@return	middle record */
+@return middle record */
 UNIV_INLINE
 rec_t*
 page_get_middle_rec(
@@ -476,62 +622,11 @@ page_get_middle_rec(
 
 	return(page_rec_get_nth(page, middle));
 }
-
-/*************************************************************//**
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
-	const dtuple_t*	dtuple,	/*!< in: data tuple */
-	const rec_t*	rec,	/*!< in: physical record on a page; may also
-				be page infimum or supremum, in which case
-				matched-parameter values below are not
-				affected */
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint*		matched_fields, /*!< in/out: number of already completely
-				matched fields; when function returns
-				contains the value for current comparison */
-	ulint*		matched_bytes) /*!< in/out: number of already matched
-				bytes within the first field not completely
-				matched; when function returns contains the
-				value for current comparison */
-{
-	ulint	rec_offset;
-
-	ut_ad(dtuple_check_typed(dtuple));
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-	ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
-
-	rec_offset = page_offset(rec);
-
-	if (rec_offset == PAGE_NEW_INFIMUM
-	    || rec_offset == PAGE_OLD_INFIMUM) {
-
-		return(1);
-
-	} else if (rec_offset == PAGE_NEW_SUPREMUM
-		   || rec_offset == PAGE_OLD_SUPREMUM) {
-
-		return(-1);
-	}
-
-	return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
-					 matched_fields,
-					 matched_bytes));
-}
 #endif /* !UNIV_HOTBACKUP */
 
-#endif /* !UNIV_INNOCHECKSUM */
 /*************************************************************//**
 Gets the page number.
-@return	page number */
+@return page number */
 UNIV_INLINE
 ulint
 page_get_page_no(
@@ -542,10 +637,9 @@ page_get_page_no(
 	return(mach_read_from_4(page + FIL_PAGE_OFFSET));
 }
 
-#ifndef UNIV_INNOCHECKSUM
 /*************************************************************//**
 Gets the tablespace identifier.
-@return	space id */
+@return space id */
 UNIV_INLINE
 ulint
 page_get_space_id(
@@ -556,11 +650,10 @@ page_get_space_id(
 	return(mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
 }
 
-#endif /* !UNIV_INNOCHECKSUM */
 /*************************************************************//**
 Gets the number of user records on page (infimum and supremum records
 are not user records).
-@return	number of user records */
+@return number of user records */
 UNIV_INLINE
 ulint
 page_get_n_recs(
@@ -570,10 +663,9 @@ page_get_n_recs(
 	return(page_header_get_field(page, PAGE_N_RECS));
 }
 
-#ifndef UNIV_INNOCHECKSUM
 /*************************************************************//**
 Gets the number of dir slots in directory.
-@return	number of slots */
+@return number of slots */
 UNIV_INLINE
 ulint
 page_dir_get_n_slots(
@@ -598,7 +690,7 @@ page_dir_set_n_slots(
 
 /*************************************************************//**
 Gets the number of records in the heap.
-@return	number of user records */
+@return number of user records */
 UNIV_INLINE
 ulint
 page_dir_get_n_heap(
@@ -634,7 +726,7 @@ page_dir_set_n_heap(
 #ifdef UNIV_DEBUG
 /*************************************************************//**
 Gets pointer to nth directory slot.
-@return	pointer to dir slot */
+@return pointer to dir slot */
 UNIV_INLINE
 page_dir_slot_t*
 page_dir_get_nth_slot(
@@ -652,7 +744,7 @@ page_dir_get_nth_slot(
 
 /**************************************************************//**
 Used to check the consistency of a record on a page.
-@return	TRUE if succeed */
+@return TRUE if succeed */
 UNIV_INLINE
 ibool
 page_rec_check(
@@ -671,7 +763,7 @@ page_rec_check(
 
 /***************************************************************//**
 Gets the record pointed to by a directory slot.
-@return	pointer to record */
+@return pointer to record */
 UNIV_INLINE
 const rec_t*
 page_dir_slot_get_rec(
@@ -697,7 +789,7 @@ page_dir_slot_set_rec(
 
 /***************************************************************//**
 Gets the number of records owned by a directory slot.
-@return	number of records */
+@return number of records */
 UNIV_INLINE
 ulint
 page_dir_slot_get_n_owned(
@@ -747,7 +839,7 @@ page_dir_calc_reserved_space(
 
 /************************************************************//**
 Gets the pointer to the next record on the page.
-@return	pointer to next record */
+@return pointer to next record */
 UNIV_INLINE
 const rec_t*
 page_rec_get_next_low(
@@ -773,8 +865,6 @@ page_rec_get_next_low(
 			(void*) rec,
 			(ulong) page_get_space_id(page),
 			(ulong) page_get_page_no(page));
-		buf_page_print(page, 0, 0);
-
 		ut_error;
 	} else if (offs == 0) {
 
@@ -786,7 +876,7 @@ page_rec_get_next_low(
 
 /************************************************************//**
 Gets the pointer to the next record on the page.
-@return	pointer to next record */
+@return pointer to next record */
 UNIV_INLINE
 rec_t*
 page_rec_get_next(
@@ -798,7 +888,7 @@ page_rec_get_next(
 
 /************************************************************//**
 Gets the pointer to the next record on the page.
-@return	pointer to next record */
+@return pointer to next record */
 UNIV_INLINE
 const rec_t*
 page_rec_get_next_const(
@@ -812,7 +902,7 @@ page_rec_get_next_const(
 Gets the pointer to the next non delete-marked record on the page.
 If all subsequent records are delete-marked, then this function
 will return the supremum record.
-@return	pointer to next non delete-marked record or pointer to supremum */
+@return pointer to next non delete-marked record or pointer to supremum */
 UNIV_INLINE
 const rec_t*
 page_rec_get_next_non_del_marked(
@@ -863,7 +953,7 @@ page_rec_set_next(
 
 /************************************************************//**
 Gets the pointer to the previous record.
-@return	pointer to previous record */
+@return pointer to previous record */
 UNIV_INLINE
 const rec_t*
 page_rec_get_prev_const(
@@ -910,7 +1000,7 @@ page_rec_get_prev_const(
 
 /************************************************************//**
 Gets the pointer to the previous record.
-@return	pointer to previous record */
+@return pointer to previous record */
 UNIV_INLINE
 rec_t*
 page_rec_get_prev(
@@ -923,7 +1013,7 @@ page_rec_get_prev(
 
 /***************************************************************//**
 Looks for the record which owns the given record.
-@return	the owner record */
+@return the owner record */
 UNIV_INLINE
 rec_t*
 page_rec_find_owner_rec(
@@ -948,7 +1038,7 @@ page_rec_find_owner_rec(
 /**********************************************************//**
 Returns the base extra size of a physical record.  This is the
 size of the fixed header, independent of the record size.
-@return	REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
+@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
 UNIV_INLINE
 ulint
 page_rec_get_base_extra_size(
@@ -961,25 +1051,10 @@ page_rec_get_base_extra_size(
 	return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
 }
 
-#endif /* !UNIV_INNOCHECKSUM */
-
-/************************************************************//**
-Determine whether the page is in new-style compact format.
-@return nonzero if the page is in compact format, zero if it is in
-old-style format */
-UNIV_INLINE
-ulint
-page_is_comp(
-/*=========*/
-	const page_t*	page)	/*!< in: index page */
-{
-	return(page_header_get_field(page, PAGE_N_HEAP) & 0x8000);
-}
-
 /************************************************************//**
 Returns the sum of the sizes of the records in the record list, excluding
 the infimum and supremum records.
-@return	data in bytes */
+@return data in bytes */
 UNIV_INLINE
 ulint
 page_get_data_size(
@@ -999,7 +1074,6 @@ page_get_data_size(
 	return(ret);
 }
 
-#ifndef UNIV_INNOCHECKSUM
 /************************************************************//**
 Allocates a block of memory from the free list of an index page. */
 UNIV_INLINE
@@ -1035,7 +1109,7 @@ page_mem_alloc_free(
 
 /*************************************************************//**
 Calculates free space if a page is emptied.
-@return	free space */
+@return free space */
 UNIV_INLINE
 ulint
 page_get_free_space_of_empty(
@@ -1085,7 +1159,7 @@ PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the
 value of page_get_free_space_of_empty, the insert is impossible, otherwise
 it is allowed. This function returns the maximum combined size of records
 which can be inserted on top of the record heap.
-@return	maximum combined size for inserted records */
+@return maximum combined size for inserted records */
 UNIV_INLINE
 ulint
 page_get_max_insert_size(
@@ -1127,7 +1201,7 @@ page_get_max_insert_size(
 /************************************************************//**
 Returns the maximum combined size of records which can be inserted on top
 of the record heap if a page is first reorganized.
-@return	maximum combined size for inserted records */
+@return maximum combined size for inserted records */
 UNIV_INLINE
 ulint
 page_get_max_insert_size_after_reorganize(
@@ -1201,3 +1275,5 @@ page_mem_free(
 #undef UNIV_INLINE
 #define UNIV_INLINE	UNIV_INLINE_ORIGINAL
 #endif
+
+#endif
diff --git a/storage/innobase/include/page0size.h b/storage/innobase/include/page0size.h
new file mode 100644
index 00000000000..ab917e1ff05
--- /dev/null
+++ b/storage/innobase/include/page0size.h
@@ -0,0 +1,202 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0size.h
+A class describing a page size.
+
+Created Nov 14, 2013 Vasil Dimov
+*******************************************************/
+
+#ifndef page0size_t
+#define page0size_t
+
+#include "univ.i"
+#include "fsp0types.h"
+
+#define FIELD_REF_SIZE 20
+
+/** A BLOB field reference full of zero, for use in assertions and
+tests.Initially, BLOB field references are set to zero, in
+dtuple_convert_big_rec(). */
+extern const byte field_ref_zero[FIELD_REF_SIZE];
+
+#define PAGE_SIZE_T_SIZE_BITS	17
+
+/** Page size descriptor. Contains the physical and logical page size, as well
+as whether the page is compressed or not. */
+class page_size_t {
+public:
+	/** Constructor from (physical, logical, is_compressed).
+	@param[in]	physical	physical (on-disk/zipped) page size
+	@param[in]	logical		logical (in-memory/unzipped) page size
+	@param[in]	is_compressed	whether the page is compressed */
+	page_size_t(ulint physical, ulint logical, bool is_compressed)
+	{
+		if (physical == 0) {
+			physical = UNIV_PAGE_SIZE_ORIG;
+		}
+		if (logical == 0) {
+			logical = UNIV_PAGE_SIZE_ORIG;
+		}
+
+		m_physical = static_cast<unsigned>(physical);
+		m_logical = static_cast<unsigned>(logical);
+		m_is_compressed = static_cast<unsigned>(is_compressed);
+
+		ut_ad(physical <= (1 << PAGE_SIZE_T_SIZE_BITS));
+		ut_ad(logical <= (1 << PAGE_SIZE_T_SIZE_BITS));
+
+		ut_ad(ut_is_2pow(physical));
+		ut_ad(ut_is_2pow(logical));
+
+		ut_ad(logical <= UNIV_PAGE_SIZE_MAX);
+		ut_ad(logical >= physical);
+		ut_ad(!is_compressed || physical <= UNIV_ZIP_SIZE_MAX);
+	}
+
+	/** Constructor from (fsp_flags).
+	@param[in]	fsp_flags	filespace flags */
+	explicit page_size_t(ulint fsp_flags)
+	{
+		ulint	ssize = FSP_FLAGS_GET_PAGE_SSIZE(fsp_flags);
+
+		/* If the logical page size is zero in fsp_flags, then use the
+		legacy 16k page size. */
+		ssize = (0 == ssize) ? UNIV_PAGE_SSIZE_ORIG : ssize;
+
+		/* Convert from a 'log2 minus 9' to a page size in bytes. */
+		const ulint	size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
+
+		ut_ad(size <= UNIV_PAGE_SIZE_MAX);
+		ut_ad(size <= (1 << PAGE_SIZE_T_SIZE_BITS));
+
+		m_logical = size;
+
+		ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags);
+
+		/* If the fsp_flags have zero in the zip_ssize field, then it means
+		that the tablespace does not have compressed pages and the physical
+		page size is the same as the logical page size. */
+		if (ssize == 0) {
+			m_is_compressed = false;
+			m_physical = m_logical;
+		} else {
+			m_is_compressed = true;
+
+			/* Convert from a 'log2 minus 9' to a page size
+			in bytes. */
+			const ulint	phy
+				= ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
+
+			ut_ad(phy <= UNIV_ZIP_SIZE_MAX);
+			ut_ad(phy <= (1 << PAGE_SIZE_T_SIZE_BITS));
+
+			m_physical = phy;
+		}
+	}
+
+	/** Retrieve the physical page size (on-disk).
+	@return physical page size in bytes */
+	inline ulint physical() const
+	{
+		ut_ad(m_physical > 0);
+
+		return(m_physical);
+	}
+
+	/** Retrieve the logical page size (in-memory).
+	@return logical page size in bytes */
+	inline ulint logical() const
+	{
+		ut_ad(m_logical > 0);
+		return(m_logical);
+	}
+
+	/** Check whether the page is compressed on disk.
+	@return true if compressed */
+	inline bool is_compressed() const
+	{
+		return(m_is_compressed);
+	}
+
+	/** Copy the values from a given page_size_t object.
+	@param[in]	src	page size object whose values to fetch */
+	inline void copy_from(const page_size_t& src)
+	{
+		m_physical = src.physical();
+		m_logical = src.logical();
+		m_is_compressed = src.is_compressed();
+	}
+
+	/** Check if a given page_size_t object is equal to the current one.
+	@param[in]	a	page_size_t object to compare
+	@return true if equal */
+	inline bool equals_to(const page_size_t& a) const
+	{
+		return(a.physical() == m_physical
+		       && a.logical() == m_logical
+		       && a.is_compressed() == m_is_compressed);
+	}
+
+private:
+
+	/* Disable implicit copying. */
+	void operator=(const page_size_t&);
+
+	/* For non compressed tablespaces, physical page size is equal to
+	the logical page size and the data is stored in buf_page_t::frame
+	(and is also always equal to univ_page_size (--innodb-page-size=)).
+
+	For compressed tablespaces, physical page size is the compressed
+	page size as stored on disk and in buf_page_t::zip::data. The logical
+	page size is the uncompressed page size in memory - the size of
+	buf_page_t::frame (currently also always equal to univ_page_size
+	(--innodb-page-size=)). */
+
+	/** Physical page size. */
+	unsigned	m_physical:PAGE_SIZE_T_SIZE_BITS;
+
+	/** Logical page size. */
+	unsigned	m_logical:PAGE_SIZE_T_SIZE_BITS;
+
+	/** Flag designating whether the physical page is compressed, which is
+	true IFF the whole tablespace where the page belongs is compressed. */
+	unsigned	m_is_compressed:1;
+};
+
+/* Overloading the global output operator to conveniently print an object
+of type the page_size_t.
+@param[in,out]	out	the output stream
+@param[in]	obj	an object of type page_size_t to be printed
+@retval	the output stream */
+inline
+std::ostream&
+operator<<(
+	std::ostream&		out,
+	const page_size_t&	obj)
+{
+	out << "[page size: physical=" << obj.physical()
+		<< ", logical=" << obj.logical()
+		<< ", compressed=" << obj.is_compressed() << "]";
+	return(out);
+}
+
+extern page_size_t	univ_page_size;
+
+#endif /* page0size_t */
diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h
index 2892e860875..fe56468c454 100644
--- a/storage/innobase/include/page0types.h
+++ b/storage/innobase/include/page0types.h
@@ -26,20 +26,18 @@ Created 2/2/1994 Heikki Tuuri
 #ifndef page0types_h
 #define page0types_h
 
-using namespace std;
-
-#include <map>
-
 #include "univ.i"
 #include "dict0types.h"
 #include "mtr0types.h"
-#include "sync0types.h"
-#include "os0thread.h"
+#include "ut0new.h"
+
+#include <map>
 
 /** Eliminates a name collision on HP-UX */
 #define page_t	   ib_page_t
 /** Type of the index page */
 typedef	byte		page_t;
+#ifndef UNIV_INNOCHECKSUM
 /** Index page cursor */
 struct page_cur_t;
 
@@ -63,6 +61,42 @@ ssize, which is the number of shifts from 512. */
 # error "PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS)"
 #endif
 
+/* Page cursor search modes; the values must be in this order! */
+enum page_cur_mode_t {
+	PAGE_CUR_UNSUPP	= 0,
+	PAGE_CUR_G	= 1,
+	PAGE_CUR_GE	= 2,
+	PAGE_CUR_L	= 3,
+	PAGE_CUR_LE	= 4,
+
+/*      PAGE_CUR_LE_OR_EXTENDS = 5,*/ /* This is a search mode used in
+				 "column LIKE 'abc%' ORDER BY column DESC";
+				 we have to find strings which are <= 'abc' or
+				 which extend it */
+
+/* These search mode is for search R-tree index. */
+	PAGE_CUR_CONTAIN		= 7,
+	PAGE_CUR_INTERSECT		= 8,
+	PAGE_CUR_WITHIN			= 9,
+	PAGE_CUR_DISJOINT		= 10,
+	PAGE_CUR_MBR_EQUAL		= 11,
+	PAGE_CUR_RTREE_INSERT		= 12,
+	PAGE_CUR_RTREE_LOCATE		= 13,
+	PAGE_CUR_RTREE_GET_FATHER	= 14
+};
+
+
+/** The information used for compressing a page when applying
+TRUNCATE log record during recovery */
+struct redo_page_compress_t {
+	ulint		type;		/*!< index type */
+	index_id_t	index_id;	/*!< index id */
+	ulint		n_fields;	/*!< number of index fields */
+	ulint		field_len;	/*!< the length of index field */
+	const byte*	fields;		/*!< index field information */
+	ulint		trx_id_pos;	/*!< position of trx-id column. */
+};
+
 /** Compressed page descriptor */
 struct page_zip_des_t
 {
@@ -110,21 +144,21 @@ struct page_zip_stat_t {
 };
 
 /** Compression statistics types */
-typedef map<index_id_t, page_zip_stat_t>	page_zip_stat_per_index_t;
+typedef std::map<
+	index_id_t,
+	page_zip_stat_t,
+	std::less<index_id_t>,
+	ut_allocator<std::pair<const index_id_t, page_zip_stat_t> > >
+	page_zip_stat_per_index_t;
 
 /** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
-extern page_zip_stat_t				page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+extern page_zip_stat_t			page_zip_stat[PAGE_ZIP_SSIZE_MAX];
 /** Statistics on compression, indexed by dict_index_t::id */
-extern page_zip_stat_per_index_t		page_zip_stat_per_index;
-extern ib_mutex_t				page_zip_stat_per_index_mutex;
-#ifdef HAVE_PSI_INTERFACE
-extern mysql_pfs_key_t				page_zip_stat_per_index_mutex_key;
-#endif /* HAVE_PSI_INTERFACE */
+extern page_zip_stat_per_index_t	page_zip_stat_per_index;
 
 /**********************************************************************//**
 Write the "deleted" flag of a record on a compressed page.  The flag must
 already have been written on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_rec_set_deleted(
 /*=====================*/
@@ -136,7 +170,6 @@ page_zip_rec_set_deleted(
 /**********************************************************************//**
 Write the "owned" flag of a record on a compressed page.  The n_owned field
 must already have been written on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_rec_set_owned(
 /*===================*/
@@ -147,7 +180,6 @@ page_zip_rec_set_owned(
 
 /**********************************************************************//**
 Shift the dense page directory when a record is deleted. */
-UNIV_INTERN
 void
 page_zip_dir_delete(
 /*================*/
@@ -160,7 +192,6 @@ page_zip_dir_delete(
 
 /**********************************************************************//**
 Add a slot to the dense page directory. */
-UNIV_INTERN
 void
 page_zip_dir_add_slot(
 /*==================*/
@@ -168,4 +199,5 @@ page_zip_dir_add_slot(
 	ulint		is_clustered)	/*!< in: nonzero for clustered index,
 					zero for others */
 	MY_ATTRIBUTE((nonnull));
+#endif /* !UNIV_INNOCHECKSUM */
 #endif
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
index 4e362cec641..7b5df3d306b 100644
--- a/storage/innobase/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
@@ -1,9 +1,7 @@
-
 /*****************************************************************************
 
 Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2016, MariaDB Corporation
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,36 +32,62 @@ Created June 2005 by Marko Makela
 # define UNIV_INLINE
 #endif
 
+#ifdef UNIV_INNOCHECKSUM
+#include "univ.i"
+#include "buf0buf.h"
+#include "ut0crc32.h"
+#include "buf0checksum.h"
+#include "mach0data.h"
+#include "zlib.h"
+#endif /* UNIV_INNOCHECKSUM */
+
+#ifndef UNIV_INNOCHECKSUM
+#include "mtr0types.h"
 #include "page0types.h"
+#endif /* !UNIV_INNOCHECKSUM */
+
 #include "buf0types.h"
+
 #ifndef UNIV_INNOCHECKSUM
-#include "mtr0types.h"
 #include "dict0types.h"
 #include "srv0srv.h"
 #include "trx0types.h"
 #include "mem0mem.h"
-#endif /* !UNIV_INNOCHECKSUM */
 
 /* Compression level to be used by zlib. Settable by user. */
 extern uint	page_zip_level;
 
 /* Default compression level. */
 #define DEFAULT_COMPRESSION_LEVEL	6
+/** Start offset of the area that will be compressed */
+#define PAGE_ZIP_START			PAGE_NEW_SUPREMUM_END
+/** Size of an compressed page directory entry */
+#define PAGE_ZIP_DIR_SLOT_SIZE		2
+/** Predefine the sum of DIR_SLOT, TRX_ID & ROLL_PTR */
+#define PAGE_ZIP_CLUST_LEAF_SLOT_SIZE		\
+		(PAGE_ZIP_DIR_SLOT_SIZE		\
+		+ DATA_TRX_ID_LEN		\
+		+ DATA_ROLL_PTR_LEN)
+/** Mask of record offsets */
+#define PAGE_ZIP_DIR_SLOT_MASK		0x3fff
+/** 'owned' flag */
+#define PAGE_ZIP_DIR_SLOT_OWNED		0x4000
+/** 'deleted' flag */
+#define PAGE_ZIP_DIR_SLOT_DEL		0x8000
 
 /* Whether or not to log compressed page images to avoid possible
 compression algorithm changes in zlib. */
 extern my_bool	page_zip_log_pages;
 
-#ifndef UNIV_INNOCHECKSUM
 /**********************************************************************//**
 Determine the size of a compressed page in bytes.
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
 page_zip_get_size(
 /*==============*/
 	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************************//**
 Set the size of a compressed page in bytes. */
 UNIV_INLINE
@@ -74,30 +98,40 @@ page_zip_set_size(
 	ulint		size);		/*!< in: size in bytes */
 
 #ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Determine if a record is so big that it needs to be stored externally.
-@return	FALSE if the entire record can be stored locally on the page */
+/** Determine if a record is so big that it needs to be stored externally.
+@param[in]	rec_size	length of the record in bytes
+@param[in]	comp		nonzero=compact format
+@param[in]	n_fields	number of fields in the record; ignored if
+tablespace is not compressed
+@param[in]	page_size	page size
+@return FALSE if the entire record can be stored locally on the page */
 UNIV_INLINE
 ibool
 page_zip_rec_needs_ext(
-/*===================*/
-	ulint	rec_size,	/*!< in: length of the record in bytes */
-	ulint	comp,		/*!< in: nonzero=compact format */
-	ulint	n_fields,	/*!< in: number of fields in the record;
-				ignored if zip_size == 0 */
-	ulint	zip_size)	/*!< in: compressed page size in bytes, or 0 */
-	MY_ATTRIBUTE((const));
+	ulint			rec_size,
+	ulint			comp,
+	ulint			n_fields,
+	const page_size_t&	page_size)
+	MY_ATTRIBUTE((warn_unused_result));
 
 /**********************************************************************//**
 Determine the guaranteed free space on an empty page.
-@return	minimum payload size on the page */
-UNIV_INTERN
+@return minimum payload size on the page */
 ulint
 page_zip_empty_size(
 /*================*/
 	ulint	n_fields,	/*!< in: number of columns in the index */
 	ulint	zip_size)	/*!< in: compressed page size in bytes */
 	MY_ATTRIBUTE((const));
+
+/** Check whether a tuple is too big for compressed table
+@param[in]	index	dict index object
+@param[in]	entry	entry for the index
+@return	true if it's too big, otherwise false */
+bool
+page_zip_is_too_big(
+	const dict_index_t*	index,
+	const dtuple_t*		entry);
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
@@ -111,36 +145,54 @@ page_zip_des_init(
 
 /**********************************************************************//**
 Configure the zlib allocator to use the given memory heap. */
-UNIV_INTERN
 void
 page_zip_set_alloc(
 /*===============*/
 	void*		stream,		/*!< in/out: zlib stream */
 	mem_heap_t*	heap);		/*!< in: memory heap to use */
-#endif /* !UNIV_INNOCHECKSUM */
 
 /**********************************************************************//**
 Compress a page.
 @return TRUE on success, FALSE on failure; page_zip will be left
 intact on failure. */
-UNIV_INTERN
 ibool
 page_zip_compress(
 /*==============*/
-	page_zip_des_t*	page_zip,/*!< in: size; out: data, n_blobs,
-				m_start, m_end, m_nonempty */
-	const page_t*	page,	/*!< in: uncompressed page */
-	dict_index_t*	index,	/*!< in: index of the B-tree node */
-	ulint		level,	/*!< in: compression level */
-	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
-	MY_ATTRIBUTE((nonnull(1,2,3)));
+	page_zip_des_t*		page_zip,	/*!< in: size; out: data,
+						n_blobs, m_start, m_end,
+						m_nonempty */
+	const page_t*		page,		/*!< in: uncompressed page */
+	dict_index_t*		index,		/*!< in: index of the B-tree
+						node */
+	ulint			level,		/*!< in: commpression level */
+	const redo_page_compress_t* page_comp_info,
+						/*!< in: used for applying
+						TRUNCATE log
+						record during recovery */
+	mtr_t*			mtr);		/*!< in/out: mini-transaction,
+						or NULL */
+
+/**********************************************************************//**
+Write the index information for the compressed page.
+@return used size of buf */
+ulint
+page_zip_fields_encode(
+/*===================*/
+	ulint			n,	/*!< in: number of fields
+					to compress */
+	const dict_index_t*	index,	/*!< in: index comprising
+					at least n fields */
+	ulint			trx_id_pos,
+					/*!< in: position of the trx_id column
+					in the index, or ULINT_UNDEFINED if
+					this is a non-leaf page */
+	byte*			buf);	/*!< out: buffer of (n + 1) * 2 bytes */
 
 /**********************************************************************//**
 Decompress a page.  This function should tolerate errors on the compressed
 page.  Instead of letting assertions fail, it will return FALSE if an
 inconsistency is detected.
-@return	TRUE on success, FALSE on failure */
-UNIV_INTERN
+@return TRUE on success, FALSE on failure */
 ibool
 page_zip_decompress(
 /*================*/
@@ -153,11 +205,10 @@ page_zip_decompress(
 				after page creation */
 	MY_ATTRIBUTE((nonnull(1,2)));
 
-#ifndef UNIV_INNOCHECKSUM
 #ifdef UNIV_DEBUG
 /**********************************************************************//**
 Validate a compressed page descriptor.
-@return	TRUE if ok */
+@return TRUE if ok */
 UNIV_INLINE
 ibool
 page_zip_simple_validate(
@@ -165,13 +216,11 @@ page_zip_simple_validate(
 	const page_zip_des_t*	page_zip);	/*!< in: compressed page
 						descriptor */
 #endif /* UNIV_DEBUG */
-#endif /* !UNIV_INNOCHECKSUM */
 
 #ifdef UNIV_ZIP_DEBUG
 /**********************************************************************//**
 Check that the compressed and decompressed pages match.
-@return	TRUE if valid, FALSE if not */
-UNIV_INTERN
+@return TRUE if valid, FALSE if not */
 ibool
 page_zip_validate_low(
 /*==================*/
@@ -183,7 +232,6 @@ page_zip_validate_low(
 	MY_ATTRIBUTE((nonnull(1,2)));
 /**********************************************************************//**
 Check that the compressed and decompressed pages match. */
-UNIV_INTERN
 ibool
 page_zip_validate(
 /*==============*/
@@ -193,7 +241,6 @@ page_zip_validate(
 	MY_ATTRIBUTE((nonnull(1,2)));
 #endif /* UNIV_ZIP_DEBUG */
 
-#ifndef UNIV_INNOCHECKSUM
 /**********************************************************************//**
 Determine how big record can be inserted without recompressing the page.
 @return a positive number indicating the maximum size of a record
@@ -204,11 +251,11 @@ page_zip_max_ins_size(
 /*==================*/
 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
 	ibool			is_clust)/*!< in: TRUE if clustered index */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /**********************************************************************//**
 Determine if enough space is available in the modification log.
-@return	TRUE if page_zip_write_rec() will succeed */
+@return TRUE if page_zip_write_rec() will succeed */
 UNIV_INLINE
 ibool
 page_zip_available(
@@ -218,7 +265,7 @@ page_zip_available(
 	ulint			length,	/*!< in: combined size of the record */
 	ulint			create)	/*!< in: nonzero=add the record to
 					the heap */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /**********************************************************************//**
 Write data to the uncompressed header portion of a page.  The data must
@@ -236,7 +283,6 @@ page_zip_write_header(
 /**********************************************************************//**
 Write an entire record on the compressed page.  The data must already
 have been written to the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_write_rec(
 /*===============*/
@@ -249,8 +295,7 @@ page_zip_write_rec(
 
 /***********************************************************//**
 Parses a log record of writing a BLOB pointer of a record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_zip_parse_write_blob_ptr(
 /*==========================*/
@@ -262,7 +307,6 @@ page_zip_parse_write_blob_ptr(
 /**********************************************************************//**
 Write a BLOB pointer of a record on the leaf page of a clustered index.
 The information must already have been updated on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_write_blob_ptr(
 /*====================*/
@@ -272,14 +316,12 @@ page_zip_write_blob_ptr(
 	dict_index_t*	index,	/*!< in: index of the page */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	ulint		n,	/*!< in: column index */
-	mtr_t*		mtr)	/*!< in: mini-transaction handle,
+	mtr_t*		mtr);	/*!< in: mini-transaction handle,
 				or NULL if no logging is needed */
-	MY_ATTRIBUTE((nonnull(1,2,3,4)));
 
 /***********************************************************//**
 Parses a log record of writing the node pointer of a record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_zip_parse_write_node_ptr(
 /*==========================*/
@@ -290,7 +332,6 @@ page_zip_parse_write_node_ptr(
 
 /**********************************************************************//**
 Write the node pointer of a record on a non-leaf compressed page. */
-UNIV_INTERN
 void
 page_zip_write_node_ptr(
 /*====================*/
@@ -298,12 +339,10 @@ page_zip_write_node_ptr(
 	byte*		rec,	/*!< in/out: record */
 	ulint		size,	/*!< in: data size of rec */
 	ulint		ptr,	/*!< in: node pointer */
-	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
-	MY_ATTRIBUTE((nonnull(1,2)));
+	mtr_t*		mtr);	/*!< in: mini-transaction, or NULL */
 
 /**********************************************************************//**
 Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
-UNIV_INTERN
 void
 page_zip_write_trx_id_and_roll_ptr(
 /*===============================*/
@@ -318,7 +357,6 @@ page_zip_write_trx_id_and_roll_ptr(
 /**********************************************************************//**
 Write the "deleted" flag of a record on a compressed page.  The flag must
 already have been written on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_rec_set_deleted(
 /*=====================*/
@@ -330,7 +368,6 @@ page_zip_rec_set_deleted(
 /**********************************************************************//**
 Write the "owned" flag of a record on a compressed page.  The n_owned field
 must already have been written on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_rec_set_owned(
 /*===================*/
@@ -341,7 +378,6 @@ page_zip_rec_set_owned(
 
 /**********************************************************************//**
 Insert a record to the dense page directory. */
-UNIV_INTERN
 void
 page_zip_dir_insert(
 /*================*/
@@ -354,7 +390,6 @@ page_zip_dir_insert(
 /**********************************************************************//**
 Shift the dense page directory and the array of BLOB pointers
 when a record is deleted. */
-UNIV_INTERN
 void
 page_zip_dir_delete(
 /*================*/
@@ -368,7 +403,6 @@ page_zip_dir_delete(
 
 /**********************************************************************//**
 Add a slot to the dense page directory. */
-UNIV_INTERN
 void
 page_zip_dir_add_slot(
 /*==================*/
@@ -379,8 +413,7 @@ page_zip_dir_add_slot(
 
 /***********************************************************//**
 Parses a log record of writing to the header of a page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_zip_parse_write_header(
 /*========================*/
@@ -416,7 +449,6 @@ bits in the same mini-transaction in such a way that the modification
 will be redo-logged.
 @return TRUE on success, FALSE on failure; page_zip will be left
 intact on failure, but page will be overwritten. */
-UNIV_INTERN
 ibool
 page_zip_reorganize(
 /*================*/
@@ -427,15 +459,12 @@ page_zip_reorganize(
 	dict_index_t*	index,	/*!< in: index of the B-tree node */
 	mtr_t*		mtr)	/*!< in: mini-transaction */
 	MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_INNOCHECKSUM */
-
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Copy the records of a page byte for byte.  Do not copy the page header
 or trailer, except those B-tree header fields that are directly
 related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
 NOTE: The caller must update the lock table and the adaptive hash index. */
-UNIV_INTERN
 void
 page_zip_copy_recs(
 /*===============*/
@@ -446,45 +475,58 @@ page_zip_copy_recs(
 	const page_zip_des_t*	src_zip,	/*!< in: compressed page */
 	const page_t*		src,		/*!< in: page */
 	dict_index_t*		index,		/*!< in: index of the B-tree */
-	mtr_t*			mtr)		/*!< in: mini-transaction */
-	MY_ATTRIBUTE((nonnull));
+	mtr_t*			mtr);		/*!< in: mini-transaction */
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
 Parses a log record of compressing an index page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_zip_parse_compress(
 /*====================*/
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
-	page_t*		page,	/*!< out: uncompressed page */
-	page_zip_des_t*	page_zip)/*!< out: compressed page */
-	MY_ATTRIBUTE((nonnull(1,2)));
+	byte*		ptr,		/*!< in: buffer */
+	byte*		end_ptr,	/*!< in: buffer end */
+	page_t*		page,		/*!< out: uncompressed page */
+	page_zip_des_t*	page_zip);	/*!< out: compressed page */
 
-/**********************************************************************//**
-Calculate the compressed page checksum.
-@return	page checksum */
-UNIV_INTERN
-ulint
+#endif /* !UNIV_INNOCHECKSUM */
+
+/** Calculate the compressed page checksum.
+@param[in]	data			compressed page
+@param[in]	size			size of compressed page
+@param[in]	algo			algorithm to use
+@param[in]	use_legacy_big_endian	only used if algo is
+SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true
+then use big endian byteorder when converting byte strings to integers.
+@return page checksum */
+uint32_t
 page_zip_calc_checksum(
-/*===================*/
-        const void*     data,   /*!< in: compressed page */
-        ulint           size,   /*!< in: size of compressed page */
-	srv_checksum_algorithm_t algo) /*!< in: algorithm to use */
-	MY_ATTRIBUTE((nonnull));
+	const void*			data,
+	ulint				size,
+	srv_checksum_algorithm_t	algo,
+	bool				use_legacy_big_endian = false);
 
 /**********************************************************************//**
 Verify a compressed page's checksum.
-@return	TRUE if the stored checksum is valid according to the value of
+@return TRUE if the stored checksum is valid according to the value of
 innodb_checksum_algorithm */
-UNIV_INTERN
 ibool
 page_zip_verify_checksum(
 /*=====================*/
 	const void*	data,	/*!< in: compressed page */
-	ulint		size);	/*!< in: size of compressed page */
+	ulint		size	/*!< in: size of compressed page */
+#ifdef UNIV_INNOCHECKSUM
+	/* these variables are used only for innochecksum tool. */
+	,uintmax_t	page_no,	/*!< in: page number of
+					given read_buf */
+	bool		strict_check,	/*!< in: true if strict-check
+					option is enable */
+	bool		is_log_enabled, /*!< in: true if log option is
+					enable */
+	FILE*		log_file	/*!< in: file pointer to
+					log_file */
+#endif /* UNIV_INNOCHECKSUM */
+);
 
 #ifndef UNIV_INNOCHECKSUM
 /**********************************************************************//**
@@ -499,7 +541,7 @@ page_zip_compress_write_log_no_data(
 	mtr_t*		mtr);	/*!< in: mtr */
 /**********************************************************************//**
 Parses a log record of compressing an index page without the data.
-@return	end of log record or NULL */
+@return end of log record or NULL */
 UNIV_INLINE
 byte*
 page_zip_parse_compress_no_data(
@@ -519,49 +561,11 @@ void
 page_zip_reset_stat_per_index();
 /*===========================*/
 
-#endif /* !UNIV_INNOCHECKSUM */
-
-#ifndef UNIV_HOTBACKUP
-/** Check if a pointer to an uncompressed page matches a compressed page.
-When we IMPORT a tablespace the blocks and accompanying frames are allocted
-from outside the buffer pool.
-@param ptr	pointer to an uncompressed page frame
-@param page_zip	compressed page descriptor
-@return		TRUE if ptr and page_zip refer to the same block */
-# define PAGE_ZIP_MATCH(ptr, page_zip)					\
-	(((page_zip)->m_external					\
-	  && (page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data))	\
-	  || buf_frame_get_page_zip(ptr) == (page_zip))
-#else /* !UNIV_HOTBACKUP */
-/** Check if a pointer to an uncompressed page matches a compressed page.
-@param ptr	pointer to an uncompressed page frame
-@param page_zip	compressed page descriptor
-@return		TRUE if ptr and page_zip refer to the same block */
-# define PAGE_ZIP_MATCH(ptr, page_zip)				\
-	(page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data)
-#endif /* !UNIV_HOTBACKUP */
-
 #ifdef UNIV_MATERIALIZE
 # undef UNIV_INLINE
 # define UNIV_INLINE	UNIV_INLINE_ORIGINAL
 #endif
 
-#ifdef UNIV_INNOCHECKSUM
-/** Issue a warning when the checksum that is stored in the page is valid,
-but different than the global setting innodb_checksum_algorithm.
-@param[in]	current_algo	current checksum algorithm
-@param[in]	page_checksum	page valid checksum
-@param[in]	space_id	tablespace id
-@param[in]	page_no		page number */
-void
-page_warn_strict_checksum(
-	srv_checksum_algorithm_t	curr_algo,
-	srv_checksum_algorithm_t	page_checksum,
-	ulint				space_id,
-	ulint				page_no);
-#endif /* UNIV_INNOCHECKSUM */
-
-#ifndef UNIV_INNOCHECKSUM
 #ifndef UNIV_NONINL
 # include "page0zip.ic"
 #endif
diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic
index 6c7d8cd32c7..9963fe01c82 100644
--- a/storage/innobase/include/page0zip.ic
+++ b/storage/innobase/include/page0zip.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -32,6 +32,7 @@ Created June 2005 by Marko Makela
 #include "page0zip.h"
 #include "mtr0log.h"
 #include "page0page.h"
+#include "srv0srv.h"
 
 /* The format of compressed pages is as follows.
 
@@ -100,20 +101,9 @@ In summary, the compressed page looks like this:
   - deleted records (free list) in link order
 */
 
-/** Start offset of the area that will be compressed */
-#define PAGE_ZIP_START		PAGE_NEW_SUPREMUM_END
-/** Size of an compressed page directory entry */
-#define PAGE_ZIP_DIR_SLOT_SIZE	2
-/** Mask of record offsets */
-#define PAGE_ZIP_DIR_SLOT_MASK	0x3fff
-/** 'owned' flag */
-#define PAGE_ZIP_DIR_SLOT_OWNED	0x4000
-/** 'deleted' flag */
-#define PAGE_ZIP_DIR_SLOT_DEL	0x8000
-
 /**********************************************************************//**
 Determine the size of a compressed page in bytes.
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
 page_zip_get_size(
@@ -159,22 +149,23 @@ page_zip_set_size(
 }
 
 #ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Determine if a record is so big that it needs to be stored externally.
-@return	FALSE if the entire record can be stored locally on the page */
+/** Determine if a record is so big that it needs to be stored externally.
+@param[in]	rec_size	length of the record in bytes
+@param[in]	comp		nonzero=compact format
+@param[in]	n_fields	number of fields in the record; ignored if
+tablespace is not compressed
+@param[in]	page_size	page size
+@return FALSE if the entire record can be stored locally on the page */
 UNIV_INLINE
 ibool
 page_zip_rec_needs_ext(
-/*===================*/
-	ulint	rec_size,	/*!< in: length of the record in bytes */
-	ulint	comp,		/*!< in: nonzero=compact format */
-	ulint	n_fields,	/*!< in: number of fields in the record;
-				ignored if zip_size == 0 */
-	ulint	zip_size)	/*!< in: compressed page size in bytes, or 0 */
+	ulint			rec_size,
+	ulint			comp,
+	ulint			n_fields,
+	const page_size_t&	page_size)
 {
 	ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES);
-	ut_ad(ut_is_2pow(zip_size));
-	ut_ad(comp || !zip_size);
+	ut_ad(comp || !page_size.is_compressed());
 
 #if UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE
 	if (rec_size >= REC_MAX_DATA_SIZE) {
@@ -182,7 +173,7 @@ page_zip_rec_needs_ext(
 	}
 #endif
 
-	if (zip_size) {
+	if (page_size.is_compressed()) {
 		ut_ad(comp);
 		/* On a compressed page, there is a two-byte entry in
 		the dense page directory for every record.  But there
@@ -191,7 +182,7 @@ page_zip_rec_needs_ext(
 		the encoded heap number.  Check also the available space
 		on the uncompressed page. */
 		return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1)
-		       >= page_zip_empty_size(n_fields, zip_size)
+		       >= page_zip_empty_size(n_fields, page_size.physical())
 		       || rec_size >= page_get_free_space_of_empty(TRUE) / 2);
 	}
 
@@ -202,7 +193,7 @@ page_zip_rec_needs_ext(
 #ifdef UNIV_DEBUG
 /**********************************************************************//**
 Validate a compressed page descriptor.
-@return	TRUE if ok */
+@return TRUE if ok */
 UNIV_INLINE
 ibool
 page_zip_simple_validate(
@@ -286,7 +277,7 @@ page_zip_max_ins_size(
 
 /**********************************************************************//**
 Determine if enough space is available in the modification log.
-@return	TRUE if enough space is available */
+@return TRUE if enough space is available */
 UNIV_INLINE
 ibool
 page_zip_available(
@@ -336,7 +327,6 @@ page_zip_des_init(
 
 /**********************************************************************//**
 Write a log record of writing to the uncompressed header portion of a page. */
-UNIV_INTERN
 void
 page_zip_write_header_log(
 /*======================*/
@@ -361,7 +351,6 @@ page_zip_write_header(
 {
 	ulint	pos;
 
-	ut_ad(PAGE_ZIP_MATCH(str, page_zip));
 	ut_ad(page_zip_simple_validate(page_zip));
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
@@ -403,7 +392,7 @@ page_zip_compress_write_log_no_data(
 
 /**********************************************************************//**
 Parses a log record of compressing an index page without the data.
-@return	end of log record or NULL */
+@return end of log record or NULL */
 UNIV_INLINE
 byte*
 page_zip_parse_compress_no_data(
@@ -426,7 +415,7 @@ page_zip_parse_compress_no_data(
 	was successful. Crash in this case. */
 
 	if (page
-	    && !page_zip_compress(page_zip, page, index, level, NULL)) {
+	    && !page_zip_compress(page_zip, page, index, level, NULL, NULL)) {
 		ut_error;
 	}
 
diff --git a/storage/innobase/include/pars0opt.h b/storage/innobase/include/pars0opt.h
index 1084d644c90..cb1ce60ac22 100644
--- a/storage/innobase/include/pars0opt.h
+++ b/storage/innobase/include/pars0opt.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -37,7 +37,6 @@ Created 12/21/1997 Heikki Tuuri
 Optimizes a select. Decides which indexes to tables to use. The tables
 are accessed in the order that they were written to the FROM part in the
 select statement. */
-UNIV_INTERN
 void
 opt_search_plan(
 /*============*/
@@ -49,7 +48,6 @@ already exist in the list. If the column is already in the list, puts a value
 indirection to point to the occurrence in the column list, except if the
 column occurrence we are looking at is in the column list, in which case
 nothing is done. */
-UNIV_INTERN
 void
 opt_find_all_cols(
 /*==============*/
@@ -60,13 +58,14 @@ opt_find_all_cols(
 					to add new found columns */
 	plan_t*		plan,		/*!< in: plan or NULL */
 	que_node_t*	exp);		/*!< in: expression or condition */
+#ifdef UNIV_SQL_DEBUG
 /********************************************************************//**
 Prints info of a query plan. */
-UNIV_INTERN
 void
 opt_print_query_plan(
 /*=================*/
 	sel_node_t*	sel_node);	/*!< in: select node */
+#endif /* UNIV_SQL_DEBUG */
 
 #ifndef UNIV_NONINL
 #include "pars0opt.ic"
diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h
index 73585c78a6a..7e153d0c19b 100644
--- a/storage/innobase/include/pars0pars.h
+++ b/storage/innobase/include/pars0pars.h
@@ -33,6 +33,7 @@ Created 11/19/1996 Heikki Tuuri
 #include "row0types.h"
 #include "trx0types.h"
 #include "ut0vec.h"
+#include "row0mysql.h"
 
 /** Type of the user functions. The first argument is always InnoDB-supplied
 and varies in type, while 'user_arg' is a user-supplied argument. The
@@ -44,12 +45,6 @@ typedef ibool	(*pars_user_func_cb_t)(void* arg, void* user_arg);
 information */
 extern int	yydebug;
 
-#ifdef UNIV_SQL_DEBUG
-/** If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-extern ibool	pars_print_lexed;
-#endif /* UNIV_SQL_DEBUG */
-
 /* Global variable used while parsing a single procedure or query : the code is
 NOT re-entrant */
 extern sym_tab_t*	pars_sym_tab_global;
@@ -98,8 +93,7 @@ yyparse(void);
 
 /*************************************************************//**
 Parses an SQL string returning the query graph.
-@return	own: the query graph */
-UNIV_INTERN
+@return own: the query graph */
 que_t*
 pars_sql(
 /*=====*/
@@ -108,7 +102,6 @@ pars_sql(
 /*************************************************************//**
 Retrieves characters to the lexical analyzer.
 @return number of characters copied or 0 on EOF */
-UNIV_INTERN
 int
 pars_get_lex_chars(
 /*===============*/
@@ -117,15 +110,13 @@ pars_get_lex_chars(
 				in the buffer */
 /*************************************************************//**
 Called by yyparse on error. */
-UNIV_INTERN
 void
 yyerror(
 /*====*/
 	const char*	s);	/*!< in: error message string */
 /*********************************************************************//**
 Parses a variable declaration.
-@return	own: symbol table node of type SYM_VAR */
-UNIV_INTERN
+@return own: symbol table node of type SYM_VAR */
 sym_node_t*
 pars_variable_declaration(
 /*======================*/
@@ -134,8 +125,7 @@ pars_variable_declaration(
 	pars_res_word_t* type);	/*!< in: pointer to a type token */
 /*********************************************************************//**
 Parses a function expression.
-@return	own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
 func_node_t*
 pars_func(
 /*======*/
@@ -144,8 +134,7 @@ pars_func(
 /*************************************************************************
 Rebind a LIKE search string. NOTE: We ignore any '%' characters embedded
 within the search string.
-@return	own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
 int
 pars_like_rebind(
 /*=============*/
@@ -154,8 +143,7 @@ pars_like_rebind(
         ulint           len);   /* in: length of literal to (re) bind*/
 /*********************************************************************//**
 Parses an operator expression.
-@return	own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
 func_node_t*
 pars_op(
 /*====*/
@@ -165,8 +153,7 @@ pars_op(
 				operator */
 /*********************************************************************//**
 Parses an ORDER BY clause. Order by a single column only is supported.
-@return	own: order-by node in a query tree */
-UNIV_INTERN
+@return own: order-by node in a query tree */
 order_node_t*
 pars_order_by(
 /*==========*/
@@ -175,8 +162,7 @@ pars_order_by(
 /*********************************************************************//**
 Parses a select list; creates a query graph node for the whole SELECT
 statement.
-@return	own: select node in a query tree */
-UNIV_INTERN
+@return own: select node in a query tree */
 sel_node_t*
 pars_select_list(
 /*=============*/
@@ -184,8 +170,7 @@ pars_select_list(
 	sym_node_t*	into_list);	/*!< in: variables list or NULL */
 /*********************************************************************//**
 Parses a cursor declaration.
-@return	sym_node */
-UNIV_INTERN
+@return sym_node */
 que_node_t*
 pars_cursor_declaration(
 /*====================*/
@@ -194,8 +179,7 @@ pars_cursor_declaration(
 	sel_node_t*	select_node);	/*!< in: select node */
 /*********************************************************************//**
 Parses a function declaration.
-@return	sym_node */
-UNIV_INTERN
+@return sym_node */
 que_node_t*
 pars_function_declaration(
 /*======================*/
@@ -203,8 +187,7 @@ pars_function_declaration(
 					table */
 /*********************************************************************//**
 Parses a select statement.
-@return	own: select node in a query tree */
-UNIV_INTERN
+@return own: select node in a query tree */
 sel_node_t*
 pars_select_statement(
 /*==================*/
@@ -218,8 +201,7 @@ pars_select_statement(
 	order_node_t*	order_by);	/*!< in: NULL or an order-by node */
 /*********************************************************************//**
 Parses a column assignment in an update.
-@return	column assignment node */
-UNIV_INTERN
+@return column assignment node */
 col_assign_node_t*
 pars_column_assignment(
 /*===================*/
@@ -227,8 +209,7 @@ pars_column_assignment(
 	que_node_t*	exp);	/*!< in: value to assign */
 /*********************************************************************//**
 Parses a delete or update statement start.
-@return	own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
 upd_node_t*
 pars_update_statement_start(
 /*========================*/
@@ -238,8 +219,7 @@ pars_update_statement_start(
 					if delete */
 /*********************************************************************//**
 Parses an update or delete statement.
-@return	own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
 upd_node_t*
 pars_update_statement(
 /*==================*/
@@ -249,8 +229,7 @@ pars_update_statement(
 	que_node_t*	search_cond);	/*!< in: search condition or NULL */
 /*********************************************************************//**
 Parses an insert statement.
-@return	own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
 ins_node_t*
 pars_insert_statement(
 /*==================*/
@@ -259,8 +238,7 @@ pars_insert_statement(
 	sel_node_t*	select);	/*!< in: select condition or NULL */
 /*********************************************************************//**
 Parses a procedure parameter declaration.
-@return	own: symbol table node of type SYM_VAR */
-UNIV_INTERN
+@return own: symbol table node of type SYM_VAR */
 sym_node_t*
 pars_parameter_declaration(
 /*=======================*/
@@ -271,8 +249,7 @@ pars_parameter_declaration(
 	pars_res_word_t* type);	/*!< in: pointer to a type token */
 /*********************************************************************//**
 Parses an elsif element.
-@return	elsif node */
-UNIV_INTERN
+@return elsif node */
 elsif_node_t*
 pars_elsif_element(
 /*===============*/
@@ -280,8 +257,7 @@ pars_elsif_element(
 	que_node_t*	stat_list);	/*!< in: statement list */
 /*********************************************************************//**
 Parses an if-statement.
-@return	if-statement node */
-UNIV_INTERN
+@return if-statement node */
 if_node_t*
 pars_if_statement(
 /*==============*/
@@ -290,8 +266,7 @@ pars_if_statement(
 	que_node_t*	else_part);	/*!< in: else-part statement list */
 /*********************************************************************//**
 Parses a for-loop-statement.
-@return	for-statement node */
-UNIV_INTERN
+@return for-statement node */
 for_node_t*
 pars_for_statement(
 /*===============*/
@@ -301,8 +276,7 @@ pars_for_statement(
 	que_node_t*	stat_list);	/*!< in: statement list */
 /*********************************************************************//**
 Parses a while-statement.
-@return	while-statement node */
-UNIV_INTERN
+@return while-statement node */
 while_node_t*
 pars_while_statement(
 /*=================*/
@@ -310,22 +284,19 @@ pars_while_statement(
 	que_node_t*	stat_list);	/*!< in: statement list */
 /*********************************************************************//**
 Parses an exit statement.
-@return	exit statement node */
-UNIV_INTERN
+@return exit statement node */
 exit_node_t*
 pars_exit_statement(void);
 /*=====================*/
 /*********************************************************************//**
 Parses a return-statement.
-@return	return-statement node */
-UNIV_INTERN
+@return return-statement node */
 return_node_t*
 pars_return_statement(void);
 /*=======================*/
 /*********************************************************************//**
 Parses a procedure call.
-@return	function node */
-UNIV_INTERN
+@return function node */
 func_node_t*
 pars_procedure_call(
 /*================*/
@@ -333,8 +304,7 @@ pars_procedure_call(
 	que_node_t*	args);	/*!< in: argument list */
 /*********************************************************************//**
 Parses an assignment statement.
-@return	assignment statement node */
-UNIV_INTERN
+@return assignment statement node */
 assign_node_t*
 pars_assignment_statement(
 /*======================*/
@@ -343,8 +313,7 @@ pars_assignment_statement(
 /*********************************************************************//**
 Parses a fetch statement. into_list or user_func (but not both) must be
 non-NULL.
-@return	fetch statement node */
-UNIV_INTERN
+@return fetch statement node */
 fetch_node_t*
 pars_fetch_statement(
 /*=================*/
@@ -353,8 +322,7 @@ pars_fetch_statement(
 	sym_node_t*	user_func);	/*!< in: user function name, or NULL */
 /*********************************************************************//**
 Parses an open or close cursor statement.
-@return	fetch statement node */
-UNIV_INTERN
+@return fetch statement node */
 open_node_t*
 pars_open_statement(
 /*================*/
@@ -363,30 +331,26 @@ pars_open_statement(
 	sym_node_t*	cursor);	/*!< in: cursor node */
 /*********************************************************************//**
 Parses a row_printf-statement.
-@return	row_printf-statement node */
-UNIV_INTERN
+@return row_printf-statement node */
 row_printf_node_t*
 pars_row_printf_statement(
 /*======================*/
 	sel_node_t*	sel_node);	/*!< in: select node */
 /*********************************************************************//**
 Parses a commit statement.
-@return	own: commit node struct */
-UNIV_INTERN
+@return own: commit node struct */
 commit_node_t*
 pars_commit_statement(void);
 /*=======================*/
 /*********************************************************************//**
 Parses a rollback statement.
-@return	own: rollback node struct */
-UNIV_INTERN
+@return own: rollback node struct */
 roll_node_t*
 pars_rollback_statement(void);
 /*=========================*/
 /*********************************************************************//**
 Parses a column definition at a table creation.
-@return	column sym table node */
-UNIV_INTERN
+@return column sym table node */
 sym_node_t*
 pars_column_def(
 /*============*/
@@ -401,8 +365,7 @@ pars_column_def(
 						is of type NOT NULL. */
 /*********************************************************************//**
 Parses a table creation operation.
-@return	table create subgraph */
-UNIV_INTERN
+@return table create subgraph */
 tab_node_t*
 pars_create_table(
 /*==============*/
@@ -424,8 +387,7 @@ pars_create_table(
 					from disk */
 /*********************************************************************//**
 Parses an index creation operation.
-@return	index create subgraph */
-UNIV_INTERN
+@return index create subgraph */
 ind_node_t*
 pars_create_index(
 /*==============*/
@@ -438,8 +400,7 @@ pars_create_index(
 	sym_node_t*	column_list);	/*!< in: list of column names */
 /*********************************************************************//**
 Parses a procedure definition.
-@return	query fork node */
-UNIV_INTERN
+@return query fork node */
 que_fork_t*
 pars_procedure_definition(
 /*======================*/
@@ -453,38 +414,37 @@ Parses a stored procedure call, when this is not within another stored
 procedure, that is, the client issues a procedure call directly.
 In MySQL/InnoDB, stored InnoDB procedures are invoked via the
 parsed procedure tree, not via InnoDB SQL, so this function is not used.
-@return	query graph */
-UNIV_INTERN
+@return query graph */
 que_fork_t*
 pars_stored_procedure_call(
 /*=======================*/
 	sym_node_t*	sym_node);	/*!< in: stored procedure name */
-/******************************************************************//**
-Completes a query graph by adding query thread and fork nodes
+/** Completes a query graph by adding query thread and fork nodes
 above it and prepares the graph for running. The fork created is of
 type QUE_FORK_MYSQL_INTERFACE.
-@return	query thread node to run */
-UNIV_INTERN
+@param[in]	node		root node for an incomplete query
+				graph, or NULL for dummy graph
+@param[in]	trx		transaction handle
+@param[in]	heap		memory heap from which allocated
+@param[in]	prebuilt	row prebuilt structure
+@return query thread node to run */
 que_thr_t*
 pars_complete_graph_for_exec(
-/*=========================*/
-	que_node_t*	node,	/*!< in: root node for an incomplete
-				query graph, or NULL for dummy graph */
-	trx_t*		trx,	/*!< in: transaction handle */
-	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	que_node_t*	node,
+	trx_t*		trx,
+	mem_heap_t*	heap,
+	row_prebuilt_t*	prebuilt)
 	MY_ATTRIBUTE((nonnull(2,3), warn_unused_result));
 
 /****************************************************************//**
 Create parser info struct.
-@return	own: info struct */
-UNIV_INTERN
+@return own: info struct */
 pars_info_t*
 pars_info_create(void);
 /*==================*/
 
 /****************************************************************//**
 Free info struct and everything it contains. */
-UNIV_INTERN
 void
 pars_info_free(
 /*===========*/
@@ -492,7 +452,6 @@ pars_info_free(
 
 /****************************************************************//**
 Add bound literal. */
-UNIV_INTERN
 void
 pars_info_add_literal(
 /*==================*/
@@ -507,7 +466,6 @@ pars_info_add_literal(
 /****************************************************************//**
 Equivalent to pars_info_add_literal(info, name, str, strlen(str),
 DATA_VARCHAR, DATA_ENGLISH). */
-UNIV_INTERN
 void
 pars_info_add_str_literal(
 /*======================*/
@@ -517,7 +475,6 @@ pars_info_add_str_literal(
 /********************************************************************
 If the literal value already exists then it rebinds otherwise it
 creates a new entry.*/
-UNIV_INTERN
 void
 pars_info_bind_literal(
 /*===================*/
@@ -530,7 +487,6 @@ pars_info_bind_literal(
 /********************************************************************
 If the literal value already exists then it rebinds otherwise it
 creates a new entry.*/
-UNIV_INTERN
 void
 pars_info_bind_varchar_literal(
 /*===========================*/
@@ -547,7 +503,6 @@ pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
 
 except that the buffer is dynamically allocated from the info struct's
 heap. */
-UNIV_INTERN
 void
 pars_info_bind_int4_literal(
 /*=======================*/
@@ -557,7 +512,6 @@ pars_info_bind_int4_literal(
 /********************************************************************
 If the literal value already exists then it rebinds otherwise it
 creates a new entry. */
-UNIV_INTERN
 void
 pars_info_bind_int8_literal(
 /*=======================*/
@@ -566,7 +520,6 @@ pars_info_bind_int8_literal(
 	const ib_uint64_t*	val);		/*!< in: value */
 /****************************************************************//**
 Add user function. */
-UNIV_INTERN
 void
 pars_info_bind_function(
 /*===================*/
@@ -576,7 +529,6 @@ pars_info_bind_function(
 	void*			arg);	/*!< in: user-supplied argument */
 /****************************************************************//**
 Add bound id. */
-UNIV_INTERN
 void
 pars_info_bind_id(
 /*=============*/
@@ -593,7 +545,6 @@ pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
 
 except that the buffer is dynamically allocated from the info struct's
 heap. */
-UNIV_INTERN
 void
 pars_info_add_int4_literal(
 /*=======================*/
@@ -610,7 +561,6 @@ pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
 
 except that the buffer is dynamically allocated from the info struct's
 heap. */
-UNIV_INTERN
 void
 pars_info_add_ull_literal(
 /*======================*/
@@ -621,7 +571,6 @@ pars_info_add_ull_literal(
 /****************************************************************//**
 If the literal value already exists then it rebinds otherwise it
 creates a new entry. */
-UNIV_INTERN
 void
 pars_info_bind_ull_literal(
 /*=======================*/
@@ -632,7 +581,6 @@ pars_info_bind_ull_literal(
 
 /****************************************************************//**
 Add bound id. */
-UNIV_INTERN
 void
 pars_info_add_id(
 /*=============*/
@@ -642,8 +590,7 @@ pars_info_add_id(
 
 /****************************************************************//**
 Get bound literal with the given name.
-@return	bound literal, or NULL if not found */
-UNIV_INTERN
+@return bound literal, or NULL if not found */
 pars_bound_lit_t*
 pars_info_get_bound_lit(
 /*====================*/
@@ -652,8 +599,7 @@ pars_info_get_bound_lit(
 
 /****************************************************************//**
 Get bound id with the given name.
-@return	bound id, or NULL if not found */
-UNIV_INTERN
+@return bound id, or NULL if not found */
 pars_bound_id_t*
 pars_info_get_bound_id(
 /*===================*/
@@ -662,7 +608,6 @@ pars_info_get_bound_id(
 
 /******************************************************************//**
 Release any resources used by the lexer. */
-UNIV_INTERN
 void
 pars_lexer_close(void);
 /*==================*/
diff --git a/storage/innobase/include/pars0sym.h b/storage/innobase/include/pars0sym.h
index bcf73639228..abd0c5cd4c8 100644
--- a/storage/innobase/include/pars0sym.h
+++ b/storage/innobase/include/pars0sym.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -35,8 +35,7 @@ Created 12/15/1997 Heikki Tuuri
 
 /******************************************************************//**
 Creates a symbol table for a single stored procedure or query.
-@return	own: symbol table */
-UNIV_INTERN
+@return own: symbol table */
 sym_tab_t*
 sym_tab_create(
 /*===========*/
@@ -45,15 +44,13 @@ sym_tab_create(
 Frees the memory allocated dynamically AFTER parsing phase for variables
 etc. in the symbol table. Does not free the mem heap where the table was
 originally created. Frees also SQL explicit cursor definitions. */
-UNIV_INTERN
 void
 sym_tab_free_private(
 /*=================*/
 	sym_tab_t*	sym_tab);	/*!< in, own: symbol table */
 /******************************************************************//**
 Adds an integer literal to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_int_lit(
 /*================*/
@@ -61,8 +58,7 @@ sym_tab_add_int_lit(
 	ulint		val);		/*!< in: integer value */
 /******************************************************************//**
 Adds an string literal to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_str_lit(
 /*================*/
@@ -72,8 +68,7 @@ sym_tab_add_str_lit(
 	ulint		len);		/*!< in: string length */
 /******************************************************************//**
 Add a bound literal to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_bound_lit(
 /*==================*/
@@ -82,7 +77,6 @@ sym_tab_add_bound_lit(
 	ulint*		lit_type);	/*!< out: type of literal (PARS_*_LIT) */
 /**********************************************************************
 Rebind literal to a node in the symbol table. */
-
 sym_node_t*
 sym_tab_rebind_lit(
 /*===============*/
@@ -92,16 +86,14 @@ sym_tab_rebind_lit(
         ulint           length);        /* in: length of data */
 /******************************************************************//**
 Adds an SQL null literal to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_null_lit(
 /*=================*/
 	sym_tab_t*	sym_tab);	/*!< in: symbol table */
 /******************************************************************//**
 Adds an identifier to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_id(
 /*===========*/
@@ -111,8 +103,7 @@ sym_tab_add_id(
 
 /******************************************************************//**
 Add a bound identifier to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_bound_id(
 /*===========*/
diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
index ba8828623af..3e90e0b25e3 100644
--- a/storage/innobase/include/que0que.h
+++ b/storage/innobase/include/que0que.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -37,17 +37,12 @@ Created 5/27/1996 Heikki Tuuri
 #include "row0types.h"
 #include "pars0types.h"
 
-/* If the following flag is set TRUE, the module will print trace info
-of SQL execution in the UNIV_SQL_DEBUG version */
-extern ibool	que_trace_on;
-
 /** Mutex protecting the query threads. */
 extern ib_mutex_t	que_thr_mutex;
 
 /***********************************************************************//**
 Creates a query graph fork node.
-@return	own: fork node */
-UNIV_INTERN
+@return own: fork node */
 que_fork_t*
 que_fork_create(
 /*============*/
@@ -79,26 +74,25 @@ que_node_set_parent(
 /*================*/
 	que_node_t*	node,	/*!< in: graph node */
 	que_node_t*	parent);/*!< in: parent */
-/***********************************************************************//**
-Creates a query graph thread node.
-@return	own: query thread node */
-UNIV_INTERN
+/** Creates a query graph thread node.
+@param[in]	parent		parent node, i.e., a fork node
+@param[in]	heap		memory heap where created
+@param[in]	prebuilt	row prebuilt structure
+@return own: query thread node */
 que_thr_t*
 que_thr_create(
-/*===========*/
-	que_fork_t*	parent,	/*!< in: parent node, i.e., a fork node */
-	mem_heap_t*	heap);	/*!< in: memory heap where created */
+	que_fork_t*	parent,
+	mem_heap_t*	heap,
+	row_prebuilt_t*	prebuilt);
 /**********************************************************************//**
 Frees a query graph, but not the heap where it was created. Does not free
 explicit cursor declarations, they are freed in que_graph_free. */
-UNIV_INTERN
 void
 que_graph_free_recursive(
 /*=====================*/
 	que_node_t*	node);	/*!< in: query graph node */
 /**********************************************************************//**
 Frees a query graph. */
-UNIV_INTERN
 void
 que_graph_free(
 /*===========*/
@@ -111,8 +105,7 @@ que_graph_free(
 Stops a query thread if graph or trx is in a state requiring it. The
 conditions are tested in the order (1) graph, (2) trx. The lock_sys_t::mutex
 has to be reserved.
-@return	TRUE if stopped */
-UNIV_INTERN
+@return TRUE if stopped */
 ibool
 que_thr_stop(
 /*=========*/
@@ -120,7 +113,6 @@ que_thr_stop(
 /**********************************************************************//**
 Moves a thread from another state to the QUE_THR_RUNNING state. Increments
 the n_active_thrs counters of the query graph and transaction. */
-UNIV_INTERN
 void
 que_thr_move_to_run_state_for_mysql(
 /*================================*/
@@ -129,7 +121,6 @@ que_thr_move_to_run_state_for_mysql(
 /**********************************************************************//**
 A patch for MySQL used to 'stop' a dummy query thread used in MySQL
 select, when there is no error or lock wait. */
-UNIV_INTERN
 void
 que_thr_stop_for_mysql_no_error(
 /*============================*/
@@ -140,14 +131,12 @@ A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
 query thread is stopped and made inactive, except in the case where
 it was put to the lock wait state in lock0lock.cc, but the lock has already
 been granted or the transaction chosen as a victim in deadlock resolution. */
-UNIV_INTERN
 void
 que_thr_stop_for_mysql(
 /*===================*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Run a query thread. Handles lock waits. */
-UNIV_INTERN
 void
 que_run_threads(
 /*============*/
@@ -157,8 +146,7 @@ Moves a suspended query thread to the QUE_THR_RUNNING state and release
 a worker thread to execute it. This function should be used to end
 the wait state of a query thread waiting for a lock or a stored procedure
 completion.
-@return query thread instance of thread to wakeup or NULL  */
-UNIV_INTERN
+@return query thread instance of thread to wakeup or NULL */
 que_thr_t*
 que_thr_end_lock_wait(
 /*==================*/
@@ -172,7 +160,6 @@ is returned.
 @return a query thread of the graph moved to QUE_THR_RUNNING state, or
 NULL; the query thread should be executed by que_run_threads by the
 caller */
-UNIV_INTERN
 que_thr_t*
 que_fork_start_command(
 /*===================*/
@@ -200,7 +187,7 @@ UNIV_INLINE
 ulint
 que_node_get_type(
 /*==============*/
-	que_node_t*	node);	/*!< in: graph node */
+	const que_node_t*	node);	/*!< in: graph node */
 /***********************************************************************//**
 Gets pointer to the value data type field of a graph node. */
 UNIV_INLINE
@@ -217,7 +204,7 @@ que_node_get_val(
 	que_node_t*	node);	/*!< in: graph node */
 /***********************************************************************//**
 Gets the value buffer size of a graph node.
-@return	val buffer size, not defined if val.data == NULL in node */
+@return val buffer size, not defined if val.data == NULL in node */
 UNIV_INLINE
 ulint
 que_node_get_val_buf_size(
@@ -240,7 +227,7 @@ que_node_get_next(
 	que_node_t*	node);	/*!< in: node in a list */
 /*********************************************************************//**
 Gets the parent node of a query graph node.
-@return	parent node or NULL */
+@return parent node or NULL */
 UNIV_INLINE
 que_node_t*
 que_node_get_parent(
@@ -249,15 +236,14 @@ que_node_get_parent(
 /****************************************************************//**
 Get the first containing loop node (e.g. while_node_t or for_node_t) for the
 given node, or NULL if the node is not within a loop.
-@return	containing loop node, or NULL. */
-UNIV_INTERN
+@return containing loop node, or NULL. */
 que_node_t*
 que_node_get_containing_loop_node(
 /*==============================*/
 	que_node_t*	node);	/*!< in: node */
 /*********************************************************************//**
 Catenates a query graph node to a list of them, possible empty list.
-@return	one-way list of nodes */
+@return one-way list of nodes */
 UNIV_INLINE
 que_node_t*
 que_node_list_add_last(
@@ -274,7 +260,7 @@ que_node_list_get_last(
 	que_node_t*	node_list);	/* in: node list, or NULL */
 /*********************************************************************//**
 Gets a query graph node list length.
-@return	length, for NULL list 0 */
+@return length, for NULL list 0 */
 UNIV_INLINE
 ulint
 que_node_list_get_len(
@@ -293,7 +279,7 @@ que_thr_peek_stop(
 	que_thr_t*	thr);	/*!< in: query thread */
 /***********************************************************************//**
 Returns TRUE if the query graph is for a SELECT statement.
-@return	TRUE if a select */
+@return TRUE if a select */
 UNIV_INLINE
 ibool
 que_graph_is_select(
@@ -301,15 +287,13 @@ que_graph_is_select(
 	que_t*		graph);		/*!< in: graph */
 /**********************************************************************//**
 Prints info of an SQL query graph node. */
-UNIV_INTERN
 void
 que_node_print_info(
 /*================*/
 	que_node_t*	node);	/*!< in: query graph node */
 /*********************************************************************//**
 Evaluate the given SQL
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 que_eval_sql(
 /*=========*/
@@ -325,7 +309,6 @@ Round robin scheduler.
 @return a query thread of the graph moved to QUE_THR_RUNNING state, or
 NULL; the query thread should be executed by que_run_threads by the
 caller */
-UNIV_INTERN
 que_thr_t*
 que_fork_scheduler_round_robin(
 /*===========================*/
@@ -334,18 +317,43 @@ que_fork_scheduler_round_robin(
 
 /*********************************************************************//**
 Initialise the query sub-system. */
-UNIV_INTERN
 void
 que_init(void);
 /*==========*/
 
 /*********************************************************************//**
 Close the query sub-system. */
-UNIV_INTERN
 void
 que_close(void);
 /*===========*/
 
+/** Query thread states */
+enum que_thr_state_t {
+	QUE_THR_RUNNING,
+	QUE_THR_PROCEDURE_WAIT,
+	/** in selects this means that the thread is at the end of its
+	result set (or start, in case of a scroll cursor); in other
+	statements, this means the thread has done its task */
+	QUE_THR_COMPLETED,
+	QUE_THR_COMMAND_WAIT,
+	QUE_THR_LOCK_WAIT,
+	QUE_THR_SUSPENDED
+};
+
+/** Query thread lock states */
+enum que_thr_lock_t {
+	QUE_THR_LOCK_NOLOCK,
+	QUE_THR_LOCK_ROW,
+	QUE_THR_LOCK_TABLE
+};
+
+/** From where the cursor position is counted */
+enum que_cur_t {
+	QUE_CUR_NOT_DEFINED,
+	QUE_CUR_START,
+	QUE_CUR_END
+};
+
 /* Query graph query thread node: the fields are protected by the
 trx_t::mutex with the exceptions named below */
 
@@ -355,7 +363,7 @@ struct que_thr_t{
 					corruption */
 	que_node_t*	child;		/*!< graph child node */
 	que_t*		graph;		/*!< graph where this node belongs */
-	ulint		state;		/*!< state of the query thread */
+	que_thr_state_t	state;		/*!< state of the query thread */
 	ibool		is_active;	/*!< TRUE if the thread has been set
 					to the run state in
 					que_thr_move_to_run_state, but not
@@ -392,6 +400,8 @@ struct que_thr_t{
 	ulint		fk_cascade_depth; /*!< maximum cascading call depth
 					supported for foreign key constraint
 					related delete/updates */
+	row_prebuilt_t*	prebuilt;	/*!< prebuilt structure processed by
+					the query thread */
 };
 
 #define QUE_THR_MAGIC_N		8476583
@@ -500,29 +510,6 @@ struct que_fork_t{
 #define QUE_NODE_CALL		31
 #define QUE_NODE_EXIT		32
 
-/* Query thread states */
-#define QUE_THR_RUNNING		1
-#define QUE_THR_PROCEDURE_WAIT	2
-#define	QUE_THR_COMPLETED	3	/* in selects this means that the
-					thread is at the end of its result set
-					(or start, in case of a scroll cursor);
-					in other statements, this means the
-					thread has done its task */
-#define QUE_THR_COMMAND_WAIT	4
-#define QUE_THR_LOCK_WAIT	5
-#define QUE_THR_SUSPENDED	7
-#define QUE_THR_ERROR		8
-
-/* Query thread lock states */
-#define QUE_THR_LOCK_NOLOCK	0
-#define QUE_THR_LOCK_ROW	1
-#define QUE_THR_LOCK_TABLE	2
-
-/* From where the cursor position is counted */
-#define QUE_CUR_NOT_DEFINED	1
-#define QUE_CUR_START		2
-#define	QUE_CUR_END		3
-
 #ifndef UNIV_NONINL
 #include "que0que.ic"
 #endif
diff --git a/storage/innobase/include/que0que.ic b/storage/innobase/include/que0que.ic
index eff5a86d958..ec61081cfe2 100644
--- a/storage/innobase/include/que0que.ic
+++ b/storage/innobase/include/que0que.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -84,11 +84,9 @@ UNIV_INLINE
 ulint
 que_node_get_type(
 /*==============*/
-	que_node_t*	node)	/*!< in: graph node */
+	const que_node_t*	node)	/*!< in: graph node */
 {
-	ut_ad(node);
-
-	return(((que_common_t*) node)->type);
+	return(reinterpret_cast<const que_common_t*>(node)->type);
 }
 
 /***********************************************************************//**
@@ -106,7 +104,7 @@ que_node_get_val(
 
 /***********************************************************************//**
 Gets the value buffer size of a graph node.
-@return	val buffer size, not defined if val.data == NULL in node */
+@return val buffer size, not defined if val.data == NULL in node */
 UNIV_INLINE
 ulint
 que_node_get_val_buf_size(
@@ -161,7 +159,7 @@ que_node_get_data_type(
 
 /*********************************************************************//**
 Catenates a query graph node to a list of them, possible empty list.
-@return	one-way list of nodes */
+@return one-way list of nodes */
 UNIV_INLINE
 que_node_t*
 que_node_list_add_last(
@@ -216,7 +214,7 @@ que_node_list_get_last(
 }
 /*********************************************************************//**
 Gets the next list node in a list of query graph nodes.
-@return	next node in a list of nodes */
+@return next node in a list of nodes */
 UNIV_INLINE
 que_node_t*
 que_node_get_next(
@@ -228,7 +226,7 @@ que_node_get_next(
 
 /*********************************************************************//**
 Gets a query graph node list length.
-@return	length, for NULL list 0 */
+@return length, for NULL list 0 */
 UNIV_INLINE
 ulint
 que_node_list_get_len(
@@ -251,7 +249,7 @@ que_node_list_get_len(
 
 /*********************************************************************//**
 Gets the parent node of a query graph node.
-@return	parent node or NULL */
+@return parent node or NULL */
 UNIV_INLINE
 que_node_t*
 que_node_get_parent(
@@ -292,7 +290,7 @@ que_thr_peek_stop(
 
 /***********************************************************************//**
 Returns TRUE if the query graph is for a SELECT statement.
-@return	TRUE if a select */
+@return TRUE if a select */
 UNIV_INLINE
 ibool
 que_graph_is_select(
@@ -307,3 +305,4 @@ que_graph_is_select(
 
 	return(FALSE);
 }
+
diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
index ae75cfac6f5..129341be77c 100644
--- a/storage/innobase/include/read0read.h
+++ b/storage/innobase/include/read0read.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,166 +28,98 @@ Created 2/16/1997 Heikki Tuuri
 
 #include "univ.i"
 
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "trx0trx.h"
 #include "read0types.h"
 
-/*********************************************************************//**
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view.
-@return	own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_open_now(
-/*===============*/
-	trx_id_t	cr_trx_id,	/*!< in: trx_id of creating
-					transaction, or 0 used in purge */
-	mem_heap_t*	heap);		/*!< in: memory heap from which
-					allocated */
-/*********************************************************************//**
-Makes a copy of the oldest existing read view, or opens a new. The view
-must be closed with ..._close.
-@return	own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_purge_open(
-/*=================*/
-	mem_heap_t*	heap);		/*!< in: memory heap from which
-					allocated */
-/*********************************************************************//**
-Remove a read view from the trx_sys->view_list. */
-UNIV_INLINE
-void
-read_view_remove(
-/*=============*/
-	read_view_t*	view,		/*!< in: read view, can be 0 */
-	bool		own_mutex);	/*!< in: true if caller owns the
-					trx_sys_t::mutex */
-/*********************************************************************//**
-Closes a consistent read view for MySQL. This function is called at an SQL
-statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-UNIV_INTERN
-void
-read_view_close_for_mysql(
-/*======================*/
-	trx_t*	trx);	/*!< in: trx which has a read view */
-/*********************************************************************//**
-Checks if a read view sees the specified transaction.
-@return	true if sees */
-UNIV_INLINE
-bool
-read_view_sees_trx_id(
-/*==================*/
-	const read_view_t*	view,	/*!< in: read view */
-	trx_id_t		trx_id)	/*!< in: trx id */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Prints a read view to stderr. */
-UNIV_INTERN
-void
-read_view_print(
-/*============*/
-	const read_view_t*	view);	/*!< in: read view */
-/*********************************************************************//**
-Create a consistent cursor view for mysql to be used in cursors. In this
-consistent read view modifications done by the creating transaction or future
-transactions are not visible. */
-UNIV_INTERN
-cursor_view_t*
-read_cursor_view_create_for_mysql(
-/*==============================*/
-	trx_t*		cr_trx);/*!< in: trx where cursor view is created */
-/*********************************************************************//**
-Close a given consistent cursor view for mysql and restore global read view
-back to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_view_close_for_mysql(
-/*=============================*/
-	trx_t*		trx,		/*!< in: trx */
-	cursor_view_t*	curview);	/*!< in: cursor view to be closed */
-/*********************************************************************//**
-This function sets a given consistent cursor view to a transaction
-read view if given consistent cursor view is not NULL. Otherwise, function
-restores a global read view to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_set_for_mysql(
-/*======================*/
-	trx_t*		trx,	/*!< in: transaction where cursor is set */
-	cursor_view_t*	curview);/*!< in: consistent cursor view to be set */
-
-/** Read view lists the trx ids of those transactions for which a consistent
-read should not see the modifications to the database. */
-
-struct read_view_t{
-	ulint		type;	/*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
-	undo_no_t	undo_no;/*!< 0 or if type is
-				VIEW_HIGH_GRANULARITY
-				transaction undo_no when this high-granularity
-				consistent read view was created */
-	trx_id_t	low_limit_no;
-				/*!< The view does not need to see the undo
-				logs for transactions whose transaction number
-				is strictly smaller (<) than this value: they
-				can be removed in purge if not needed by other
-				views */
-	trx_id_t	low_limit_id;
-				/*!< The read should not see any transaction
-				with trx id >= this value. In other words,
-				this is the "high water mark". */
-	trx_id_t	up_limit_id;
-				/*!< The read should see all trx ids which
-				are strictly smaller (<) than this value.
-				In other words,
-				this is the "low water mark". */
-	ulint		n_trx_ids;
-				/*!< Number of cells in the trx_ids array */
-	trx_id_t*	trx_ids;/*!< Additional trx ids which the read should
-				not see: typically, these are the read-write
-				active transactions at the time when the read
-				is serialized, except the reading transaction
-				itself; the trx ids in this array are in a
-				descending order. These trx_ids should be
-				between the "low" and "high" water marks,
-				that is, up_limit_id and low_limit_id. */
-	trx_id_t	creator_trx_id;
-				/*!< trx id of creating transaction, or
-				0 used in purge */
-	UT_LIST_NODE_T(read_view_t) view_list;
-				/*!< List of read views in trx_sys */
-};
-
-/** Read view types @{ */
-#define VIEW_NORMAL		1	/*!< Normal consistent read view
-					where transaction does not see changes
-					made by active transactions except
-					creating transaction. */
-#define VIEW_HIGH_GRANULARITY	2	/*!< High-granularity read view where
-					transaction does not see changes
-					made by active transactions and own
-					changes after a point in time when this
-					read view was created. */
-/* @} */
-
-/** Implement InnoDB framework to support consistent read views in
-cursors. This struct holds both heap where consistent read view
-is allocated and pointer to a read view. */
-
-struct cursor_view_t{
-	mem_heap_t*	heap;
-				/*!< Memory heap for the cursor view */
-	read_view_t*	read_view;
-				/*!< Consistent read view of the cursor*/
-	ulint		n_mysql_tables_in_use;
-				/*!< number of Innobase tables used in the
-				processing of this cursor */
+#include <algorithm>
+
+/** The MVCC read view manager */
+class MVCC {
+public:
+	/** Constructor
+	@param size		Number of views to pre-allocate */
+	explicit MVCC(ulint size);
+
+	/** Destructor.
+	Free all the views in the m_free list */
+	~MVCC();
+
+	/**
+	Allocate and create a view.
+	@param view		view owned by this class created for the
+				caller. Must be freed by calling close()
+	@param trx		transaction creating the view */
+	void view_open(ReadView*& view, trx_t* trx);
+
+	/**
+	Close a view created by the above function.
+	@para view		view allocated by trx_open.
+	@param own_mutex	true if caller owns trx_sys_t::mutex */
+	void view_close(ReadView*& view, bool own_mutex);
+
+	/**
+	Release a view that is inactive but not closed. Caller must own
+	the trx_sys_t::mutex.
+	@param view		View to release */
+	void view_release(ReadView*& view);
+
+	/** Clones the oldest view and stores it in view. No need to
+	call view_close(). The caller owns the view that is passed in.
+	It will also move the closed views from the m_views list to the
+	m_free list. This function is called by Purge to create it view.
+	@param view		Preallocated view, owned by the caller */
+	void clone_oldest_view(ReadView* view);
+
+	/**
+	@return the number of active views */
+	ulint size() const;
+
+	/**
+	@return true if the view is active and valid */
+	static bool is_view_active(ReadView* view)
+	{
+		ut_a(view != reinterpret_cast<ReadView*>(0x1));
+
+		return(view != NULL && !(intptr_t(view) & 0x1));
+	}
+
+	/**
+	Set the view creator transaction id. Note: This shouldbe set only
+	for views created by RW transactions. */
+	static void set_view_creator_trx_id(ReadView* view, trx_id_t id);
+
+private:
+
+	/**
+	Validates a read view list. */
+	bool validate() const;
+
+	/**
+	Find a free view from the active list, if none found then allocate
+	a new view. This function will also attempt to move delete marked
+	views from the active list to the freed list.
+	@return a view to use */
+	inline ReadView* get_view();
+
+	/**
+	Get the oldest view in the system. It will also move the delete
+	marked read views from the views list to the freed list.
+	@return oldest view if found or NULL */
+	inline ReadView* get_oldest_view() const;
+
+private:
+	// Prevent copying
+	MVCC(const MVCC&);
+	MVCC& operator=(const MVCC&);
+
+private:
+	typedef UT_LIST_BASE_NODE_T(ReadView) view_list_t;
+
+	/** Free views ready for reuse. */
+	view_list_t		m_free;
+
+	/** Active and closed views, the closed views will have the
+	creator trx id set to TRX_ID_MAX */
+	view_list_t		m_views;
 };
 
-#ifndef UNIV_NONINL
-#include "read0read.ic"
-#endif
-
-#endif
+#endif /* read0read_h */
diff --git a/storage/innobase/include/read0read.ic b/storage/innobase/include/read0read.ic
deleted file mode 100644
index 82c1028f12e..00000000000
--- a/storage/innobase/include/read0read.ic
+++ /dev/null
@@ -1,148 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/read0read.ic
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#include "trx0sys.h"
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Validates a read view object. */
-static
-bool
-read_view_validate(
-/*===============*/
-	const read_view_t*	view)	/*!< in: view to validate */
-{
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	/* Check that the view->trx_ids array is in descending order. */
-	for (ulint i = 1; i < view->n_trx_ids; ++i) {
-
-		ut_a(view->trx_ids[i] < view->trx_ids[i - 1]);
-	}
-
-	return(true);
-}
-
-/** Functor to validate the view list. */
-struct	ViewCheck {
-
-	ViewCheck() : m_prev_view(0) { }
-
-	void	operator()(const read_view_t* view)
-	{
-		ut_a(m_prev_view == NULL
-		     || m_prev_view->low_limit_no >= view->low_limit_no);
-
-		m_prev_view = view;
-	}
-
-	const read_view_t*	m_prev_view;
-};
-
-/*********************************************************************//**
-Validates a read view list. */
-static
-bool
-read_view_list_validate(void)
-/*=========================*/
-{
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	ut_list_map(trx_sys->view_list, &read_view_t::view_list, ViewCheck());
-
-	return(true);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Checks if a read view sees the specified transaction.
-@return	true if sees */
-UNIV_INLINE
-bool
-read_view_sees_trx_id(
-/*==================*/
-	const read_view_t*	view,	/*!< in: read view */
-	trx_id_t		trx_id)	/*!< in: trx id */
-{
-	if (trx_id < view->up_limit_id) {
-
-		return(true);
-	} else if (trx_id >= view->low_limit_id) {
-
-		return(false);
-	} else {
-		ulint	lower = 0;
-		ulint	upper = view->n_trx_ids - 1;
-
-		ut_a(view->n_trx_ids > 0);
-
-		do {
-			ulint		mid	= (lower + upper) >> 1;
-			trx_id_t	mid_id	= view->trx_ids[mid];
-
-			if (mid_id == trx_id) {
-				return(FALSE);
-			} else if (mid_id < trx_id) {
-				if (mid > 0) {
-					upper = mid - 1;
-				} else {
-					break;
-				}
-			} else {
-				lower = mid + 1;
-			}
-		} while (lower <= upper);
-	}
-
-	return(true);
-}
-
-/*********************************************************************//**
-Remove a read view from the trx_sys->view_list. */
-UNIV_INLINE
-void
-read_view_remove(
-/*=============*/
-	read_view_t*	view,		/*!< in: read view, can be 0 */
-	bool		own_mutex)	/*!< in: true if caller owns the
-					trx_sys_t::mutex */
-{
-	if (view != 0) {
-		if (!own_mutex) {
-			mutex_enter(&trx_sys->mutex);
-		}
-
-		ut_ad(read_view_validate(view));
-
-		UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
-
-		ut_ad(read_view_list_validate());
-
-		if (!own_mutex) {
-			mutex_exit(&trx_sys->mutex);
-		}
-	}
-}
-
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
index 969f4ebb637..c83c7e04f11 100644
--- a/storage/innobase/include/read0types.h
+++ b/storage/innobase/include/read0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,306 @@ Created 2/16/1997 Heikki Tuuri
 #ifndef read0types_h
 #define read0types_h
 
-struct read_view_t;
-struct cursor_view_t;
+#include <algorithm>
+#include "dict0mem.h"
+
+#include "trx0types.h"
+
+// Friend declaration
+class MVCC;
+
+/** Read view lists the trx ids of those transactions for which a consistent
+read should not see the modifications to the database. */
+
+class ReadView {
+	/** This is similar to a std::vector but it is not a drop
+	in replacement. It is specific to ReadView. */
+	class ids_t {
+		typedef trx_ids_t::value_type value_type;
+
+		/**
+		Constructor */
+		ids_t() : m_ptr(), m_size(), m_reserved() { }
+
+		/**
+		Destructor */
+		~ids_t() { UT_DELETE_ARRAY(m_ptr); }
+
+		/**
+		Try and increase the size of the array. Old elements are
+		copied across. It is a no-op if n is < current size.
+
+		@param n 		Make space for n elements */
+		void reserve(ulint n);
+
+		/**
+		Resize the array, sets the current element count.
+		@param n		new size of the array, in elements */
+		void resize(ulint n)
+		{
+			ut_ad(n <= capacity());
+
+			m_size = n;
+		}
+
+		/**
+		Reset the size to 0 */
+		void clear() { resize(0); }
+
+		/**
+		@return the capacity of the array in elements */
+		ulint capacity() const { return(m_reserved); }
+
+		/**
+		Copy and overwrite the current array contents
+
+		@param start		Source array
+		@param end		Pointer to end of array */
+		void assign(const value_type* start, const value_type* end);
+
+		/**
+		Insert the value in the correct slot, preserving the order.
+		Doesn't check for duplicates. */
+		void insert(value_type value);
+
+		/**
+		@return the value of the first element in the array */
+		value_type front() const
+		{
+			ut_ad(!empty());
+
+			return(m_ptr[0]);
+		}
+
+		/**
+		@return the value of the last element in the array */
+		value_type back() const
+		{
+			ut_ad(!empty());
+
+			return(m_ptr[m_size - 1]);
+		}
+
+		/**
+		Append a value to the array.
+		@param value		the value to append */
+		void push_back(value_type value);
+
+		/**
+		@return a pointer to the start of the array */
+		trx_id_t* data() { return(m_ptr); };
+
+		/**
+		@return a const pointer to the start of the array */
+		const trx_id_t* data() const { return(m_ptr); };
+
+		/**
+		@return the number of elements in the array */
+		ulint size() const { return(m_size); }
+
+		/**
+		@return true if size() == 0 */
+		bool empty() const { return(size() == 0); }
+
+	private:
+		// Prevent copying
+		ids_t(const ids_t&);
+		ids_t& operator=(const ids_t&);
+
+	private:
+		/** Memory for the array */
+		value_type*	m_ptr;
+
+		/** Number of active elements in the array */
+		ulint		m_size;
+
+		/** Size of m_ptr in elements */
+		ulint		m_reserved;
+
+		friend class ReadView;
+	};
+public:
+	ReadView();
+	~ReadView();
+	/** Check whether transaction id is valid.
+	@param[in]	id		transaction id to check
+	@param[in]	name		table name */
+	static void check_trx_id_sanity(
+		trx_id_t		id,
+		const table_name_t&	name);
+
+	/** Check whether the changes by id are visible.
+	@param[in]	id	transaction id to check against the view
+	@param[in]	name	table name
+	@return whether the view sees the modifications of id. */
+	bool changes_visible(
+		trx_id_t		id,
+		const table_name_t&	name) const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_ad(id > 0);
+
+		if (id < m_up_limit_id || id == m_creator_trx_id) {
+
+			return(true);
+		}
+
+		check_trx_id_sanity(id, name);
+
+		if (id >= m_low_limit_id) {
+
+			return(false);
+
+		} else if (m_ids.empty()) {
+
+			return(true);
+		}
+
+		const ids_t::value_type*	p = m_ids.data();
+
+		return(!std::binary_search(p, p + m_ids.size(), id));
+	}
+
+	/**
+	@param id		transaction to check
+	@return true if view sees transaction id */
+	bool sees(trx_id_t id) const
+	{
+		return(id < m_up_limit_id);
+	}
+
+	/**
+	Mark the view as closed */
+	void close()
+	{
+		ut_ad(m_creator_trx_id != TRX_ID_MAX);
+		m_creator_trx_id = TRX_ID_MAX;
+	}
+
+	/**
+	@return true if the view is closed */
+	bool is_closed() const
+	{
+		return(m_closed);
+	}
+
+	/**
+	Write the limits to the file.
+	@param file		file to write to */
+	void print_limits(FILE* file) const
+	{
+		fprintf(file,
+			"Trx read view will not see trx with"
+			" id >= " TRX_ID_FMT ", sees < " TRX_ID_FMT "\n",
+			m_low_limit_id, m_up_limit_id);
+	}
+
+	/**
+	@return the low limit no */
+	trx_id_t low_limit_no() const
+	{
+		return(m_low_limit_no);
+	}
+
+	/**
+	@return the low limit id */
+	trx_id_t low_limit_id() const
+	{
+		return(m_low_limit_id);
+	}
+
+	/**
+	@return true if there are no transaction ids in the snapshot */
+	bool empty() const
+	{
+		return(m_ids.empty());
+	}
+
+#ifdef UNIV_DEBUG
+	/**
+	@param rhs		view to compare with
+	@return truen if this view is less than or equal rhs */
+	bool le(const ReadView* rhs) const
+	{
+		return(m_low_limit_no <= rhs->m_low_limit_no);
+	}
+
+	trx_id_t up_limit_id() const
+	{
+		return(m_up_limit_id);
+	}
+#endif /* UNIV_DEBUG */
+private:
+	/**
+	Copy the transaction ids from the source vector */
+	inline void copy_trx_ids(const trx_ids_t& trx_ids);
+
+	/**
+	Opens a read view where exactly the transactions serialized before this
+	point in time are seen in the view.
+	@param id		Creator transaction id */
+	inline void prepare(trx_id_t id);
+
+	/**
+	Complete the read view creation */
+	inline void complete();
+
+	/**
+	Copy state from another view. Must call copy_complete() to finish.
+	@param other		view to copy from */
+	inline void copy_prepare(const ReadView& other);
+
+	/**
+	Complete the copy, insert the creator transaction id into the
+	m_trx_ids too and adjust the m_up_limit_id *, if required */
+	inline void copy_complete();
+
+	/**
+	Set the creator transaction id, existing id must be 0 */
+	void creator_trx_id(trx_id_t id)
+	{
+		ut_ad(m_creator_trx_id == 0);
+		m_creator_trx_id = id;
+	}
+
+	friend class MVCC;
+
+private:
+	// Disable copying
+	ReadView(const ReadView&);
+	ReadView& operator=(const ReadView&);
+
+private:
+	/** The read should not see any transaction with trx id >= this
+	value. In other words, this is the "high water mark". */
+	trx_id_t	m_low_limit_id;
+
+	/** The read should see all trx ids which are strictly
+	smaller (<) than this value.  In other words, this is the
+	low water mark". */
+	trx_id_t	m_up_limit_id;
+
+	/** trx id of creating transaction, set to TRX_ID_MAX for free
+	views. */
+	trx_id_t	m_creator_trx_id;
+
+	/** Set of RW transactions that was active when this snapshot
+	was taken */
+	ids_t		m_ids;
+
+	/** The view does not need to see the undo logs for transactions
+	whose transaction number is strictly smaller (<) than this value:
+	they can be removed in purge if not needed by other views */
+	trx_id_t	m_low_limit_no;
+
+	/** AC-NL-RO transaction view that has been "closed". */
+	bool		m_closed;
+
+	typedef UT_LIST_NODE_T(ReadView) node_t;
+
+	/** List of read views in trx_sys */
+	byte		pad1[64 - sizeof(node_t)];
+	node_t		m_view_list;
+};
 
 #endif
diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h
index 65116229fdc..a59479849a8 100644
--- a/storage/innobase/include/rem0cmp.h
+++ b/storage/innobase/include/rem0cmp.h
@@ -26,16 +26,16 @@ Created 7/1/1994 Heikki Tuuri
 #ifndef rem0cmp_h
 #define rem0cmp_h
 
-#include "univ.i"
+#include "ha_prototypes.h"
 #include "data0data.h"
 #include "data0type.h"
 #include "dict0dict.h"
 #include "rem0rec.h"
+#include <my_sys.h>
 
 /*************************************************************//**
 Returns TRUE if two columns are equal for comparison purposes.
-@return	TRUE if the columns are considered equal in comparisons */
-UNIV_INTERN
+@return TRUE if the columns are considered equal in comparisons */
 ibool
 cmp_cols_are_equal(
 /*===============*/
@@ -43,170 +43,142 @@ cmp_cols_are_equal(
 	const dict_col_t*	col2,	/*!< in: column 2 */
 	ibool			check_charsets);
 					/*!< in: whether to check charsets */
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INLINE
+/** Compare two data fields.
+@param[in] mtype main type
+@param[in] prtype precise type
+@param[in] data1 data field
+@param[in] len1 length of data1 in bytes, or UNIV_SQL_NULL
+@param[in] data2 data field
+@param[in] len2 length of data2 in bytes, or UNIV_SQL_NULL
+@return the comparison result of data1 and data2
+@retval 0 if data1 is equal to data2
+@retval negative if data1 is less than data2
+@retval positive if data1 is greater than data2 */
 int
 cmp_data_data(
-/*==========*/
-	ulint		mtype,	/*!< in: main type */
-	ulint		prtype,	/*!< in: precise type */
-	const byte*	data1,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2);	/*!< in: data field length or UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow(
-/*===============*/
-	ulint		mtype,	/*!< in: main type */
-	ulint		prtype,	/*!< in: precise type */
-	const byte*	data1,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2);	/*!< in: data field length or UNIV_SQL_NULL */
+	ulint		mtype,
+	ulint		prtype,
+	const byte*	data1,
+	ulint		len1,
+	const byte*	data2,
+	ulint		len2)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type to be VARCHAR.
-@return	1, 0, -1, if lhs is greater, equal, less than rhs, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_varchar(
-/*=======================*/
-	const byte*	lhs,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		lhs_len,/* in: data field length or UNIV_SQL_NULL */
-	const byte*	rhs,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		rhs_len);/* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two varchar/char fields. The comparison
-is for the LIKE operator.
-@return	1, 0, -1, if lhs is greater, equal, less than rhs, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_like_prefix(
-/*===========================*/
-	const byte*	data1,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2);	/* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two varchar/char fields. The comparison
-is for the LIKE operator.
-@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_like_suffix(
-/*===========================*/
-	const byte*	data1,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2);	/* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two varchar/char fields. The comparison
-is for the LIKE operator.
-@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_like_substr(
-/*===========================*/
-	const byte*	data1,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2);	/* in: data field length or UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two dfields where at least the first
-has its data type field set.
-@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
-respectively */
+/** Compare two data fields.
+@param[in] dfield1 data field; must have type field set
+@param[in] dfield2 data field
+@return the comparison result of dfield1 and dfield2
+@retval 0 if dfield1 is equal to dfield2
+@retval negative if dfield1 is less than dfield2
+@retval positive if dfield1 is greater than dfield2 */
 UNIV_INLINE
 int
 cmp_dfield_dfield(
 /*==============*/
 	const dfield_t*	dfield1,/*!< in: data field; must have type field set */
 	const dfield_t*	dfield2);/*!< in: data field */
-/*************************************************************//**
-This function is used to compare a data tuple to a physical record.
-Only dtuple->n_fields_cmp first fields are taken into account for
-the data tuple! If we denote by n = n_fields_cmp, then rec must
-have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. If rec has an externally stored field we do not
-compare it but return with value 0 if such a comparison should be
-made.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared, or until
-the first externally stored field in rec */
-UNIV_INTERN
+
+
+/** Compare a GIS data tuple to a physical record.
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] mode compare mode
+@retval negative if dtuple is less than rec */
+int
+cmp_dtuple_rec_with_gis(
+/*====================*/
+	const dtuple_t*	dtuple,
+	const rec_t*	rec,
+	const ulint*	offsets,
+	page_cur_mode_t	mode)
+	MY_ATTRIBUTE((nonnull));
+
+/** Compare a GIS data tuple to a physical record in rtree non-leaf node.
+We need to check the page number field, since we don't store pk field in
+rtree non-leaf node.
+@param[in] dtuple data tuple
+@param[in] rec R-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] mode compare mode
+@retval negative if dtuple is less than rec */
+int
+cmp_dtuple_rec_with_gis_internal(
+	const dtuple_t*	dtuple,
+	const rec_t*	rec,
+	const ulint*	offsets);
+
+/** Compare a data tuple to a physical record.
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] n_cmp number of fields to compare
+@param[in,out] matched_fields number of completely matched fields
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
 int
 cmp_dtuple_rec_with_match_low(
-/*==========================*/
-	const dtuple_t*	dtuple,	/*!< in: data tuple */
-	const rec_t*	rec,	/*!< in: physical record which differs from
-				dtuple in some of the common fields, or which
-				has an equal number or more fields than
-				dtuple */
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n_cmp,	/*!< in: number of fields to compare */
-	ulint*		matched_fields,
-				/*!< in/out: number of already completely
-				matched fields; when function returns,
-				contains the value for current comparison */
-	ulint*		matched_bytes)
-				/*!< in/out: number of already matched
-				bytes within the first field not completely
-				matched; when function returns, contains the
-				value for current comparison */
+	const dtuple_t*	dtuple,
+	const rec_t*	rec,
+	const ulint*	offsets,
+	ulint		n_cmp,
+	ulint*		matched_fields)
 	MY_ATTRIBUTE((nonnull));
-#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields,bytes)	\
+#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields)		\
 	cmp_dtuple_rec_with_match_low(					\
-		tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields,bytes)
-/**************************************************************//**
-Compares a data tuple to a physical record.
+		tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields)
+/** Compare a data tuple to a physical record.
+@param[in]	dtuple		data tuple
+@param[in]	rec		B-tree or R-tree index record
+@param[in]	index		index tree
+@param[in]	offsets		rec_get_offsets(rec)
+@param[in,out]	matched_fields	number of completely matched fields
+@param[in,out]	matched_bytes	number of matched bytes in the first
+field that is not matched
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
+int
+cmp_dtuple_rec_with_match_bytes(
+	const dtuple_t*		dtuple,
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	const ulint*		offsets,
+	ulint*			matched_fields,
+	ulint*			matched_bytes)
+	MY_ATTRIBUTE((warn_unused_result));
+/** Compare a data tuple to a physical record.
 @see cmp_dtuple_rec_with_match
-@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
-UNIV_INTERN
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec)
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
 int
 cmp_dtuple_rec(
-/*===========*/
-	const dtuple_t*	dtuple,	/*!< in: data tuple */
-	const rec_t*	rec,	/*!< in: physical record */
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const dtuple_t*	dtuple,
+	const rec_t*	rec,
+	const ulint*	offsets);
 /**************************************************************//**
 Checks if a dtuple is a prefix of a record. The last field in dtuple
 is allowed to be a prefix of the corresponding field in the record.
-@return	TRUE if prefix */
-UNIV_INTERN
+@return TRUE if prefix */
 ibool
 cmp_dtuple_is_prefix_of_rec(
 /*========================*/
 	const dtuple_t*	dtuple,	/*!< in: data tuple */
 	const rec_t*	rec,	/*!< in: physical record */
 	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
-/*************************************************************//**
-Compare two physical records that contain the same number of columns,
+/** Compare two physical records that contain the same number of columns,
 none of which are stored externally.
-@retval 1 if rec1 (including non-ordering columns) is greater than rec2
-@retval -1 if rec1 (including non-ordering columns) is less than rec2
+@retval positive if rec1 (including non-ordering columns) is greater than rec2
+@retval negative if rec1 (including non-ordering columns) is less than rec2
 @retval 0 if rec1 is a duplicate of rec2 */
-UNIV_INTERN
 int
 cmp_rec_rec_simple(
 /*===============*/
@@ -219,80 +191,64 @@ cmp_rec_rec_simple(
 					duplicate key value if applicable,
 					or NULL */
 	MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result));
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared, and if an externally stored field is
-encountered, then 0 is returned.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
-UNIV_INTERN
+/** Compare two B-tree records.
+@param[in] rec1 B-tree record
+@param[in] rec2 B-tree record
+@param[in] offsets1 rec_get_offsets(rec1, index)
+@param[in] offsets2 rec_get_offsets(rec2, index)
+@param[in] index B-tree index
+@param[in] nulls_unequal true if this is for index cardinality
+statistics estimation, and innodb_stats_method=nulls_unequal
+or innodb_stats_method=nulls_ignored
+@param[out] matched_fields number of completely matched fields
+within the first field not completely matched
+@return the comparison result
+@retval 0 if rec1 is equal to rec2
+@retval negative if rec1 is less than rec2
+@retval positive if rec2 is greater than rec2 */
 int
 cmp_rec_rec_with_match(
-/*===================*/
-	const rec_t*	rec1,	/*!< in: physical record */
-	const rec_t*	rec2,	/*!< in: physical record */
-	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
-	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
-	dict_index_t*	index,	/*!< in: data dictionary index */
-	ibool		nulls_unequal,
-				/* in: TRUE if this is for index statistics
-				cardinality estimation, and innodb_stats_method
-				is "nulls_unequal" or "nulls_ignored" */
-	ulint*		matched_fields, /*!< in/out: number of already completely
-				matched fields; when the function returns,
-				contains the value the for current
-				comparison */
-	ulint*		matched_bytes);/*!< in/out: number of already matched
-				bytes within the first field not completely
-				matched; when the function returns, contains
-				the value for the current comparison */
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared.
-@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
-rec2; only the common first fields are compared */
+	const rec_t*		rec1,
+	const rec_t*		rec2,
+	const ulint*		offsets1,
+	const ulint*		offsets2,
+	const dict_index_t*	index,
+	bool			nulls_unequal,
+	ulint*			matched_fields);
+
+/** Compare two B-tree records.
+Only the common first fields are compared, and externally stored field
+are treated as equal.
+@param[in]	rec1		B-tree record
+@param[in]	rec2		B-tree record
+@param[in]	offsets1	rec_get_offsets(rec1, index)
+@param[in]	offsets2	rec_get_offsets(rec2, index)
+@param[out]	matched_fields	number of completely matched fields
+				within the first field not completely matched
+@return positive, 0, negative if rec1 is greater, equal, less, than rec2,
+respectively */
 UNIV_INLINE
 int
 cmp_rec_rec(
-/*========*/
-	const rec_t*	rec1,	/*!< in: physical record */
-	const rec_t*	rec2,	/*!< in: physical record */
-	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
-	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
-	dict_index_t*	index);	/*!< in: data dictionary index */
+	const rec_t*		rec1,
+	const rec_t*		rec2,
+	const ulint*		offsets1,
+	const ulint*		offsets2,
+	const dict_index_t*	index,
+	ulint*			matched_fields = NULL);
 
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INTERN
-int
-cmp_dfield_dfield_like_prefix(
-/*==========================*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*	dfield1,/* in: data field; must have type field set */
-	dfield_t*	dfield2);/* in: data field */
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_substr(
-/*==========================*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*	dfield1,/* in: data field; must have type field set */
-	dfield_t*	dfield2);/* in: data field */
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
+/** Compare two data fields.
+@param[in] dfield1 data field
+@param[in] dfield2 data field
+@return the comparison result of dfield1 and dfield2
+@retval 0 if dfield1 is equal to dfield2, or a prefix of dfield1
+@retval negative if dfield1 is less than dfield2
+@retval positive if dfield1 is greater than dfield2 */
 UNIV_INLINE
 int
-cmp_dfield_dfield_like_suffix(
-/*==========================*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*	dfield1,/* in: data field; must have type field set */
-	dfield_t*	dfield2);/* in: data field */
+cmp_dfield_dfield_like_prefix(
+	const dfield_t*	dfield1,
+	const dfield_t*	dfield2);
 
 #ifndef UNIV_NONINL
 #include "rem0cmp.ic"
diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic
index 67a2dcacba1..bf913b93bfb 100644
--- a/storage/innobase/include/rem0cmp.ic
+++ b/storage/innobase/include/rem0cmp.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,91 +23,20 @@ Comparison services for records
 Created 7/1/1994 Heikki Tuuri
 ************************************************************************/
 
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INLINE
-int
-cmp_data_data(
-/*==========*/
-	ulint		mtype,	/*!< in: main type */
-	ulint		prtype,	/*!< in: precise type */
-	const byte*	data1,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2)	/*!< in: data field length or UNIV_SQL_NULL */
-{
-	return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2));
-}
+#include <mysql_com.h>
 
-/*****************************************************************
-This function is used to compare two (CHAR) data fields for the LIKE
-operator. */
-UNIV_INLINE
-int
-cmp_data_data_like_prefix(
-/*======================*/
-				/* out: 1, 0, -1, if data1 is greater, equal,
-				less than data2, respectively */
-	byte*           data1,  /* in: data field (== a pointer to a memory
-				buffer) */
-	ulint           len1,   /* in: data field length or UNIV_SQL_NULL */
-	byte*           data2,  /* in: data field (== a pointer to a memory
-				buffer) */
-	ulint           len2)   /* in: data field length or UNIV_SQL_NULL */
-{
-	return(cmp_data_data_slow_like_prefix(data1, len1, data2, len2));
-}
-/*****************************************************************
-This function is used to compare two (CHAR) data fields for the LIKE
-operator. */
-UNIV_INLINE
-int
-cmp_data_data_like_suffix(
-/*======================*/
-				/* out: 1, 0, -1, if data1 is greater, equal,
-				less than data2, respectively */
-	byte*           data1,  /* in: data field (== a pointer to a memory
-				buffer) */
-	ulint           len1,   /* in: data field length or UNIV_SQL_NULL */
-	byte*           data2,  /* in: data field (== a pointer to a memory
-				buffer) */
-	ulint           len2)   /* in: data field length or UNIV_SQL_NULL */
-{
-	return(cmp_data_data_slow_like_suffix(data1, len1, data2, len2));
-}
-/*****************************************************************
-This function is used to compare two (CHAR) data fields for the LIKE
-operator. */
-UNIV_INLINE
-int
-cmp_data_data_like_substr(
-/*======================*/
-				/* out: 1, 0, -1, if data1 is greater, equal,
-				less than data2, respectively */
-	byte*           data1,  /* in: data field (== a pointer to a memory
-				buffer) */
-	ulint           len1,   /* in: data field length or UNIV_SQL_NULL */
-	byte*           data2,  /* in: data field (== a pointer to a memory
-				buffer) */
-	ulint           len2)   /* in: data field length or UNIV_SQL_NULL */
-{
-	return(cmp_data_data_slow_like_substr(data1, len1, data2, len2));
-}
-/*************************************************************//**
-This function is used to compare two dfields where at least the first
-has its data type field set.
-@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
-respectively */
+/** Compare two data fields.
+@param[in] dfield1 data field; must have type field set
+@param[in] dfield2 data field
+@return the comparison result of dfield1 and dfield2
+@retval 0 if dfield1 is equal to dfield2
+@retval negative if dfield1 is less than dfield2
+@retval positive if dfield1 is greater than dfield2 */
 UNIV_INLINE
 int
 cmp_dfield_dfield(
-/*==============*/
-	const dfield_t*	dfield1,/*!< in: data field; must have type field set */
-	const dfield_t*	dfield2)/*!< in: data field */
+	const dfield_t*	dfield1,
+	const dfield_t*	dfield2)
 {
 	const dtype_t*	type;
 
@@ -122,65 +51,90 @@ cmp_dfield_dfield(
 			     dfield_get_len(dfield2)));
 }
 
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
+/** Compare two B-tree records.
+Only the common first fields are compared, and externally stored field
+are treated as equal.
+@param[in]	rec1		B-tree record
+@param[in]	rec2		B-tree record
+@param[in]	offsets1	rec_get_offsets(rec1, index)
+@param[in]	offsets2	rec_get_offsets(rec2, index)
+@param[out]	matched_fields	number of completely matched fields
+				within the first field not completely matched
+@return positive, 0, negative if rec1 is greater, equal, less, than rec2,
+respectively */
 UNIV_INLINE
 int
-cmp_dfield_dfield_like_suffix(
-/*==========================*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*       dfield1,/* in: data field; must have type field set */
-	dfield_t*       dfield2)/* in: data field */
+cmp_rec_rec(
+	const rec_t*		rec1,
+	const rec_t*		rec2,
+	const ulint*		offsets1,
+	const ulint*		offsets2,
+	const dict_index_t*	index,
+	ulint*			matched_fields)
 {
-	ut_ad(dfield_check_typed(dfield1));
+	ulint	match_f;
+	int	ret;
+
+	ret = cmp_rec_rec_with_match(
+		rec1, rec2, offsets1, offsets2, index, false, &match_f);
+
+	if (matched_fields != NULL) {
+		*matched_fields = match_f;
+	}
 
-	return(cmp_data_data_like_suffix(
-		(byte*) dfield_get_data(dfield1),
-		dfield_get_len(dfield1),
-		(byte*) dfield_get_data(dfield2),
-		dfield_get_len(dfield2)));
+	return(ret);
 }
 
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
+/** Compare two data fields.
+@param[in] dfield1 data field
+@param[in] dfield2 data field
+@return the comparison result of dfield1 and dfield2
+@retval 0 if dfield1 is equal to dfield2, or a prefix of dfield1
+@retval negative if dfield1 is less than dfield2
+@retval positive if dfield1 is greater than dfield2 */
 UNIV_INLINE
 int
-cmp_dfield_dfield_like_substr(
-/*==========================*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*       dfield1,/* in: data field; must have type field set */
-	dfield_t*       dfield2)/* in: data field */
+cmp_dfield_dfield_like_prefix(
+	const dfield_t*	dfield1,
+	const dfield_t*	dfield2)
 {
+	const dtype_t*  type;
+
 	ut_ad(dfield_check_typed(dfield1));
+	ut_ad(dfield_check_typed(dfield2));
 
-	return(cmp_data_data_like_substr(
-		(byte*) dfield_get_data(dfield1),
-		dfield_get_len(dfield1),
-		(byte*) dfield_get_data(dfield2),
-		dfield_get_len(dfield2)));
-}
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared.
-@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
-rec2; only the common first fields are compared */
-UNIV_INLINE
-int
-cmp_rec_rec(
-/*========*/
-	const rec_t*	rec1,	/*!< in: physical record */
-	const rec_t*	rec2,	/*!< in: physical record */
-	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
-	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
-	dict_index_t*	index)	/*!< in: data dictionary index */
-{
-	ulint	match_f		= 0;
-	ulint	match_b		= 0;
+	type = dfield_get_type(dfield1);
 
-	return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
-				      FALSE, &match_f, &match_b));
+#ifdef UNIV_DEBUG
+	switch (type->prtype & DATA_MYSQL_TYPE_MASK) {
+	case MYSQL_TYPE_BIT:
+	case MYSQL_TYPE_STRING:
+	case MYSQL_TYPE_VAR_STRING:
+	case MYSQL_TYPE_TINY_BLOB:
+	case MYSQL_TYPE_MEDIUM_BLOB:
+	case MYSQL_TYPE_BLOB:
+	case MYSQL_TYPE_LONG_BLOB:
+	case MYSQL_TYPE_VARCHAR:
+		break;
+	default:
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
+	uint cs_num = (uint) dtype_get_charset_coll(type->prtype);
+
+	if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
+		return(cs->coll->strnncoll(
+			       cs,
+			       static_cast<uchar*>(
+				       dfield_get_data(dfield1)),
+			       dfield_get_len(dfield1),
+			       static_cast<uchar*>(
+				       dfield_get_data(dfield2)),
+			       dfield_get_len(dfield2),
+			       1));
+	}
+
+	ib::fatal() << "Unable to find charset-collation " << cs_num;
+	return(0);
 }
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index 83286a98f8e..8490e7c9c88 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -26,13 +26,14 @@ Created 5/30/1994 Heikki Tuuri
 #ifndef rem0rec_h
 #define rem0rec_h
 
-#ifndef UNIV_INNOCHECKSUM
 #include "univ.i"
 #include "data0data.h"
 #include "rem0types.h"
 #include "mtr0types.h"
 #include "page0types.h"
-#endif /* !UNIV_INNOCHECKSUM */
+#include "trx0types.h"
+#include <ostream>
+#include <sstream>
 
 /* Info bit denoting the predefined minimum record: this bit is set
 if and only if the record is the first user record on a non-leaf
@@ -87,43 +88,42 @@ significant bit denotes that the tail of a field is stored off-page. */
 
 /* Number of elements that should be initially allocated for the
 offsets[] array, first passed to rec_get_offsets() */
-#define REC_OFFS_NORMAL_SIZE	100
+#define REC_OFFS_NORMAL_SIZE	OFFS_IN_REC_NORMAL_SIZE
 #define REC_OFFS_SMALL_SIZE	10
 
-#ifndef UNIV_INNOCHECKSUM
 /******************************************************//**
 The following function is used to get the pointer of the next chained record
 on the same page.
-@return	pointer to the next chained record, or NULL if none */
+@return pointer to the next chained record, or NULL if none */
 UNIV_INLINE
 const rec_t*
 rec_get_next_ptr_const(
 /*===================*/
 	const rec_t*	rec,	/*!< in: physical record */
 	ulint		comp)	/*!< in: nonzero=compact page format */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to get the pointer of the next chained record
 on the same page.
-@return	pointer to the next chained record, or NULL if none */
+@return pointer to the next chained record, or NULL if none */
 UNIV_INLINE
 rec_t*
 rec_get_next_ptr(
 /*=============*/
 	rec_t*	rec,	/*!< in: physical record */
 	ulint	comp)	/*!< in: nonzero=compact page format */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to get the offset of the
 next chained record on the same page.
-@return	the page offset of the next chained record, or 0 if none */
+@return the page offset of the next chained record, or 0 if none */
 UNIV_INLINE
 ulint
 rec_get_next_offs(
 /*==============*/
 	const rec_t*	rec,	/*!< in: physical record */
 	ulint		comp)	/*!< in: nonzero=compact page format */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the next record offset field
 of an old-style record. */
@@ -147,34 +147,49 @@ rec_set_next_offs_new(
 /******************************************************//**
 The following function is used to get the number of fields
 in an old-style record.
-@return	number of data fields */
+@return number of data fields */
 UNIV_INLINE
 ulint
 rec_get_n_fields_old(
 /*=================*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to get the number of fields
 in a record.
-@return	number of data fields */
+@return number of data fields */
 UNIV_INLINE
 ulint
 rec_get_n_fields(
 /*=============*/
 	const rec_t*		rec,	/*!< in: physical record */
 	const dict_index_t*	index)	/*!< in: record descriptor */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Confirms the n_fields of the entry is sane with comparing the other
+record in the same page specified
+@param[in]	index	index
+@param[in]	rec	record of the same page
+@param[in]	entry	index entry
+@return	true if n_fields is sane */
+UNIV_INLINE
+bool
+rec_n_fields_is_sane(
+	dict_index_t*	index,
+	const rec_t*	rec,
+	const dtuple_t*	entry)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************//**
 The following function is used to get the number of records owned by the
 previous directory record.
-@return	number of owned records */
+@return number of owned records */
 UNIV_INLINE
 ulint
 rec_get_n_owned_old(
 /*================*/
 	const rec_t*	rec)	/*!< in: old-style physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the number of owned records. */
 UNIV_INLINE
@@ -187,13 +202,13 @@ rec_set_n_owned_old(
 /******************************************************//**
 The following function is used to get the number of records owned by the
 previous directory record.
-@return	number of owned records */
+@return number of owned records */
 UNIV_INLINE
 ulint
 rec_get_n_owned_new(
 /*================*/
 	const rec_t*	rec)	/*!< in: new-style physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the number of owned records. */
 UNIV_INLINE
@@ -207,14 +222,14 @@ rec_set_n_owned_new(
 /******************************************************//**
 The following function is used to retrieve the info bits of
 a record.
-@return	info bits */
+@return info bits */
 UNIV_INLINE
 ulint
 rec_get_info_bits(
 /*==============*/
 	const rec_t*	rec,	/*!< in: physical record */
 	ulint		comp)	/*!< in: nonzero=compact page format */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the info bits of a record. */
 UNIV_INLINE
@@ -235,13 +250,13 @@ rec_set_info_bits_new(
 	MY_ATTRIBUTE((nonnull));
 /******************************************************//**
 The following function retrieves the status bits of a new-style record.
-@return	status bits */
+@return status bits */
 UNIV_INLINE
 ulint
 rec_get_status(
 /*===========*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 The following function is used to set the status bits of a new-style record. */
@@ -256,14 +271,14 @@ rec_set_status(
 /******************************************************//**
 The following function is used to retrieve the info and status
 bits of a record.  (Only compact records have status bits.)
-@return	info bits */
+@return info bits */
 UNIV_INLINE
 ulint
 rec_get_info_and_status_bits(
 /*=========================*/
 	const rec_t*	rec,	/*!< in: physical record */
 	ulint		comp)	/*!< in: nonzero=compact page format */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the info and status
 bits of a record.  (Only compact records have status bits.) */
@@ -277,14 +292,14 @@ rec_set_info_and_status_bits(
 
 /******************************************************//**
 The following function tells if record is delete marked.
-@return	nonzero if delete marked */
+@return nonzero if delete marked */
 UNIV_INLINE
 ulint
 rec_get_deleted_flag(
 /*=================*/
 	const rec_t*	rec,	/*!< in: physical record */
 	ulint		comp)	/*!< in: nonzero=compact page format */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the deleted bit. */
 UNIV_INLINE
@@ -306,23 +321,23 @@ rec_set_deleted_flag_new(
 	MY_ATTRIBUTE((nonnull(1)));
 /******************************************************//**
 The following function tells if a new-style record is a node pointer.
-@return	TRUE if node pointer */
+@return TRUE if node pointer */
 UNIV_INLINE
 ibool
 rec_get_node_ptr_flag(
 /*==================*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to get the order number
 of an old-style record in the heap of the index page.
-@return	heap order number */
+@return heap order number */
 UNIV_INLINE
 ulint
 rec_get_heap_no_old(
 /*================*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the heap number
 field in an old-style record. */
@@ -336,13 +351,13 @@ rec_set_heap_no_old(
 /******************************************************//**
 The following function is used to get the order number
 of a new-style record in the heap of the index page.
-@return	heap order number */
+@return heap order number */
 UNIV_INLINE
 ulint
 rec_get_heap_no_new(
 /*================*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the heap number
 field in a new-style record. */
@@ -356,13 +371,13 @@ rec_set_heap_no_new(
 /******************************************************//**
 The following function is used to test whether the data offsets
 in the record are stored in one-byte or two-byte format.
-@return	TRUE if 1-byte form */
+@return TRUE if 1-byte form */
 UNIV_INLINE
 ibool
 rec_get_1byte_offs_flag(
 /*====================*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 The following function is used to set the 1-byte offsets flag. */
@@ -378,14 +393,14 @@ rec_set_1byte_offs_flag(
 Returns the offset of nth field end if the record is stored in the 1-byte
 offsets form. If the field is SQL null, the flag is ORed in the returned
 value.
-@return	offset of the start of the field, SQL null flag ORed */
+@return offset of the start of the field, SQL null flag ORed */
 UNIV_INLINE
 ulint
 rec_1_get_field_end_info(
 /*=====================*/
 	const rec_t*	rec,	/*!< in: record */
 	ulint		n)	/*!< in: field index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 Returns the offset of nth field end if the record is stored in the 2-byte
@@ -399,7 +414,7 @@ rec_2_get_field_end_info(
 /*=====================*/
 	const rec_t*	rec,	/*!< in: record */
 	ulint		n)	/*!< in: field index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 Returns nonzero if the field is stored off-page.
@@ -411,13 +426,12 @@ rec_2_is_field_extern(
 /*==================*/
 	const rec_t*	rec,	/*!< in: record */
 	ulint		n)	/*!< in: field index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 Determine how many of the first n columns in a compact
 physical record are stored externally.
-@return	number of externally stored columns */
-UNIV_INTERN
+@return number of externally stored columns */
 ulint
 rec_get_n_extern_new(
 /*=================*/
@@ -429,8 +443,7 @@ rec_get_n_extern_new(
 /******************************************************//**
 The following function determines the offsets to each field
 in the record.	It can reuse a previously allocated array.
-@return	the new offsets */
-UNIV_INTERN
+@return the new offsets */
 ulint*
 rec_get_offsets_func(
 /*=================*/
@@ -465,7 +478,6 @@ rec_get_offsets_func(
 /******************************************************//**
 The following function determines the offsets to each field
 in the record.  It can reuse a previously allocated array. */
-UNIV_INTERN
 void
 rec_get_offsets_reverse(
 /*====================*/
@@ -482,7 +494,7 @@ rec_get_offsets_reverse(
 #ifdef UNIV_DEBUG
 /************************************************************//**
 Validates offsets returned by rec_get_offsets().
-@return	TRUE if valid */
+@return TRUE if valid */
 UNIV_INLINE
 ibool
 rec_offs_validate(
@@ -511,8 +523,7 @@ rec_offs_make_valid(
 /************************************************************//**
 The following function is used to get the offset to the nth
 data field in an old-style record.
-@return	offset to the field */
-UNIV_INTERN
+@return offset to the field */
 ulint
 rec_get_nth_field_offs_old(
 /*=======================*/
@@ -527,18 +538,18 @@ rec_get_nth_field_offs_old(
 Gets the physical size of an old-style field.
 Also an SQL null may have a field of size > 0,
 if the data type is of a fixed size.
-@return	field size in bytes */
+@return field size in bytes */
 UNIV_INLINE
 ulint
 rec_get_nth_field_size(
 /*===================*/
 	const rec_t*	rec,	/*!< in: record */
 	ulint		n)	/*!< in: index of the field */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /************************************************************//**
 The following function is used to get an offset to the nth
 data field in a record.
-@return	offset from the origin of rec */
+@return offset from the origin of rec */
 UNIV_INLINE
 ulint
 rec_get_nth_field_offs(
@@ -553,73 +564,81 @@ rec_get_nth_field_offs(
 /******************************************************//**
 Determine if the offsets are for a record in the new
 compact format.
-@return	nonzero if compact format */
+@return nonzero if compact format */
 UNIV_INLINE
 ulint
 rec_offs_comp(
 /*==========*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 Determine if the offsets are for a record containing
 externally stored columns.
-@return	nonzero if externally stored */
+@return nonzero if externally stored */
 UNIV_INLINE
 ulint
 rec_offs_any_extern(
 /*================*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 Determine if the offsets are for a record containing null BLOB pointers.
-@return	first field containing a null BLOB pointer, or NULL if none found */
+@return first field containing a null BLOB pointer, or NULL if none found */
 UNIV_INLINE
 const byte*
 rec_offs_any_null_extern(
 /*=====================*/
 	const rec_t*	rec,		/*!< in: record */
 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec) */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 Returns nonzero if the extern bit is set in nth field of rec.
-@return	nonzero if externally stored */
+@return nonzero if externally stored */
 UNIV_INLINE
 ulint
 rec_offs_nth_extern(
 /*================*/
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	ulint		n)	/*!< in: nth field */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Mark the nth field as externally stored.
+@param[in]	offsets		array returned by rec_get_offsets()
+@param[in]	n		nth field */
+void
+rec_offs_make_nth_extern(
+        ulint*		offsets,
+        const ulint     n);
 /******************************************************//**
 Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return	nonzero if SQL NULL */
+@return nonzero if SQL NULL */
 UNIV_INLINE
 ulint
 rec_offs_nth_sql_null(
 /*==================*/
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	ulint		n)	/*!< in: nth field */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 Gets the physical size of a field.
-@return	length of field */
+@return length of field */
 UNIV_INLINE
 ulint
 rec_offs_nth_size(
 /*==============*/
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	ulint		n)	/*!< in: nth field */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 Returns the number of extern bits set in a record.
-@return	number of externally stored fields */
+@return number of externally stored fields */
 UNIV_INLINE
 ulint
 rec_offs_n_extern(
 /*==============*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /***********************************************************//**
 This is used to modify the value of an already existing field in a record.
 The previous value must have exactly the same size as the new value. If len
@@ -645,23 +664,23 @@ The following function returns the data size of an old-style physical
 record, that is the sum of field lengths. SQL null fields
 are counted as length 0 fields. The value returned by the function
 is the distance from record origin to record end in bytes.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_get_data_size_old(
 /*==================*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 The following function returns the number of allocated elements
 for an array of offsets.
-@return	number of elements */
+@return number of elements */
 UNIV_INLINE
 ulint
 rec_offs_get_n_alloc(
 /*=================*/
 	const ulint*	offsets)/*!< in: array for rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 The following function sets the number of allocated elements
 for an array of offsets. */
@@ -677,99 +696,101 @@ rec_offs_set_n_alloc(
 	rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
 /**********************************************************//**
 The following function returns the number of fields in a record.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 rec_offs_n_fields(
 /*==============*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 The following function returns the data size of a physical
 record, that is the sum of field lengths. SQL null fields
 are counted as length 0 fields. The value returned by the function
 is the distance from record origin to record end in bytes.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_offs_data_size(
 /*===============*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 Returns the total size of record minus data size of record.
 The value returned by the function is the distance from record
 start to record origin in bytes.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_offs_extra_size(
 /*================*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 Returns the total size of a physical record.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_offs_size(
 /*==========*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #ifdef UNIV_DEBUG
 /**********************************************************//**
 Returns a pointer to the start of the record.
-@return	pointer to start */
+@return pointer to start */
 UNIV_INLINE
 byte*
 rec_get_start(
 /*==========*/
 	const rec_t*	rec,	/*!< in: pointer to record */
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 Returns a pointer to the end of the record.
-@return	pointer to end */
+@return pointer to end */
 UNIV_INLINE
 byte*
 rec_get_end(
 /*========*/
 	const rec_t*	rec,	/*!< in: pointer to record */
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #else /* UNIV_DEBUG */
 # define rec_get_start(rec, offsets) ((rec) - rec_offs_extra_size(offsets))
 # define rec_get_end(rec, offsets) ((rec) + rec_offs_data_size(offsets))
 #endif /* UNIV_DEBUG */
-/***************************************************************//**
-Copies a physical record to a buffer.
-@return	pointer to the origin of the copy */
+
+/** Copy a physical record to a buffer.
+@param[in]	buf	buffer
+@param[in]	rec	physical record
+@param[in]	offsets	array returned by rec_get_offsets()
+@return pointer to the origin of the copy */
 UNIV_INLINE
 rec_t*
 rec_copy(
-/*=====*/
-	void*		buf,	/*!< in: buffer */
-	const rec_t*	rec,	/*!< in: physical record */
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull));
+	void*		buf,
+	const rec_t*	rec,
+	const ulint*	offsets);
+
 #ifndef UNIV_HOTBACKUP
 /**********************************************************//**
 Determines the size of a data tuple prefix in a temporary file.
-@return	total size */
-UNIV_INTERN
+@return total size */
 ulint
 rec_get_converted_size_temp(
 /*========================*/
 	const dict_index_t*	index,	/*!< in: record descriptor */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
+	const dtuple_t*		v_entry,/*!< in: dtuple contains virtual column
+					data */
 	ulint*			extra)	/*!< out: extra size */
-	MY_ATTRIBUTE((warn_unused_result, nonnull));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 Determine the offset to each field in temporary file.
 @see rec_convert_dtuple_to_temp() */
-UNIV_INTERN
 void
 rec_init_offsets_temp(
 /*==================*/
@@ -782,21 +803,21 @@ rec_init_offsets_temp(
 /*********************************************************//**
 Builds a temporary file record out of a data tuple.
 @see rec_init_offsets_temp() */
-UNIV_INTERN
 void
 rec_convert_dtuple_to_temp(
 /*=======================*/
 	rec_t*			rec,		/*!< out: record */
 	const dict_index_t*	index,		/*!< in: record descriptor */
 	const dfield_t*		fields,		/*!< in: array of data fields */
-	ulint			n_fields)	/*!< in: number of fields */
-	MY_ATTRIBUTE((nonnull));
+	ulint			n_fields,	/*!< in: number of fields */
+	const dtuple_t*		v_entry);	/*!< in: dtuple contains
+						virtual column data */
+
 
 /**************************************************************//**
 Copies the first n fields of a physical record to a new physical record in
 a buffer.
-@return	own: copied record */
-UNIV_INTERN
+@return own: copied record */
 rec_t*
 rec_copy_prefix_to_buf(
 /*===================*/
@@ -809,28 +830,27 @@ rec_copy_prefix_to_buf(
 						or NULL */
 	ulint*			buf_size)	/*!< in/out: buffer size */
 	MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Folds a prefix of a physical record to a ulint.
-@return	the folded value */
+/** Fold a prefix of a physical record.
+@param[in]	rec		index record
+@param[in]	offsets		return value of rec_get_offsets()
+@param[in]	n_fields	number of complete fields to fold
+@param[in]	n_bytes		number of bytes to fold in the last field
+@param[in]	index_id	index tree ID
+@return the folded value */
 UNIV_INLINE
 ulint
 rec_fold(
-/*=====*/
-	const rec_t*	rec,		/*!< in: the physical record */
-	const ulint*	offsets,	/*!< in: array returned by
-					rec_get_offsets() */
-	ulint		n_fields,	/*!< in: number of complete
-					fields to fold */
-	ulint		n_bytes,	/*!< in: number of bytes to fold
-					in an incomplete last field */
-	index_id_t	tree_id)	/*!< in: index tree id */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	const rec_t*	rec,
+	const ulint*	offsets,
+	ulint		n_fields,
+	ulint		n_bytes,
+	index_id_t	tree_id)
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************//**
 Builds a physical record out of a data tuple and
 stores it into the given buffer.
-@return	pointer to the origin of physical record */
-UNIV_INTERN
+@return pointer to the origin of physical record */
 rec_t*
 rec_convert_dtuple_to_rec(
 /*======================*/
@@ -840,11 +860,11 @@ rec_convert_dtuple_to_rec(
 	const dtuple_t*		dtuple,	/*!< in: data tuple */
 	ulint			n_ext)	/*!< in: number of
 					externally stored columns */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 Returns the extra size of an old-style physical record if we know its
 data size and number of fields.
-@return	extra size */
+@return extra size */
 UNIV_INLINE
 ulint
 rec_get_converted_extra_size(
@@ -855,8 +875,7 @@ rec_get_converted_extra_size(
 	MY_ATTRIBUTE((const));
 /**********************************************************//**
 Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return	total size */
-UNIV_INTERN
+@return total size */
 ulint
 rec_get_converted_size_comp_prefix(
 /*===============================*/
@@ -867,8 +886,7 @@ rec_get_converted_size_comp_prefix(
 	MY_ATTRIBUTE((warn_unused_result, nonnull(1,2)));
 /**********************************************************//**
 Determines the size of a data tuple in ROW_FORMAT=COMPACT.
-@return	total size */
-UNIV_INTERN
+@return total size */
 ulint
 rec_get_converted_size_comp(
 /*========================*/
@@ -884,7 +902,7 @@ rec_get_converted_size_comp(
 /**********************************************************//**
 The following function returns the size of a data tuple when converted to
 a physical record.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_get_converted_size(
@@ -897,7 +915,6 @@ rec_get_converted_size(
 /**************************************************************//**
 Copies the first n fields of a physical record to a data tuple.
 The fields are copied to the memory heap. */
-UNIV_INTERN
 void
 rec_copy_prefix_to_dtuple(
 /*======================*/
@@ -911,8 +928,7 @@ rec_copy_prefix_to_dtuple(
 #endif /* !UNIV_HOTBACKUP */
 /***************************************************************//**
 Validates the consistency of a physical record.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 rec_validate(
 /*=========*/
@@ -921,7 +937,6 @@ rec_validate(
 	MY_ATTRIBUTE((nonnull));
 /***************************************************************//**
 Prints an old-style physical record. */
-UNIV_INTERN
 void
 rec_print_old(
 /*==========*/
@@ -932,7 +947,6 @@ rec_print_old(
 /***************************************************************//**
 Prints a physical record in ROW_FORMAT=COMPACT.  Ignores the
 record header. */
-UNIV_INTERN
 void
 rec_print_comp(
 /*===========*/
@@ -941,8 +955,16 @@ rec_print_comp(
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 	MY_ATTRIBUTE((nonnull));
 /***************************************************************//**
+Prints a spatial index record. */
+void
+rec_print_mbr_rec(
+/*==========*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	MY_ATTRIBUTE((nonnull));
+/***************************************************************//**
 Prints a physical record. */
-UNIV_INTERN
 void
 rec_print_new(
 /*==========*/
@@ -952,7 +974,6 @@ rec_print_new(
 	MY_ATTRIBUTE((nonnull));
 /***************************************************************//**
 Prints a physical record. */
-UNIV_INTERN
 void
 rec_print(
 /*======*/
@@ -961,11 +982,121 @@ rec_print(
 	const dict_index_t*	index)	/*!< in: record descriptor */
 	MY_ATTRIBUTE((nonnull));
 
+/** Pretty-print a record.
+@param[in,out]	o	output stream
+@param[in]	rec	physical record
+@param[in]	info	rec_get_info_bits(rec)
+@param[in]	offsets	rec_get_offsets(rec) */
+void
+rec_print(
+	std::ostream&	o,
+	const rec_t*	rec,
+	ulint		info,
+	const ulint*	offsets);
+
+/** Wrapper for pretty-printing a record */
+struct rec_index_print
+{
+	/** Constructor */
+	rec_index_print(const rec_t* rec, const dict_index_t* index) :
+		m_rec(rec), m_index(index)
+	{}
+
+	/** Record */
+	const rec_t*		m_rec;
+	/** Index */
+	const dict_index_t*	m_index;
+};
+
+/** Display a record.
+@param[in,out]	o	output stream
+@param[in]	r	record to display
+@return	the output stream */
+std::ostream&
+operator<<(std::ostream& o, const rec_index_print& r);
+
+/** Wrapper for pretty-printing a record */
+struct rec_offsets_print
+{
+	/** Constructor */
+	rec_offsets_print(const rec_t* rec, const ulint* offsets) :
+		m_rec(rec), m_offsets(offsets)
+	{}
+
+	/** Record */
+	const rec_t*		m_rec;
+	/** Offsets to each field */
+	const ulint*		m_offsets;
+};
+
+/** Display a record.
+@param[in,out]	o	output stream
+@param[in]	r	record to display
+@return	the output stream */
+std::ostream&
+operator<<(std::ostream& o, const rec_offsets_print& r);
+
+# ifndef DBUG_OFF
+/** Pretty-printer of records and tuples */
+class rec_printer : public std::ostringstream {
+public:
+	/** Construct a pretty-printed record.
+	@param rec	record with header
+	@param offsets	rec_get_offsets(rec, ...) */
+	rec_printer(const rec_t* rec, const ulint* offsets)
+		:
+		std::ostringstream ()
+	{
+		rec_print(*this, rec,
+			  rec_get_info_bits(rec, rec_offs_comp(offsets)),
+			  offsets);
+	}
+
+	/** Construct a pretty-printed record.
+	@param rec record, possibly lacking header
+	@param info rec_get_info_bits(rec)
+	@param offsets rec_get_offsets(rec, ...) */
+	rec_printer(const rec_t* rec, ulint info, const ulint* offsets)
+		:
+		std::ostringstream ()
+	{
+		rec_print(*this, rec, info, offsets);
+	}
+
+	/** Construct a pretty-printed tuple.
+	@param tuple	data tuple */
+	rec_printer(const dtuple_t* tuple)
+		:
+		std::ostringstream ()
+	{
+		dtuple_print(*this, tuple);
+	}
+
+	/** Construct a pretty-printed tuple.
+	@param field	array of data tuple fields
+	@param n	number of fields */
+	rec_printer(const dfield_t* field, ulint n)
+		:
+		std::ostringstream ()
+	{
+		dfield_print(*this, field, n);
+	}
+
+	/** Destructor */
+	virtual ~rec_printer() {}
+
+private:
+	/** Copy constructor */
+	rec_printer(const rec_printer& other);
+	/** Assignment operator */
+	rec_printer& operator=(const rec_printer& other);
+};
+# endif /* !DBUG_OFF */
+
 # ifdef UNIV_DEBUG
 /************************************************************//**
 Reads the DB_TRX_ID of a clustered index record.
-@return	the value of DB_TRX_ID */
-UNIV_INTERN
+@return the value of DB_TRX_ID */
 trx_id_t
 rec_get_trx_id(
 /*===========*/
@@ -982,7 +1113,7 @@ are given in one byte (resp. two byte) format. */
 
 /* The data size of record must be smaller than this because we reserve
 two upmost bits in a two byte offset for special purposes */
-#define REC_MAX_DATA_SIZE	(16384)
+#define REC_MAX_DATA_SIZE	16384
 
 #ifdef WITH_WSREP
 int wsrep_rec_get_foreign_key(
@@ -993,9 +1124,9 @@ int wsrep_rec_get_foreign_key(
 	dict_index_t*	index_ref,  /* in: index for referenced table */
 	ibool		new_protocol); /* in: protocol > 1 */
 #endif /* WITH_WSREP */
+
 #ifndef UNIV_NONINL
 #include "rem0rec.ic"
 #endif
 
-#endif /* !UNIV_INNOCHECKSUM */
-#endif
+#endif /* rem0rec_h */
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
index 5811a77a48b..b855a39da9e 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innobase/include/rem0rec.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,7 @@ Created 5/30/1994 Heikki Tuuri
 #include "mach0data.h"
 #include "ut0byte.h"
 #include "dict0dict.h"
+#include "dict0boot.h"
 #include "btr0types.h"
 
 /* Compact flag ORed to the extra size returned by rec_get_offsets() */
@@ -136,7 +137,6 @@ and the shift needed to obtain each bit-field of the record. */
 
 /***********************************************************//**
 Sets the value of the ith field SQL null bit of an old-style record. */
-UNIV_INTERN
 void
 rec_set_nth_field_null_bit(
 /*=======================*/
@@ -146,7 +146,6 @@ rec_set_nth_field_null_bit(
 /***********************************************************//**
 Sets an old-style record field to SQL null.
 The physical size of the field is not changed. */
-UNIV_INTERN
 void
 rec_set_nth_field_sql_null(
 /*=======================*/
@@ -238,7 +237,7 @@ rec_set_bit_field_2(
 /******************************************************//**
 The following function is used to get the pointer of the next chained record
 on the same page.
-@return	pointer to the next chained record, or NULL if none */
+@return pointer to the next chained record, or NULL if none */
 UNIV_INLINE
 const rec_t*
 rec_get_next_ptr_const(
@@ -294,7 +293,7 @@ rec_get_next_ptr_const(
 /******************************************************//**
 The following function is used to get the pointer of the next chained record
 on the same page.
-@return	pointer to the next chained record, or NULL if none */
+@return pointer to the next chained record, or NULL if none */
 UNIV_INLINE
 rec_t*
 rec_get_next_ptr(
@@ -308,7 +307,7 @@ rec_get_next_ptr(
 /******************************************************//**
 The following function is used to get the offset of the next chained record
 on the same page.
-@return	the page offset of the next chained record, or 0 if none */
+@return the page offset of the next chained record, or 0 if none */
 UNIV_INLINE
 ulint
 rec_get_next_offs(
@@ -418,7 +417,7 @@ rec_set_next_offs_new(
 /******************************************************//**
 The following function is used to get the number of fields
 in an old-style record.
-@return	number of data fields */
+@return number of data fields */
 UNIV_INLINE
 ulint
 rec_get_n_fields_old(
@@ -458,7 +457,7 @@ rec_set_n_fields_old(
 
 /******************************************************//**
 The following function retrieves the status bits of a new-style record.
-@return	status bits */
+@return status bits */
 UNIV_INLINE
 ulint
 rec_get_status(
@@ -479,7 +478,7 @@ rec_get_status(
 /******************************************************//**
 The following function is used to get the number of fields
 in a record.
-@return	number of data fields */
+@return number of data fields */
 UNIV_INLINE
 ulint
 rec_get_n_fields(
@@ -508,10 +507,32 @@ rec_get_n_fields(
 	}
 }
 
+/** Confirms the n_fields of the entry is sane with comparing the other
+record in the same page specified
+@param[in]	index	index
+@param[in]	rec	record of the same page
+@param[in]	entry	index entry
+@return	true if n_fields is sane */
+UNIV_INLINE
+bool
+rec_n_fields_is_sane(
+	dict_index_t*	index,
+	const rec_t*	rec,
+	const dtuple_t*	entry)
+{
+	return(rec_get_n_fields(rec, index)
+	       == dtuple_get_n_fields(entry)
+	       /* a record for older SYS_INDEXES table
+	       (missing merge_threshold column) is acceptable. */
+	       || (index->table->id == DICT_INDEXES_ID
+		   && rec_get_n_fields(rec, index)
+		      == dtuple_get_n_fields(entry) - 1));
+}
+
 /******************************************************//**
 The following function is used to get the number of records owned by the
 previous directory record.
-@return	number of owned records */
+@return number of owned records */
 UNIV_INLINE
 ulint
 rec_get_n_owned_old(
@@ -538,7 +559,7 @@ rec_set_n_owned_old(
 /******************************************************//**
 The following function is used to get the number of records owned by the
 previous directory record.
-@return	number of owned records */
+@return number of owned records */
 UNIV_INLINE
 ulint
 rec_get_n_owned_new(
@@ -566,9 +587,22 @@ rec_set_n_owned_new(
 	}
 }
 
+#ifdef UNIV_DEBUG
+/** Check if the info bits are valid.
+@param[in]	bits	info bits to check
+@return true if valid */
+inline
+bool
+rec_info_bits_valid(
+	ulint	bits)
+{
+	return(0 == (bits & ~(REC_INFO_DELETED_FLAG | REC_INFO_MIN_REC_FLAG)));
+}
+#endif /* UNIV_DEBUG */
+
 /******************************************************//**
 The following function is used to retrieve the info bits of a record.
-@return	info bits */
+@return info bits */
 UNIV_INLINE
 ulint
 rec_get_info_bits(
@@ -576,9 +610,11 @@ rec_get_info_bits(
 	const rec_t*	rec,	/*!< in: physical record */
 	ulint		comp)	/*!< in: nonzero=compact page format */
 {
-	return(rec_get_bit_field_1(
-		       rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
-		       REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT));
+	const ulint	val = rec_get_bit_field_1(
+		rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
+		REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
+	ut_ad(rec_info_bits_valid(val));
+	return(val);
 }
 
 /******************************************************//**
@@ -590,6 +626,7 @@ rec_set_info_bits_old(
 	rec_t*	rec,	/*!< in: old-style physical record */
 	ulint	bits)	/*!< in: info bits */
 {
+	ut_ad(rec_info_bits_valid(bits));
 	rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS,
 			    REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
 }
@@ -602,6 +639,7 @@ rec_set_info_bits_new(
 	rec_t*	rec,	/*!< in/out: new-style physical record */
 	ulint	bits)	/*!< in: info bits */
 {
+	ut_ad(rec_info_bits_valid(bits));
 	rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS,
 			    REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
 }
@@ -622,7 +660,7 @@ rec_set_status(
 /******************************************************//**
 The following function is used to retrieve the info and status
 bits of a record.  (Only compact records have status bits.)
-@return	info bits */
+@return info bits */
 UNIV_INLINE
 ulint
 rec_get_info_and_status_bits(
@@ -663,7 +701,7 @@ rec_set_info_and_status_bits(
 
 /******************************************************//**
 The following function tells if record is delete marked.
-@return	nonzero if delete marked */
+@return nonzero if delete marked */
 UNIV_INLINE
 ulint
 rec_get_deleted_flag(
@@ -733,7 +771,7 @@ rec_set_deleted_flag_new(
 
 /******************************************************//**
 The following function tells if a new-style record is a node pointer.
-@return	TRUE if node pointer */
+@return TRUE if node pointer */
 UNIV_INLINE
 ibool
 rec_get_node_ptr_flag(
@@ -746,7 +784,7 @@ rec_get_node_ptr_flag(
 /******************************************************//**
 The following function is used to get the order number
 of an old-style record in the heap of the index page.
-@return	heap order number */
+@return heap order number */
 UNIV_INLINE
 ulint
 rec_get_heap_no_old(
@@ -774,7 +812,7 @@ rec_set_heap_no_old(
 /******************************************************//**
 The following function is used to get the order number
 of a new-style record in the heap of the index page.
-@return	heap order number */
+@return heap order number */
 UNIV_INLINE
 ulint
 rec_get_heap_no_new(
@@ -802,7 +840,7 @@ rec_set_heap_no_new(
 /******************************************************//**
 The following function is used to test whether the data offsets in the record
 are stored in one-byte or two-byte format.
-@return	TRUE if 1-byte form */
+@return TRUE if 1-byte form */
 UNIV_INLINE
 ibool
 rec_get_1byte_offs_flag(
@@ -839,7 +877,7 @@ rec_set_1byte_offs_flag(
 Returns the offset of nth field end if the record is stored in the 1-byte
 offsets form. If the field is SQL null, the flag is ORed in the returned
 value.
-@return	offset of the start of the field, SQL null flag ORed */
+@return offset of the start of the field, SQL null flag ORed */
 UNIV_INLINE
 ulint
 rec_1_get_field_end_info(
@@ -894,7 +932,7 @@ the fields. */
 /**********************************************************//**
 The following function returns the number of allocated elements
 for an array of offsets.
-@return	number of elements */
+@return number of elements */
 UNIV_INLINE
 ulint
 rec_offs_get_n_alloc(
@@ -928,7 +966,7 @@ rec_offs_set_n_alloc(
 
 /**********************************************************//**
 The following function returns the number of fields in a record.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 rec_offs_n_fields(
@@ -947,7 +985,7 @@ rec_offs_n_fields(
 
 /************************************************************//**
 Validates offsets returned by rec_get_offsets().
-@return	TRUE if valid */
+@return TRUE if valid */
 UNIV_INLINE
 ibool
 rec_offs_validate(
@@ -1025,7 +1063,7 @@ rec_offs_make_valid(
 /************************************************************//**
 The following function is used to get an offset to the nth
 data field in a record.
-@return	offset from the origin of rec */
+@return offset from the origin of rec */
 UNIV_INLINE
 ulint
 rec_get_nth_field_offs(
@@ -1062,7 +1100,7 @@ rec_get_nth_field_offs(
 /******************************************************//**
 Determine if the offsets are for a record in the new
 compact format.
-@return	nonzero if compact format */
+@return nonzero if compact format */
 UNIV_INLINE
 ulint
 rec_offs_comp(
@@ -1076,7 +1114,7 @@ rec_offs_comp(
 /******************************************************//**
 Determine if the offsets are for a record containing
 externally stored columns.
-@return	nonzero if externally stored */
+@return nonzero if externally stored */
 UNIV_INLINE
 ulint
 rec_offs_any_extern(
@@ -1089,7 +1127,7 @@ rec_offs_any_extern(
 
 /******************************************************//**
 Determine if the offsets are for a record containing null BLOB pointers.
-@return	first field containing a null BLOB pointer, or NULL if none found */
+@return first field containing a null BLOB pointer, or NULL if none found */
 UNIV_INLINE
 const byte*
 rec_offs_any_null_extern(
@@ -1125,7 +1163,7 @@ rec_offs_any_null_extern(
 
 /******************************************************//**
 Returns nonzero if the extern bit is set in nth field of rec.
-@return	nonzero if externally stored */
+@return nonzero if externally stored */
 UNIV_INLINE
 ulint
 rec_offs_nth_extern(
@@ -1140,7 +1178,7 @@ rec_offs_nth_extern(
 
 /******************************************************//**
 Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return	nonzero if SQL NULL */
+@return nonzero if SQL NULL */
 UNIV_INLINE
 ulint
 rec_offs_nth_sql_null(
@@ -1155,7 +1193,7 @@ rec_offs_nth_sql_null(
 
 /******************************************************//**
 Gets the physical size of a field.
-@return	length of field */
+@return length of field */
 UNIV_INLINE
 ulint
 rec_offs_nth_size(
@@ -1174,7 +1212,7 @@ rec_offs_nth_size(
 
 /******************************************************//**
 Returns the number of extern bits set in a record.
-@return	number of externally stored fields */
+@return number of externally stored fields */
 UNIV_INLINE
 ulint
 rec_offs_n_extern(
@@ -1202,7 +1240,7 @@ offsets form. If the field is SQL null, the flag is ORed in the returned
 value. This function and the 2-byte counterpart are defined here because the
 C-compiler was not able to sum negative and positive constant offsets, and
 warned of constant arithmetic overflow within the compiler.
-@return	offset of the start of the PREVIOUS field, SQL null flag ORed */
+@return offset of the start of the PREVIOUS field, SQL null flag ORed */
 UNIV_INLINE
 ulint
 rec_1_get_prev_field_end_info(
@@ -1220,7 +1258,7 @@ rec_1_get_prev_field_end_info(
 Returns the offset of n - 1th field end if the record is stored in the 2-byte
 offsets form. If the field is SQL null, the flag is ORed in the returned
 value.
-@return	offset of the start of the PREVIOUS field, SQL null flag ORed */
+@return offset of the start of the PREVIOUS field, SQL null flag ORed */
 UNIV_INLINE
 ulint
 rec_2_get_prev_field_end_info(
@@ -1271,7 +1309,7 @@ rec_2_set_field_end_info(
 /******************************************************//**
 Returns the offset of nth field start if the record is stored in the 1-byte
 offsets form.
-@return	offset of the start of the field */
+@return offset of the start of the field */
 UNIV_INLINE
 ulint
 rec_1_get_field_start_offs(
@@ -1294,7 +1332,7 @@ rec_1_get_field_start_offs(
 /******************************************************//**
 Returns the offset of nth field start if the record is stored in the 2-byte
 offsets form.
-@return	offset of the start of the field */
+@return offset of the start of the field */
 UNIV_INLINE
 ulint
 rec_2_get_field_start_offs(
@@ -1319,7 +1357,7 @@ The following function is used to read the offset of the start of a data field
 in the record. The start of an SQL null field is the end offset of the
 previous non-null field, or 0, if none exists. If n is the number of the last
 field + 1, then the end offset of the last field is returned.
-@return	offset of the start of the field */
+@return offset of the start of the field */
 UNIV_INLINE
 ulint
 rec_get_field_start_offs(
@@ -1347,7 +1385,7 @@ rec_get_field_start_offs(
 Gets the physical size of an old-style field.
 Also an SQL null may have a field of size > 0,
 if the data type is of a fixed size.
-@return	field size in bytes */
+@return field size in bytes */
 UNIV_INLINE
 ulint
 rec_get_nth_field_size(
@@ -1415,7 +1453,7 @@ The following function returns the data size of an old-style physical
 record, that is the sum of field lengths. SQL null fields
 are counted as length 0 fields. The value returned by the function
 is the distance from record origin to record end in bytes.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_get_data_size_old(
@@ -1450,7 +1488,7 @@ The following function returns the data size of a physical
 record, that is the sum of field lengths. SQL null fields
 are counted as length 0 fields. The value returned by the function
 is the distance from record origin to record end in bytes.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_offs_data_size(
@@ -1470,7 +1508,7 @@ rec_offs_data_size(
 Returns the total size of record minus data size of record. The value
 returned by the function is the distance from record start to record origin
 in bytes.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_offs_extra_size(
@@ -1486,7 +1524,7 @@ rec_offs_extra_size(
 
 /**********************************************************//**
 Returns the total size of a physical record.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_offs_size(
@@ -1499,7 +1537,7 @@ rec_offs_size(
 #ifdef UNIV_DEBUG
 /**********************************************************//**
 Returns a pointer to the end of the record.
-@return	pointer to end */
+@return pointer to end */
 UNIV_INLINE
 byte*
 rec_get_end(
@@ -1513,7 +1551,7 @@ rec_get_end(
 
 /**********************************************************//**
 Returns a pointer to the start of the record.
-@return	pointer to start */
+@return pointer to start */
 UNIV_INLINE
 byte*
 rec_get_start(
@@ -1526,16 +1564,17 @@ rec_get_start(
 }
 #endif /* UNIV_DEBUG */
 
-/***************************************************************//**
-Copies a physical record to a buffer.
-@return	pointer to the origin of the copy */
+/** Copy a physical record to a buffer.
+@param[in]	buf	buffer
+@param[in]	rec	physical record
+@param[in]	offsets	array returned by rec_get_offsets()
+@return pointer to the origin of the copy */
 UNIV_INLINE
 rec_t*
 rec_copy(
-/*=====*/
-	void*		buf,	/*!< in: buffer */
-	const rec_t*	rec,	/*!< in: physical record */
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	void*		buf,
+	const rec_t*	rec,
+	const ulint*	offsets)
 {
 	ulint	extra_len;
 	ulint	data_len;
@@ -1556,7 +1595,7 @@ rec_copy(
 /**********************************************************//**
 Returns the extra size of an old-style physical record if we know its
 data size and number of fields.
-@return	extra size */
+@return extra size */
 UNIV_INLINE
 ulint
 rec_get_converted_extra_size(
@@ -1576,7 +1615,7 @@ rec_get_converted_extra_size(
 /**********************************************************//**
 The following function returns the size of a data tuple when converted to
 a physical record.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_get_converted_size(
@@ -1592,12 +1631,19 @@ rec_get_converted_size(
 	ut_ad(dtuple);
 	ut_ad(dtuple_check_typed(dtuple));
 
-	ut_ad(dict_index_is_univ(index)
+	ut_ad(dict_index_is_ibuf(index)
+
 	      || dtuple_get_n_fields(dtuple)
-	      == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
-		   == REC_STATUS_NODE_PTR)
-		  ? dict_index_get_n_unique_in_tree(index) + 1
-		  : dict_index_get_n_fields(index)));
+		 == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
+		      == REC_STATUS_NODE_PTR)
+		     ? dict_index_get_n_unique_in_tree_nonleaf(index) + 1
+		     : dict_index_get_n_fields(index))
+
+	      /* a record for older SYS_INDEXES table
+	      (missing merge_threshold column) is acceptable. */
+	      || (index->table->id == DICT_INDEXES_ID
+		  && dtuple_get_n_fields(dtuple)
+		     == dict_index_get_n_fields(index) - 1));
 
 	if (dict_table_is_comp(index->table)) {
 		return(rec_get_converted_size_comp(index,
@@ -1622,7 +1668,7 @@ rec_get_converted_size(
 	support multiple page sizes.  At that time, we will need
 	to consider the node pointer on these universal btrees. */
 
-	if (dict_index_is_univ(index)) {
+	if (dict_index_is_ibuf(index)) {
 		/* This is for the insert buffer B-tree.
 		All fields in the leaf tuple ascend to the
 		parent node plus the child page pointer. */
@@ -1651,22 +1697,21 @@ rec_get_converted_size(
 }
 
 #ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Folds a prefix of a physical record to a ulint. Folds only existing fields,
-that is, checks that we do not run out of the record.
-@return	the folded value */
+/** Fold a prefix of a physical record.
+@param[in]	rec		index record
+@param[in]	offsets		return value of rec_get_offsets()
+@param[in]	n_fields	number of complete fields to fold
+@param[in]	n_bytes		number of bytes to fold in the last field
+@param[in]	index_id	index tree ID
+@return the folded value */
 UNIV_INLINE
 ulint
 rec_fold(
-/*=====*/
-	const rec_t*	rec,		/*!< in: the physical record */
-	const ulint*	offsets,	/*!< in: array returned by
-					rec_get_offsets() */
-	ulint		n_fields,	/*!< in: number of complete
-					fields to fold */
-	ulint		n_bytes,	/*!< in: number of bytes to fold
-					in an incomplete last field */
-	index_id_t	tree_id)	/*!< in: index tree id */
+	const rec_t*	rec,
+	const ulint*	offsets,
+	ulint		n_fields,
+	ulint		n_bytes,
+	index_id_t	tree_id)
 {
 	ulint		i;
 	const byte*	data;
@@ -1676,7 +1721,7 @@ rec_fold(
 
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
 	ut_ad(rec_validate(rec, offsets));
-	ut_ad(n_fields + n_bytes > 0);
+	ut_ad(n_fields > 0 || n_bytes > 0);
 
 	n_fields_rec = rec_offs_n_fields(offsets);
 	ut_ad(n_fields <= n_fields_rec);
diff --git a/storage/innobase/include/row0ext.h b/storage/innobase/include/row0ext.h
index a098e2f9b29..1d788a4c217 100644
--- a/storage/innobase/include/row0ext.h
+++ b/storage/innobase/include/row0ext.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,11 +31,11 @@ Created September 2006 Marko Makela
 #include "data0types.h"
 #include "mem0mem.h"
 #include "dict0types.h"
+#include "page0size.h"
 
 /********************************************************************//**
 Creates a cache of column prefixes of externally stored columns.
-@return	own: column prefix cache */
-UNIV_INTERN
+@return own: column prefix cache */
 row_ext_t*
 row_ext_create(
 /*===========*/
@@ -92,6 +92,9 @@ struct row_ext_t{
 				REC_ANTELOPE_MAX_INDEX_COL_LEN or
 				REC_VERSION_56_MAX_INDEX_COL_LEN depending
 				on row format */
+	page_size_t	page_size;
+				/*!< page size of the externally stored
+				columns */
 	ulint		len[1];	/*!< prefix lengths; 0 if not cached */
 };
 
diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h
index 00bd3317de3..7e39fe33d9f 100644
--- a/storage/innobase/include/row0ftsort.h
+++ b/storage/innobase/include/row0ftsort.h
@@ -35,6 +35,8 @@ Created 10/13/2010 Jimmy Yang
 #include "fts0types.h"
 #include "fts0priv.h"
 #include "row0merge.h"
+#include "btr0bulk.h"
+#include "os0thread.h"
 
 /** This structure defineds information the scan thread will fetch
 and put to the linked list for parallel tokenization/sort threads
@@ -53,7 +55,6 @@ struct fts_doc_item {
 tokenization threads and sort threads. */
 typedef UT_LIST_BASE_NODE_T(fts_doc_item_t)     fts_doc_list_t;
 
-#define FTS_NUM_AUX_INDEX	6
 #define FTS_PLL_MERGE		1
 
 /** Sort information passed to each individual parallel sort thread */
@@ -99,6 +100,16 @@ struct fts_psort_t {
 	ib_mutex_t		mutex;		/*!< mutex for fts_doc_list */
 };
 
+/** Row fts token for plugin parser */
+struct row_fts_token_t {
+	fts_string_t*	text;		/*!< token */
+	ulint		position;	/*!< token position in the document */
+	UT_LIST_NODE_T(row_fts_token_t)
+			token_list;	/*!< next token link */
+};
+
+typedef UT_LIST_BASE_NODE_T(row_fts_token_t)     fts_token_list_t;
+
 /** Structure stores information from string tokenization operation */
 struct fts_tokenize_ctx {
 	ulint			processed_len;  /*!< processed string length */
@@ -112,6 +123,7 @@ struct fts_tokenize_ctx {
 	ib_rbt_t*		cached_stopword;/*!< in: stopword list */
 	dfield_t		sort_field[FTS_NUM_FIELDS_SORT];
 						/*!< in: sort field */
+	fts_token_list_t	fts_token_list;
 };
 
 typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;
@@ -119,13 +131,16 @@ typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;
 /** Structure stores information needed for the insertion phase of FTS
 parallel sort. */
 struct fts_psort_insert {
-	trx_t*		trx;		/*!< Transaction used for insertion */
-	que_t**		ins_graph;	/*!< insert graph */
-	fts_table_t	fts_table;	/*!< auxiliary table */
 	CHARSET_INFO*	charset;	/*!< charset info */
 	mem_heap_t*	heap;		/*!< heap */
 	ibool		opt_doc_id_size;/*!< Whether to use smaller (4 bytes)
 					integer for Doc ID */
+	BtrBulk*	btr_bulk;	/*!< Bulk load instance */
+	dtuple_t*	tuple;		/*!< Tuple to insert */
+
+#ifdef UNIV_DEBUG
+	ulint		aux_index_id;	/*!< Auxiliary index id */
+#endif
 };
 
 typedef struct fts_psort_insert	fts_psort_insert_t;
@@ -159,7 +174,6 @@ tokenized doc string. The index has three "fields":
 3) Word's position in original 'doc'.
 
 @return dict_index_t structure for the fts sort index */
-UNIV_INTERN
 dict_index_t*
 row_merge_create_fts_sort_index(
 /*============================*/
@@ -176,7 +190,6 @@ row_merge_create_fts_sort_index(
 /********************************************************************//**
 Initialize FTS parallel sort structures.
 @return TRUE if all successful */
-UNIV_INTERN
 ibool
 row_fts_psort_info_init(
 /*====================*/
@@ -197,7 +210,6 @@ row_fts_psort_info_init(
 /********************************************************************//**
 Clean up and deallocate FTS parallel sort structures, and close
 temparary merge sort files */
-UNIV_INTERN
 void
 row_fts_psort_info_destroy(
 /*=======================*/
@@ -205,7 +217,6 @@ row_fts_psort_info_destroy(
 	fts_psort_t*	merge_info);	/*!< parallel merge info */
 /********************************************************************//**
 Free up merge buffers when merge sort is done */
-UNIV_INTERN
 void
 row_fts_free_pll_merge_buf(
 /*=======================*/
@@ -214,14 +225,12 @@ row_fts_free_pll_merge_buf(
 /*********************************************************************//**
 Function performs parallel tokenization of the incoming doc strings.
 @return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
 os_thread_ret_t
 fts_parallel_tokenization(
 /*======================*/
 	void*		arg);		/*!< in: psort_info for the thread */
 /*********************************************************************//**
 Start the parallel tokenization and parallel merge sort */
-UNIV_INTERN
 void
 row_fts_start_psort(
 /*================*/
@@ -229,14 +238,12 @@ row_fts_start_psort(
 /*********************************************************************//**
 Function performs the merge and insertion of the sorted records.
 @return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
 os_thread_ret_t
 fts_parallel_merge(
 /*===============*/
 	void*		arg);		/*!< in: parallel merge info */
 /*********************************************************************//**
 Kick off the parallel merge and insert thread */
-UNIV_INTERN
 void
 row_fts_start_parallel_merge(
 /*=========================*/
@@ -244,7 +251,6 @@ row_fts_start_parallel_merge(
 /********************************************************************//**
 Read sorted FTS data files and insert data tuples to auxillary tables.
 @return DB_SUCCESS or error number */
-UNIV_INTERN
 void
 row_fts_insert_tuple(
 /*=================*/
@@ -258,7 +264,6 @@ row_fts_insert_tuple(
 /********************************************************************//**
 Propagate a newly added record up one level in the selection tree
 @return parent where this value propagated to */
-UNIV_INTERN
 int
 row_merge_fts_sel_propagate(
 /*========================*/
@@ -272,7 +277,6 @@ row_merge_fts_sel_propagate(
 Read sorted file containing index data tuples and insert these data
 tuples to the index
 @return DB_SUCCESS or error number */
-UNIV_INTERN
 dberr_t
 row_fts_merge_insert(
 /*=================*/
diff --git a/storage/innobase/include/row0import.h b/storage/innobase/include/row0import.h
index a821c230a3b..137c28ac1fa 100644
--- a/storage/innobase/include/row0import.h
+++ b/storage/innobase/include/row0import.h
@@ -27,7 +27,6 @@ Created 2012-02-08 by Sunny Bains
 #define row0import_h
 
 #include "univ.i"
-#include "db0err.h"
 #include "dict0types.h"
 
 // Forward declarations
@@ -38,8 +37,7 @@ struct row_prebuilt_t;
 /*****************************************************************//**
 Imports a tablespace. The space id in the .ibd file must match the space id
 of the table in the data dictionary.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_import_for_mysql(
 /*=================*/
@@ -51,7 +49,6 @@ row_import_for_mysql(
 /*****************************************************************//**
 Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
 @return DB_SUCCESS or error code. */
-UNIV_INTERN
 dberr_t
 row_import_update_discarded_flag(
 /*=============================*/
@@ -70,7 +67,6 @@ row_import_update_discarded_flag(
 Update the (space, root page) of a table's indexes from the values
 in the data dictionary.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 row_import_update_index_root(
 /*=========================*/
diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h
index 71ee39070ef..4038c32b9c0 100644
--- a/storage/innobase/include/row0ins.h
+++ b/storage/innobase/include/row0ins.h
@@ -39,7 +39,6 @@ which lock either the success or the failure of the constraint. NOTE that
 the caller must have a shared latch on dict_foreign_key_check_lock.
 @return DB_SUCCESS, DB_LOCK_WAIT, DB_NO_REFERENCED_ROW, or
 DB_ROW_IS_REFERENCED */
-UNIV_INTERN
 dberr_t
 row_ins_check_foreign_constraint(
 /*=============================*/
@@ -56,8 +55,7 @@ row_ins_check_foreign_constraint(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Creates an insert node struct.
-@return	own: insert node struct */
-UNIV_INTERN
+@return own: insert node struct */
 ins_node_t*
 ins_node_create(
 /*============*/
@@ -68,7 +66,6 @@ ins_node_create(
 Sets a new row to insert for an INS_DIRECT node. This function is only used
 if we have constructed the row separately, which is a rare case; this
 function is quite slow. */
-UNIV_INTERN
 void
 ins_node_set_new_row(
 /*=================*/
@@ -85,7 +82,6 @@ the delete marked record.
 @retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
 @retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
 @return error code */
-UNIV_INTERN
 dberr_t
 row_ins_clust_index_entry_low(
 /*==========================*/
@@ -97,8 +93,12 @@ row_ins_clust_index_entry_low(
 	ulint		n_uniq,	/*!< in: 0 or index->n_uniq */
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	que_thr_t*	thr)	/*!< in: query thread or NULL */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	que_thr_t*	thr,	/*!< in: query thread or NULL */
+	bool		dup_chk_only)
+				/*!< in: if true, just do duplicate check
+				and return. don't execute actual insert. */
+	MY_ATTRIBUTE((warn_unused_result));
+
 /***************************************************************//**
 Tries to insert an entry into a secondary index. If a record with exactly the
 same fields is found, the other record is necessarily marked deleted.
@@ -107,7 +107,6 @@ It is then unmarked. Otherwise, the entry is just inserted to the index.
 @retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
 @retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
 @return error code */
-UNIV_INTERN
 dberr_t
 row_ins_sec_index_entry_low(
 /*========================*/
@@ -122,13 +121,26 @@ row_ins_sec_index_entry_low(
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	trx_id_t	trx_id,	/*!< in: PAGE_MAX_TRX_ID during
 				row_log_table_apply(), or 0 */
-	que_thr_t*	thr)	/*!< in: query thread */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	que_thr_t*	thr,	/*!< in: query thread */
+	bool		dup_chk_only)
+				/*!< in: if true, just do duplicate check
+				and return. don't execute actual insert. */
+	MY_ATTRIBUTE((warn_unused_result));
+/** Sets the values of the dtuple fields in entry from the values of appropriate
+columns in row.
+@param[in]	index	index handler
+@param[out]	entry	index entry to make
+@param[in]	row	row */
+dberr_t
+row_ins_index_entry_set_vals(
+	const dict_index_t*	index,
+	dtuple_t*		entry,
+	const dtuple_t*		row);
+
 /***************************************************************//**
 Tries to insert the externally stored fields (off-page columns)
 of a clustered index entry.
 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
 dberr_t
 row_ins_index_entry_big_rec_func(
 /*=============================*/
@@ -155,35 +167,38 @@ Inserts an entry into a clustered index. Tries first optimistic,
 then pessimistic descent down the tree. If the entry matches enough
 to a delete marked record, performs the insert by updating or delete
 unmarking the delete marked record.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
 dberr_t
 row_ins_clust_index_entry(
 /*======================*/
 	dict_index_t*	index,	/*!< in: clustered index */
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	que_thr_t*	thr,	/*!< in: query thread */
-	ulint		n_ext)	/*!< in: number of externally stored columns */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	bool		dup_chk_only)
+				/*!< in: if true, just do duplicate check
+				and return. don't execute actual insert. */
+	MY_ATTRIBUTE((warn_unused_result));
 /***************************************************************//**
 Inserts an entry into a secondary index. Tries first optimistic,
 then pessimistic descent down the tree. If the entry matches enough
 to a delete marked record, performs the insert by updating or delete
 unmarking the delete marked record.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
 dberr_t
 row_ins_sec_index_entry(
 /*====================*/
 	dict_index_t*	index,	/*!< in: secondary index */
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
-	que_thr_t*	thr)	/*!< in: query thread */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	que_thr_t*	thr,	/*!< in: query thread */
+	bool		dup_chk_only)
+				/*!< in: if true, just do duplicate check
+				and return. don't execute actual insert. */
+	MY_ATTRIBUTE((warn_unused_result));
 /***********************************************************//**
 Inserts a row to a table. This is a high-level function used in
 SQL execution graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_ins_step(
 /*=========*/
@@ -216,6 +231,10 @@ struct ins_node_t{
 				entry_list and sys fields are stored here;
 				if this is NULL, entry list should be created
 				and buffers for sys fields in row allocated */
+	dict_index_t*   duplicate;
+				/* This is the first index that reported
+				DB_DUPLICATE_KEY.  Used in the case of REPLACE
+				or INSERT ... ON DUPLICATE UPDATE. */
 	ulint		magic_n;
 };
 
diff --git a/storage/innobase/include/row0log.h b/storage/innobase/include/row0log.h
index 5ff148ff045..c8db44f23b3 100644
--- a/storage/innobase/include/row0log.h
+++ b/storage/innobase/include/row0log.h
@@ -35,6 +35,8 @@ Created 2011-05-26 Marko Makela
 #include "trx0types.h"
 #include "que0types.h"
 
+class ut_stage_alter_t;
+
 extern ulint onlineddl_rowlog_rows;
 extern ulint onlineddl_rowlog_pct_used;
 extern ulint onlineddl_pct_progress;
@@ -43,7 +45,6 @@ extern ulint onlineddl_pct_progress;
 Allocate the row log for an index and flag the index
 for online creation.
 @retval true if success, false if not */
-UNIV_INTERN
 bool
 row_log_allocate(
 /*=============*/
@@ -62,7 +63,6 @@ row_log_allocate(
 
 /******************************************************//**
 Free the row log for an index that was being created online. */
-UNIV_INTERN
 void
 row_log_free(
 /*=========*/
@@ -81,8 +81,8 @@ row_log_abort_sec(
 /******************************************************//**
 Try to log an operation to a secondary index that is
 (or was) being created.
-@retval	true if the operation was logged or can be ignored
-@retval	false if online index creation is not taking place */
+@retval true if the operation was logged or can be ignored
+@retval false if online index creation is not taking place */
 UNIV_INLINE
 bool
 row_log_online_op_try(
@@ -94,7 +94,6 @@ row_log_online_op_try(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /******************************************************//**
 Logs an operation to a secondary index that is (or was) being created. */
-UNIV_INTERN
 void
 row_log_online_op(
 /*==============*/
@@ -107,7 +106,6 @@ row_log_online_op(
 /******************************************************//**
 Gets the error status of the online index rebuild log.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 row_log_table_get_error(
 /*====================*/
@@ -115,15 +113,25 @@ row_log_table_get_error(
 					that is being rebuilt online */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
+/** Check whether a virtual column is indexed in the new table being
+created during alter table
+@param[in]	index	cluster index
+@param[in]	v_no	virtual column number
+@return true if it is indexed, else false */
+bool
+row_log_col_is_indexed(
+	const dict_index_t*	index,
+	ulint			v_no);
+
 /******************************************************//**
 Logs a delete operation to a table that is being rebuilt.
 This will be merged in row_log_table_apply_delete(). */
-UNIV_INTERN
 void
 row_log_table_delete(
 /*=================*/
 	const rec_t*	rec,	/*!< in: clustered index leaf page record,
 				page X-latched */
+	const dtuple_t*	ventry,	/*!< in: dtuple holding virtual column info */
 	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
 				or X-latched */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
@@ -134,7 +142,6 @@ row_log_table_delete(
 /******************************************************//**
 Logs an update operation to a table that is being rebuilt.
 This will be merged in row_log_table_apply_update(). */
-UNIV_INTERN
 void
 row_log_table_update(
 /*=================*/
@@ -143,16 +150,18 @@ row_log_table_update(
 	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
 				or X-latched */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
-	const dtuple_t*	old_pk)	/*!< in: row_log_table_get_pk()
+	const dtuple_t*	old_pk,	/*!< in: row_log_table_get_pk()
 				before the update */
-	UNIV_COLD MY_ATTRIBUTE((nonnull(1,2,3)));
+	const dtuple_t*	new_v_row,/*!< in: dtuple contains the new virtual
+				columns */
+	const dtuple_t*	old_v_row);/*!< in: dtuple contains the old virtual
+				columns */
 
 /******************************************************//**
 Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
 of a table that is being rebuilt.
 @return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
 or NULL if the PRIMARY KEY definition does not change */
-UNIV_INTERN
 const dtuple_t*
 row_log_table_get_pk(
 /*=================*/
@@ -170,19 +179,17 @@ row_log_table_get_pk(
 /******************************************************//**
 Logs an insert to a table that is being rebuilt.
 This will be merged in row_log_table_apply_insert(). */
-UNIV_INTERN
 void
 row_log_table_insert(
 /*=================*/
 	const rec_t*	rec,	/*!< in: clustered index leaf page record,
 				page X-latched */
+	const dtuple_t*	ventry,	/*!< in: dtuple holding virtual column info */
 	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
 				or X-latched */
-	const ulint*	offsets)/*!< in: rec_get_offsets(rec,index) */
-	UNIV_COLD MY_ATTRIBUTE((nonnull));
+	const ulint*	offsets);/*!< in: rec_get_offsets(rec,index) */
 /******************************************************//**
 Notes that a BLOB is being freed during online ALTER TABLE. */
-UNIV_INTERN
 void
 row_log_table_blob_free(
 /*====================*/
@@ -191,51 +198,66 @@ row_log_table_blob_free(
 	UNIV_COLD MY_ATTRIBUTE((nonnull));
 /******************************************************//**
 Notes that a BLOB is being allocated during online ALTER TABLE. */
-UNIV_INTERN
 void
 row_log_table_blob_alloc(
 /*=====================*/
 	dict_index_t*	index,	/*!< in/out: clustered index, X-latched */
 	ulint		page_no)/*!< in: starting page number of the BLOB */
 	UNIV_COLD MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-Apply the row_log_table log to a table upon completing rebuild.
+
+/** Apply the row_log_table log to a table upon completing rebuild.
+@param[in]	thr		query graph
+@param[in]	old_table	old table
+@param[in,out]	table		MySQL table (for reporting duplicates)
+@param[in,out]	stage		performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_log_table() will be called initially and then
+stage->inc() will be called for each block of log that is applied.
 @return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
 dberr_t
 row_log_table_apply(
-/*================*/
-	que_thr_t*	thr,	/*!< in: query graph */
-	dict_table_t*	old_table,
-				/*!< in: old table */
-	struct TABLE*	table)	/*!< in/out: MySQL table
-				(for reporting duplicates) */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	que_thr_t*		thr,
+	dict_table_t*		old_table,
+	struct TABLE*		table,
+	ut_stage_alter_t*	stage)
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 Get the latest transaction ID that has invoked row_log_online_op()
 during online creation.
 @return latest transaction ID, or 0 if nothing was logged */
-UNIV_INTERN
 trx_id_t
 row_log_get_max_trx(
 /*================*/
 	dict_index_t*	index)	/*!< in: index, must be locked */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/******************************************************//**
-Merge the row log to the index upon completing index creation.
+/** Apply the row log to the index upon completing index creation.
+@param[in]	trx	transaction (for checking if the operation was
+interrupted)
+@param[in,out]	index	secondary index
+@param[in,out]	table	MySQL table (for reporting duplicates)
+@param[in,out]	stage	performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_log_index() will be called initially and then
+stage->inc() will be called for each block of log that is applied.
 @return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
 dberr_t
 row_log_apply(
-/*==========*/
-	trx_t*		trx,	/*!< in: transaction (for checking if
-				the operation was interrupted) */
-	dict_index_t*	index,	/*!< in/out: secondary index */
-	struct TABLE*	table)	/*!< in/out: MySQL table
-				(for reporting duplicates) */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	const trx_t*		trx,
+	dict_index_t*		index,
+	struct TABLE*		table,
+	ut_stage_alter_t*	stage)
+	MY_ATTRIBUTE((warn_unused_result));
+
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Estimate how much work is to be done by the log apply phase
+of an ALTER TABLE for this index.
+@param[in]	index	index whose log to assess
+@return work to be done by log-apply in abstract units
+*/
+ulint
+row_log_estimate_work(
+	const dict_index_t*	index);
+#endif /* HAVE_PSI_STAGE_INTERFACE */
 
 #ifndef UNIV_NONINL
 #include "row0log.ic"
diff --git a/storage/innobase/include/row0log.ic b/storage/innobase/include/row0log.ic
index b0f37dbd8e7..3570e5dca1b 100644
--- a/storage/innobase/include/row0log.ic
+++ b/storage/innobase/include/row0log.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,9 +33,7 @@ row_log_abort_sec(
 /*===============*/
 	dict_index_t*	index)	/*!< in/out: index (x-latched) */
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
 
 	ut_ad(!dict_index_is_clust(index));
 	dict_index_set_online_status(index, ONLINE_INDEX_ABORTED);
@@ -45,8 +43,8 @@ row_log_abort_sec(
 /******************************************************//**
 Try to log an operation to a secondary index that is
 (or was) being created.
-@retval	true if the operation was logged or can be ignored
-@retval	false if online index creation is not taking place */
+@retval true if the operation was logged or can be ignored
+@retval false if online index creation is not taking place */
 UNIV_INLINE
 bool
 row_log_online_op_try(
@@ -56,10 +54,10 @@ row_log_online_op_try(
 	trx_id_t	trx_id)	/*!< in: transaction ID for insert,
 				or 0 for delete */
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
-	      || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(rw_lock_own_flagged(
+			dict_index_get_lock(index),
+			RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
 
 	switch (dict_index_get_online_status(index)) {
 	case ONLINE_INDEX_COMPLETE:
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
index 04d4010ad48..8cb9eb7976e 100644
--- a/storage/innobase/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
@@ -35,11 +35,14 @@ Created 13/06/2005 Jan Lindstrom
 #include "mtr0mtr.h"
 #include "rem0types.h"
 #include "rem0rec.h"
-#include "read0types.h"
 #include "btr0types.h"
 #include "row0mysql.h"
 #include "lock0types.h"
 #include "srv0srv.h"
+#include "ut0stage.h"
+
+/* Reserve free space from every block for key_version */
+#define ROW_MERGE_RESERVE_SIZE 4
 
 /* Reserve free space from every block for key_version */
 #define ROW_MERGE_RESERVE_SIZE 4
@@ -111,18 +114,22 @@ struct index_field_t {
 	ulint		col_no;		/*!< column offset */
 	ulint		prefix_len;	/*!< column prefix length, or 0
 					if indexing the whole column */
-	const char*	col_name;	/*!< column name or NULL */
+	bool		is_v_col;	/*!< whether this is a virtual column */
 };
 
 /** Definition of an index being created */
 struct index_def_t {
 	const char*	name;		/*!< index name */
+	bool		rebuild;	/*!< whether the table is rebuilt */
 	ulint		ind_type;	/*!< 0, DICT_UNIQUE,
 					or DICT_CLUSTERED */
 	ulint		key_number;	/*!< MySQL key number,
 					or ULINT_UNDEFINED if none */
 	ulint		n_fields;	/*!< number of fields in index */
 	index_field_t*	fields;		/*!< field definitions */
+	st_mysql_ftparser*
+			parser;		/*!< fulltext parser plugin */
+	bool		is_ngram;	/*!< true if it's ngram parser */
 };
 
 /** Structure for reporting duplicate records. */
@@ -138,40 +145,39 @@ struct row_merge_dup_t {
 
 /*************************************************************//**
 Report a duplicate key. */
-UNIV_INTERN
 void
 row_merge_dup_report(
 /*=================*/
 	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
 	const dfield_t*		entry)	/*!< in: duplicate index entry */
 	MY_ATTRIBUTE((nonnull));
+
 /*********************************************************************//**
 Sets an exclusive lock on a table, for the duration of creating indexes.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_merge_lock_table(
 /*=================*/
 	trx_t*		trx,		/*!< in/out: transaction */
 	dict_table_t*	table,		/*!< in: table to lock */
 	enum lock_mode	mode)		/*!< in: LOCK_X or LOCK_S */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
+
 /*********************************************************************//**
 Drop indexes that were created before an error occurred.
 The data dictionary must have been locked exclusively by the caller,
 because the transaction will not be committed. */
-UNIV_INTERN
 void
 row_merge_drop_indexes_dict(
 /*========================*/
 	trx_t*		trx,	/*!< in/out: dictionary transaction */
 	table_id_t	table_id)/*!< in: table identifier */
 	MY_ATTRIBUTE((nonnull));
+
 /*********************************************************************//**
 Drop those indexes which were created before an error occurred.
 The data dictionary must have been locked exclusively by the caller,
 because the transaction will not be committed. */
-UNIV_INTERN
 void
 row_merge_drop_indexes(
 /*===================*/
@@ -180,9 +186,9 @@ row_merge_drop_indexes(
 	ibool		locked)	/*!< in: TRUE=table locked,
 				FALSE=may need to do a lazy drop */
 	MY_ATTRIBUTE((nonnull));
+
 /*********************************************************************//**
 Drop all partially created indexes during crash recovery. */
-UNIV_INTERN
 void
 row_merge_drop_temp_indexes(void);
 /*=============================*/
@@ -191,7 +197,6 @@ row_merge_drop_temp_indexes(void);
 UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
 @param[in]	path	location for creating temporary merge files.
 @return File descriptor */
-UNIV_INTERN
 int
 row_merge_file_create_low(
 	const char*	path)
@@ -199,7 +204,6 @@ row_merge_file_create_low(
 /*********************************************************************//**
 Destroy a merge file. And de-register the file from Performance Schema
 if UNIV_PFS_IO is defined. */
-UNIV_INTERN
 void
 row_merge_file_destroy_low(
 /*=======================*/
@@ -209,19 +213,19 @@ row_merge_file_destroy_low(
 Provide a new pathname for a table that is being renamed if it belongs to
 a file-per-table tablespace.  The caller is responsible for freeing the
 memory allocated for the return value.
-@return	new pathname of tablespace file, or NULL if space = 0 */
-UNIV_INTERN
+@return new pathname of tablespace file, or NULL if space = 0 */
 char*
 row_make_new_pathname(
 /*==================*/
 	dict_table_t*	table,		/*!< in: table to be renamed */
-	const char*	new_name);	/*!< in: new name */
+	const char*	new_name)	/*!< in: new name */
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /*********************************************************************//**
 Rename the tables in the data dictionary.  The data dictionary must
 have been locked exclusively by the caller, because the transaction
 will not be committed.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_merge_rename_tables_dict(
 /*=========================*/
@@ -237,100 +241,119 @@ row_merge_rename_tables_dict(
 Rename an index in the dictionary that was created. The data
 dictionary must have been locked exclusively by the caller, because
 the transaction will not be committed.
-@return	DB_SUCCESS if all OK */
-UNIV_INTERN
+@return DB_SUCCESS if all OK */
 dberr_t
 row_merge_rename_index_to_add(
 /*==========================*/
 	trx_t*		trx,		/*!< in/out: transaction */
 	table_id_t	table_id,	/*!< in: table identifier */
 	index_id_t	index_id)	/*!< in: index identifier */
-	MY_ATTRIBUTE((nonnull));
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
 /*********************************************************************//**
 Rename an index in the dictionary that is to be dropped. The data
 dictionary must have been locked exclusively by the caller, because
 the transaction will not be committed.
-@return	DB_SUCCESS if all OK */
-UNIV_INTERN
+@return DB_SUCCESS if all OK */
 dberr_t
 row_merge_rename_index_to_drop(
 /*===========================*/
 	trx_t*		trx,		/*!< in/out: transaction */
 	table_id_t	table_id,	/*!< in: table identifier */
 	index_id_t	index_id)	/*!< in: index identifier */
-	MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Create the index and load in to the dictionary.
-@return	index, or NULL on error */
-UNIV_INTERN
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
+/** Create the index and load in to the dictionary.
+@param[in,out]	trx		trx (sets error_state)
+@param[in,out]	table		the index is on this table
+@param[in]	index_def	the index definition
+@param[in]	add_v		new virtual columns added along with add
+				index call
+@param[in]	col_names	column names if columns are renamed
+				or NULL
+@return index, or NULL on error */
 dict_index_t*
 row_merge_create_index(
-/*===================*/
-	trx_t*			trx,	/*!< in/out: trx (sets error_state) */
-	dict_table_t*		table,	/*!< in: the index is on this table */
+	trx_t*			trx,
+	dict_table_t*		table,
 	const index_def_t*	index_def,
-					/*!< in: the index definition */
-	const char**		col_names);
-					/*! in: column names if columns are
-					renamed or NULL */
+	const dict_add_v_col_t*	add_v,
+	const char**		col_names)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*********************************************************************//**
 Check if a transaction can use an index.
-@return	TRUE if index can be used by the transaction else FALSE */
-UNIV_INTERN
+@return TRUE if index can be used by the transaction else FALSE */
 ibool
 row_merge_is_index_usable(
 /*======================*/
 	const trx_t*		trx,	/*!< in: transaction */
-	const dict_index_t*	index);	/*!< in: index to check */
+	const dict_index_t*	index)	/*!< in: index to check */
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /*********************************************************************//**
 Drop a table. The caller must have ensured that the background stats
 thread is not processing the table. This can be done by calling
 dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
 before calling this function.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 row_merge_drop_table(
 /*=================*/
 	trx_t*		trx,		/*!< in: transaction */
 	dict_table_t*	table)		/*!< in: table instance to drop */
-	MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Build indexes on a table by reading a clustered index,
-creating a temporary file containing index entries, merge sorting
-these index entries and inserting sorted index entries to indexes.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Build indexes on a table by reading a clustered index, creating a temporary
+file containing index entries, merge sorting these index entries and inserting
+sorted index entries to indexes.
+@param[in]	trx		transaction
+@param[in]	old_table	table where rows are read from
+@param[in]	new_table	table where indexes are created; identical to
+old_table unless creating a PRIMARY KEY
+@param[in]	online		true if creating indexes online
+@param[in]	indexes		indexes to be created
+@param[in]	key_numbers	MySQL key numbers
+@param[in]	n_indexes	size of indexes[]
+@param[in,out]	table		MySQL table, for reporting erroneous key value
+if applicable
+@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	col_map		mapping of old column numbers to new ones, or
+NULL if old_table == new_table
+@param[in]	add_autoinc	number of added AUTO_INCREMENT columns, or
+ULINT_UNDEFINED if none is added
+@param[in,out]	sequence	autoinc sequence
+@param[in]	skip_pk_sort	whether the new PRIMARY KEY will follow
+existing order
+@param[in,out]	stage		performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of
+this function and it will be passed to other functions for further accounting.
+@param[in]	add_v		new virtual columns added along with indexes
+@param[in]	eval_table	mysql table used to evaluate virtual column
+				value, see innobase_get_computed_value().
+@return DB_SUCCESS or error code */
 dberr_t
 row_merge_build_indexes(
-/*====================*/
-	trx_t*		trx,		/*!< in: transaction */
-	dict_table_t*	old_table,	/*!< in: table where rows are
-					read from */
-	dict_table_t*	new_table,	/*!< in: table where indexes are
-					created; identical to old_table
-					unless creating a PRIMARY KEY */
-	bool		online,		/*!< in: true if creating indexes
-					online */
-	dict_index_t**	indexes,	/*!< in: indexes to be created */
-	const ulint*	key_numbers,	/*!< in: MySQL key numbers */
-	ulint		n_indexes,	/*!< in: size of indexes[] */
-	struct TABLE*	table,		/*!< in/out: MySQL table, for
-					reporting erroneous key value
-					if applicable */
-	const dtuple_t*	add_cols,	/*!< in: default values of
-					added columns, or NULL */
-	const ulint*	col_map,	/*!< in: mapping of old column
-					numbers to new ones, or NULL
-					if old_table == new_table */
-	ulint		add_autoinc,	/*!< in: number of added
-					AUTO_INCREMENT column, or
-					ULINT_UNDEFINED if none is added */
-	ib_sequence_t&	sequence)	/*!< in/out: autoinc sequence */
-	MY_ATTRIBUTE((nonnull(1,2,3,5,6,8), warn_unused_result));
+	trx_t*			trx,
+	dict_table_t*		old_table,
+	dict_table_t*		new_table,
+	bool			online,
+	dict_index_t**		indexes,
+	const ulint*		key_numbers,
+	ulint			n_indexes,
+	struct TABLE*		table,
+	const dtuple_t*		add_cols,
+	const ulint*		col_map,
+	ulint			add_autoinc,
+	ib_sequence_t&		sequence,
+	bool			skip_pk_sort,
+	ut_stage_alter_t*	stage,
+	const dict_add_v_col_t*	add_v,
+	struct TABLE*		eval_table)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /********************************************************************//**
 Write a buffer to a block. */
-UNIV_INTERN
 void
 row_merge_buf_write(
 /*================*/
@@ -338,9 +361,9 @@ row_merge_buf_write(
 	const merge_file_t*	of,	/*!< in: output file */
 	row_merge_block_t*	block)	/*!< out: buffer for writing to file */
 	MY_ATTRIBUTE((nonnull));
+
 /********************************************************************//**
 Sort a buffer. */
-UNIV_INTERN
 void
 row_merge_buf_sort(
 /*===============*/
@@ -348,10 +371,10 @@ row_merge_buf_sort(
 	row_merge_dup_t*	dup)	/*!< in/out: reporter of duplicates
 					(NULL if non-unique index) */
 	MY_ATTRIBUTE((nonnull(1)));
+
 /********************************************************************//**
 Write a merge block to the file system.
 @return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
 ibool
 row_merge_write(
 /*============*/
@@ -361,12 +384,12 @@ row_merge_write(
 	const void*	buf,	/*!< in: data */
 	fil_space_crypt_t*	crypt_data,	/*!< in: table crypt data */
 	void*		crypt_buf,		/*!< in: crypt buf or NULL */
-	ulint		space);			/*!< in: space id */
+	ulint		space)			/*!< in: space id */
+	MY_ATTRIBUTE((warn_unused_result));
 
 /********************************************************************//**
 Empty a sort buffer.
 @return sort buffer */
-UNIV_INTERN
 row_merge_buf_t*
 row_merge_buf_empty(
 /*================*/
@@ -377,62 +400,73 @@ row_merge_buf_empty(
 @param[out]	merge_file	merge file structure
 @param[in]	path		location for creating temporary file
 @return file descriptor, or -1 on failure */
-UNIV_INTERN
 int
 row_merge_file_create(
 	merge_file_t*	merge_file,
-	const char*	path);
+	const char*	path)
+	MY_ATTRIBUTE((warn_unused_result, nonnull));
 
-/*********************************************************************//**
-Merge disk files.
+/** Merge disk files.
+@param[in]	trx	transaction
+@param[in]	dup	descriptor of index being created
+@param[in,out]	file	file containing index entries
+@param[in,out]	block	3 buffers
+@param[in,out]	tmpfd	temporary file handle
+@param[in]      update_progress true, if we should update progress status
+@param[in]      pct_progress total progress percent until now
+@param[in]      pct_ocst current progress percent
+@param[in]      crypt_data tale crypt data
+@param[in]      crypt_block crypt buf or NULL
+@param[in]      space    space_id
+@param[in,out]	stage	performance schema accounting object, used by
+ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially
+and then stage->inc() will be called for each record processed.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 row_merge_sort(
 /*===========*/
-	trx_t*			trx,	/*!< in: transaction */
-	const row_merge_dup_t*	dup,	/*!< in: descriptor of
-					index being created */
-	merge_file_t*		file,	/*!< in/out: file containing
-					index entries */
-	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
-	int*			tmpfd,	/*!< in/out: temporary file handle */
-	const bool		update_progress, /*!< in: update progress status variable or not */
-	const float		pct_progress, /*!< in: total progress percent until now */
-	const float		pct_cost, /*!< in: current progress percent */
-	fil_space_crypt_t*	crypt_data,/*!< in: table crypt data */
-	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
-	ulint			space)	   /*!< in: space id */
-	__attribute__((nonnull(1,2,3,4,5)));
+	trx_t*			trx,
+	const row_merge_dup_t*	dup,
+	merge_file_t*		file,
+	row_merge_block_t*	block,
+	int*			tmpfd,
+	const bool		update_progress,
+	const float		pct_progress,
+	const float		pct_cost,
+	fil_space_crypt_t*	crypt_data,
+	row_merge_block_t*	crypt_block,
+	ulint			space,
+	ut_stage_alter_t*	stage = NULL)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*********************************************************************//**
 Allocate a sort buffer.
 @return own: sort buffer */
-UNIV_INTERN
 row_merge_buf_t*
 row_merge_buf_create(
 /*=================*/
 	dict_index_t*	index)	/*!< in: secondary index */
 	MY_ATTRIBUTE((warn_unused_result, nonnull, malloc));
+
 /*********************************************************************//**
 Deallocate a sort buffer. */
-UNIV_INTERN
 void
 row_merge_buf_free(
 /*===============*/
 	row_merge_buf_t*	buf)	/*!< in,own: sort buffer to be freed */
 	MY_ATTRIBUTE((nonnull));
+
 /*********************************************************************//**
 Destroy a merge file. */
-UNIV_INTERN
 void
 row_merge_file_destroy(
 /*===================*/
 	merge_file_t*	merge_file)	/*!< in/out: merge file structure */
 	MY_ATTRIBUTE((nonnull));
+
 /********************************************************************//**
 Read a merge block from the file system.
 @return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
 ibool
 row_merge_read(
 /*===========*/
@@ -443,12 +477,12 @@ row_merge_read(
 	row_merge_block_t*	buf,	/*!< out: data */
 	fil_space_crypt_t*	crypt_data,/*!< in: table crypt data */
 	row_merge_block_t*	crypt_buf, /*!< in: crypt buf or NULL */
-	ulint			space);	   /*!< in: space id */
+	ulint			space)	   /*!< in: space id */
+	MY_ATTRIBUTE((warn_unused_result));
 
 /********************************************************************//**
 Read a merge record.
 @return pointer to next record, or NULL on I/O error or end of list */
-UNIV_INTERN
 const byte*
 row_merge_read_rec(
 /*===============*/
@@ -465,5 +499,5 @@ row_merge_read_rec(
 	fil_space_crypt_t*	crypt_data,/*!< in: table crypt data */
 	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
 	ulint			space)	   /*!< in: space id */
-	__attribute__((nonnull(1,2,3,4,6,7,8), warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* row0merge.h */
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index 71e3b9bb19e..f010e717103 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -27,7 +27,8 @@ Created 9/17/2000 Heikki Tuuri
 #ifndef row0mysql_h
 #define row0mysql_h
 
-#include "univ.i"
+#include "ha_prototypes.h"
+
 #include "data0data.h"
 #include "que0types.h"
 #include "dict0types.h"
@@ -46,7 +47,6 @@ struct row_prebuilt_t;
 
 /*******************************************************************//**
 Frees the blob heap in prebuilt when no longer needed. */
-UNIV_INTERN
 void
 row_mysql_prebuilt_free_blob_heap(
 /*==============================*/
@@ -57,7 +57,6 @@ Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
 format.
 @return pointer to the data, we skip the 1 or 2 bytes at the start
 that are used to store the len */
-UNIV_INTERN
 byte*
 row_mysql_store_true_var_len(
 /*=========================*/
@@ -69,7 +68,6 @@ Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
 returns a pointer to the data.
 @return pointer to the data, we skip the 1 or 2 bytes at the start
 that are used to store the len */
-UNIV_INTERN
 const byte*
 row_mysql_read_true_varchar(
 /*========================*/
@@ -79,7 +77,6 @@ row_mysql_read_true_varchar(
 				or 2 bytes */
 /*******************************************************************//**
 Stores a reference to a BLOB in the MySQL format. */
-UNIV_INTERN
 void
 row_mysql_store_blob_ref(
 /*=====================*/
@@ -96,8 +93,7 @@ row_mysql_store_blob_ref(
 				header! */
 /*******************************************************************//**
 Reads a reference to a BLOB in the MySQL format.
-@return	pointer to BLOB data */
-UNIV_INTERN
+@return pointer to BLOB data */
 const byte*
 row_mysql_read_blob_ref(
 /*====================*/
@@ -106,9 +102,36 @@ row_mysql_read_blob_ref(
 					MySQL format */
 	ulint		col_len);	/*!< in: BLOB reference length
 					(not BLOB length) */
+/*******************************************************************//**
+Converts InnoDB geometry data format to MySQL data format. */
+void
+row_mysql_store_geometry(
+/*=====================*/
+	byte*		dest,		/*!< in/out: where to store */
+	ulint		dest_len,	/*!< in: dest buffer size: determines into
+					how many bytes the geometry length is stored,
+					the space for the length may vary from 1
+					to 4 bytes */
+	const byte*	src,		/*!< in: geometry data; if the value to store
+					is SQL NULL this should be NULL pointer */
+	ulint		src_len);	/*!< in: geometry length; if the value to store
+					is SQL NULL this should be 0; remember
+					also to set the NULL bit in the MySQL record
+					header! */
+/*******************************************************************//**
+Reads a reference to a geometry data in the MySQL format.
+@return pointer to geometry data */
+const byte*
+row_mysql_read_geometry(
+/*====================*/
+	ulint*		len,		/*!< out: geometry data length */
+	const byte*	ref,		/*!< in: reference in the
+					MySQL format */
+	ulint		col_len)	/*!< in: BLOB reference length
+					(not BLOB length) */
+	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
 /**************************************************************//**
 Pad a column with spaces. */
-UNIV_INTERN
 void
 row_mysql_pad_col(
 /*==============*/
@@ -121,8 +144,7 @@ row_mysql_pad_col(
 Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
 The counterpart of this function is row_sel_field_store_in_mysql_format() in
 row0sel.cc.
-@return	up to which byte we used buf in the conversion */
-UNIV_INTERN
+@return up to which byte we used buf in the conversion */
 byte*
 row_mysql_store_col_in_innobase_format(
 /*===================================*/
@@ -158,7 +180,6 @@ row_mysql_store_col_in_innobase_format(
 Handles user errors and lock waits detected by the database engine.
 @return true if it was a lock wait and we should continue running the
 query thread */
-UNIV_INTERN
 bool
 row_mysql_handle_errors(
 /*====================*/
@@ -171,8 +192,7 @@ row_mysql_handle_errors(
 	MY_ATTRIBUTE((nonnull(1,2)));
 /********************************************************************//**
 Create a prebuilt struct for a MySQL table handle.
-@return	own: a prebuilt struct */
-UNIV_INTERN
+@return own: a prebuilt struct */
 row_prebuilt_t*
 row_create_prebuilt(
 /*================*/
@@ -181,7 +201,6 @@ row_create_prebuilt(
 					the MySQL format */
 /********************************************************************//**
 Free a prebuilt struct for a MySQL table handle. */
-UNIV_INTERN
 void
 row_prebuilt_free(
 /*==============*/
@@ -190,7 +209,6 @@ row_prebuilt_free(
 /*********************************************************************//**
 Updates the transaction pointers in query graphs stored in the prebuilt
 struct. */
-UNIV_INTERN
 void
 row_update_prebuilt_trx(
 /*====================*/
@@ -203,18 +221,17 @@ AUTO_INC lock gives exclusive access to the auto-inc counter of the
 table. The lock is reserved only for the duration of an SQL statement.
 It is not compatible with another AUTO_INC or exclusive lock on the
 table.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_lock_table_autoinc_for_mysql(
 /*=============================*/
 	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in the MySQL
 					table handle */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /*********************************************************************//**
 Sets a table lock on the table mentioned in prebuilt.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_lock_table_for_mysql(
 /*=====================*/
@@ -227,20 +244,19 @@ row_lock_table_for_mysql(
 	ulint		mode)		/*!< in: lock mode of table
 					(ignored if table==NULL) */
 	MY_ATTRIBUTE((nonnull(1)));
-/*********************************************************************//**
-Does an insert for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+
+/** Does an insert for MySQL.
+@param[in]	mysql_rec	row in the MySQL format
+@param[in,out]	prebuilt	prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS*/
 dberr_t
 row_insert_for_mysql(
-/*=================*/
-	byte*		mysql_rec,	/*!< in: row in the MySQL format */
-	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
-					handle */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	const byte*		mysql_rec,
+	row_prebuilt_t*		prebuilt)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*********************************************************************//**
 Builds a dummy query graph used in selects. */
-UNIV_INTERN
 void
 row_prebuild_sel_graph(
 /*===================*/
@@ -250,8 +266,7 @@ row_prebuild_sel_graph(
 Gets pointer to a prebuilt update vector used in updates. If the update
 graph has not yet been built in the prebuilt struct, then this function
 first builds it.
-@return	prebuilt update vector */
-UNIV_INTERN
+@return prebuilt update vector */
 upd_t*
 row_get_prebuilt_update_vector(
 /*===========================*/
@@ -260,50 +275,44 @@ row_get_prebuilt_update_vector(
 /*********************************************************************//**
 Checks if a table is such that we automatically created a clustered
 index on it (on row id).
-@return	TRUE if the clustered index was generated automatically */
-UNIV_INTERN
+@return TRUE if the clustered index was generated automatically */
 ibool
 row_table_got_default_clust_index(
 /*==============================*/
 	const dict_table_t*	table);	/*!< in: table */
-/*********************************************************************//**
-Does an update or delete of a row for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+
+/** Does an update or delete of a row for MySQL.
+@param[in]	mysql_rec	row in the MySQL format
+@param[in,out]	prebuilt	prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS */
 dberr_t
 row_update_for_mysql(
-/*=================*/
-	byte*		mysql_rec,	/*!< in: the row to be updated, in
-					the MySQL format */
-	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
-					handle */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
+	const byte*		mysql_rec,
+	row_prebuilt_t*		prebuilt)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
 session is using a READ COMMITTED or READ UNCOMMITTED isolation level.
 Before calling this function row_search_for_mysql() must have
 initialized prebuilt->new_rec_locks to store the information which new
 record locks really were set. This function removes a newly set
 clustered index record lock under prebuilt->pcur or
 prebuilt->clust_pcur.  Thus, this implements a 'mini-rollback' that
-releases the latest clustered index record lock we set. */
-UNIV_INTERN
+releases the latest clustered index record lock we set.
+@param[in,out]	prebuilt		prebuilt struct in MySQL handle
+@param[in]	has_latches_on_recs	TRUE if called so that we have the
+					latches on the records under pcur
+					and clust_pcur, and we do not need
+					to reposition the cursors. */
 void
 row_unlock_for_mysql(
-/*=================*/
-	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt struct in MySQL
-					handle */
-	ibool		has_latches_on_recs)/*!< in: TRUE if called
-					so that we have the latches on
-					the records under pcur and
-					clust_pcur, and we do not need
-					to reposition the cursors. */
-	MY_ATTRIBUTE((nonnull));
+	row_prebuilt_t*	prebuilt,
+	ibool		has_latches_on_recs);
+
 /*********************************************************************//**
 Checks if a table name contains the string "/#sql" which denotes temporary
 tables in MySQL.
 @return true if temporary table */
-UNIV_INTERN
 bool
 row_is_mysql_tmp_table_name(
 /*========================*/
@@ -314,29 +323,15 @@ row_is_mysql_tmp_table_name(
 /*********************************************************************//**
 Creates an query graph node of 'update' type to be used in the MySQL
 interface.
-@return	own: update node */
-UNIV_INTERN
+@return own: update node */
 upd_node_t*
 row_create_update_node_for_mysql(
 /*=============================*/
 	dict_table_t*	table,	/*!< in: table to update */
 	mem_heap_t*	heap);	/*!< in: mem heap from which allocated */
-/**********************************************************************//**
-Does a cascaded delete or set null in a foreign key operation.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_update_cascade_for_mysql(
-/*=========================*/
-	que_thr_t*	thr,	/*!< in: query thread */
-	upd_node_t*	node,	/*!< in: update node used in the cascade
-				or set null operation */
-	dict_table_t*	table)	/*!< in: table where we do the operation */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Locks the data dictionary exclusively for performing a table create or other
 data dictionary modification operation. */
-UNIV_INTERN
 void
 row_mysql_lock_data_dictionary_func(
 /*================================*/
@@ -347,7 +342,6 @@ row_mysql_lock_data_dictionary_func(
 	row_mysql_lock_data_dictionary_func(trx, __FILE__, __LINE__)
 /*********************************************************************//**
 Unlocks the data dictionary exclusive lock. */
-UNIV_INTERN
 void
 row_mysql_unlock_data_dictionary(
 /*=============================*/
@@ -355,7 +349,6 @@ row_mysql_unlock_data_dictionary(
 /*********************************************************************//**
 Locks the data dictionary in shared mode from modifications, for performing
 foreign key check, rollback, or other operation invisible to MySQL. */
-UNIV_INTERN
 void
 row_mysql_freeze_data_dictionary_func(
 /*==================================*/
@@ -366,37 +359,34 @@ row_mysql_freeze_data_dictionary_func(
 	row_mysql_freeze_data_dictionary_func(trx, __FILE__, __LINE__)
 /*********************************************************************//**
 Unlocks the data dictionary shared lock. */
-UNIV_INTERN
 void
 row_mysql_unfreeze_data_dictionary(
 /*===============================*/
 	trx_t*	trx);	/*!< in/out: transaction */
 /*********************************************************************//**
-Creates a table for MySQL. If the name of the table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also start the printing of monitor
-output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). On failure the transaction will
-be rolled back.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+Creates a table for MySQL. On failure the transaction will be rolled back
+and the 'table' object will be freed.
+@return error code or DB_SUCCESS */
 dberr_t
 row_create_table_for_mysql(
 /*=======================*/
 	dict_table_t*	table,	/*!< in, own: table definition
 				(will be freed, or on DB_SUCCESS
 				added to the data dictionary cache) */
+	const char*	compression,
+				/*!< in: compression algorithm to use,
+				can be NULL */
 	trx_t*		trx,	/*!< in/out: transaction */
 	bool		commit,	/*!< in: if true, commit the transaction */
 	fil_encryption_t mode,	/*!< in: encryption mode */
 	ulint		key_id)	/*!< in: encryption key_id */
-	__attribute__((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*********************************************************************//**
 Does an index creation operation for MySQL. TODO: currently failure
 to create an index results in dropping the whole table! This is no problem
 currently as all indexes must be created at the same time as the table.
-@return	error number or DB_SUCCESS */
-UNIV_INTERN
+@return error number or DB_SUCCESS */
 dberr_t
 row_create_index_for_mysql(
 /*=======================*/
@@ -409,7 +399,7 @@ row_create_index_for_mysql(
 					index columns, which are
 					then checked for not being too
 					large. */
-	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Scans a table create SQL string and adds to the data dictionary
 the foreign key constraints declared in the string. This function
@@ -417,46 +407,48 @@ should be called after the indexes for a table have been created.
 Each foreign key constraint must be accompanied with indexes in
 bot participating tables. The indexes are allowed to contain more
 fields than mentioned in the constraint.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+
+@param[in]	trx		transaction
+@param[in]	sql_string	table create statement where
+				foreign keys are declared like:
+				FOREIGN KEY (a, b) REFERENCES table2(c, d),
+				table2 can be written also with the database
+				name before it: test.table2; the default
+				database id the database of parameter name
+@param[in]	sql_length	length of sql_string
+@param[in]	name		table full name in normalized form
+@param[in]	reject_fks	if TRUE, fail with error code
+				DB_CANNOT_ADD_CONSTRAINT if any
+				foreign keys are found.
+@return error code or DB_SUCCESS */
 dberr_t
 row_table_add_foreign_constraints(
-/*==============================*/
-	trx_t*		trx,		/*!< in: transaction */
-	const char*	sql_string,	/*!< in: table create statement where
-					foreign keys are declared like:
-				FOREIGN KEY (a, b) REFERENCES table2(c, d),
-					table2 can be written also with the
-					database name before it: test.table2 */
-	size_t		sql_length,	/*!< in: length of sql_string */
-	const char*	name,		/*!< in: table full name in the
-					normalized form
-					database_name/table_name */
-	ibool		reject_fks)	/*!< in: if TRUE, fail with error
-					code DB_CANNOT_ADD_CONSTRAINT if
-					any foreign keys are found. */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	trx_t*			trx,
+	const char*		sql_string,
+	size_t			sql_length,
+	const char*		name,
+	ibool			reject_fks)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*********************************************************************//**
 The master thread in srv0srv.cc calls this regularly to drop tables which
 we must drop in background after queries to them have ended. Such lazy
 dropping of tables is needed in ALTER TABLE on Unix.
-@return	how many tables dropped + remaining tables in list */
-UNIV_INTERN
+@return how many tables dropped + remaining tables in list */
 ulint
 row_drop_tables_for_mysql_in_background(void);
 /*=========================================*/
 /*********************************************************************//**
 Get the background drop list length. NOTE: the caller must own the kernel
 mutex!
-@return	how many tables in list */
-UNIV_INTERN
+@return how many tables in list */
 ulint
 row_get_background_drop_list_len_low(void);
 /*======================================*/
+
 /*********************************************************************//**
 Sets an exclusive lock on a table.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_mysql_lock_table(
 /*=================*/
@@ -468,8 +460,7 @@ row_mysql_lock_table(
 
 /*********************************************************************//**
 Truncates a table for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_truncate_table_for_mysql(
 /*=========================*/
@@ -477,14 +468,10 @@ row_truncate_table_for_mysql(
 	trx_t*		trx)	/*!< in: transaction handle */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
-Drops a table for MySQL.  If the name of the dropped table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread.  If the data dictionary was not already locked
+Drops a table for MySQL.  If the data dictionary was not already locked
 by the transaction, the transaction will be committed.  Otherwise, the
 data dictionary will remain locked.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_drop_table_for_mysql(
 /*=====================*/
@@ -492,15 +479,13 @@ row_drop_table_for_mysql(
 	trx_t*		trx,	/*!< in: dictionary transaction handle */
 	bool		drop_db,/*!< in: true=dropping whole database */
 	ibool		create_failed,/*!<in: TRUE=create table failed
-				       because e.g. foreign key column
-				       type mismatch. */
-	bool		nonatomic = true)
+					because e.g. foreign key column
+					type mismatch. */
+	bool		nonatomic = true);
 				/*!< in: whether it is permitted
 				to release and reacquire dict_operation_lock */
-	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Drop all temporary tables during crash recovery. */
-UNIV_INTERN
 void
 row_mysql_drop_temp_tables(void);
 /*============================*/
@@ -509,8 +494,7 @@ row_mysql_drop_temp_tables(void);
 Discards the tablespace of a table which stored in an .ibd file. Discarding
 means that this function deletes the .ibd file and assigns a new table id for
 the table. Also the flag table->ibd_file_missing is set TRUE.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_discard_tablespace_for_mysql(
 /*=============================*/
@@ -520,28 +504,28 @@ row_discard_tablespace_for_mysql(
 /*****************************************************************//**
 Imports a tablespace. The space id in the .ibd file must match the space id
 of the table in the data dictionary.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_import_tablespace_for_mysql(
 /*============================*/
 	dict_table_t*	table,		/*!< in/out: table */
 	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL */
         MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Drops a database for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+
+/** Drop a database for MySQL.
+@param[in]	name	database name which ends at '/'
+@param[in]	trx	transaction handle
+@param[out]	found	number of dropped tables/partitions
+@return error code or DB_SUCCESS */
 dberr_t
 row_drop_database_for_mysql(
-/*========================*/
-	const char*	name,	/*!< in: database name which ends to '/' */
-	trx_t*		trx)	/*!< in: transaction handle */
-	MY_ATTRIBUTE((nonnull));
+	const char*	name,
+	trx_t*		trx,
+	ulint*		found);
+
 /*********************************************************************//**
 Renames a table for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_rename_table_for_mysql(
 /*=======================*/
@@ -550,49 +534,52 @@ row_rename_table_for_mysql(
 	trx_t*		trx,		/*!< in/out: transaction */
 	bool		commit)		/*!< in: whether to commit trx */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Renames a partitioned table for MySQL.
+@param[in]	old_name	Old table name.
+@param[in]	new_name	New table name.
+@param[in,out]	trx		Transaction.
+@return error code or DB_SUCCESS */
+dberr_t
+row_rename_partitions_for_mysql(
+	const char*	old_name,
+	const char*	new_name,
+	trx_t*		trx)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /*********************************************************************//**
-Checks that the index contains entries in an ascending order, unique
-constraint is not broken, and calculates the number of index entries
+Scans an index for either COOUNT(*) or CHECK TABLE.
+If CHECK TABLE; Checks that the index contains entries in an ascending order,
+unique constraint is not broken, and calculates the number of index entries
 in the read view of the current transaction.
-@return true if ok */
-UNIV_INTERN
-bool
-row_check_index_for_mysql(
-/*======================*/
+@return DB_SUCCESS or other error */
+dberr_t
+row_scan_index_for_mysql(
+/*=====================*/
 	row_prebuilt_t*		prebuilt,	/*!< in: prebuilt struct
 						in MySQL handle */
 	const dict_index_t*	index,		/*!< in: index */
+	bool			check_keys,	/*!< in: true=check for mis-
+						ordered or duplicate records,
+						false=count the rows only */
 	ulint*			n_rows)		/*!< out: number of entries
 						seen in the consistent read */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Determines if a table is a magic monitor table.
-@return	true if monitor table */
-UNIV_INTERN
-bool
-row_is_magic_monitor_table(
-/*=======================*/
-	const char*	table_name)	/*!< in: name of the table, in the
-					form database/table_name */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Initialize this module */
-UNIV_INTERN
 void
 row_mysql_init(void);
 /*================*/
 
 /*********************************************************************//**
 Close this module */
-UNIV_INTERN
 void
 row_mysql_close(void);
 /*=================*/
 
 /*********************************************************************//**
 Reassigns the table identifier of a table.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_mysql_table_id_reassign(
 /*========================*/
@@ -655,6 +642,7 @@ struct mysql_row_templ_t {
 	ulint	is_unsigned;		/*!< if a column type is an integer
 					type and this field is != 0, then
 					it is an unsigned integer type */
+	ulint	is_virtual;		/*!< if a column is a virtual column */
 };
 
 #define MYSQL_FETCH_CACHE_SIZE		8
@@ -680,10 +668,6 @@ struct row_prebuilt_t {
 					an SQL statement: we may have to set
 					an intention lock on the table,
 					create a consistent read view etc. */
-	unsigned	mysql_has_locked:1;/*!< this is set TRUE when MySQL
-					calls external_lock on this handle
-					with a lock flag, and set FALSE when
-					with the F_UNLOCK flag */
 	unsigned	clust_index_was_generated:1;
 					/*!< if the user did not define a
 					primary key in MySQL, then Innobase
@@ -723,11 +707,18 @@ struct row_prebuilt_t {
 					is set but we later optimize out the
 					clustered index lookup */
 	unsigned	templ_contains_blob:1;/*!< TRUE if the template contains
-					a column with DATA_BLOB ==
-					get_innobase_type_from_mysql_type();
+					a column with DATA_LARGE_MTYPE(
+					get_innobase_type_from_mysql_type())
+					is TRUE;
 					not to be confused with InnoDB
 					externally stored columns
 					(VARCHAR can be off-page too) */
+	unsigned	templ_contains_fixed_point:1;/*!< TRUE if the
+					template contains a column with
+					DATA_POINT. Since InnoDB regards
+					DATA_POINT as non-BLOB type, the
+					templ_contains_blob can't tell us
+					if there is DATA_POINT */
 	mysql_row_templ_t* mysql_template;/*!< template used to transform
 					rows fast between MySQL and Innobase
 					formats; memory for this template
@@ -761,9 +752,9 @@ struct row_prebuilt_t {
 					trx_id or n_indexes mismatch. */
 	que_fork_t*	upd_graph;	/*!< Innobase SQL query graph used
 					in updates or deletes */
-	btr_pcur_t	pcur;		/*!< persistent cursor used in selects
+	btr_pcur_t*	pcur;		/*!< persistent cursor used in selects
 					and updates */
-	btr_pcur_t	clust_pcur;	/*!< persistent cursor used in
+	btr_pcur_t*	clust_pcur;	/*!< persistent cursor used in
 					some selects and updates */
 	que_fork_t*	sel_graph;	/*!< dummy query graph used in
 					selects */
@@ -856,6 +847,8 @@ struct row_prebuilt_t {
 	mem_heap_t*	old_vers_heap;	/*!< memory heap where a previous
 					version is built in consistent read */
 	bool		in_fts_query;	/*!< Whether we are in a FTS query */
+	bool		fts_doc_id_in_read_set; /*!< true if table has externally
+					defined FTS_DOC_ID coulmn. */
 	/*----------------------*/
 	ulonglong	autoinc_last_value;
 					/*!< last value of AUTO-INC interval */
@@ -879,12 +872,18 @@ struct row_prebuilt_t {
 	ulint		idx_cond_n_cols;/*!< Number of fields in idx_cond_cols.
 					0 if and only if idx_cond == NULL. */
 	/*----------------------*/
-	ulint		magic_n2;	/*!< this should be the same as
-					magic_n */
-	/*----------------------*/
 	unsigned	innodb_api:1;	/*!< whether this is a InnoDB API
 					query */
 	const rec_t*	innodb_api_rec;	/*!< InnoDB API search result */
+	/*----------------------*/
+
+	/*----------------------*/
+	rtr_info_t*	rtr_info;	/*!< R-tree Search Info */
+	/*----------------------*/
+
+	ulint		magic_n2;	/*!< this should be the same as
+					magic_n */
+
 	byte*		srch_key_val1;  /*!< buffer used in converting
 					search key values from MySQL format
 					to InnoDB format.*/
@@ -892,7 +891,17 @@ struct row_prebuilt_t {
 					search key values from MySQL format
 					to InnoDB format.*/
 	uint		srch_key_val_len; /*!< Size of search key */
+	/** Disable prefetch. */
+	bool		m_no_prefetch;
+
+	/** Return materialized key for secondary index scan */
+	bool		m_read_virtual_key;
+
+	/** The MySQL table object */
+	TABLE*		m_mysql_table;
 
+	/** limit value to avoid fts result overflow */
+	ulonglong	m_fts_limit;
 };
 
 /** Callback for row_mysql_sys_index_iterate() */
@@ -900,11 +909,52 @@ struct SysIndexCallback {
 	virtual ~SysIndexCallback() { }
 
 	/** Callback method
-	@param mtr - current mini transaction
-	@param pcur - persistent cursor. */
+	@param mtr current mini transaction
+	@param pcur persistent cursor. */
 	virtual void operator()(mtr_t* mtr, btr_pcur_t* pcur) throw() = 0;
 };
 
+/** Get the computed value by supplying the base column values.
+@param[in,out]	row		the data row
+@param[in]	col		virtual column
+@param[in]	index		index on the virtual column
+@param[in,out]	local_heap	heap memory for processing large data etc.
+@param[in,out]	heap		memory heap that copies the actual index row
+@param[in]	ifield		index field
+@param[in]	thd		MySQL thread handle
+@param[in,out]	mysql_table	mysql table object
+@param[in]	old_table	during ALTER TABLE, this is the old table
+				or NULL.
+@param[in]	parent_update	update vector for the parent row
+@param[in]	foreign		foreign key information
+@return the field filled with computed value */
+dfield_t*
+innobase_get_computed_value(
+	const dtuple_t*		row,
+	const dict_v_col_t*	col,
+	const dict_index_t*	index,
+	mem_heap_t**		local_heap,
+	mem_heap_t*		heap,
+	const dict_field_t*	ifield,
+	THD*			thd,
+	TABLE*			mysql_table,
+	const dict_table_t*	old_table,
+	upd_t*			parent_update,
+	dict_foreign_t*		foreign);
+
+/** Get the computed value by supplying the base column values.
+@param[in,out]	table	the table whose virtual column template to be built */
+void
+innobase_init_vc_templ(
+	dict_table_t*	table);
+
+/** Change dbname and table name in table->vc_templ.
+@param[in,out]	table	the table whose virtual column template
+dbname and tbname to be renamed. */
+void
+innobase_rename_vc_templ(
+	dict_table_t*	table);
+
 #define ROW_PREBUILT_FETCH_MAGIC_N	465765687
 
 #define ROW_MYSQL_WHOLE_ROW	0
@@ -926,4 +976,10 @@ struct SysIndexCallback {
 #include "row0mysql.ic"
 #endif
 
+#ifdef UNIV_DEBUG
+/** Wait for the background drop list to become empty. */
+void
+row_wait_for_background_drop_list_empty();
+#endif /* UNIV_DEBUG */
+
 #endif /* row0mysql.h */
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
index 5df899bc399..32a989833bc 100644
--- a/storage/innobase/include/row0purge.h
+++ b/storage/innobase/include/row0purge.h
@@ -34,20 +34,18 @@ Created 3/14/1997 Heikki Tuuri
 #include "trx0types.h"
 #include "que0types.h"
 #include "row0types.h"
-#include "row0purge.h"
 #include "ut0vec.h"
 
-/********************************************************************//**
-Creates a purge node to a query graph.
-@return	own: purge node */
-UNIV_INTERN
+/** Create a purge node to a query graph.
+@param[in]	parent	parent node, i.e., a thr node
+@param[in]	heap	memory heap where created
+@return own: purge node */
 purge_node_t*
 row_purge_node_create(
-/*==================*/
-	que_thr_t*	parent,		/*!< in: parent node, i.e., a
-					thr node */
-	mem_heap_t*	heap)		/*!< in: memory heap where created */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	que_thr_t*	parent,
+	mem_heap_t*	heap)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /***********************************************************//**
 Determines if it is possible to remove a secondary index entry.
 Removal is possible if the secondary index entry does not refer to any
@@ -62,8 +60,7 @@ inserts a record that the secondary index entry would refer to.
 However, in that case, the user transaction would also re-insert the
 secondary index entry after purge has removed it and released the leaf
 page latch.
-@return	true if the secondary index record can be purged */
-UNIV_INTERN
+@return true if the secondary index record can be purged */
 bool
 row_purge_poss_sec(
 /*===============*/
@@ -74,8 +71,7 @@ row_purge_poss_sec(
 /***************************************************************
 Does the purge operation for a single undo log record. This is a high-level
 function used in an SQL execution graph.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_purge_step(
 /*===========*/
@@ -91,9 +87,9 @@ struct purge_node_t{
 	roll_ptr_t	roll_ptr;/* roll pointer to undo log record */
 	ib_vector_t*    undo_recs;/*!< Undo recs to purge */
 
-	undo_no_t	undo_no;/* undo number of the record */
+	undo_no_t	undo_no;/*!< undo number of the record */
 
-	ulint		rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
+	ulint		rec_type;/*!< undo log record type: TRX_UNDO_INSERT_REC,
 				... */
 	dict_table_t*	table;	/*!< table where purge is done */
 
@@ -118,6 +114,7 @@ struct purge_node_t{
 	btr_pcur_t	pcur;	/*!< persistent cursor used in searching the
 				clustered index record */
 	ibool		done;	/* Debug flag */
+	trx_id_t	trx_id;	/*!< trx id for this purging record */
 
 #ifdef UNIV_DEBUG
 	/***********************************************************//**
diff --git a/storage/innobase/include/row0quiesce.h b/storage/innobase/include/row0quiesce.h
index 35d8184d33c..ae14b3c63c1 100644
--- a/storage/innobase/include/row0quiesce.h
+++ b/storage/innobase/include/row0quiesce.h
@@ -37,7 +37,6 @@ struct trx_t;
 
 /*********************************************************************//**
 Quiesce the tablespace that the table resides in. */
-UNIV_INTERN
 void
 row_quiesce_table_start(
 /*====================*/
@@ -48,7 +47,6 @@ row_quiesce_table_start(
 /*********************************************************************//**
 Set a table's quiesce state.
 @return DB_SUCCESS or errro code. */
-UNIV_INTERN
 dberr_t
 row_quiesce_set_state(
 /*==================*/
@@ -59,7 +57,6 @@ row_quiesce_set_state(
 
 /*********************************************************************//**
 Cleanup after table quiesce. */
-UNIV_INTERN
 void
 row_quiesce_table_complete(
 /*=======================*/
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
index b04068c5a5d..93ff90d020e 100644
--- a/storage/innobase/include/row0row.h
+++ b/storage/innobase/include/row0row.h
@@ -33,14 +33,13 @@ Created 4/20/1996 Heikki Tuuri
 #include "que0types.h"
 #include "mtr0mtr.h"
 #include "rem0types.h"
-#include "read0types.h"
 #include "row0types.h"
 #include "btr0types.h"
 
 /*********************************************************************//**
 Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
 a clustered index record.
-@return	offset of DATA_TRX_ID */
+@return offset of DATA_TRX_ID */
 UNIV_INLINE
 ulint
 row_get_trx_id_offset(
@@ -50,7 +49,7 @@ row_get_trx_id_offset(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Reads the trx id field from a clustered index record.
-@return	value of the field */
+@return value of the field */
 UNIV_INLINE
 trx_id_t
 row_get_rec_trx_id(
@@ -61,7 +60,7 @@ row_get_rec_trx_id(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Reads the roll pointer field from a clustered index record.
-@return	value of the field */
+@return value of the field */
 UNIV_INLINE
 roll_ptr_t
 row_get_rec_roll_ptr(
@@ -70,13 +69,18 @@ row_get_rec_roll_ptr(
 	const dict_index_t*	index,	/*!< in: clustered index */
 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/* Flags for row build type. */
+#define ROW_BUILD_NORMAL	0	/*!< build index row */
+#define ROW_BUILD_FOR_PURGE	1	/*!< build row for purge. */
+#define ROW_BUILD_FOR_UNDO	2	/*!< build row for undo. */
+#define ROW_BUILD_FOR_INSERT	3	/*!< build row for insert. */
 /*****************************************************************//**
 When an insert or purge to a table is performed, this function builds
 the entry to be inserted into or purged from an index on the table.
 @return index entry which should be inserted or purged
 @retval NULL if the externally stored columns in the clustered index record
 are unavailable and ext != NULL, or row is missing some needed columns. */
-UNIV_INTERN
 dtuple_t*
 row_build_index_entry_low(
 /*======================*/
@@ -85,9 +89,12 @@ row_build_index_entry_low(
 	const row_ext_t*	ext,	/*!< in: externally stored column
 					prefixes, or NULL */
 	dict_index_t*		index,	/*!< in: index on the table */
-	mem_heap_t*		heap)	/*!< in: memory heap from which
+	mem_heap_t*		heap,	/*!< in: memory heap from which
 					the memory for the index entry
 					is allocated */
+	ulint			flag)	/*!< in: ROW_BUILD_NORMAL,
+					ROW_BUILD_FOR_PURGE
+                                        or ROW_BUILD_FOR_UNDO */
 	MY_ATTRIBUTE((warn_unused_result, nonnull(1,3,4)));
 /*****************************************************************//**
 When an insert or purge to a table is performed, this function builds
@@ -111,8 +118,7 @@ row_build_index_entry(
 /*******************************************************************//**
 An inverse function to row_build_index_entry. Builds a row from a
 record in a clustered index.
-@return	own: row built; see the NOTE below! */
-UNIV_INTERN
+@return own: row built; see the NOTE below! */
 dtuple_t*
 row_build(
 /*======*/
@@ -153,14 +159,49 @@ row_build(
 	row_ext_t**		ext,	/*!< out, own: cache of
 					externally stored column
 					prefixes, or NULL */
-	mem_heap_t*		heap)	/*!< in: memory heap from which
+	mem_heap_t*		heap);	/*!< in: memory heap from which
 					the memory needed is allocated */
-	MY_ATTRIBUTE((nonnull(2,3,9)));
+
+/** An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index, with possible indexing on ongoing
+addition of new virtual columns.
+@param[in]	type		ROW_COPY_POINTERS or ROW_COPY_DATA;
+@param[in]	index		clustered index
+@param[in]	rec		record in the clustered index
+@param[in]	offsets		rec_get_offsets(rec,index) or NULL
+@param[in]	col_table	table, to check which
+				externally stored columns
+				occur in the ordering columns
+				of an index, or NULL if
+				index->table should be
+				consulted instead
+@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	add_v		new virtual columns added
+				along with new indexes
+@param[in]	col_map		mapping of old column
+				numbers to new ones, or NULL
+@param[in]	ext		cache of externally stored column
+				prefixes, or NULL
+@param[in]	heap		memory heap from which
+				the memory needed is allocated
+@return own: row built */
+dtuple_t*
+row_build_w_add_vcol(
+	ulint			type,
+	const dict_index_t*	index,
+	const rec_t*		rec,
+	const ulint*		offsets,
+	const dict_table_t*	col_table,
+	const dtuple_t*		add_cols,
+	const dict_add_v_col_t*	add_v,
+	const ulint*		col_map,
+	row_ext_t**		ext,
+	mem_heap_t*		heap);
+
 /*******************************************************************//**
 Converts an index record to a typed data tuple.
 @return index entry built; does not set info_bits, and the data fields
 in the entry will point directly to rec */
-UNIV_INTERN
 dtuple_t*
 row_rec_to_index_entry_low(
 /*=======================*/
@@ -171,12 +212,11 @@ row_rec_to_index_entry_low(
 					stored columns */
 	mem_heap_t*		heap)	/*!< in: memory heap from which
 					the memory needed is allocated */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************************//**
 Converts an index record to a typed data tuple. NOTE that externally
 stored (often big) fields are NOT copied to heap.
-@return	own: index entry built */
-UNIV_INTERN
+@return own: index entry built */
 dtuple_t*
 row_rec_to_index_entry(
 /*===================*/
@@ -187,12 +227,11 @@ row_rec_to_index_entry(
 					stored columns */
 	mem_heap_t*		heap)	/*!< in: memory heap from which
 					the memory needed is allocated */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************************//**
 Builds from a secondary index record a row reference with which we can
 search the clustered index record.
-@return	own: row reference built; see the NOTE below! */
-UNIV_INTERN
+@return own: row reference built; see the NOTE below! */
 dtuple_t*
 row_build_row_ref(
 /*==============*/
@@ -210,11 +249,10 @@ row_build_row_ref(
 				as long as the row reference is used! */
 	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
 				needed is allocated */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************************//**
 Builds from a secondary index record a row reference with which we can
 search the clustered index record. */
-UNIV_INTERN
 void
 row_build_row_ref_in_tuple(
 /*=======================*/
@@ -252,8 +290,7 @@ row_build_row_ref_fast(
 /***************************************************************//**
 Searches the clustered index record for a row, if we have the row
 reference.
-@return	TRUE if found */
-UNIV_INTERN
+@return TRUE if found */
 ibool
 row_search_on_row_ref(
 /*==================*/
@@ -267,8 +304,7 @@ row_search_on_row_ref(
 /*********************************************************************//**
 Fetches the clustered index record for a secondary index record. The latches
 on the secondary index record are preserved.
-@return	record or NULL, if no record found */
-UNIV_INTERN
+@return record or NULL, if no record found */
 rec_t*
 row_get_clust_rec(
 /*==============*/
@@ -294,8 +330,7 @@ enum row_search_result {
 
 /***************************************************************//**
 Searches an index record.
-@return	whether the record was found or buffered */
-UNIV_INTERN
+@return whether the record was found or buffered */
 enum row_search_result
 row_search_index_entry(
 /*===================*/
@@ -322,8 +357,7 @@ Not more than "buf_size" bytes are written to "buf".
 The result is always NUL-terminated (provided buf_size is positive) and the
 number of bytes that were written to "buf" is returned (including the
 terminating NUL).
-@return	number of bytes that were written */
-UNIV_INTERN
+@return number of bytes that were written */
 ulint
 row_raw_format(
 /*===========*/
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
index ac62422be1f..08c0f18e95b 100644
--- a/storage/innobase/include/row0row.ic
+++ b/storage/innobase/include/row0row.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,7 +30,7 @@ Created 4/20/1996 Heikki Tuuri
 /*********************************************************************//**
 Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
 a clustered index record.
-@return	offset of DATA_TRX_ID */
+@return offset of DATA_TRX_ID */
 UNIV_INLINE
 ulint
 row_get_trx_id_offset(
@@ -56,7 +56,7 @@ row_get_trx_id_offset(
 
 /*********************************************************************//**
 Reads the trx id field from a clustered index record.
-@return	value of the field */
+@return value of the field */
 UNIV_INLINE
 trx_id_t
 row_get_rec_trx_id(
@@ -81,7 +81,7 @@ row_get_rec_trx_id(
 
 /*********************************************************************//**
 Reads the roll pointer field from a clustered index record.
-@return	value of the field */
+@return value of the field */
 UNIV_INLINE
 roll_ptr_t
 row_get_rec_roll_ptr(
@@ -126,7 +126,8 @@ row_build_index_entry(
 	dtuple_t*	entry;
 
 	ut_ad(dtuple_check_typed(row));
-	entry = row_build_index_entry_low(row, ext, index, heap);
+	entry = row_build_index_entry_low(row, ext, index, heap,
+					  ROW_BUILD_NORMAL);
 	ut_ad(!entry || dtuple_check_typed(entry));
 	return(entry);
 }
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
index fd5bc755a22..1186aa6f26e 100644
--- a/storage/innobase/include/row0sel.h
+++ b/storage/innobase/include/row0sel.h
@@ -31,17 +31,16 @@ Created 12/19/1997 Heikki Tuuri
 #include "que0types.h"
 #include "dict0types.h"
 #include "trx0types.h"
+#include "read0types.h"
 #include "row0types.h"
 #include "que0types.h"
 #include "pars0sym.h"
 #include "btr0pcur.h"
-#include "read0read.h"
 #include "row0mysql.h"
 
 /*********************************************************************//**
 Creates a select node struct.
-@return	own: select node struct */
-UNIV_INTERN
+@return own: select node struct */
 sel_node_t*
 sel_node_create(
 /*============*/
@@ -49,7 +48,6 @@ sel_node_create(
 /*********************************************************************//**
 Frees the memory private to a select node when a query graph is freed,
 does not free the heap where the node was originally created. */
-UNIV_INTERN
 void
 sel_node_free_private(
 /*==================*/
@@ -57,14 +55,13 @@ sel_node_free_private(
 /*********************************************************************//**
 Frees a prefetch buffer for a column, including the dynamically allocated
 memory for data stored there. */
-UNIV_INTERN
 void
 sel_col_prefetch_buf_free(
 /*======================*/
 	sel_buf_t*	prefetch_buf);	/*!< in, own: prefetch buffer */
 /*********************************************************************//**
 Gets the plan node for the nth table in a join.
-@return	plan node */
+@return plan node */
 UNIV_INLINE
 plan_t*
 sel_node_get_nth_plan(
@@ -74,15 +71,14 @@ sel_node_get_nth_plan(
 /**********************************************************************//**
 Performs a select step. This is a high-level function used in SQL execution
 graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_sel_step(
 /*=========*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of an open or close cursor statement node.
-@return	query thread to run next or NULL */
+@return query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 open_step(
@@ -90,16 +86,14 @@ open_step(
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs a fetch for a cursor.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 fetch_step(
 /*=======*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /****************************************************************//**
 Sample callback function for fetch that prints each row.
-@return	always returns non-NULL */
-UNIV_INTERN
+@return always returns non-NULL */
 void*
 row_fetch_print(
 /*============*/
@@ -107,19 +101,30 @@ row_fetch_print(
 	void*	user_arg);	/*!< in:  not used */
 /***********************************************************//**
 Prints a row in a select result.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_printf_step(
 /*============*/
 	que_thr_t*	thr);	/*!< in: query thread */
+
+/** Copy used fields from cached row.
+Copy cache record field by field, don't touch fields that
+are not covered by current key.
+@param[out]	buf		Where to copy the MySQL row.
+@param[in]	cached_rec	What to copy (in MySQL row format).
+@param[in]	prebuilt	prebuilt struct. */
+void
+row_sel_copy_cached_fields_for_mysql(
+	byte*		buf,
+	const byte*	cached_rec,
+	row_prebuilt_t*	prebuilt);
+
 /****************************************************************//**
 Converts a key value stored in MySQL format to an Innobase dtuple. The last
 field of the key value may be just a prefix of a fixed length field: hence
 the parameter key_len. But currently we do not allow search keys where the
 last field is only a prefix of the full key field len and print a warning if
 such appears. */
-UNIV_INTERN
 void
 row_sel_convert_mysql_key_to_innobase(
 /*==================================*/
@@ -139,21 +144,72 @@ row_sel_convert_mysql_key_to_innobase(
 	const byte*	key_ptr,	/*!< in: MySQL key value */
 	ulint		key_len,	/*!< in: MySQL key value length */
 	trx_t*		trx);		/*!< in: transaction */
-/********************************************************************//**
-Searches for rows in the database. This is used in the interface to
+
+
+/** Searches for rows in the database. This is used in the interface to
 MySQL. This function opens a cursor, and also implements fetch next
 and fetch prev. NOTE that if we do a search with a full key value
 from a unique index (ROW_SEL_EXACT), then we will not store the cursor
 position and fetch next or fetch prev must not be tried to the cursor!
+
+@param[out]	buf		buffer for the fetched row in MySQL format
+@param[in]	mode		search mode PAGE_CUR_L
+@param[in,out]	prebuilt	prebuilt struct for the table handler;
+				this contains the info to search_tuple,
+				index; if search tuple contains 0 field then
+				we position the cursor at start or the end of
+				index, depending on 'mode'
+@param[in]	match_mode	0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in]	direction	0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+				Note: if this is != 0, then prebuilt must has a
+				pcur with stored position! In opening of a
+				cursor 'direction' should be 0.
 @return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
-DB_LOCK_TABLE_FULL, or DB_TOO_BIG_RECORD */
-UNIV_INTERN
+DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
+UNIV_INLINE
 dberr_t
 row_search_for_mysql(
+	byte*		buf,
+	page_cur_mode_t	mode,
+	row_prebuilt_t*	prebuilt,
+	ulint		match_mode,
+	ulint		direction)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Searches for rows in the database using cursor.
+Function is mainly used for tables that are shared across connections and
+so it employs technique that can help re-construct the rows that
+transaction is suppose to see.
+It also has optimization such as pre-caching the rows, using AHI, etc.
+
+@param[out]	buf		buffer for the fetched row in MySQL format
+@param[in]	mode		search mode PAGE_CUR_L
+@param[in,out]	prebuilt	prebuilt struct for the table handler;
+				this contains the info to search_tuple,
+				index; if search tuple contains 0 field then
+				we position the cursor at start or the end of
+				index, depending on 'mode'
+@param[in]	match_mode	0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in]	direction	0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+				Note: if this is != 0, then prebuilt must has a
+				pcur with stored position! In opening of a
+				cursor 'direction' should be 0.
+@return DB_SUCCESS or error code */
+dberr_t
+row_search_mvcc(
+	byte*		buf,
+	page_cur_mode_t	mode,
+	row_prebuilt_t*	prebuilt,
+	ulint		match_mode,
+	ulint		direction)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/********************************************************************//**
+Count rows in a R-Tree leaf level.
+@return DB_SUCCESS if successful */
+dberr_t
+row_count_rtree_recs(
 /*=================*/
-	byte*		buf,		/*!< in/out: buffer for the fetched
-					row in the MySQL format */
-	ulint		mode,		/*!< in: search mode PAGE_CUR_L, ... */
 	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct for the
 					table handle; this contains the info
 					of search_tuple, index; if search
@@ -161,19 +217,13 @@ row_search_for_mysql(
 					position the cursor at the start or
 					the end of the index, depending on
 					'mode' */
-	ulint		match_mode,	/*!< in: 0 or ROW_SEL_EXACT or
-					ROW_SEL_EXACT_PREFIX */
-	ulint		direction)	/*!< in: 0 or ROW_SEL_NEXT or
-					ROW_SEL_PREV; NOTE: if this is != 0,
-					then prebuilt must have a pcur
-					with stored position! In opening of a
-					cursor 'direction' should be 0. */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	ulint*		n_rows);	/*!< out: number of entries
+					seen in the consistent read */
+
 /*******************************************************************//**
 Checks if MySQL at the moment is allowed for this table to retrieve a
 consistent read result, or store it to the query cache.
-@return	TRUE if storing or retrieving from the query cache is permitted */
-UNIV_INTERN
+@return TRUE if storing or retrieving from the query cache is permitted */
 ibool
 row_search_check_if_query_cache_permitted(
 /*======================================*/
@@ -182,8 +232,7 @@ row_search_check_if_query_cache_permitted(
 					'/' char, table name */
 /*******************************************************************//**
 Read the max AUTOINC value from an index.
-@return	DB_SUCCESS if all OK else error code */
-UNIV_INTERN
+@return DB_SUCCESS if all OK else error code */
 dberr_t
 row_search_max_autoinc(
 /*===================*/
@@ -233,7 +282,7 @@ struct plan_t{
 					for each field in the search
 					tuple */
 	dtuple_t*	tuple;		/*!< search tuple */
-	ulint		mode;		/*!< search mode: PAGE_CUR_G, ... */
+	page_cur_mode_t	mode;		/*!< search mode: PAGE_CUR_G, ... */
 	ulint		n_exact_match;	/*!< number of first fields in
 					the search tuple which must be
 					exactly matched */
@@ -312,7 +361,7 @@ struct sel_node_t{
 					containing the search plan and the
 					search data structures */
 	que_node_t*	search_cond;	/*!< search condition */
-	read_view_t*	read_view;	/*!< if the query is a non-locking
+	ReadView*	read_view;	/*!< if the query is a non-locking
 					consistent read, its read view is
 					placed here, otherwise NULL */
 	ibool		consistent_read;/*!< TRUE if the select is a consistent,
@@ -402,6 +451,45 @@ enum row_sel_match_mode {
 				of a fixed length column) */
 };
 
+#ifdef UNIV_DEBUG
+/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
+# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
+        row_sel_field_store_in_mysql_format_func(dest,templ,idx,field,src,len)
+#else /* UNIV_DEBUG */
+/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
+# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
+        row_sel_field_store_in_mysql_format_func(dest,templ,src,len)
+#endif /* UNIV_DEBUG */
+
+/**************************************************************//**
+Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
+function is row_mysql_store_col_in_innobase_format() in row0mysql.cc. */
+
+void
+row_sel_field_store_in_mysql_format_func(
+/*=====================================*/
+        byte*           dest,   /*!< in/out: buffer where to store; NOTE
+                                that BLOBs are not in themselves
+                                stored here: the caller must allocate
+                                and copy the BLOB into buffer before,
+                                and pass the pointer to the BLOB in
+                                'data' */
+        const mysql_row_templ_t* templ,
+                                /*!< in: MySQL column template.
+                                Its following fields are referenced:
+                                type, is_unsigned, mysql_col_len,
+                                mbminlen, mbmaxlen */
+#ifdef UNIV_DEBUG
+        const dict_index_t* index,
+                                /*!< in: InnoDB index */
+        ulint           field_no,
+                                /*!< in: templ->rec_field_no or
+                                templ->clust_rec_field_no or
+                                templ->icp_rec_field_no */
+#endif /* UNIV_DEBUG */
+        const byte*     data,   /*!< in: data to store */
+        ulint           len);    /*!< in: length of the data */
+
 #ifndef UNIV_NONINL
 #include "row0sel.ic"
 #endif
diff --git a/storage/innobase/include/row0sel.ic b/storage/innobase/include/row0sel.ic
index d83a3448832..d14b41e3f5f 100644
--- a/storage/innobase/include/row0sel.ic
+++ b/storage/innobase/include/row0sel.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,7 @@ Created 12/19/1997 Heikki Tuuri
 
 /*********************************************************************//**
 Gets the plan node for the nth table in a join.
-@return	plan node */
+@return plan node */
 UNIV_INLINE
 plan_t*
 sel_node_get_nth_plan(
@@ -55,7 +55,7 @@ sel_node_reset_cursor(
 
 /**********************************************************************//**
 Performs an execution step of an open or close cursor statement node.
-@return	query thread to run next or NULL */
+@return query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 open_step(
@@ -103,3 +103,36 @@ open_step(
 
 	return(thr);
 }
+
+
+/** Searches for rows in the database. This is used in the interface to
+MySQL. This function opens a cursor, and also implements fetch next
+and fetch prev. NOTE that if we do a search with a full key value
+from a unique index (ROW_SEL_EXACT), then we will not store the cursor
+position and fetch next or fetch prev must not be tried to the cursor!
+
+@param[out]	buf		buffer for the fetched row in MySQL format
+@param[in]	mode		search mode PAGE_CUR_L
+@param[in,out]	prebuilt	prebuilt struct for the table handler;
+				this contains the info to search_tuple,
+				index; if search tuple contains 0 field then
+				we position the cursor at start or the end of
+				index, depending on 'mode'
+@param[in]	match_mode	0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in]	direction	0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+				Note: if this is != 0, then prebuilt must has a
+				pcur with stored position! In opening of a
+				cursor 'direction' should be 0.
+@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
+DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
+UNIV_INLINE
+dberr_t
+row_search_for_mysql(
+	byte*			buf,
+	page_cur_mode_t		mode,
+	row_prebuilt_t*		prebuilt,
+	ulint			match_mode,
+	ulint			direction)
+{
+	return(row_search_mvcc(buf, mode, prebuilt, match_mode, direction));
+}
diff --git a/storage/innobase/include/row0trunc.h b/storage/innobase/include/row0trunc.h
new file mode 100644
index 00000000000..b6c7810d522
--- /dev/null
+++ b/storage/innobase/include/row0trunc.h
@@ -0,0 +1,433 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0trunc.h
+TRUNCATE implementation
+
+Created 2013-04-25 Krunal Bauskar
+*******************************************************/
+
+#ifndef row0trunc_h
+#define row0trunc_h
+
+#include "row0mysql.h"
+#include "dict0boot.h"
+#include "fil0fil.h"
+#include "srv0start.h"
+#include "ut0new.h"
+
+#include <vector>
+
+/** The information of TRUNCATE log record.
+This class handles the recovery stage of TRUNCATE table. */
+class truncate_t {
+
+public:
+	/**
+	Constructor
+
+	@param old_table_id	old table id assigned to table before truncate
+	@param new_table_id	new table id that will be assigned to table
+				after truncate
+	@param dir_path		directory path */
+	truncate_t(
+		table_id_t	old_table_id,
+		table_id_t	new_table_id,
+		const char*	dir_path);
+
+	/**
+	Constructor
+
+	@param log_file_name	parse the log file during recovery to populate
+				information related to table to truncate */
+	truncate_t(const char*	log_file_name);
+
+	/**
+	Consturctor
+
+	@param space_id		space in which table reisde
+	@param name		table name
+	@param tablespace_flags	tablespace flags use for recreating tablespace
+	@param log_flags	page format flag
+	@param recv_lsn		lsn of redo log record. */
+	truncate_t(
+		ulint		space_id,
+		const char*	name,
+		ulint		tablespace_flags,
+		ulint		log_flags,
+		lsn_t		recv_lsn);
+
+	/** Destructor */
+	~truncate_t();
+
+	/** The index information of MLOG_FILE_TRUNCATE redo record */
+	struct index_t {
+
+		/* Default copy constructor and destructor should be OK. */
+
+		index_t();
+
+		/**
+		Set the truncate log values for a compressed table.
+		@return DB_CORRUPTION or error code */
+		dberr_t set(const dict_index_t* index);
+
+		typedef std::vector<byte, ut_allocator<byte> >	fields_t;
+
+		/** Index id */
+		index_id_t	m_id;
+
+		/** Index type */
+		ulint		m_type;
+
+		/** Root Page Number */
+		ulint		m_root_page_no;
+
+		/** New Root Page Number.
+		Note: This field is not persisted to TRUNCATE log but used
+		during truncate table fix-up for updating SYS_XXXX tables. */
+		ulint		m_new_root_page_no;
+
+		/** Number of index fields */
+		ulint		m_n_fields;
+
+		/** DATA_TRX_ID column position. */
+		ulint		m_trx_id_pos;
+
+		/** Compressed table field meta data, encode by
+		page_zip_fields_encode. Empty for non-compressed tables.
+		Should be NUL terminated. */
+		fields_t	m_fields;
+	};
+
+	/**
+	@return the directory path, can be NULL */
+	const char* get_dir_path() const
+	{
+		return(m_dir_path);
+	}
+
+	/**
+	Register index information
+
+	@param index	index information logged as part of truncate log. */
+	void add(index_t& index)
+	{
+		m_indexes.push_back(index);
+	}
+
+	/**
+	Add table to truncate post recovery.
+
+	@param ptr	table information need to complete truncate of table. */
+	static void add(truncate_t* ptr)
+	{
+		s_tables.push_back(ptr);
+	}
+
+	/**
+	Clear registered index vector */
+	void clear()
+	{
+		m_indexes.clear();
+	}
+
+	/**
+	@return old table id of the table to truncate */
+	table_id_t old_table_id() const
+	{
+		return(m_old_table_id);
+	}
+
+	/**
+	@return new table id of the table to truncate */
+	table_id_t new_table_id() const
+	{
+		return(m_new_table_id);
+	}
+
+	/**
+	Update root page number in SYS_XXXX tables.
+
+	@param trx			transaction object
+	@param table_id			table id for which information needs to
+					be updated.
+	@param reserve_dict_mutex       if TRUE, acquire/release
+					dict_sys->mutex around call to pars_sql.
+	@param mark_index_corrupted	if true, then mark index corrupted
+	@return DB_SUCCESS or error code */
+	dberr_t update_root_page_no(
+		trx_t*		trx,
+		table_id_t	table_id,
+		ibool		reserve_dict_mutex,
+		bool		mark_index_corrupted) const;
+
+	/** Create an index for a table.
+	@param[in]	table_name		table name, for which to create
+	the index
+	@param[in]	space_id		space id where we have to
+	create the index
+	@param[in]	page_size		page size of the .ibd file
+	@param[in]	index_type		type of index to truncate
+	@param[in]	index_id		id of index to truncate
+	@param[in]	btr_redo_create_info	control info for ::btr_create()
+	@param[in,out]	mtr			mini-transaction covering the
+	create index
+	@return root page no or FIL_NULL on failure */
+	ulint create_index(
+		const char*		table_name,
+		ulint			space_id,
+		const page_size_t&	page_size,
+		ulint			index_type,
+		index_id_t      	index_id,
+		const btr_create_t&	btr_redo_create_info,
+		mtr_t*			mtr) const;
+
+	/** Create the indexes for a table
+	@param[in]	table_name	table name, for which to create the
+	indexes
+	@param[in]	space_id	space id where we have to create the
+	indexes
+	@param[in]	page_size	page size of the .ibd file
+	@param[in]	flags		tablespace flags
+	@param[in]	format_flags	page format flags
+	@return DB_SUCCESS or error code. */
+	dberr_t create_indexes(
+		const char*		table_name,
+		ulint			space_id,
+		const page_size_t&	page_size,
+		ulint			flags,
+		ulint			format_flags);
+
+	/** Check if index has been modified since TRUNCATE log snapshot
+	was recorded.
+	@param space_id	space_id where table/indexes resides.
+	@return true if modified else false */
+	bool is_index_modified_since_logged(
+		ulint		space_id,
+		ulint		root_page_no) const;
+
+	/** Drop indexes for a table.
+	@param space_id		space_id where table/indexes resides.
+	@return DB_SUCCESS or error code. */
+	void drop_indexes(ulint	space_id) const;
+
+	/**
+	Parses log record during recovery
+	@param start_ptr	buffer containing log body to parse
+	@param end_ptr		buffer end
+
+	@return DB_SUCCESS or error code */
+	dberr_t parse(
+		byte*		start_ptr,
+		const byte*	end_ptr);
+
+	/** Parse MLOG_TRUNCATE log record from REDO log file during recovery.
+	@param[in,out]	start_ptr	buffer containing log body to parse
+	@param[in]	end_ptr		buffer end
+	@param[in]	space_id	tablespace identifier
+	@return parsed upto or NULL. */
+	static byte* parse_redo_entry(
+		byte*		start_ptr,
+		const byte*	end_ptr,
+		ulint		space_id);
+
+	/**
+	Write a log record for truncating a single-table tablespace.
+
+	@param start_ptr	buffer to write log record
+	@param end_ptr		buffer end
+	@param space_id		space id
+	@param tablename	the table name in the usual
+				databasename/tablename format of InnoDB
+	@param flags		tablespace flags
+	@param format_flags	page format
+	@param lsn		lsn while logging */
+	dberr_t write(
+		byte*		start_ptr,
+		byte*		end_ptr,
+		ulint		space_id,
+		const char*	tablename,
+		ulint		flags,
+		ulint		format_flags,
+		lsn_t		lsn) const;
+
+	/**
+	@return number of indexes parsed from the truncate log record */
+	size_t indexes() const;
+
+	/**
+	Truncate a single-table tablespace. The tablespace must be cached
+	in the memory cache.
+
+	Note: This is defined in fil0fil.cc because it needs to access some
+	types that are local to that file.
+
+	@param space_id		space id
+	@param dir_path		directory path
+	@param tablename	the table name in the usual
+				databasename/tablename format of InnoDB
+	@param flags		tablespace flags
+	@param default_size	if true, truncate to default size if tablespace
+				is being newly re-initialized.
+	@return DB_SUCCESS or error */
+	static dberr_t truncate(
+		ulint		space_id,
+		const char*	dir_path,
+		const char*	tablename,
+		ulint		flags,
+		bool		default_size);
+
+	/**
+	Fix the table truncate by applying information parsed from TRUNCATE log.
+	Fix-up includes re-creating table (drop and re-create indexes) 
+	@return	error code or DB_SUCCESS */
+	static dberr_t fixup_tables_in_system_tablespace();
+
+	/**
+	Fix the table truncate by applying information parsed from TRUNCATE log.
+	Fix-up includes re-creating tablespace.
+	@return	error code or DB_SUCCESS */
+	static dberr_t fixup_tables_in_non_system_tablespace();
+
+	/**
+	Check whether a tablespace was truncated during recovery
+	@param space_id		tablespace id to check
+	@return true if the tablespace was truncated */
+	static bool is_tablespace_truncated(ulint space_id);
+
+	/** Was tablespace truncated (on crash before checkpoint).
+	If the MLOG_TRUNCATE redo-record is still available then tablespace
+	was truncated and checkpoint is yet to happen.
+	@param[in]	space_id	tablespace id to check.
+	@return true if tablespace was truncated. */
+	static bool was_tablespace_truncated(ulint space_id);
+
+	/** Get the lsn associated with space.
+	@param[in]	space_id	tablespace id to check.
+	@return associated lsn. */
+	static lsn_t get_truncated_tablespace_init_lsn(ulint space_id);
+
+private:
+	typedef std::vector<index_t, ut_allocator<index_t> >	indexes_t;
+
+	/** Space ID of tablespace */
+	ulint			m_space_id;
+
+	/** ID of table that is being truncated. */
+	table_id_t		m_old_table_id;
+
+	/** New ID that will be assigned to table on truncation. */
+	table_id_t		m_new_table_id;
+
+	/** Data dir path of tablespace */
+	char*			m_dir_path;
+
+	/** Table name */
+	char*			m_tablename;
+
+	/** Tablespace Flags */
+	ulint			m_tablespace_flags;
+
+	/** Format flags (log flags; stored in page-no field of header) */
+	ulint			m_format_flags;
+
+	/** Index meta-data */
+	indexes_t		m_indexes;
+
+	/** LSN of TRUNCATE log record. */
+	lsn_t			m_log_lsn;
+
+	/** Log file name. */
+	char*			m_log_file_name;
+
+	/** Encryption information of the table */
+	fil_encryption_t	m_encryption;
+	uint			m_key_id;
+
+	/** Vector of tables to truncate. */
+	typedef	std::vector<truncate_t*, ut_allocator<truncate_t*> >
+		tables_t;
+
+	/** Information about tables to truncate post recovery */
+	static	tables_t	s_tables;
+
+	/** Information about truncated table
+	This is case when truncate is complete but checkpoint hasn't. */
+	typedef std::map<ulint, lsn_t>	truncated_tables_t;
+	static truncated_tables_t	s_truncated_tables;
+
+public:
+	/** If true then fix-up of table is active and so while creating
+	index instead of grabbing information from dict_index_t, grab it
+	from parsed truncate log record. */
+	static	bool		s_fix_up_active;
+};
+
+/**
+Parse truncate log file. */
+class TruncateLogParser {
+
+public:
+
+	/**
+	Scan and Parse truncate log files.
+
+	@param dir_path         look for log directory in following path
+	@return DB_SUCCESS or error code. */
+	static dberr_t scan_and_parse(
+		const char*	dir_path);
+
+private:
+	typedef std::vector<char*, ut_allocator<char*> >
+		trunc_log_files_t;
+
+private:
+	/**
+	Scan to find out truncate log file from the given directory path.
+
+	@param dir_path		look for log directory in following path.
+	@param log_files	cache to hold truncate log file name found.
+	@return DB_SUCCESS or error code. */
+	static dberr_t scan(
+		const char*		dir_path,
+		trunc_log_files_t&	log_files);
+
+	/**
+	Parse the log file and populate table to truncate information.
+	(Add this table to truncate information to central vector that is then
+	used by truncate fix-up routine to fix-up truncate action of the table.)
+
+	@param	log_file_name	log file to parse
+	@return DB_SUCCESS or error code. */
+	static dberr_t parse(
+		const char*		log_file_name);
+};
+
+
+/**
+Truncates a table for MySQL.
+@param table		table being truncated
+@param trx		transaction covering the truncate
+@return	error code or DB_SUCCESS */
+dberr_t
+row_truncate_table_for_mysql(dict_table_t* table, trx_t* trx);
+
+#endif /* row0trunc_h */
+
diff --git a/storage/innobase/include/row0uins.h b/storage/innobase/include/row0uins.h
index 89e334e5433..95a714d2338 100644
--- a/storage/innobase/include/row0uins.h
+++ b/storage/innobase/include/row0uins.h
@@ -40,12 +40,12 @@ the same clustered index unique key did not have any record, even delete
 marked, at the time of the insert.  InnoDB is eager in a rollback:
 if it figures out that an index record will be removed in the purge
 anyway, it will remove it in the rollback.
-@return	DB_SUCCESS */
-UNIV_INTERN
+@return DB_SUCCESS */
 dberr_t
 row_undo_ins(
 /*=========*/
-	undo_node_t*	node)	/*!< in: row undo node */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr)	/*!< in: query thread */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 #ifndef UNIV_NONINL
 #include "row0uins.ic"
diff --git a/storage/innobase/include/row0umod.h b/storage/innobase/include/row0umod.h
index 4f1d8e1f66c..a1bb42035a9 100644
--- a/storage/innobase/include/row0umod.h
+++ b/storage/innobase/include/row0umod.h
@@ -36,14 +36,13 @@ Created 2/27/1997 Heikki Tuuri
 
 /***********************************************************//**
 Undoes a modify operation on a row of a table.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 row_undo_mod(
 /*=========*/
 	undo_node_t*	node,	/*!< in: row undo node */
 	que_thr_t*	thr)	/*!< in: query thread */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 #ifndef UNIV_NONINL
 #include "row0umod.ic"
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
index 5dddfb4eae1..3d5b3574afa 100644
--- a/storage/innobase/include/row0undo.h
+++ b/storage/innobase/include/row0undo.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -38,8 +38,7 @@ Created 1/8/1997 Heikki Tuuri
 
 /********************************************************************//**
 Creates a row undo node to a query graph.
-@return	own: undo node */
-UNIV_INTERN
+@return own: undo node */
 undo_node_t*
 row_undo_node_create(
 /*=================*/
@@ -51,18 +50,17 @@ Looks for the clustered index record when node has the row reference.
 The pcur in node is used in the search. If found, stores the row to node,
 and stores the position of pcur, and detaches it. The pcur must be closed
 by the caller in any case.
-@return TRUE if found; NOTE the node->pcur must be closed by the
+@return true if found; NOTE the node->pcur must be closed by the
 caller, regardless of the return value */
-UNIV_INTERN
-ibool
+bool
 row_undo_search_clust_to_pcur(
 /*==========================*/
-	undo_node_t*	node);	/*!< in: row undo node */
+	undo_node_t*	node)	/*!< in/out: row undo node */
+	MY_ATTRIBUTE((warn_unused_result));
 /***********************************************************//**
 Undoes a row operation in a table. This is a high-level function used
 in SQL execution graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_undo_step(
 /*==========*/
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
index e59ec58b63c..3c1033fe419 100644
--- a/storage/innobase/include/row0upd.h
+++ b/storage/innobase/include/row0upd.h
@@ -32,6 +32,7 @@ Created 12/27/1996 Heikki Tuuri
 #include "btr0types.h"
 #include "dict0types.h"
 #include "trx0types.h"
+#include <stack>
 
 #ifndef UNIV_HOTBACKUP
 # include "btr0pcur.h"
@@ -39,9 +40,18 @@ Created 12/27/1996 Heikki Tuuri
 # include "pars0types.h"
 #endif /* !UNIV_HOTBACKUP */
 
+/** The std::deque to store cascade update nodes, that uses mem_heap_t
+as allocator. */
+typedef std::deque<upd_node_t*, mem_heap_allocator<upd_node_t*> >
+	deque_mem_heap_t;
+
+/** Double-ended queue of update nodes to be processed for cascade
+operations */
+typedef deque_mem_heap_t upd_cascade_t;
+
 /*********************************************************************//**
 Creates an update vector object.
-@return	own: update vector object */
+@return own: update vector object */
 UNIV_INLINE
 upd_t*
 upd_create(
@@ -51,7 +61,7 @@ upd_create(
 /*********************************************************************//**
 Returns the number of fields in the update vector == number of columns
 to be updated by an update vector.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 upd_get_n_fields(
@@ -60,7 +70,7 @@ upd_get_n_fields(
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Returns the nth field of an update vector.
-@return	update vector field */
+@return update vector field */
 UNIV_INLINE
 upd_field_t*
 upd_get_nth_field(
@@ -82,21 +92,32 @@ upd_field_set_field_no(
 					index */
 	dict_index_t*	index,		/*!< in: index */
 	trx_t*		trx);		/*!< in: transaction */
+
+/** set field number to a update vector field, marks this field is updated
+@param[in,out]	upd_field	update vector field
+@param[in]	field_no	virtual column sequence num
+@param[in]	index		index */
+UNIV_INLINE
+void
+upd_field_set_v_field_no(
+	upd_field_t*	upd_field,
+	ulint		field_no,
+	dict_index_t*	index);
 /*********************************************************************//**
 Returns a field of an update vector by field_no.
-@return	update vector field, or NULL */
+@return update vector field, or NULL */
 UNIV_INLINE
 const upd_field_t*
 upd_get_field_by_field_no(
 /*======================*/
 	const upd_t*	update,	/*!< in: update vector */
-	ulint		no)	/*!< in: field_no */
-	MY_ATTRIBUTE((nonnull, pure));
+	ulint		no,	/*!< in: field_no */
+	bool		is_virtual) /*!< in: if it is a virtual column */
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Writes into the redo log the values of trx id and roll ptr and enough info
 to determine their positions within a clustered index record.
-@return	new pointer to mlog */
-UNIV_INTERN
+@return new pointer to mlog */
 byte*
 row_upd_write_sys_vals_to_log(
 /*==========================*/
@@ -123,7 +144,6 @@ row_upd_rec_sys_fields(
 				  can be 0 during IMPORT */
 /*********************************************************************//**
 Sets the trx id or roll ptr field of a clustered index entry. */
-UNIV_INTERN
 void
 row_upd_index_entry_sys_field(
 /*==========================*/
@@ -136,15 +156,13 @@ row_upd_index_entry_sys_field(
 	ib_uint64_t	val);	/*!< in: value to write */
 /*********************************************************************//**
 Creates an update node for a query graph.
-@return	own: update node */
-UNIV_INTERN
+@return own: update node */
 upd_node_t*
 upd_node_create(
 /*============*/
 	mem_heap_t*	heap);	/*!< in: mem heap where created */
 /***********************************************************//**
 Writes to the redo log the new values of the fields occurring in the index. */
-UNIV_INTERN
 void
 row_upd_index_write_log(
 /*====================*/
@@ -159,7 +177,6 @@ Returns TRUE if row update changes size of some field in index or if some
 field to be updated is stored externally in rec or update.
 @return TRUE if the update changes the size of some field in index or
 the field is external in rec or update */
-UNIV_INTERN
 ibool
 row_upd_changes_field_size_or_external(
 /*===================================*/
@@ -169,7 +186,6 @@ row_upd_changes_field_size_or_external(
 /***********************************************************//**
 Returns true if row update contains disowned external fields.
 @return true if the update contains disowned external fields. */
-UNIV_INTERN
 bool
 row_upd_changes_disowned_external(
 /*==============================*/
@@ -182,7 +198,6 @@ record given. No field size changes are allowed. This function is
 usually invoked on a clustered index. The only use case for a
 secondary index is row_ins_sec_index_entry_by_modify() or its
 counterpart in ibuf_insert_to_index_page(). */
-UNIV_INTERN
 void
 row_upd_rec_in_place(
 /*=================*/
@@ -197,8 +212,7 @@ row_upd_rec_in_place(
 Builds an update vector from those fields which in a secondary index entry
 differ from a record that has the equal ordering fields. NOTE: we compare
 the fields as binary strings!
-@return	own: update vector of differing fields */
-UNIV_INTERN
+@return own: update vector of differing fields */
 upd_t*
 row_upd_build_sec_rec_difference_binary(
 /*====================================*/
@@ -208,30 +222,36 @@ row_upd_build_sec_rec_difference_binary(
 	const dtuple_t*	entry,	/*!< in: entry to insert */
 	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
 	MY_ATTRIBUTE((warn_unused_result, nonnull));
-/***************************************************************//**
-Builds an update vector from those fields, excluding the roll ptr and
+/** Builds an update vector from those fields, excluding the roll ptr and
 trx id fields, which in an index entry differ from a record that has
 the equal ordering fields. NOTE: we compare the fields as binary strings!
+@param[in]	index		clustered index
+@param[in]	entry		clustered index entry to insert
+@param[in]	rec		clustered index record
+@param[in]	offsets		rec_get_offsets(rec,index), or NULL
+@param[in]	no_sys		skip the system columns
+				DB_TRX_ID and DB_ROLL_PTR
+@param[in]	trx		transaction (for diagnostics),
+				or NULL
+@param[in]	heap		memory heap from which allocated
+@param[in,out]	mysql_table	NULL, or mysql table object when
+				user thread invokes dml
 @return own: update vector of differing fields, excluding roll ptr and
 trx id */
-UNIV_INTERN
-const upd_t*
+upd_t*
 row_upd_build_difference_binary(
-/*============================*/
-	dict_index_t*	index,	/*!< in: clustered index */
-	const dtuple_t*	entry,	/*!< in: entry to insert */
-	const rec_t*	rec,	/*!< in: clustered index record */
-	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
-	bool		no_sys,	/*!< in: skip the system columns
-				DB_TRX_ID and DB_ROLL_PTR */
-	trx_t*		trx,	/*!< in: transaction (for diagnostics),
-				or NULL */
-	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	dict_index_t*	index,
+	const dtuple_t*	entry,
+	const rec_t*	rec,
+	const ulint*	offsets,
+	bool		no_sys,
+	trx_t*		trx,
+	mem_heap_t*	heap,
+	TABLE*		mysql_table)
 	MY_ATTRIBUTE((nonnull(1,2,3,7), warn_unused_result));
 /***********************************************************//**
 Replaces the new column values stored in the update vector to the index entry
 given. */
-UNIV_INTERN
 void
 row_upd_index_replace_new_col_vals_index_pos(
 /*=========================================*/
@@ -254,7 +274,6 @@ row_upd_index_replace_new_col_vals_index_pos(
 /***********************************************************//**
 Replaces the new column values stored in the update vector to the index entry
 given. */
-UNIV_INTERN
 void
 row_upd_index_replace_new_col_vals(
 /*===============================*/
@@ -272,7 +291,6 @@ row_upd_index_replace_new_col_vals(
 	MY_ATTRIBUTE((nonnull));
 /***********************************************************//**
 Replaces the new column values stored in the update vector. */
-UNIV_INTERN
 void
 row_upd_replace(
 /*============*/
@@ -287,6 +305,23 @@ row_upd_replace(
 	const upd_t*		update,	/*!< in: an update vector built for the
 					clustered index */
 	mem_heap_t*		heap);	/*!< in: memory heap */
+/** Replaces the virtual column values stored in a dtuple with that of
+a update vector.
+@param[in,out]	row	dtuple whose column to be updated
+@param[in]	table	table
+@param[in]	update	an update vector built for the clustered index
+@param[in]	upd_new	update to new or old value
+@param[in,out]	undo_row undo row (if needs to be updated)
+@param[in]	ptr	remaining part in update undo log */
+void
+row_upd_replace_vcol(
+	dtuple_t*		row,
+	const dict_table_t*	table,
+	const upd_t*		update,
+	bool			upd_new,
+	dtuple_t*		undo_row,
+	const byte*		ptr);
+
 /***********************************************************//**
 Checks if an update vector changes an ordering field of an index record.
 
@@ -294,7 +329,6 @@ This function is fast if the update vector is short or the number of ordering
 fields in the index is small. Otherwise, this can be quadratic.
 NOTE: we compare the fields as binary strings!
 @return TRUE if update vector changes an ordering field in the index record */
-UNIV_INTERN
 ibool
 row_upd_changes_ord_field_binary_func(
 /*==================================*/
@@ -309,21 +343,22 @@ row_upd_changes_ord_field_binary_func(
 				row and the data values in update are not
 				known when this function is called, e.g., at
 				compile time */
-	const row_ext_t*ext)	/*!< NULL, or prefixes of the externally
+	const row_ext_t*ext,	/*!< NULL, or prefixes of the externally
 				stored columns in the old row */
+	ulint		flag)	/*!< in: ROW_BUILD_NORMAL,
+				ROW_BUILD_FOR_PURGE or ROW_BUILD_FOR_UNDO */
 	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
 #ifdef UNIV_DEBUG
 # define row_upd_changes_ord_field_binary(index,update,thr,row,ext)	\
-	row_upd_changes_ord_field_binary_func(index,update,thr,row,ext)
+	row_upd_changes_ord_field_binary_func(index,update,thr,row,ext,0)
 #else /* UNIV_DEBUG */
 # define row_upd_changes_ord_field_binary(index,update,thr,row,ext)	\
-	row_upd_changes_ord_field_binary_func(index,update,row,ext)
+	row_upd_changes_ord_field_binary_func(index,update,row,ext,0)
 #endif /* UNIV_DEBUG */
 /***********************************************************//**
 Checks if an FTS indexed column is affected by an UPDATE.
 @return offset within fts_t::indexes if FTS indexed column updated else
 ULINT_UNDEFINED */
-UNIV_INTERN
 ulint
 row_upd_changes_fts_column(
 /*=======================*/
@@ -332,7 +367,6 @@ row_upd_changes_fts_column(
 /***********************************************************//**
 Checks if an FTS Doc ID column is affected by an UPDATE.
 @return whether Doc ID column is affected */
-UNIV_INTERN
 bool
 row_upd_changes_doc_id(
 /*===================*/
@@ -346,17 +380,25 @@ fields in the index is small. Otherwise, this can be quadratic.
 NOTE: we compare the fields as binary strings!
 @return TRUE if update vector may change an ordering field in an index
 record */
-UNIV_INTERN
 ibool
 row_upd_changes_some_index_ord_field_binary(
 /*========================================*/
 	const dict_table_t*	table,	/*!< in: table */
 	const upd_t*		update);/*!< in: update vector for the row */
+/** Stores to the heap the row on which the node->pcur is positioned.
+@param[in]	node		row update node
+@param[in]	thd		mysql thread handle
+@param[in,out]	mysql_table	NULL, or mysql table object when
+				user thread invokes dml */
+void
+row_upd_store_row(
+	upd_node_t*	node,
+	THD*		thd,
+	TABLE*		mysql_table);
 /***********************************************************//**
 Updates a row in a table. This is a high-level function used
 in SQL execution graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_upd_step(
 /*=========*/
@@ -364,20 +406,18 @@ row_upd_step(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Parses the log data of system field values.
-@return	log data end or NULL */
-UNIV_INTERN
+@return log data end or NULL */
 byte*
 row_upd_parse_sys_vals(
 /*===================*/
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
 	ulint*		pos,	/*!< out: TRX_ID position in record */
 	trx_id_t*	trx_id,	/*!< out: trx id */
 	roll_ptr_t*	roll_ptr);/*!< out: roll ptr */
 /*********************************************************************//**
 Updates the trx id and roll ptr field in a clustered index record in database
 recovery. */
-UNIV_INTERN
 void
 row_upd_rec_sys_fields_in_recovery(
 /*===============================*/
@@ -389,13 +429,12 @@ row_upd_rec_sys_fields_in_recovery(
 	roll_ptr_t	roll_ptr);/*!< in: roll ptr of the undo log record */
 /*********************************************************************//**
 Parses the log data written by row_upd_index_write_log.
-@return	log data end or NULL */
-UNIV_INTERN
+@return log data end or NULL */
 byte*
 row_upd_index_parse(
 /*================*/
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
 	mem_heap_t*	heap,	/*!< in: memory heap where update vector is
 				built */
 	upd_t**		update_out);/*!< out: update vector */
@@ -407,7 +446,9 @@ struct upd_field_t{
 					the clustered index, but in updating
 					a secondary index record in btr0cur.cc
 					this is the position in the secondary
-					index */
+					index. If this field is a virtual
+					column, then field_no represents
+					the nth virtual	column in the table */
 #ifndef UNIV_HOTBACKUP
 	unsigned	orig_len:16;	/*!< original length of the locally
 					stored part of an externally stored
@@ -418,14 +459,61 @@ struct upd_field_t{
 					query graph */
 #endif /* !UNIV_HOTBACKUP */
 	dfield_t	new_val;	/*!< new value for the column */
+	dfield_t*	old_v_val;	/*!< old value for the virtual column */
 };
 
+
+/* check whether an update field is on virtual column */
+#define upd_fld_is_virtual_col(upd_fld)			\
+	(((upd_fld)->new_val.type.prtype & DATA_VIRTUAL) == DATA_VIRTUAL)
+
+/* set DATA_VIRTUAL bit on update field to show it is a virtual column */
+#define upd_fld_set_virtual_col(upd_fld)			\
+	((upd_fld)->new_val.type.prtype |= DATA_VIRTUAL)
+
 /* Update vector structure */
 struct upd_t{
+	mem_heap_t*	heap;		/*!< heap from which memory allocated */
 	ulint		info_bits;	/*!< new value of info bits to record;
 					default is 0 */
+	dtuple_t*	old_vrow;	/*!< pointer to old row, used for
+					virtual column update now */
 	ulint		n_fields;	/*!< number of update fields */
 	upd_field_t*	fields;		/*!< array of update fields */
+
+	/** Append an update field to the end of array
+	@param[in]	field	an update field */
+	void append(const upd_field_t& field)
+	{
+		fields[n_fields++] = field;
+	}
+
+	/** Determine if the given field_no is modified.
+	@return true if modified, false otherwise.  */
+	bool is_modified(const ulint field_no) const
+	{
+		for (ulint i = 0; i < n_fields; ++i) {
+			if (field_no == fields[i].field_no) {
+				return(true);
+			}
+		}
+		return(false);
+	}
+
+#ifdef UNIV_DEBUG
+        bool validate() const
+        {
+                for (ulint i = 0; i < n_fields; ++i) {
+                        dfield_t* field = &fields[i].new_val;
+                        if (dfield_is_ext(field)) {
+				ut_ad(dfield_get_len(field)
+				      >= BTR_EXTERN_FIELD_REF_SIZE);
+                        }
+                }
+                return(true);
+        }
+#endif // UNIV_DEBUG
+
 };
 
 #ifndef UNIV_HOTBACKUP
@@ -444,11 +532,38 @@ struct upd_node_t{
 	dict_foreign_t*	foreign;/* NULL or pointer to a foreign key
 				constraint if this update node is used in
 				doing an ON DELETE or ON UPDATE operation */
-	upd_node_t*	cascade_node;/* NULL or an update node template which
-				is used to implement ON DELETE/UPDATE CASCADE
-				or ... SET NULL for foreign keys */
-	mem_heap_t*	cascade_heap;/* NULL or a mem heap where the cascade
-				node is created */
+
+	bool		cascade_top;
+				/*!< true if top level in cascade */
+
+	upd_cascade_t*	cascade_upd_nodes;
+				/*!< Queue of update nodes to handle the
+				cascade of update and delete operations in an
+				iterative manner.  Their parent/child
+				relations are properly maintained. All update
+				nodes point to this same queue.  All these
+				nodes are allocated in heap pointed to by
+				upd_node_t::cascade_heap. */
+
+	upd_cascade_t*	new_upd_nodes;
+				/*!< Intermediate list of update nodes in a
+				cascading update/delete operation.  After
+				processing one update node, this will be
+				concatenated to cascade_upd_nodes.  This extra
+				list is needed so that retry because of
+				DB_LOCK_WAIT works corrrectly. */
+
+	upd_cascade_t*	processed_cascades;
+				/*!< List of processed update nodes in a
+				cascading update/delete operation.  All the
+				cascade nodes are stored here, so that memory
+				can be freed. */
+
+	mem_heap_t*	cascade_heap;
+				/*!< NULL or a mem heap where cascade_upd_nodes
+				are created.  This heap is owned by the node
+				that has cascade_top=true. */
+
 	sel_node_t*	select;	/*!< query graph subtree implementing a base
 				table cursor: the rows returned will be
 				updated */
@@ -495,7 +610,25 @@ struct upd_node_t{
 	sym_node_t*	table_sym;/* table node in symbol table */
 	que_node_t*	col_assign_list;
 				/* column assignment list */
+
+	doc_id_t	fts_doc_id;
+				/* The FTS doc id of the row that is now
+				pointed to by the pcur. */
+
+	doc_id_t	fts_next_doc_id;
+				/* The new fts doc id that will be used
+				in update operation */
+
 	ulint		magic_n;
+
+#ifndef DBUG_OFF
+	/** Print information about this object into the trace log file. */
+	void dbug_trace();
+
+	/** Ensure that the member cascade_upd_nodes has only one update node
+	for each of the tables.  This is useful for testing purposes. */
+	void check_cascade_only_once();
+#endif /* !DBUG_OFF */
 };
 
 #define	UPD_NODE_MAGIC_N	1579975
@@ -511,11 +644,6 @@ struct upd_node_t{
 #define UPD_NODE_INSERT_CLUSTERED  3	/* clustered index record should be
 					inserted, old record is already delete
 					marked */
-#define UPD_NODE_INSERT_BLOB	   4	/* clustered index record should be
-					inserted, old record is already
-					delete-marked; non-updated BLOBs
-					should be inherited by the new record
-					and disowned by the old record */
 #define UPD_NODE_UPDATE_ALL_SEC	   5	/* an ordering field of the clustered
 					index record was changed, or this is
 					a delete operation: should update
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
index 618a77fa4bf..ab1dc5c7076 100644
--- a/storage/innobase/include/row0upd.ic
+++ b/storage/innobase/include/row0upd.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,7 +34,7 @@ Created 12/27/1996 Heikki Tuuri
 
 /*********************************************************************//**
 Creates an update vector object.
-@return	own: update vector object */
+@return own: update vector object */
 UNIV_INLINE
 upd_t*
 upd_create(
@@ -44,11 +44,12 @@ upd_create(
 {
 	upd_t*	update;
 
-	update = (upd_t*) mem_heap_zalloc(heap, sizeof(upd_t));
+	update = static_cast<upd_t*>(mem_heap_zalloc(
+			heap, sizeof(upd_t) + sizeof(upd_field_t) * n));
 
 	update->n_fields = n;
-	update->fields = (upd_field_t*)
-		mem_heap_zalloc(heap, sizeof(upd_field_t) * n);
+	update->fields = reinterpret_cast<upd_field_t*>(&update[1]);
+	update->heap = heap;
 
 	return(update);
 }
@@ -56,7 +57,7 @@ upd_create(
 /*********************************************************************//**
 Returns the number of fields in the update vector == number of columns
 to be updated by an update vector.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 upd_get_n_fields(
@@ -71,7 +72,7 @@ upd_get_n_fields(
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Returns the nth field of an update vector.
-@return	update vector field */
+@return update vector field */
 UNIV_INLINE
 upd_field_t*
 upd_get_nth_field(
@@ -103,13 +104,12 @@ upd_field_set_field_no(
 	upd_field->orig_len = 0;
 
 	if (field_no >= dict_index_get_n_fields(index)) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to access field %lu in ",
-			(ulong) field_no);
-		dict_index_name_print(stderr, trx, index);
-		fprintf(stderr, "\n"
-			"InnoDB: but index only has %lu fields\n",
-			(ulong) dict_index_get_n_fields(index));
+		ib::error()
+			<< " trying to access field " << field_no
+			<< " in " << index->name
+			<< " of table " << index->table->name
+			<< " which contains only " << index->n_fields
+			<< " fields";
 		ut_ad(0);
 	}
 
@@ -117,20 +117,55 @@ upd_field_set_field_no(
 			   dfield_get_type(&upd_field->new_val));
 }
 
+/** set field number to a update vector field, marks this field is updated.
+@param[in,out]	upd_field	update vector field
+@param[in]	field_no	virtual column sequence num
+@param[in]	index		index */
+UNIV_INLINE
+void
+upd_field_set_v_field_no(
+	upd_field_t*	upd_field,
+	ulint		field_no,
+	dict_index_t*	index)
+{
+	upd_field->field_no = field_no;
+	upd_field->orig_len = 0;
+
+	if (field_no >= dict_table_get_n_v_cols(index->table)) {
+		ib::error()
+			<< " trying to access virtual field " << field_no
+			<< " in " << index->name
+			<< " of table " << index->table->name
+			<< " which contains only " << index->table->n_v_cols
+			<< " virutal columns";
+		ut_ad(0);
+	}
+
+	dict_col_copy_type(&dict_table_get_nth_v_col(
+				index->table, field_no)->m_col,
+			   dfield_get_type(&upd_field->new_val));
+}
+
 /*********************************************************************//**
 Returns a field of an update vector by field_no.
-@return	update vector field, or NULL */
+@return update vector field, or NULL */
 UNIV_INLINE
 const upd_field_t*
 upd_get_field_by_field_no(
 /*======================*/
 	const upd_t*	update,	/*!< in: update vector */
-	ulint		no)	/*!< in: field_no */
+	ulint		no,	/*!< in: field_no */
+	bool		is_virtual) /*!< in: if it is virtual column */
 {
 	ulint	i;
 	for (i = 0; i < upd_get_n_fields(update); i++) {
 		const upd_field_t*	uf = upd_get_nth_field(update, i);
 
+		/* matches only if the field matches that of is_virtual */
+		if ((!is_virtual) != (!upd_fld_is_virtual_col(uf))) {
+			continue;
+		}
+
 		if (uf->field_no == no) {
 
 			return(uf);
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
index 7b850215701..489db305fac 100644
--- a/storage/innobase/include/row0vers.h
+++ b/storage/innobase/include/row0vers.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,10 @@ Created 2/6/1997 Heikki Tuuri
 #include "que0types.h"
 #include "rem0types.h"
 #include "mtr0mtr.h"
-#include "read0types.h"
+#include "dict0mem.h"
+
+// Forward declaration
+class ReadView;
 
 /*****************************************************************//**
 Finds out if an active transaction has inserted or modified a secondary
@@ -42,33 +45,36 @@ index record.
 NOTE that this function can return false positives but never false
 negatives. The caller must confirm all positive results by calling
 trx_is_active() while holding lock_sys->mutex. */
-UNIV_INTERN
-trx_id_t
+trx_t*
 row_vers_impl_x_locked(
 /*===================*/
 	const rec_t*	rec,	/*!< in: record in a secondary index */
 	dict_index_t*	index,	/*!< in: the secondary index */
 	const ulint*	offsets);/*!< in: rec_get_offsets(rec, index) */
+
 /*****************************************************************//**
 Finds out if we must preserve a delete marked earlier version of a clustered
 index record, because it is >= the purge view.
-@return	TRUE if earlier version should be preserved */
-UNIV_INTERN
+@param[in]	trx_id		transaction id in the version
+@param[in]	name		table name
+@param[in,out]	mtr		mini transaction  holding the latch on the
+				clustered index record; it will also hold
+				 the latch on purge_view
+@return TRUE if earlier version should be preserved */
 ibool
 row_vers_must_preserve_del_marked(
 /*==============================*/
-	trx_id_t	trx_id,	/*!< in: transaction id in the version */
-	mtr_t*		mtr);	/*!< in: mtr holding the latch on the
-				clustered index record; it will also
-				hold the latch on purge_view */
+	trx_id_t		trx_id,
+	const table_name_t&	name,
+	mtr_t*			mtr);
+
 /*****************************************************************//**
 Finds out if a version of the record, where the version >= the current
 purge view, should have ientry as its secondary index entry. We check
 if there is any not delete marked version of the record where the trx
 id >= purge view, and the secondary index entry == ientry; exactly in
 this case we return TRUE.
-@return	TRUE if earlier version should have */
-UNIV_INTERN
+@return TRUE if earlier version should have */
 ibool
 row_vers_old_has_index_entry(
 /*=========================*/
@@ -80,13 +86,15 @@ row_vers_old_has_index_entry(
 	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec; it will
 				also hold the latch on purge_view */
 	dict_index_t*	index,	/*!< in: the secondary index */
-	const dtuple_t*	ientry);/*!< in: the secondary index entry */
+	const dtuple_t*	ientry,	/*!< in: the secondary index entry */
+	roll_ptr_t	roll_ptr,/*!< in: roll_ptr for the purge record */
+	trx_id_t	trx_id);/*!< in: transaction ID on the purging record */
+
 /*****************************************************************//**
 Constructs the version of a clustered index record which a consistent
 read should see. We assume that the trx id stored in rec is such that
 the consistent read should not see rec in its present version.
-@return	DB_SUCCESS or DB_MISSING_HISTORY */
-UNIV_INTERN
+@return DB_SUCCESS or DB_MISSING_HISTORY */
 dberr_t
 row_vers_build_for_consistent_read(
 /*===============================*/
@@ -99,23 +107,22 @@ row_vers_build_for_consistent_read(
 	dict_index_t*	index,	/*!< in: the clustered index */
 	ulint**		offsets,/*!< in/out: offsets returned by
 				rec_get_offsets(rec, index) */
-	read_view_t*	view,	/*!< in: the consistent read view */
+	ReadView*	view,	/*!< in: the consistent read view */
 	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
 				the offsets are allocated */
 	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
 				*old_vers is allocated; memory for possible
 				intermediate versions is allocated and freed
 				locally within the function */
-	rec_t**		old_vers)/*!< out, own: old version, or NULL
+	rec_t**		old_vers,/*!< out, own: old version, or NULL
 				if the history is missing or the record
 				does not exist in the view, that is,
 				it was freshly inserted afterwards */
-	MY_ATTRIBUTE((nonnull(1,2,3,4,5,6,7)));
+	const dtuple_t**vrow);	/*!< out: reports virtual column info if any */
 
 /*****************************************************************//**
 Constructs the last committed version of a clustered index record,
 which should be seen by a semi-consistent read. */
-UNIV_INTERN
 void
 row_vers_build_for_semi_consistent_read(
 /*====================================*/
@@ -133,10 +140,11 @@ row_vers_build_for_semi_consistent_read(
 				*old_vers is allocated; memory for possible
 				intermediate versions is allocated and freed
 				locally within the function */
-	const rec_t**	old_vers)/*!< out: rec, old version, or NULL if the
+	const rec_t**	old_vers,/*!< out: rec, old version, or NULL if the
 				record does not exist in the view, that is,
 				it was freshly inserted afterwards */
-	MY_ATTRIBUTE((nonnull(1,2,3,4,5)));
+	const dtuple_t**vrow);	/*!< out: holds virtual column info if any
+				is updated in the view */
 
 
 #ifndef UNIV_NONINL
diff --git a/storage/innobase/include/srv0conc.h b/storage/innobase/include/srv0conc.h
index cf61ef5528d..9573c5add84 100644
--- a/storage/innobase/include/srv0conc.h
+++ b/storage/innobase/include/srv0conc.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -52,32 +52,18 @@ we could get a deadlock. Value of 0 will disable the concurrency check. */
 
 extern ulong	srv_thread_concurrency;
 
-/*********************************************************************//**
-Initialise the concurrency management data structures */
-void
-srv_conc_init(void);
-/*===============*/
-
-/*********************************************************************//**
-Free the concurrency management data structures */
-void
-srv_conc_free(void);
-/*===============*/
-
+struct row_prebuilt_t;
 /*********************************************************************//**
 Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-UNIV_INTERN
+(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue.
+@param[in,out]	prebuilt	row prebuilt handler */
 void
 srv_conc_enter_innodb(
-/*==================*/
-	trx_t*	trx);		/*!< in: transaction object associated
-				with the thread */
+	row_prebuilt_t*	prebuilt);
 
 /*********************************************************************//**
 This lets a thread enter InnoDB regardless of the number of threads inside
 InnoDB. This must be called when a thread ends a lock wait. */
-UNIV_INTERN
 void
 srv_conc_force_enter_innodb(
 /*========================*/
@@ -87,7 +73,6 @@ srv_conc_force_enter_innodb(
 /*********************************************************************//**
 This must be called when a thread exits InnoDB in a lock wait or at the
 end of an SQL statement. */
-UNIV_INTERN
 void
 srv_conc_force_exit_innodb(
 /*=======================*/
@@ -96,14 +81,12 @@ srv_conc_force_exit_innodb(
 
 /*********************************************************************//**
 Get the count of threads waiting inside InnoDB. */
-UNIV_INTERN
 ulint
 srv_conc_get_waiting_threads(void);
 /*==============================*/
 
 /*********************************************************************//**
 Get the count of threads active inside InnoDB. */
-UNIV_INTERN
 ulint
 srv_conc_get_active_threads(void);
 /*==============================*/
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index d15110726b9..d5a305bdf68 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -1,6 +1,6 @@
 /***********************************************************************
 
-Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
 Copyright (c) 2013, 2016, MariaDB Corporation.
 
@@ -30,6 +30,14 @@ Created 12/15/2009	Jimmy Yang
 #define srv0mon_h
 
 #include "univ.i"
+
+#ifndef __STDC_LIMIT_MACROS
+/* Required for FreeBSD so that INT64_MAX is defined. */
+#define __STDC_LIMIT_MACROS
+#endif /* __STDC_LIMIT_MACROS */
+
+#include <stdint.h>
+
 #ifndef UNIV_HOTBACKUP
 
 
@@ -42,7 +50,7 @@ enum monitor_running_status {
 typedef enum monitor_running_status	monitor_running_t;
 
 /** Monitor counter value type */
-typedef	ib_int64_t			mon_type_t;
+typedef	int64_t				mon_type_t;
 
 /** Two monitor structures are defined in this file. One is
 "monitor_value_t" which contains dynamic counter values for each
@@ -98,9 +106,15 @@ enum monitor_type_t {
 };
 
 /** Counter minimum value is initialized to be max value of
- mon_type_t (ib_int64_t) */
-#define	MIN_RESERVED		((mon_type_t) (IB_UINT64_MAX >> 1))
-#define	MAX_RESERVED		(~MIN_RESERVED)
+ mon_type_t (int64_t) */
+#ifndef INT64_MAX
+#define INT64_MAX		(9223372036854775807LL)
+#endif
+#ifndef INT64_MIN
+#define INT64_MIN		(-9223372036854775807LL-1)
+#endif
+#define	MIN_RESERVED		INT64_MAX
+#define	MAX_RESERVED		INT64_MIN
 
 /** This enumeration defines internal monitor identifier used internally
 to identify each particular counter. Its value indexes into two arrays,
@@ -125,7 +139,6 @@ enum monitor_id_t {
 	MONITOR_TABLE_OPEN,
 	MONITOR_TABLE_CLOSE,
 	MONITOR_TABLE_REFERENCE,
-	MONITOR_OVLD_META_MEM_POOL,
 
 	/* Lock manager related counters */
 	MONITOR_MODULE_LOCK,
@@ -175,7 +188,6 @@ enum monitor_id_t {
 	MONITOR_FLUSH_BATCH_SCANNED,
 	MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
 	MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
-	MONITOR_FLUSH_HP_RESCAN,
 	MONITOR_FLUSH_BATCH_TOTAL_PAGE,
 	MONITOR_FLUSH_BATCH_COUNT,
 	MONITOR_FLUSH_BATCH_PAGES,
@@ -183,6 +195,24 @@ enum monitor_id_t {
 	MONITOR_FLUSH_NEIGHBOR_COUNT,
 	MONITOR_FLUSH_NEIGHBOR_PAGES,
 	MONITOR_FLUSH_N_TO_FLUSH_REQUESTED,
+
+	MONITOR_FLUSH_N_TO_FLUSH_BY_AGE,
+	MONITOR_FLUSH_ADAPTIVE_AVG_TIME_SLOT,
+	MONITOR_LRU_BATCH_FLUSH_AVG_TIME_SLOT,
+
+	MONITOR_FLUSH_ADAPTIVE_AVG_TIME_THREAD,
+	MONITOR_LRU_BATCH_FLUSH_AVG_TIME_THREAD,
+	MONITOR_FLUSH_ADAPTIVE_AVG_TIME_EST,
+	MONITOR_LRU_BATCH_FLUSH_AVG_TIME_EST,
+	MONITOR_FLUSH_AVG_TIME,
+
+	MONITOR_FLUSH_ADAPTIVE_AVG_PASS,
+	MONITOR_LRU_BATCH_FLUSH_AVG_PASS,
+	MONITOR_FLUSH_AVG_PASS,
+
+	MONITOR_LRU_GET_FREE_LOOPS,
+	MONITOR_LRU_GET_FREE_WAITS,
+
 	MONITOR_FLUSH_AVG_PAGE_RATE,
 	MONITOR_FLUSH_LSN_AVG_RATE,
 	MONITOR_FLUSH_PCT_FOR_DIRTY,
@@ -300,12 +330,13 @@ enum monitor_id_t {
 	MONITOR_OVLD_BUF_OLDEST_LSN,
 	MONITOR_OVLD_MAX_AGE_ASYNC,
 	MONITOR_OVLD_MAX_AGE_SYNC,
-	MONITOR_PENDING_LOG_WRITE,
+	MONITOR_PENDING_LOG_FLUSH,
 	MONITOR_PENDING_CHECKPOINT_WRITE,
 	MONITOR_LOG_IO,
 	MONITOR_OVLD_LOG_WAITS,
 	MONITOR_OVLD_LOG_WRITE_REQUEST,
 	MONITOR_OVLD_LOG_WRITES,
+	MONITOR_OVLD_LOG_PADDED,
 
 	/* Page Manager related counters */
 	MONITOR_MODULE_PAGE,
@@ -387,10 +418,13 @@ enum monitor_id_t {
 	MONITOR_OVLD_SRV_PAGE_SIZE,
 	MONITOR_OVLD_RWLOCK_S_SPIN_WAITS,
 	MONITOR_OVLD_RWLOCK_X_SPIN_WAITS,
+	MONITOR_OVLD_RWLOCK_SX_SPIN_WAITS,
 	MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS,
 	MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS,
+	MONITOR_OVLD_RWLOCK_SX_SPIN_ROUNDS,
 	MONITOR_OVLD_RWLOCK_S_OS_WAITS,
 	MONITOR_OVLD_RWLOCK_X_OS_WAITS,
+	MONITOR_OVLD_RWLOCK_SX_OS_WAITS,
 
 	/* Data DML related counters */
 	MONITOR_MODULE_DML_STATS,
@@ -409,6 +443,8 @@ enum monitor_id_t {
 	MONITOR_BACKGROUND_DROP_TABLE,
 	MONITOR_ONLINE_CREATE_INDEX,
 	MONITOR_PENDING_ALTER_TABLE,
+	MONITOR_ALTER_TABLE_SORT_FILES,
+	MONITOR_ALTER_TABLE_LOG_FILES,
 
 	MONITOR_MODULE_ICP,
 	MONITOR_ICP_ATTEMPTS,
@@ -416,6 +452,10 @@ enum monitor_id_t {
 	MONITOR_ICP_OUT_OF_RANGE,
 	MONITOR_ICP_MATCH,
 
+	/* Mutex/RW-Lock related counters */
+	MONITOR_MODULE_LATCHES,
+	MONITOR_LATCHES,
+
 	/* This is used only for control system to turn
 	on/off and reset all monitor counters */
 	MONITOR_ALL_COUNTER,
@@ -568,42 +608,14 @@ on the counters */
 		}							\
 	}
 
-/** Increment a monitor counter under mutex protection.
-Use MONITOR_INC if appropriate mutex protection already exists.
-@param monitor	monitor to be incremented by 1
-@param mutex	mutex to acquire and relese */
-# define MONITOR_MUTEX_INC(mutex, monitor)				\
-	ut_ad(!mutex_own(mutex));					\
-	if (MONITOR_IS_ON(monitor)) {					\
-		mutex_enter(mutex);					\
-		if (++MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
-			MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor); \
-		}							\
-		mutex_exit(mutex);					\
-	}
-/** Decrement a monitor counter under mutex protection.
-Use MONITOR_DEC if appropriate mutex protection already exists.
-@param monitor	monitor to be decremented by 1
-@param mutex	mutex to acquire and relese */
-# define MONITOR_MUTEX_DEC(mutex, monitor)				\
-	ut_ad(!mutex_own(mutex));					\
-	if (MONITOR_IS_ON(monitor)) {					\
-		mutex_enter(mutex);					\
-		if (--MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
-			MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor); \
-		}							\
-		mutex_exit(mutex);					\
-	}
-
-#if defined HAVE_ATOMIC_BUILTINS_64
 /** Atomically increment a monitor counter.
 Use MONITOR_INC if appropriate mutex protection exists.
-@param monitor	monitor to be incremented by 1 */
+@param monitor monitor to be incremented by 1 */
 # define MONITOR_ATOMIC_INC(monitor)					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		ib_uint64_t	value;					\
-		value  = os_atomic_increment_uint64(			\
-			(ib_uint64_t*) &MONITOR_VALUE(monitor),	 1);	\
+		value  = my_atomic_add64(				\
+			(int64*) &MONITOR_VALUE(monitor), 1) + 1;	\
 		/* Note: This is not 100% accurate because of the	\
 		inherent race, we ignore it due to performance. */	\
 		if (value > (ib_uint64_t) MONITOR_MAX_VALUE(monitor)) {	\
@@ -613,46 +625,18 @@ Use MONITOR_INC if appropriate mutex protection exists.
 
 /** Atomically decrement a monitor counter.
 Use MONITOR_DEC if appropriate mutex protection exists.
-@param monitor	monitor to be decremented by 1 */
+@param monitor monitor to be decremented by 1 */
 # define MONITOR_ATOMIC_DEC(monitor)					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		ib_uint64_t	value;					\
-		value = os_atomic_decrement_uint64(			\
-			(ib_uint64_t*) &MONITOR_VALUE(monitor), 1);	\
+		value = my_atomic_add64(				\
+			(int64*) &MONITOR_VALUE(monitor), -1) - 1;	\
 		/* Note: This is not 100% accurate because of the	\
 		inherent race, we ignore it due to performance. */	\
 		if (value < (ib_uint64_t) MONITOR_MIN_VALUE(monitor)) {	\
 			MONITOR_MIN_VALUE(monitor) = value;		\
 		}							\
 	}
-# define srv_mon_create() ((void) 0)
-# define srv_mon_free() ((void) 0)
-#else /* HAVE_ATOMIC_BUILTINS_64 */
-/** Mutex protecting atomic operations on platforms that lack
-built-in operations for atomic memory access */
-extern ib_mutex_t	monitor_mutex;
-/****************************************************************//**
-Initialize the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_create(void);
-/*================*/
-/****************************************************************//**
-Close the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_free(void);
-/*==============*/
-
-/** Atomically increment a monitor counter.
-Use MONITOR_INC if appropriate mutex protection exists.
-@param monitor	monitor to be incremented by 1 */
-# define MONITOR_ATOMIC_INC(monitor) MONITOR_MUTEX_INC(&monitor_mutex, monitor)
-/** Atomically decrement a monitor counter.
-Use MONITOR_DEC if appropriate mutex protection exists.
-@param monitor	monitor to be decremented by 1 */
-# define MONITOR_ATOMIC_DEC(monitor) MONITOR_MUTEX_DEC(&monitor_mutex, monitor)
-#endif /* HAVE_ATOMIC_BUILTINS_64 */
 
 #define	MONITOR_DEC(monitor)						\
 	if (MONITOR_IS_ON(monitor)) {					\
@@ -723,12 +707,12 @@ could already be checked as a module group */
 
 /** Add time difference between now and input "value" (in seconds) to the
 monitor counter
-@param monitor	monitor to update for the time difference
-@param value	the start time value */
+@param monitor monitor to update for the time difference
+@param value the start time value */
 #define	MONITOR_INC_TIME_IN_MICRO_SECS(monitor, value)			\
 	MONITOR_CHECK_DEFINED(value);					\
 	if (MONITOR_IS_ON(monitor)) {					\
-		ullint	old_time = (value);				\
+		uintmax_t	old_time = (value);				\
 		value = ut_time_us(NULL);				\
 		MONITOR_VALUE(monitor) += (mon_type_t) (value - old_time);\
 	}
@@ -736,13 +720,13 @@ monitor counter
 /** This macro updates 3 counters in one call. However, it only checks the
 main/first monitor counter 'monitor', to see it is on or off to decide
 whether to do the update.
-@param monitor		the main monitor counter to update. It accounts for
+@param monitor the main monitor counter to update. It accounts for
 			the accumulative value for the counter.
-@param monitor_n_calls	counter that counts number of times this macro is
+@param monitor_n_calls counter that counts number of times this macro is
 			called
-@param monitor_per_call	counter that records the current and max value of
+@param monitor_per_call counter that records the current and max value of
 			each incremental value
-@param value		incremental value to record this time */
+@param value incremental value to record this time */
 #define MONITOR_INC_VALUE_CUMULATIVE(					\
 		monitor, monitor_n_calls, monitor_per_call, value)	\
 	MONITOR_CHECK_DEFINED(value);					\
@@ -828,9 +812,8 @@ compensated by mon_last_value if accumulated value is required. */
 /****************************************************************//**
 Get monitor's monitor_info_t by its monitor id (index into the
 innodb_counter_info array
-@return	Point to corresponding monitor_info_t, or NULL if no such
+@return Point to corresponding monitor_info_t, or NULL if no such
 monitor */
-UNIV_INTERN
 monitor_info_t*
 srv_mon_get_info(
 /*=============*/
@@ -839,9 +822,8 @@ srv_mon_get_info(
 /****************************************************************//**
 Get monitor's name by its monitor id (index into the
 innodb_counter_info array
-@return	corresponding monitor name, or NULL if no such
+@return corresponding monitor name, or NULL if no such
 monitor */
-UNIV_INTERN
 const char*
 srv_mon_get_name(
 /*=============*/
@@ -851,9 +833,8 @@ srv_mon_get_name(
 /****************************************************************//**
 Turn on/off/reset monitor counters in a module. If module_value
 is NUM_MONITOR then turn on all monitor counters.
-@return	0 if successful, or the first monitor that cannot be
+@return 0 if successful, or the first monitor that cannot be
 turned on because it is already turned on. */
-UNIV_INTERN
 void
 srv_mon_set_module_control(
 /*=======================*/
@@ -870,7 +851,6 @@ mechanism to start/stop and reset the counters, so we simulate these
 controls by remembering the corresponding counter values when the
 corresponding monitors are turned on/off/reset, and do appropriate
 mathematics to deduct the actual value. */
-UNIV_INTERN
 void
 srv_mon_process_existing_counter(
 /*=============================*/
@@ -881,7 +861,7 @@ srv_mon_process_existing_counter(
 /*************************************************************//**
 This function is used to calculate the maximum counter value
 since the start of monitor counter
-@return	max counter value since start. */
+@return max counter value since start. */
 UNIV_INLINE
 mon_type_t
 srv_mon_calc_max_since_start(
@@ -890,7 +870,7 @@ srv_mon_calc_max_since_start(
 /*************************************************************//**
 This function is used to calculate the minimum counter value
 since the start of monitor counter
-@return	min counter value since start. */
+@return min counter value since start. */
 UNIV_INLINE
 mon_type_t
 srv_mon_calc_min_since_start(
@@ -899,7 +879,6 @@ srv_mon_calc_min_since_start(
 /*************************************************************//**
 Reset a monitor, create a new base line with the current monitor
 value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */
-UNIV_INTERN
 void
 srv_mon_reset(
 /*==========*/
@@ -913,7 +892,6 @@ srv_mon_reset_all(
 	monitor_id_t	monitor);	/*!< in: monitor id*/
 /*************************************************************//**
 Turn on monitor counters that are marked as default ON. */
-UNIV_INTERN
 void
 srv_mon_default_on(void);
 /*====================*/
diff --git a/storage/innobase/include/srv0mon.ic b/storage/innobase/include/srv0mon.ic
index 225390c6b6f..0cf76b2ea01 100644
--- a/storage/innobase/include/srv0mon.ic
+++ b/storage/innobase/include/srv0mon.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,7 @@ Created 1/20/2010	Jimmy Yang
 /*************************************************************//**
 This function is used to calculate the maximum counter value
 since the start of monitor counter
-@return	max counter value since start. */
+@return max counter value since start. */
 UNIV_INLINE
 mon_type_t
 srv_mon_calc_max_since_start(
@@ -61,7 +61,7 @@ srv_mon_calc_max_since_start(
 /*************************************************************//**
 This function is used to calculate the minimum counter value
 since the start of monitor counter
-@return	min counter value since start. */
+@return min counter value since start. */
 UNIV_INLINE
 mon_type_t
 srv_mon_calc_min_since_start(
@@ -103,9 +103,9 @@ srv_mon_reset_all(
 {
 	/* Do not reset all counter values if monitor is still on. */
 	if (MONITOR_IS_ON(monitor)) {
-		fprintf(stderr, "InnoDB: Cannot reset all values for "
-			"monitor counter %s while it is on. Please "
-			"turn it off and retry. \n",
+		fprintf(stderr, "InnoDB: Cannot reset all values for"
+			" monitor counter %s while it is on. Please"
+			" turn it off and retry.\n",
 			srv_mon_get_name(monitor));
 	} else {
 		MONITOR_RESET_ALL(monitor);
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index a5713afbd49..056a6267347 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -42,24 +42,31 @@ Created 10/10/1995 Heikki Tuuri
 #ifndef srv0srv_h
 #define srv0srv_h
 
+#include "my_global.h"
+
+#include "mysql/psi/mysql_stage.h"
+#include "mysql/psi/psi.h"
+
 #include "univ.i"
 #ifndef UNIV_HOTBACKUP
 #include "log0log.h"
-#include "sync0sync.h"
-#include "os0sync.h"
+#include "os0event.h"
 #include "que0types.h"
 #include "trx0types.h"
 #include "srv0conc.h"
 #include "buf0checksum.h"
 #include "ut0counter.h"
+#include "fil0fil.h"
+
+struct fil_space_t;
 
 /* Global counters used inside InnoDB. */
 struct srv_stats_t {
+	typedef ib_counter_t<ulint, 64> ulint_ctr_64_t;
 	typedef ib_counter_t<lsn_t, 1, single_indexer_t> lsn_ctr_1_t;
 	typedef ib_counter_t<ulint, 1, single_indexer_t> ulint_ctr_1_t;
 	typedef ib_counter_t<lint, 1, single_indexer_t> lint_ctr_1_t;
-	typedef ib_counter_t<ulint, 64> ulint_ctr_64_t;
-	typedef ib_counter_t<ib_int64_t, 1, single_indexer_t> ib_int64_ctr_1_t;
+	typedef ib_counter_t<int64_t, 1, single_indexer_t> int64_ctr_1_t;
 
 	/** Count the amount of data written in total (in bytes) */
 	ulint_ctr_1_t		data_written;
@@ -70,6 +77,9 @@ struct srv_stats_t {
 	/** Number of physical writes to the log performed */
 	ulint_ctr_1_t		log_writes;
 
+	/** Amount of data padded for log write ahead */
+	ulint_ctr_1_t		log_padded;
+
 	/** Amount of data written to the log files in bytes */
 	lsn_ctr_1_t		os_log_written;
 
@@ -142,7 +152,7 @@ struct srv_stats_t {
 	ulint_ctr_1_t		data_read;
 
 	/** Wait time of database locks */
-	ib_int64_ctr_1_t	n_lock_wait_time;
+	int64_ctr_1_t		n_lock_wait_time;
 
 	/** Number of database lock waits */
 	ulint_ctr_1_t		n_lock_wait_count;
@@ -201,6 +211,9 @@ extern os_event_t	srv_error_event;
 /** The buffer pool dump/load thread waits on this event. */
 extern os_event_t	srv_buf_dump_event;
 
+/** The buffer pool resize thread waits on this event. */
+extern os_event_t	srv_buf_resize_event;
+
 /** The buffer pool dump/load file name */
 #define SRV_BUF_DUMP_FILENAME_DEFAULT	"ib_buffer_pool"
 extern char*		srv_buf_dump_filename;
@@ -215,9 +228,10 @@ extern char		srv_disable_sort_file_cache;
 
 /* If the last data file is auto-extended, we add this many pages to it
 at a time */
-#define SRV_AUTO_EXTEND_INCREMENT	\
-	(srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
+#define SRV_AUTO_EXTEND_INCREMENT (srv_sys_space.get_autoextend_increment())
 
+/** Mutex protecting page_zip_stat_per_index */
+extern ib_mutex_t	page_zip_stat_per_index_mutex;
 /* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
 extern ib_mutex_t	srv_monitor_file_mutex;
 /* Temporary file for innodb monitor output */
@@ -239,10 +253,6 @@ extern FILE*	srv_misc_tmpfile;
 
 extern char*	srv_data_home;
 
-#ifdef UNIV_LOG_ARCHIVE
-extern char*	srv_arch_dir;
-#endif /* UNIV_LOG_ARCHIVE */
-
 /** Set if InnoDB must operate in read-only mode. We don't do any
 recovery and open all tables in RO mode instead of RW mode. We don't
 sync the max trx id to disk either. */
@@ -255,10 +265,8 @@ dictionary tables are in the system tablespace 0 */
 extern my_bool	srv_file_per_table;
 /** Sleep delay for threads waiting to enter InnoDB. In micro-seconds. */
 extern	ulong	srv_thread_sleep_delay;
-#if defined(HAVE_ATOMIC_BUILTINS)
 /** Maximum sleep delay (in micro-seconds), value of 0 disables it.*/
 extern	ulong	srv_adaptive_max_sleep_delay;
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 /** The file format to use on new *.ibd files. */
 extern ulint	srv_file_format;
@@ -297,8 +305,8 @@ extern my_bool srv_use_atomic_writes;
 extern ulong innodb_compression_algorithm;
 
 /* Number of flush threads */
-#define MTFLUSH_MAX_WORKER       64
-#define MTFLUSH_DEFAULT_WORKER   8
+#define MTFLUSH_MAX_WORKER		64
+#define MTFLUSH_DEFAULT_WORKER		8
 
 /* Number of threads used for multi-threaded flush */
 extern long    srv_mtflush_threads;
@@ -306,9 +314,6 @@ extern long    srv_mtflush_threads;
 /* If this flag is TRUE, then we will use multi threaded flush. */
 extern my_bool	srv_use_mtflush;
 
-#ifdef __WIN__
-extern ibool	srv_use_native_conditions;
-#endif /* __WIN__ */
 #endif /* !UNIV_HOTBACKUP */
 
 /** Server undo tablespaces directory, can be absolute path. */
@@ -320,66 +325,97 @@ extern ulong	srv_undo_tablespaces;
 /** The number of UNDO tablespaces that are open and ready to use. */
 extern ulint	srv_undo_tablespaces_open;
 
-/* The number of undo segments to use */
+/** The number of UNDO tablespaces that are active (hosting some rollback
+segment). It is quite possible that some of the tablespaces doesn't host
+any of the rollback-segment based on configuration used. */
+extern ulint	srv_undo_tablespaces_active;
+
+/** The number of undo segments to use */
 extern ulong	srv_undo_logs;
 
-extern ulint	srv_n_data_files;
-extern char**	srv_data_file_names;
-extern ulint*	srv_data_file_sizes;
-extern ulint*	srv_data_file_is_raw_partition;
+/** Maximum size of undo tablespace. */
+extern unsigned long long	srv_max_undo_log_size;
 
-extern ibool	srv_auto_extend_last_data_file;
-extern ulint	srv_last_file_size_max;
-extern char*	srv_log_group_home_dir;
-#ifndef UNIV_HOTBACKUP
-extern ulong	srv_auto_extend_increment;
+/** Rate at which UNDO records should be purged. */
+extern ulong	srv_purge_rseg_truncate_frequency;
 
-extern ibool	srv_created_new_raw;
+/** Enable or Disable Truncate of UNDO tablespace. */
+extern my_bool	srv_undo_log_truncate;
 
 /* Optimize prefix index queries to skip cluster index lookup when possible */
 /* Enables or disables this prefix optimization.  Disabled by default. */
 extern my_bool	srv_prefix_index_cluster_optimization;
 
+/** UNDO logs not redo logged, these logs reside in the temp tablespace.*/
+extern const ulong	srv_tmp_undo_logs;
+
+/** Default size of UNDO tablespace while it is created new. */
+extern const ulint	SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
+
+extern char*	srv_log_group_home_dir;
+
+#ifndef UNIV_HOTBACKUP
 /** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
 #define SRV_N_LOG_FILES_MAX 100
 extern ulong	srv_n_log_files;
+/** At startup, this is the current redo log file size.
+During startup, if this is different from srv_log_file_size_requested
+(innodb_log_file_size), the redo log will be rebuilt and this size
+will be initialized to srv_log_file_size_requested.
+When upgrading from a previous redo log format, this will be set to 0,
+and writing to the redo log is not allowed.
+
+During startup, this is in bytes, and later converted to pages. */
 extern ib_uint64_t	srv_log_file_size;
+/** The value of the startup parameter innodb_log_file_size */
 extern ib_uint64_t	srv_log_file_size_requested;
 extern ulint	srv_log_buffer_size;
 extern ulong	srv_flush_log_at_trx_commit;
 extern uint	srv_flush_log_at_timeout;
+extern ulong	srv_log_write_ahead_size;
 extern char	srv_adaptive_flushing;
+extern my_bool	srv_flush_sync;
 
 #ifdef WITH_INNODB_DISALLOW_WRITES
 /* When this event is reset we do not allow any file writes to take place. */
 extern os_event_t	srv_allow_writes_event;
 #endif /* WITH_INNODB_DISALLOW_WRITES */
+
 /* If this flag is TRUE, then we will load the indexes' (and tables') metadata
 even if they are marked as "corrupted". Mostly it is for DBA to process
 corrupted index and table */
 extern my_bool	srv_load_corrupted;
 
-/* The sort order table of the MySQL latin1_swedish_ci character set
-collation */
-extern const byte*	srv_latin1_ordering;
-#ifndef UNIV_HOTBACKUP
-extern my_bool	srv_use_sys_malloc;
-#else
-extern ibool	srv_use_sys_malloc;
-#endif /* UNIV_HOTBACKUP */
-extern ulint	srv_buf_pool_size;	/*!< requested size in bytes */
-extern ulint    srv_buf_pool_instances; /*!< requested number of buffer pool instances */
-extern ulong	srv_n_page_hash_locks;	/*!< number of locks to
-					protect buf_pool->page_hash */
-extern ulong	srv_LRU_scan_depth;	/*!< Scan depth for LRU
-					flush batch */
-extern ulong	srv_flush_neighbors;	/*!< whether or not to flush
-					neighbors of a block */
-extern ulint	srv_buf_pool_old_size;	/*!< previously requested size */
-extern ulint	srv_buf_pool_curr_size;	/*!< current size in bytes */
+/** Requested size in bytes */
+extern ulint		srv_buf_pool_size;
+/** Minimum pool size in bytes */
+extern const ulint	srv_buf_pool_min_size;
+/** Default pool size in bytes */
+extern const ulint	srv_buf_pool_def_size;
+/** Requested buffer pool chunk size. Each buffer pool instance consists
+of one or more chunks. */
+extern ulong		srv_buf_pool_chunk_unit;
+/** Requested number of buffer pool instances */
+extern ulong		srv_buf_pool_instances;
+/** Default number of buffer pool instances */
+extern const ulong	srv_buf_pool_instances_default;
+/** Number of locks to protect buf_pool->page_hash */
+extern ulong	srv_n_page_hash_locks;
+/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
+extern ulong	srv_LRU_scan_depth;
+/** Whether or not to flush neighbors of a block */
 extern ulong	srv_buf_pool_dump_pct;	/*!< dump that may % of each buffer
 					pool during BP dump */
-extern ulint	srv_mem_pool_size;
+extern ulong	srv_flush_neighbors;
+/** Previously requested size */
+extern ulint	srv_buf_pool_old_size;
+/** Current size as scaling factor for the other components */
+extern ulint	srv_buf_pool_base_size;
+/** Current size in bytes */
+extern ulint	srv_buf_pool_curr_size;
+/** Dump this % of each buffer pool during BP dump */
+extern ulong	srv_buf_pool_dump_pct;
+/** Lock table size in bytes */
 extern ulint	srv_lock_table_size;
 
 extern ulint	srv_n_file_io_threads;
@@ -387,6 +423,7 @@ extern my_bool	srv_random_read_ahead;
 extern ulong	srv_read_ahead_threshold;
 extern ulint	srv_n_read_io_threads;
 extern ulint	srv_n_write_io_threads;
+
 /* Defragmentation, Origianlly facebook default value is 100, but it's too high */
 #define SRV_DEFRAGMENT_FREQUENCY_DEFAULT 40
 extern my_bool	srv_defragment;
@@ -399,6 +436,8 @@ extern ulonglong	srv_defragment_interval;
 
 extern ulong	srv_idle_flush_pct;
 
+extern uint	srv_change_buffer_max_size;
+
 /* Number of IO operations per second the server can do */
 extern ulong    srv_io_capacity;
 
@@ -417,18 +456,12 @@ to treat NULL value when collecting statistics. It is not defined
 as enum type because the configure option takes unsigned integer type. */
 extern ulong	srv_innodb_stats_method;
 
-#ifdef UNIV_LOG_ARCHIVE
-extern ibool		srv_log_archive_on;
-extern ibool		srv_archive_recovery;
-extern ib_uint64_t	srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
 extern char*	srv_file_flush_method_str;
-extern ulint	srv_unix_file_flush_method;
-extern ulint	srv_win_file_flush_method;
 
 extern ulint	srv_max_n_open_files;
 
+extern ulong	srv_n_page_cleaners;
+
 extern double	srv_max_dirty_pages_pct;
 extern double	srv_max_dirty_pages_pct_lwm;
 
@@ -458,7 +491,9 @@ extern my_bool			srv_stats_sample_traditional;
 
 extern ibool	srv_use_doublewrite_buf;
 extern ulong	srv_doublewrite_batch_size;
+extern ulong	srv_checksum_algorithm;
 
+extern double	srv_max_buf_pool_modified_pct;
 extern my_bool	srv_force_primary_key;
 
 extern double	srv_max_buf_pool_modified_pct;
@@ -470,13 +505,7 @@ extern ulong	srv_replication_delay;
 
 extern my_bool	srv_print_innodb_monitor;
 extern my_bool	srv_print_innodb_lock_monitor;
-extern ibool	srv_print_innodb_tablespace_monitor;
 extern ibool	srv_print_verbose_log;
-#define DEPRECATED_MSG_INNODB_TABLE_MONITOR \
-	"Using innodb_table_monitor is deprecated and it may be removed " \
-	"in future releases. Please use the InnoDB INFORMATION_SCHEMA " \
-	"tables instead, see " REFMAN "innodb-i_s-tables.html"
-extern ibool	srv_print_innodb_table_monitor;
 
 extern ibool	srv_monitor_active;
 extern ibool	srv_error_monitor_active;
@@ -484,6 +513,9 @@ extern ibool	srv_error_monitor_active;
 /* TRUE during the lifetime of the buffer pool dump/load thread */
 extern ibool	srv_buf_dump_thread_active;
 
+/* true during the lifetime of the buffer pool resize thread */
+extern bool	srv_buf_resize_thread_active;
+
 /* TRUE during the lifetime of the stats thread */
 extern ibool	srv_dict_stats_thread_active;
 
@@ -501,39 +533,21 @@ extern ibool	srv_priority_boost;
 extern ulint	srv_truncated_status_writes;
 extern ulint	srv_available_undo_logs;
 
-extern	ulint	srv_mem_pool_size;
-extern	ulint	srv_lock_table_size;
-
-#ifdef UNIV_DEBUG
-extern	ibool	srv_print_thread_releases;
-extern	ibool	srv_print_lock_waits;
-extern	ibool	srv_print_buf_io;
-extern	ibool	srv_print_log_io;
-extern	ibool	srv_print_latch_waits;
-#else /* UNIV_DEBUG */
-# define srv_print_thread_releases	FALSE
-# define srv_print_lock_waits		FALSE
-# define srv_print_buf_io		FALSE
-# define srv_print_log_io		FALSE
-# define srv_print_latch_waits		FALSE
-#endif /* UNIV_DEBUG */
-
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 extern my_bool	srv_ibuf_disable_background_merge;
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
 #ifdef UNIV_DEBUG
+extern my_bool	srv_sync_debug;
 extern my_bool	srv_purge_view_update_only_debug;
+
+/** Value of MySQL global used to disable master thread. */
+extern my_bool	srv_master_thread_disabled_debug;
 #endif /* UNIV_DEBUG */
 
 #define SRV_SEMAPHORE_WAIT_EXTENSION	7200
 extern ulint	srv_dml_needed_delay;
 
-#ifndef HAVE_ATOMIC_BUILTINS
-/** Mutex protecting some server global variables. */
-extern ib_mutex_t	server_mutex;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
 #define SRV_MAX_N_IO_THREADS	130
 
 /* Array of English strings describing the current state of an
@@ -580,21 +594,30 @@ extern ulong srv_buf_dump_status_frequency;
 
 # ifdef UNIV_PFS_THREAD
 /* Keys to register InnoDB threads with performance schema */
-extern mysql_pfs_key_t	buf_page_cleaner_thread_key;
-extern mysql_pfs_key_t	trx_rollback_clean_thread_key;
+extern mysql_pfs_key_t	buf_dump_thread_key;
+extern mysql_pfs_key_t	dict_stats_thread_key;
 extern mysql_pfs_key_t	io_handler_thread_key;
-extern mysql_pfs_key_t	srv_lock_timeout_thread_key;
+extern mysql_pfs_key_t	io_ibuf_thread_key;
+extern mysql_pfs_key_t	io_log_thread_key;
+extern mysql_pfs_key_t	io_read_thread_key;
+extern mysql_pfs_key_t	io_write_thread_key;
+extern mysql_pfs_key_t	page_cleaner_thread_key;
+extern mysql_pfs_key_t	recv_writer_thread_key;
 extern mysql_pfs_key_t	srv_error_monitor_thread_key;
-extern mysql_pfs_key_t	srv_monitor_thread_key;
+extern mysql_pfs_key_t	srv_lock_timeout_thread_key;
 extern mysql_pfs_key_t	srv_master_thread_key;
+extern mysql_pfs_key_t	srv_monitor_thread_key;
 extern mysql_pfs_key_t	srv_purge_thread_key;
-extern mysql_pfs_key_t	recv_writer_thread_key;
+extern mysql_pfs_key_t	srv_worker_thread_key;
+extern mysql_pfs_key_t	trx_rollback_clean_thread_key;
 
 /* This macro register the current thread and its key with performance
 schema */
 #  define pfs_register_thread(key)			\
 do {								\
 	struct PSI_thread* psi = PSI_THREAD_CALL(new_thread)(key, NULL, 0);\
+	/* JAN: TODO: MYSQL 5.7 PSI                             \
+	PSI_THREAD_CALL(set_thread_os_id)(psi);	*/		\
 	PSI_THREAD_CALL(set_thread)(psi);			\
 } while (0)
 
@@ -603,21 +626,50 @@ do {								\
 do {								\
 	PSI_THREAD_CALL(delete_current_thread)();		\
 } while (0)
+# else
+#  define pfs_register_thread(key)
+#  define pfs_delete_thread()
 # endif /* UNIV_PFS_THREAD */
 
-#endif /* !UNIV_HOTBACKUP */
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Performance schema stage event for monitoring ALTER TABLE progress
+everything after flush log_make_checkpoint_at(). */
+extern PSI_stage_info	srv_stage_alter_table_end;
 
-/** Types of raw partitions in innodb_data_file_path */
-enum {
-	SRV_NOT_RAW = 0,	/*!< Not a raw partition */
-	SRV_NEW_RAW,		/*!< A 'newraw' partition, only to be
-				initialized */
-	SRV_OLD_RAW		/*!< An initialized raw partition */
-};
+/** Performance schema stage event for monitoring ALTER TABLE progress
+log_make_checkpoint_at(). */
+extern PSI_stage_info	srv_stage_alter_table_flush;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_insert_index_tuples(). */
+extern PSI_stage_info	srv_stage_alter_table_insert;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_apply(). */
+extern PSI_stage_info	srv_stage_alter_table_log_index;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_table_apply(). */
+extern PSI_stage_info	srv_stage_alter_table_log_table;
 
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_sort(). */
+extern PSI_stage_info	srv_stage_alter_table_merge_sort;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_read_clustered_index(). */
+extern PSI_stage_info	srv_stage_alter_table_read_pk_internal_sort;
+
+/** Performance schema stage event for monitoring buffer pool load progress. */
+extern PSI_stage_info	srv_stage_buffer_pool_load;
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef _WIN32
 /** Alternatives for the file flush option in Unix; see the InnoDB manual
 about what these mean */
-enum {
+enum srv_unix_flush_t {
 	SRV_UNIX_FSYNC = 1,	/*!< fsync, the default */
 	SRV_UNIX_O_DSYNC,	/*!< open log files in O_SYNC mode */
 	SRV_UNIX_LITTLESYNC,	/*!< do not call os_file_flush()
@@ -638,12 +690,15 @@ enum {
 				this case user/DBA should be sure about
 				the integrity of the meta-data */
 };
-
+extern enum srv_unix_flush_t	srv_unix_file_flush_method;
+#else
 /** Alternatives for file i/o in Windows */
-enum {
+enum srv_win_flush_t {
 	SRV_WIN_IO_NORMAL = 1,	/*!< buffered I/O */
 	SRV_WIN_IO_UNBUFFERED	/*!< unbuffered I/O; this is the default */
 };
+extern enum srv_win_flush_t	srv_win_file_flush_method;
+#endif /* _WIN32 */
 
 /** Alternatives for srv_force_recovery. Non-zero values are intended
 to help the user get a damaged database up so that he can dump intact
@@ -682,6 +737,11 @@ enum srv_stats_method_name_enum {
 
 typedef enum srv_stats_method_name_enum		srv_stats_method_name_t;
 
+#ifdef UNIV_DEBUG
+/** Force all user tables to use page compression. */
+extern ulong	srv_debug_compress;
+#endif /* UNIV_DEBUG */
+
 #ifndef UNIV_HOTBACKUP
 /** Types of threads existing in the system. */
 enum srv_thread_type {
@@ -696,32 +756,27 @@ enum srv_thread_type {
 
 /*********************************************************************//**
 Boots Innobase server. */
-UNIV_INTERN
 void
 srv_boot(void);
 /*==========*/
 /*********************************************************************//**
 Initializes the server. */
-UNIV_INTERN
 void
 srv_init(void);
 /*==========*/
 /*********************************************************************//**
 Frees the data structures created in srv_init(). */
-UNIV_INTERN
 void
 srv_free(void);
 /*==========*/
 /*********************************************************************//**
 Initializes the synchronization primitives, memory system, and the thread
 local storage. */
-UNIV_INTERN
 void
 srv_general_init(void);
 /*==================*/
 /*********************************************************************//**
 Sets the info describing an i/o thread current state. */
-UNIV_INTERN
 void
 srv_set_io_thread_op_info(
 /*======================*/
@@ -730,7 +785,6 @@ srv_set_io_thread_op_info(
 				state */
 /*********************************************************************//**
 Resets the info describing an i/o thread current state. */
-UNIV_INTERN
 void
 srv_reset_io_thread_op_info();
 /*=========================*/
@@ -740,7 +794,6 @@ and wakes up the purge thread if it is suspended (not sleeping).  Note
 that there is a small chance that the purge thread stays suspended
 (we do not protect our operation with the srv_sys_t:mutex, for
 performance reasons). */
-UNIV_INTERN
 void
 srv_wake_purge_thread_if_not_active(void);
 /*=====================================*/
@@ -750,13 +803,17 @@ and wakes up the master thread if it is suspended (not sleeping). Used
 in the MySQL interface. Note that there is a small chance that the master
 thread stays suspended (we do not protect our operation with the kernel
 mutex, for performace reasons). */
-UNIV_INTERN
 void
-srv_active_wake_master_thread(void);
-/*===============================*/
+srv_active_wake_master_thread_low(void);
+/*===================================*/
+#define srv_active_wake_master_thread()					\
+	do {								\
+		if (!srv_read_only_mode) {				\
+			srv_active_wake_master_thread_low();		\
+		}							\
+	} while (0)
 /*******************************************************************//**
 Wakes up the master thread if it is suspended or being suspended. */
-UNIV_INTERN
 void
 srv_wake_master_thread(void);
 /*========================*/
@@ -764,7 +821,6 @@ srv_wake_master_thread(void);
 Outputs to a file the output of the InnoDB Monitor.
 @return FALSE if not all information printed
 due to failure to obtain necessary mutex */
-UNIV_INTERN
 ibool
 srv_printf_innodb_monitor(
 /*======================*/
@@ -778,7 +834,6 @@ srv_printf_innodb_monitor(
 
 /******************************************************************//**
 Function to pass InnoDB status variables to MySQL */
-UNIV_INTERN
 void
 srv_export_innodb_status(void);
 /*==========================*/
@@ -786,21 +841,18 @@ srv_export_innodb_status(void);
 Get current server activity count. We don't hold srv_sys::mutex while
 reading this value as it is only used in heuristics.
 @return activity count. */
-UNIV_INTERN
 ulint
 srv_get_activity_count(void);
 /*========================*/
 /*******************************************************************//**
 Check if there has been any activity.
 @return FALSE if no change in activity counter. */
-UNIV_INTERN
 ibool
 srv_check_activity(
 /*===============*/
 	ulint		old_activity_count);	/*!< old activity count */
 /******************************************************************//**
 Increment the server activity counter. */
-UNIV_INTERN
 void
 srv_inc_activity_count(void);
 /*=========================*/
@@ -808,7 +860,6 @@ srv_inc_activity_count(void);
 /**********************************************************************//**
 Enqueues a task to server task queue and releases a worker thread, if there
 is a suspended one. */
-UNIV_INTERN
 void
 srv_que_task_enqueue_low(
 /*=====================*/
@@ -819,7 +870,6 @@ Check whether any background thread is active. If so, return the thread
 type.
 @return SRV_NONE if all are are suspended or have exited, thread
 type if any are still active. */
-UNIV_INTERN
 enum srv_thread_type
 srv_get_active_thread_type(void);
 /*============================*/
@@ -828,8 +878,7 @@ extern "C" {
 
 /*********************************************************************//**
 A thread which prints the info output by various InnoDB monitors.
-@return	a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
 os_thread_ret_t
 DECLARE_THREAD(srv_monitor_thread)(
 /*===============================*/
@@ -838,8 +887,7 @@ DECLARE_THREAD(srv_monitor_thread)(
 
 /*********************************************************************//**
 The master thread controlling the server.
-@return	a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
 os_thread_ret_t
 DECLARE_THREAD(srv_master_thread)(
 /*==============================*/
@@ -849,8 +897,7 @@ DECLARE_THREAD(srv_master_thread)(
 /*************************************************************************
 A thread which prints warnings about semaphore waits which have lasted
 too long. These can be used to track bugs which cause hangs.
-@return	a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
 os_thread_ret_t
 DECLARE_THREAD(srv_error_monitor_thread)(
 /*=====================================*/
@@ -859,8 +906,7 @@ DECLARE_THREAD(srv_error_monitor_thread)(
 
 /*********************************************************************//**
 Purge coordinator thread that schedules the purge tasks.
-@return	a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
 os_thread_ret_t
 DECLARE_THREAD(srv_purge_coordinator_thread)(
 /*=========================================*/
@@ -869,8 +915,7 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
 
 /*********************************************************************//**
 Worker thread that reads tasks from the work queue and executes them.
-@return	a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
 os_thread_ret_t
 DECLARE_THREAD(srv_worker_thread)(
 /*==============================*/
@@ -880,8 +925,7 @@ DECLARE_THREAD(srv_worker_thread)(
 
 /**********************************************************************//**
 Get count of tasks in the queue.
-@return number of tasks in queue  */
-UNIV_INTERN
+@return number of tasks in queue */
 ulint
 srv_get_task_queue_length(void);
 /*===========================*/
@@ -891,7 +935,6 @@ Releases threads of the type given from suspension in the thread table.
 NOTE! The server mutex has to be reserved by the caller!
 @return number of threads released: this may be less than n if not
 enough threads were suspended at the moment */
-UNIV_INTERN
 ulint
 srv_release_threads(
 /*================*/
@@ -902,18 +945,52 @@ srv_release_threads(
 Check whether any background thread are active. If so print which thread
 is active. Send the threads wakeup signal.
 @return name of thread that is active or NULL */
-UNIV_INTERN
 const char*
 srv_any_background_threads_are_active(void);
 /*=======================================*/
 
 /**********************************************************************//**
 Wakeup the purge threads. */
-UNIV_INTERN
 void
 srv_purge_wakeup(void);
 /*==================*/
 
+/** Call exit(3) */
+void
+srv_fatal_error();
+
+/** Check if tablespace is being truncated.
+(Ignore system-tablespace as we don't re-create the tablespace
+and so some of the action that are suppressed by this function
+for independent tablespace are not applicable to system-tablespace).
+@param	space_id	space_id to check for truncate action
+@return true		if being truncated, false if not being
+			truncated or tablespace is system-tablespace. */
+bool
+srv_is_tablespace_truncated(ulint space_id);
+
+/** Check if tablespace was truncated.
+@param[in]	space	space object to check for truncate action
+@return true if tablespace was truncated and we still have an active
+MLOG_TRUNCATE REDO log record. */
+bool
+srv_was_tablespace_truncated(const fil_space_t* space);
+
+#ifdef UNIV_DEBUG
+/** Disables master thread. It's used by:
+	SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
+@param[in]	thd		thread handle
+@param[in]	var		pointer to system variable
+@param[out]	var_ptr		where the formal string goes
+@param[in]	save		immediate result from check function */
+void
+srv_master_thread_disabled_debug_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save);
+#endif /* UNIV_DEBUG */
+
 /** Status variables to be passed to MySQL */
 struct export_var_t{
 	ulint innodb_data_pending_reads;	/*!< Pending reads */
@@ -924,8 +1001,9 @@ struct export_var_t{
 	ulint innodb_data_writes;		/*!< I/O write requests */
 	ulint innodb_data_written;		/*!< Data bytes written */
 	ulint innodb_data_reads;		/*!< I/O read requests */
-	char  innodb_buffer_pool_dump_status[512];/*!< Buf pool dump status */
-	char  innodb_buffer_pool_load_status[512];/*!< Buf pool load status */
+	char  innodb_buffer_pool_dump_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool dump status */
+	char  innodb_buffer_pool_load_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool load status */
+	char  innodb_buffer_pool_resize_status[512];/*!< Buf pool resize status */
 	ulint innodb_buffer_pool_pages_total;	/*!< Buffer pool size */
 	ulint innodb_buffer_pool_pages_data;	/*!< Data pages */
 	ulint innodb_buffer_pool_bytes_data;	/*!< File bytes used */
@@ -961,7 +1039,7 @@ struct export_var_t{
 	ulint innodb_pages_written;		/*!< buf_pool->stat.n_pages_written */
 	ulint innodb_row_lock_waits;		/*!< srv_n_lock_wait_count */
 	ulint innodb_row_lock_current_waits;	/*!< srv_n_lock_wait_current_count */
-	ib_int64_t innodb_row_lock_time;	/*!< srv_n_lock_wait_time
+	int64_t innodb_row_lock_time;		/*!< srv_n_lock_wait_time
 						/ 1000 */
 	ulint innodb_row_lock_time_avg;		/*!< srv_n_lock_wait_time
 						/ 1000
@@ -998,42 +1076,45 @@ struct export_var_t{
 	ulint innodb_purge_trx_id_age;		/*!< rw_max_trx_id - purged trx_id */
 	ulint innodb_purge_view_trx_id_age;	/*!< rw_max_trx_id
 						- purged view's min trx_id */
+	ulint innodb_ahi_drop_lookups;		/*!< number of adaptive hash
+						index lookups when freeing
+						file pages */
 #endif /* UNIV_DEBUG */
 
-	ib_int64_t innodb_page_compression_saved;/*!< Number of bytes saved
+	int64_t innodb_page_compression_saved;/*!< Number of bytes saved
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM
+	int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM
+	int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM
+	int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM
+	int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM
+	int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM
+	int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM
+	int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM
 						by page compression */
-	ib_int64_t innodb_index_pages_written;  /*!< Number of index pages
+	int64_t innodb_index_pages_written;  /*!< Number of index pages
 						written */
-	ib_int64_t innodb_non_index_pages_written;  /*!< Number of non index pages
+	int64_t innodb_non_index_pages_written;  /*!< Number of non index pages
 						written */
-	ib_int64_t innodb_pages_page_compressed;/*!< Number of pages
+	int64_t innodb_pages_page_compressed;/*!< Number of pages
 						compressed by page compression */
-	ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
+	int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
 						induced by page compression */
-	ib_int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations
+	int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations
 						saved by page compression */
-	ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages
+	int64_t innodb_pages_page_decompressed;/*!< Number of pages
 						decompressed by page
 						compression */
-	ib_int64_t innodb_pages_page_compression_error;/*!< Number of page
+	int64_t innodb_pages_page_compression_error;/*!< Number of page
 						compression errors */
-	ib_int64_t innodb_pages_encrypted;      /*!< Number of pages
+	int64_t innodb_pages_encrypted;      /*!< Number of pages
 						encrypted */
-	ib_int64_t innodb_pages_decrypted;      /*!< Number of pages
+	int64_t innodb_pages_decrypted;      /*!< Number of pages
 						decrypted */
 
 	ulint innodb_sec_rec_cluster_reads;	/*!< srv_sec_rec_cluster_reads */
@@ -1044,7 +1125,7 @@ struct export_var_t{
 	ulint innodb_encryption_rotation_pages_modified;
 	ulint innodb_encryption_rotation_pages_flushed;
 	ulint innodb_encryption_rotation_estimated_iops;
-	ib_int64_t innodb_encryption_key_requests;
+	int64_t innodb_encryption_key_requests;
 
 	ulint innodb_scrub_page_reorganizations;
 	ulint innodb_scrub_page_splits;
@@ -1092,6 +1173,7 @@ struct srv_slot_t{
 # define srv_start_raw_disk_in_use		0
 # define srv_file_per_table			1
 #endif /* !UNIV_HOTBACKUP */
+
 #ifdef WITH_WSREP
 UNIV_INTERN
 void
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
index d2e70f969b7..708a90247e3 100644
--- a/storage/innobase/include/srv0start.h
+++ b/storage/innobase/include/srv0start.h
@@ -30,41 +30,43 @@ Created 10/10/1995 Heikki Tuuri
 #include "log0log.h"
 #include "ut0byte.h"
 
-#ifdef __WIN__
-#define SRV_PATH_SEPARATOR	'\\'
+// Forward declaration
+struct dict_table_t;
+
+#ifdef DBUG_OFF
+# define RECOVERY_CRASH(x) do {} while(0)
 #else
-#define SRV_PATH_SEPARATOR	'/'
-#endif
+# define RECOVERY_CRASH(x) do {						\
+	if (srv_force_recovery_crash == x) {				\
+		fprintf(stderr, "innodb_force_recovery_crash=%lu\n",	\
+			srv_force_recovery_crash);			\
+		fflush(stderr);						\
+		_exit(3);						\
+	}								\
+} while (0)
+#endif /* DBUG_OFF */
+
+/** If buffer pool is less than the size,
+only one buffer pool instance is used. */
+#define BUF_POOL_SIZE_THRESHOLD		(1024 * 1024 * 1024)
 
 /*********************************************************************//**
-Normalizes a directory path for Windows: converts slashes to backslashes. 
-*/
-UNIV_INTERN
-void
-srv_normalize_path_for_win(
-/*=======================*/
-	char*	str);	/*!< in/out: null-terminated character string */
-/*********************************************************************//**
-Reads the data files and their sizes from a character string given in
-the .cnf file.
-@return	TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
+Parse temporary tablespace configuration.
+@return true if ok, false on parse error */
+bool
+srv_parse_temp_data_file_paths_and_sizes(
+/*=====================================*/
 	char*	str);	/*!< in/out: the data file path string */
 /*********************************************************************//**
 Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
 and srv_parse_log_group_home_dirs(). */
-UNIV_INTERN
 void
 srv_free_paths_and_sizes(void);
 /*==========================*/
 /*********************************************************************//**
 Adds a slash or a backslash to the end of a string if it is missing
 and the string is not empty.
-@return	string which has the separator if the string is not empty */
-UNIV_INTERN
+@return string which has the separator if the string is not empty */
 char*
 srv_add_path_separator_if_needed(
 /*=============================*/
@@ -73,22 +75,24 @@ srv_add_path_separator_if_needed(
 /****************************************************************//**
 Starts Innobase and creates a new database if database files
 are not found and the user wants.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 innobase_start_or_create_for_mysql(void);
 /*====================================*/
 /****************************************************************//**
 Shuts down the Innobase database.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 innobase_shutdown_for_mysql(void);
 
+/****************************************************************//**
+Shuts down background threads that can generate undo pages. */
+void
+srv_shutdown_bg_undo_sources(void);
+
 /********************************************************************
 Signal all per-table background threads to shutdown, and wait for them to do
 so. */
-UNIV_INTERN
 void
 srv_shutdown_table_bg_threads(void);
 /*=============================*/
@@ -98,7 +102,6 @@ Copy the file path component of the physical file to parameter. It will
 copy up to and including the terminating path separator.
 @return number of bytes copied or ULINT_UNDEFINED if destination buffer
 	is smaller than the path to be copied. */
-UNIV_INTERN
 ulint
 srv_path_copy(
 /*==========*/
@@ -108,41 +111,54 @@ srv_path_copy(
 	const char*	table_name)	/*!< in: source table name */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/*****************************************************************//**
-Get the meta-data filename from the table name. */
-UNIV_INTERN
+/**
+Shutdown all background threads created by InnoDB. */
+void
+srv_shutdown_all_bg_threads();
+
+/** Get the meta-data filename from the table name for a
+single-table tablespace.
+@param[in]	table		table object
+@param[out]	filename	filename
+@param[in]	max_len		filename max length */
 void
 srv_get_meta_data_filename(
-/*======================*/
-	dict_table_t*	table,		/*!< in: table */
-	char*			filename,	/*!< out: filename */
-	ulint			max_len)	/*!< in: filename max length */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	table,
+	char*		filename,
+	ulint		max_len);
+
+/** Get the encryption-data filename from the table name for a
+single-table tablespace.
+@param[in]	table		table object
+@param[out]	filename	filename
+@param[in]	max_len		filename max length */
+void
+srv_get_encryption_data_filename(
+	dict_table_t*	table,
+	char*		filename,
+	ulint		max_len);
 
 /** Log sequence number at shutdown */
 extern	lsn_t	srv_shutdown_lsn;
 /** Log sequence number immediately after startup */
 extern	lsn_t	srv_start_lsn;
 
-#ifdef HAVE_DARWIN_THREADS
-/** TRUE if the F_FULLFSYNC option is available */
-extern	ibool	srv_have_fullfsync;
-#endif
-
 /** TRUE if the server is being started */
-extern	ibool	srv_is_being_started;
+extern	bool	srv_is_being_started;
+/** TRUE if SYS_TABLESPACES is available for lookups */
+extern	bool	srv_sys_tablespaces_open;
 /** TRUE if the server was successfully started */
 extern	ibool	srv_was_started;
 /** TRUE if the server is being started, before rolling back any
 incomplete transactions */
-extern	ibool	srv_startup_is_before_trx_rollback_phase;
+extern	bool	srv_startup_is_before_trx_rollback_phase;
 
 /** TRUE if a raw partition is in use */
 extern	ibool	srv_start_raw_disk_in_use;
 
 
 /** Shutdown state */
-enum srv_shutdown_state {
+enum srv_shutdown_t {
 	SRV_SHUTDOWN_NONE = 0,	/*!< Database running normally */
 	SRV_SHUTDOWN_CLEANUP,	/*!< Cleaning up in
 				logs_empty_and_mark_files_at_shutdown() */
@@ -159,10 +175,7 @@ enum srv_shutdown_state {
 
 /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
 SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-extern	enum srv_shutdown_state	srv_shutdown_state;
+extern	enum srv_shutdown_t	srv_shutdown_state;
 #endif /* !UNIV_HOTBACKUP */
 
-/** Log 'spaces' have id's >= this */
-#define SRV_LOG_SPACE_FIRST_ID		0xFFFFFFF0UL
-
 #endif
diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
index 880d7d2a473..bc419a9be8f 100644
--- a/storage/innobase/include/sync0arr.h
+++ b/storage/innobase/include/sync0arr.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,12 +28,11 @@ Created 9/5/1995 Heikki Tuuri
 #define sync0arr_h
 
 #include "univ.i"
-#include "ut0lst.h"
-#include "ut0mem.h"
 #include "os0thread.h"
 
-/** Synchonization cell */
+/** Synchronization wait array cell */
 struct sync_cell_t;
+
 /** Synchronization wait array */
 struct sync_array_t;
 
@@ -42,112 +42,86 @@ in the instance for waiting for an object. The event of the cell is
 reset to nonsignalled state.
 If reserving cell of the instance fails, try to get another new
 instance until we can reserve an empty cell of it.
-@return the instance found, never NULL. */
+@return the sync array found, never NULL. */
 UNIV_INLINE
 sync_array_t*
 sync_array_get_and_reserve_cell(
-/*============================*/
 	void*		object,	/*!< in: pointer to the object to wait for */
 	ulint		type,	/*!< in: lock request type */
 	const char*	file,	/*!< in: file where requested */
 	ulint		line,	/*!< in: line where requested */
-	ulint*		index);	/*!< out: index of the reserved cell */
+	sync_cell_t**	cell);	/*!< out: the cell reserved, never NULL */
 /******************************************************************//**
 Reserves a wait array cell for waiting for an object.
-The event of the cell is reset to nonsignalled state.
-@return true if free cell is found, otherwise false */
-UNIV_INTERN
-bool
+The event of the cell is reset to nonsignalled state. */
+sync_cell_t*
 sync_array_reserve_cell(
-/*====================*/
 	sync_array_t*	arr,	/*!< in: wait array */
 	void*		object, /*!< in: pointer to the object to wait for */
 	ulint		type,	/*!< in: lock request type */
 	const char*	file,	/*!< in: file where requested */
-	ulint		line,	/*!< in: line where requested */
-	ulint*		index); /*!< out: index of the reserved cell */
+	ulint		line);	/*!< in: line where requested */
+
 /******************************************************************//**
 This function should be called when a thread starts to wait on
 a wait array cell. In the debug version this function checks
 if the wait for a semaphore will result in a deadlock, in which
 case prints info and asserts. */
-UNIV_INTERN
 void
 sync_array_wait_event(
-/*==================*/
 	sync_array_t*	arr,	/*!< in: wait array */
-	ulint		index);	 /*!< in: index of the reserved cell */
+	sync_cell_t*&	cell);	/*!< in: the reserved cell */
+
 /******************************************************************//**
 Frees the cell. NOTE! sync_array_wait_event frees the cell
 automatically! */
-UNIV_INTERN
 void
 sync_array_free_cell(
-/*=================*/
 	sync_array_t*	arr,	/*!< in: wait array */
-	ulint		index);	/*!< in: index of the cell in array */
+	sync_cell_t*&	cell);	/*!< in: the reserved cell */
+
 /**********************************************************************//**
 Note that one of the wait objects was signalled. */
-UNIV_INTERN
 void
-sync_array_object_signalled(void);
-/*=============================*/
+sync_array_object_signalled();
 
 /**********************************************************************//**
-If the wakeup algorithm does not work perfectly at semaphore relases,
-this function will do the waking (see the comment in mutex_exit). This
-function should be called about every 1 second in the server. */
-UNIV_INTERN
-void
-sync_arr_wake_threads_if_sema_free(void);
-/*====================================*/
-/**********************************************************************//**
 Prints warnings of long semaphore waits to stderr.
-@return	TRUE if fatal semaphore wait threshold was exceeded */
-UNIV_INTERN
+@return TRUE if fatal semaphore wait threshold was exceeded */
 ibool
 sync_array_print_long_waits(
-/*========================*/
 	os_thread_id_t*	waiter,	/*!< out: longest waiting thread */
-	const void**	sema)	/*!< out: longest-waited-for semaphore */
-	MY_ATTRIBUTE((nonnull));
+	const void**	sema);	/*!< out: longest-waited-for semaphore */
+
 /********************************************************************//**
 Validates the integrity of the wait array. Checks
 that the number of reserved cells equals the count variable. */
-UNIV_INTERN
 void
 sync_array_validate(
-/*================*/
 	sync_array_t*	arr);	/*!< in: sync wait array */
+
 /**********************************************************************//**
 Prints info of the wait array. */
-UNIV_INTERN
 void
 sync_array_print(
-/*=============*/
 	FILE*		file);	/*!< in: file where to print */
 
 /**********************************************************************//**
 Create the primary system wait array(s), they are protected by an OS mutex */
-UNIV_INTERN
 void
 sync_array_init(
-/*============*/
 	ulint		n_threads);	/*!< in: Number of slots to create */
+
 /**********************************************************************//**
 Close sync array wait sub-system. */
-UNIV_INTERN
 void
-sync_array_close(void);
-/*==================*/
+sync_array_close();
 
 /**********************************************************************//**
 Get an instance of the sync wait array. */
-UNIV_INTERN
+UNIV_INLINE
 sync_array_t*
-sync_array_get(void);
-/*================*/
-
+sync_array_get();
 /**********************************************************************//**
 Prints info of the wait array without using any mutexes/semaphores. */
 UNIV_INTERN
@@ -166,6 +140,6 @@ sync_array_get_nth_cell(
 
 #ifndef UNIV_NONINL
 #include "sync0arr.ic"
-#endif
+#endif /* UNIV_NOINL */
 
-#endif
+#endif /* sync0arr_h */
diff --git a/storage/innobase/include/sync0arr.ic b/storage/innobase/include/sync0arr.ic
index 18a46dd0a41..a15e2176278 100644
--- a/storage/innobase/include/sync0arr.ic
+++ b/storage/innobase/include/sync0arr.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,8 +25,27 @@ Inline code
 Created 9/5/1995 Heikki Tuuri
 *******************************************************/
 
-/** User configured sync array size */
-extern ulong srv_sync_array_size;
+extern ulint		sync_array_size;
+extern sync_array_t**	sync_wait_array;
+
+#include "ut0counter.h"
+
+/**********************************************************************//**
+Get an instance of the sync wait array.
+@return an instance of the sync wait array. */
+
+UNIV_INLINE
+sync_array_t*
+sync_array_get()
+/*============*/
+{
+	if (sync_array_size <= 1) {
+		return(sync_wait_array[0]);
+	}
+
+	return(sync_wait_array[default_indexer_t<>::get_rnd_index()
+			       % sync_array_size]);
+}
 
 /******************************************************************//**
 Get an instance of the sync wait array and reserve a wait array cell
@@ -34,31 +53,33 @@ in the instance for waiting for an object. The event of the cell is
 reset to nonsignalled state.
 If reserving cell of the instance fails, try to get another new
 instance until we can reserve an empty cell of it.
-@return the instance found, never NULL. */
+@return the sync array reserved, never NULL. */
 UNIV_INLINE
 sync_array_t*
 sync_array_get_and_reserve_cell(
 /*============================*/
-	void*		object,	/*!< in: pointer to the object to wait for */
+	void*		object, /*!< in: pointer to the object to wait for */
 	ulint		type,	/*!< in: lock request type */
 	const char*	file,	/*!< in: file where requested */
 	ulint		line,	/*!< in: line where requested */
-	ulint*		index)	/*!< out: index of the reserved cell */
+	sync_cell_t**	cell)	/*!< out: the cell reserved, never NULL */
 {
-	sync_array_t*	sync_arr;
-	bool		reserved = false;
+	sync_array_t*	sync_arr = NULL;
 
-	for (ulint i = 0; i < srv_sync_array_size && !reserved; ++i) {
+	*cell = NULL;
+	for (ulint i = 0; i < sync_array_size && *cell == NULL; ++i) {
+		/* Although the sync_array is get in a random way currently,
+		we still try at most sync_array_size times, in case any
+		of the sync_array we get is full */
 		sync_arr = sync_array_get();
-		reserved = sync_array_reserve_cell(sync_arr, object, type,
-						   file, line, index);
-	}
+		*cell = sync_array_reserve_cell(sync_arr, object, type,
+					       file, line);
+        }
 
 	/* This won't be true every time, for the loop above may execute
 	more than srv_sync_array_size times to reserve a cell.
 	But an assertion here makes the code more solid. */
-	ut_a(reserved);
+	ut_a(*cell != NULL);
 
-	return sync_arr;
+	return(sync_arr);
 }
-
diff --git a/storage/innobase/include/sync0debug.h b/storage/innobase/include/sync0debug.h
new file mode 100644
index 00000000000..6b80c0b25a1
--- /dev/null
+++ b/storage/innobase/include/sync0debug.h
@@ -0,0 +1,105 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0debug.h
+Debug checks for latches, header file
+
+Created 2012-08-21 Sunny Bains
+*******************************************************/
+
+#ifndef sync0debug_h
+#define sync0debug_h
+
+#include "univ.i"
+#include "sync0types.h"
+
+/** Initializes the synchronization data structures. */
+void
+sync_check_init();
+
+/** Frees the resources in synchronization data structures. */
+void
+sync_check_close();
+
+#ifdef UNIV_DEBUG
+/** Enable sync order checking. */
+void
+sync_check_enable();
+
+/** Check if it is OK to acquire the latch.
+@param[in]	latch	latch type */
+void
+sync_check_lock_validate(const latch_t* latch);
+
+/** Note that the lock has been granted
+@param[in]	latch	latch type */
+void
+sync_check_lock_granted(const latch_t* latch);
+
+/** Check if it is OK to acquire the latch.
+@param[in]	latch	latch type
+@param[in]	level	the level of the mutex */
+void
+sync_check_lock(const latch_t* latch, latch_level_t level);
+
+/**
+Check if it is OK to re-acquire the lock. */
+void
+sync_check_relock(const latch_t* latch);
+
+/** Removes a latch from the thread level array if it is found there.
+@param[in]	latch	to unlock */
+void
+sync_check_unlock(const latch_t* latch);
+
+/** Checks if the level array for the current thread contains a
+mutex or rw-latch at the specified level.
+@param[in]	level	to find
+@return	a matching latch, or NULL if not found */
+const latch_t*
+sync_check_find(latch_level_t level);
+
+/** Checks that the level array for the current thread is empty.
+Terminate iteration if the functor returns true.
+@param[in,out]	 functor	called for each element.
+@return true if the functor returns true */
+bool
+sync_check_iterate(sync_check_functor_t& functor);
+
+/** Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
+because the debug mutex is also acquired in sync0arr while holding the OS
+mutex protecting the sync array, and the ordinary mutex_enter might
+recursively call routines in sync0arr, leading to a deadlock on the OS
+mutex. */
+void
+rw_lock_debug_mutex_enter();
+
+/** Releases the debug mutex. */
+void
+rw_lock_debug_mutex_exit();
+
+#endif /* UNIV_DEBUG */
+
+#endif /* !sync0debug_h */
diff --git a/storage/innobase/include/sync0policy.h b/storage/innobase/include/sync0policy.h
new file mode 100644
index 00000000000..0eaefc7167a
--- /dev/null
+++ b/storage/innobase/include/sync0policy.h
@@ -0,0 +1,550 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/sync0policy.h
+Policies for mutexes.
+
+Created 2012-08-21 Sunny Bains.
+***********************************************************************/
+
+#ifndef sync0policy_h
+#define sync0policy_h
+
+#include "univ.i"
+#include "ut0rnd.h"
+#include "os0thread.h"
+#include "sync0types.h"
+#include "srv0mon.h"
+
+#ifdef UNIV_DEBUG
+
+# define MUTEX_MAGIC_N 979585UL
+
+template <typename Mutex>
+class MutexDebug {
+public:
+
+	/** For passing context to SyncDebug */
+	struct Context : public latch_t {
+
+		/** Constructor */
+		Context()
+			:
+			m_mutex(),
+			m_filename(),
+			m_line(),
+			m_thread_id(os_thread_id_t(ULINT_UNDEFINED))
+		{
+			/* No op */
+		}
+
+		/** Create the context for SyncDebug
+		@param[in]	id	ID of the latch to track */
+		Context(latch_id_t id)
+			:
+			latch_t(id)
+		{
+			/* No op */
+		}
+
+		/** Set to locked state
+		@param[in]	mutex		The mutex to acquire
+		@param[in]	filename	File name from where to acquire
+		@param[in]	line		Line number in filename */
+		void locked(
+			const Mutex*		mutex,
+			const char*		filename,
+			ulint			line)
+			UNIV_NOTHROW
+		{
+			m_mutex = mutex;
+
+			m_thread_id = os_thread_get_curr_id();
+
+			m_filename = filename;
+
+			m_line = line;
+		}
+
+		/** Reset to unlock state */
+		void release()
+			UNIV_NOTHROW
+		{
+			m_mutex = NULL;
+
+			m_thread_id = os_thread_id_t(ULINT_UNDEFINED);
+
+			m_filename = NULL;
+
+			m_line = ULINT_UNDEFINED;
+		}
+
+		/** Print information about the latch
+		@return the string representation */
+		virtual std::string to_string() const
+			UNIV_NOTHROW
+		{
+			std::ostringstream msg;
+
+			msg << m_mutex->policy().to_string();
+
+			if (os_thread_pf(m_thread_id) != ULINT_UNDEFINED) {
+
+				msg << " addr: " << m_mutex
+				    << " acquired: " << locked_from().c_str();
+
+			} else {
+				msg << "Not locked";
+			}
+
+			return(msg.str());
+		}
+
+		/** @return the name of the file and line number in the file
+		from where the mutex was acquired "filename:line" */
+		virtual std::string locked_from() const
+		{
+			std::ostringstream msg;
+
+			msg << sync_basename(m_filename) << ":" << m_line;
+
+			return(std::string(msg.str()));
+		}
+
+		/** Mutex to check for lock order violation */
+		const Mutex*	m_mutex;
+
+		/** Filename from where enter was called */
+		const char*	m_filename;
+
+		/** Line mumber in filename */
+		ulint		m_line;
+
+		/** Thread ID of the thread that own(ed) the mutex */
+		os_thread_id_t	m_thread_id;
+	};
+
+	/** Constructor. */
+	MutexDebug()
+		:
+		m_magic_n(),
+		m_context()
+		UNIV_NOTHROW
+	{
+		/* No op */
+	}
+
+	/* Destructor */
+	virtual ~MutexDebug() { }
+
+	/** Mutex is being destroyed. */
+	void destroy() UNIV_NOTHROW
+	{
+		ut_ad(m_context.m_thread_id == os_thread_id_t(ULINT_UNDEFINED));
+
+		m_magic_n = 0;
+
+		m_context.m_thread_id = 0;
+	}
+
+	/** Called when the mutex is "created". Note: Not from the constructor
+	but when the mutex is initialised.
+	@param[in]	id              Mutex ID */
+	void init(latch_id_t id)
+		UNIV_NOTHROW;
+
+	/** Called when an attempt is made to lock the mutex
+	@param[in]	mutex		Mutex instance to be locked
+	@param[in]	filename	Filename from where it was called
+	@param[in]	line		Line number from where it was called */
+	void enter(
+		const Mutex*	mutex,
+		const char*	filename,
+		ulint		line)
+		UNIV_NOTHROW;
+
+	/** Called when the mutex is locked
+	@param[in]	mutex		Mutex instance that was locked
+	@param[in]	filename	Filename from where it was called
+	@param[in]	line		Line number from where it was called */
+	void locked(
+		const Mutex*	mutex,
+		const char*	filename,
+		ulint		line)
+		UNIV_NOTHROW;
+
+	/** Called when the mutex is released
+	@param[in]	mutx		Mutex that was released */
+	void release(const Mutex* mutex)
+		UNIV_NOTHROW;
+
+	/** @return true if thread owns the mutex */
+	bool is_owned() const UNIV_NOTHROW
+	{
+		return(os_thread_eq(
+				m_context.m_thread_id,
+				os_thread_get_curr_id()));
+	}
+
+	/** @return the name of the file from the mutex was acquired */
+	const char* get_enter_filename() const
+		UNIV_NOTHROW
+	{
+		return(m_context.m_filename);
+	}
+
+	/** @return the name of the file from the mutex was acquired */
+	ulint get_enter_line() const
+		UNIV_NOTHROW
+	{
+		return(m_context.m_line);
+	}
+
+	/** @return id of the thread that was trying to acquire the mutex */
+	os_thread_id_t get_thread_id() const
+		UNIV_NOTHROW
+	{
+		return(m_context.m_thread_id);
+	}
+
+	/** Magic number to check for memory corruption. */
+	ulint			m_magic_n;
+
+	/** Latch state of the mutex owner */
+	Context			m_context;
+};
+#endif /* UNIV_DEBUG */
+
+/* Do nothing */
+template <typename Mutex>
+struct NoPolicy {
+	/** Default constructor. */
+	NoPolicy() { }
+
+	void init(const Mutex&, latch_id_t, const char*, uint32_t)
+		UNIV_NOTHROW { }
+	void destroy() UNIV_NOTHROW { }
+	void enter(const Mutex&, const char*, ulint line) UNIV_NOTHROW { }
+	void add(uint32_t, uint32_t) UNIV_NOTHROW { }
+	void locked(const Mutex&, const char*, ulint) UNIV_NOTHROW { }
+	void release(const Mutex&) UNIV_NOTHROW { }
+	std::string to_string() const { return(""); };
+	latch_id_t get_id() const;
+};
+
+/** Collect the metrics per mutex instance, no aggregation. */
+template <typename Mutex>
+struct GenericPolicy
+#ifdef UNIV_DEBUG
+: public MutexDebug<Mutex>
+#endif /* UNIV_DEBUG */
+{
+public:
+	typedef Mutex MutexType;
+
+	/** Constructor. */
+	GenericPolicy()
+		UNIV_NOTHROW
+		:
+#ifdef UNIV_DEBUG
+		MutexDebug<MutexType>(),
+#endif /* UNIV_DEBUG */
+		m_count(),
+		m_id()
+		{ }
+
+	/** Destructor */
+	~GenericPolicy() { }
+
+	/** Called when the mutex is "created". Note: Not from the constructor
+	but when the mutex is initialised.
+	@param[in]	mutex		Mutex instance to track
+	@param[in]	id              Mutex ID
+	@param[in]	filename	File where mutex was created
+	@param[in]	line		Line in filename */
+	void init(
+		const MutexType&	mutex,
+		latch_id_t		id,
+		const char*		filename,
+		uint32_t		line)
+		UNIV_NOTHROW
+	{
+		m_id = id;
+
+		latch_meta_t&	meta = sync_latch_get_meta(id);
+
+		ut_ad(meta.get_id() == id);
+
+		meta.get_counter()->single_register(&m_count);
+
+		sync_file_created_register(this, filename, line);
+
+		ut_d(MutexDebug<MutexType>::init(m_id));
+	}
+
+	/** Called when the mutex is destroyed. */
+	void destroy()
+		UNIV_NOTHROW
+	{
+		latch_meta_t&	meta = sync_latch_get_meta(m_id);
+
+		meta.get_counter()->single_deregister(&m_count);
+
+		sync_file_created_deregister(this);
+
+		ut_d(MutexDebug<MutexType>::destroy());
+	}
+
+	/** Called after a successful mutex acquire.
+	@param[in]	n_spins		Number of times the thread did
+					spins while trying to acquire the mutex
+	@param[in]	n_waits		Number of times the thread waited
+					in some type of OS queue */
+	void add(
+		uint32_t	n_spins,
+		uint32_t	n_waits)
+		UNIV_NOTHROW
+	{
+		/* Currently global on/off. Keeps things simple and fast */
+
+		if (!m_count.m_enabled) {
+
+			return;
+		}
+
+		m_count.m_spins += n_spins;
+		m_count.m_waits += n_waits;
+
+		++m_count.m_calls;
+	}
+
+	/** Called when an attempt is made to lock the mutex
+	@param[in]	mutex		Mutex instance to be locked
+	@param[in]	filename	Filename from where it was called
+	@param[in]	line		Line number from where it was called */
+	void enter(
+		const MutexType&	mutex,
+		const char*		filename,
+		ulint			line)
+		UNIV_NOTHROW
+	{
+		ut_d(MutexDebug<MutexType>::enter(&mutex, filename, line));
+	}
+
+	/** Called when the mutex is locked
+	@param[in]	mutex		Mutex instance that is locked
+	@param[in]	filename	Filename from where it was called
+	@param[in]	line		Line number from where it was called */
+	void locked(
+		const MutexType&	mutex,
+		const char*		filename,
+		ulint			line)
+		UNIV_NOTHROW
+	{
+		ut_d(MutexDebug<MutexType>::locked(&mutex, filename, line));
+	}
+
+	/** Called when the mutex is released
+	@param[in]	mutex		Mutex instance that is released */
+	void release(const MutexType& mutex)
+		UNIV_NOTHROW
+	{
+		ut_d(MutexDebug<MutexType>::release(&mutex));
+	}
+
+	/** Print the information about the latch
+	@return the string representation */
+	std::string print() const
+		UNIV_NOTHROW;
+
+	/** @return the latch ID */
+	latch_id_t get_id() const
+		UNIV_NOTHROW
+	{
+		return(m_id);
+	}
+
+	/** @return the string representation */
+	std::string to_string() const;
+
+private:
+	typedef latch_meta_t::CounterType Counter;
+
+	/** The user visible counters, registered with the meta-data.  */
+	Counter::Count		m_count;
+
+	/** Latch meta data ID */
+	latch_id_t		m_id;
+};
+
+/** Track agregate metrics policy, used by the page mutex. There are just
+too many of them to count individually. */
+template <typename Mutex>
+class BlockMutexPolicy
+#ifdef UNIV_DEBUG
+: public MutexDebug<Mutex>
+#endif /* UNIV_DEBUG */
+{
+public:
+	typedef Mutex MutexType;
+	typedef typename latch_meta_t::CounterType::Count Count;
+
+	/** Default constructor. */
+	BlockMutexPolicy()
+		:
+#ifdef UNIV_DEBUG
+		MutexDebug<MutexType>(),
+#endif /* UNIV_DEBUG */
+		m_count(),
+		m_id()
+	{
+		/* Do nothing */
+	}
+
+	/** Destructor */
+	~BlockMutexPolicy() { }
+
+	/** Called when the mutex is "created". Note: Not from the constructor
+	but when the mutex is initialised.
+	@param[in]	mutex		Mutex instance to track
+	@param[in]	id              Mutex ID
+	@param[in]	filename	File where mutex was created
+	@param[in]	line		Line in filename */
+	void init(
+		const MutexType&	mutex,
+		latch_id_t		id,
+		const char*		filename,
+		uint32_t		line)
+		UNIV_NOTHROW
+	{
+		/* It can be LATCH_ID_BUF_BLOCK_MUTEX or
+		LATCH_ID_BUF_POOL_ZIP. Unfortunately, they
+		are mapped to the same mutex type in the
+		buffer pool code. */
+
+		m_id = id;
+
+		latch_meta_t&	meta = sync_latch_get_meta(m_id);
+
+		ut_ad(meta.get_id() == id);
+
+		m_count = meta.get_counter()->sum_register();
+
+		ut_d(MutexDebug<MutexType>::init(m_id));
+	}
+
+	/** Called when the mutex is destroyed. */
+	void destroy()
+		UNIV_NOTHROW
+	{
+		latch_meta_t&	meta = sync_latch_get_meta(m_id);
+
+		ut_ad(meta.get_id() == m_id);
+
+		meta.get_counter()->sum_deregister(m_count);
+
+		m_count = NULL;
+
+		ut_d(MutexDebug<MutexType>::destroy());
+	}
+
+	/** Called after a successful mutex acquire.
+	@param[in]	n_spins		Number of times the thread did
+					spins while trying to acquire the mutex
+	@param[in]	n_waits		Number of times the thread waited
+					in some type of OS queue */
+	void add(
+		uint32_t	n_spins,
+		uint32_t	n_waits)
+		UNIV_NOTHROW
+	{
+		if (!m_count->m_enabled) {
+
+			return;
+		}
+
+		m_count->m_spins += n_spins;
+		m_count->m_waits += n_waits;
+
+		++m_count->m_calls;
+	}
+
+	/** Called when the mutex is locked
+	@param[in]	mutex		Mutex instance that is locked
+	@param[in]	filename	Filename from where it was called
+	@param[in]	line		Line number from where it was called */
+	void locked(
+		const MutexType&	mutex,
+		const char*		filename,
+		ulint			line)
+		UNIV_NOTHROW
+	{
+		ut_d(MutexDebug<MutexType>::locked(&mutex, filename, line));
+	}
+
+	/** Called when the mutex is released
+	@param[in]	mutex		Mutex instance that is released */
+	void release(const MutexType& mutex)
+		UNIV_NOTHROW
+	{
+		ut_d(MutexDebug<MutexType>::release(&mutex));
+	}
+
+	/** Called when an attempt is made to lock the mutex
+	@param[in]	mutex		Mutex instance to be locked
+	@param[in]	filename	Filename from where it was called
+	@param[in]	line		Line number from where it was called */
+	void enter(
+		const MutexType&	mutex,
+		const char*		filename,
+		ulint			line)
+		UNIV_NOTHROW
+	{
+		ut_d(MutexDebug<MutexType>::enter(&mutex, filename, line));
+	}
+
+	/** Print the information about the latch
+	@return the string representation */
+	std::string print() const
+		UNIV_NOTHROW;
+
+	/** @return the latch ID */
+	latch_id_t get_id() const
+	{
+		return(m_id);
+	}
+
+	/** @return the string representation */
+	std::string to_string() const;
+
+private:
+	typedef latch_meta_t::CounterType Counter;
+
+	/** The user visible counters, registered with the meta-data.  */
+	Counter::Count*		m_count;
+
+	/** Latch meta data ID */
+	latch_id_t		m_id;
+};
+
+#ifndef UNIV_NONINL
+#include "sync0policy.ic"
+#endif /* UNIV_NOINL */
+
+#endif /* sync0policy_h */
diff --git a/storage/innobase/include/sync0policy.ic b/storage/innobase/include/sync0policy.ic
new file mode 100644
index 00000000000..f7598fe7854
--- /dev/null
+++ b/storage/innobase/include/sync0policy.ic
@@ -0,0 +1,100 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/sync0policy.ic
+Policy for mutexes.
+
+Created 2012-08-21 Sunny Bains.
+***********************************************************************/
+
+#include "sync0debug.h"
+
+template <typename Mutex>
+std::string GenericPolicy<Mutex>::to_string() const
+{
+	return(sync_mutex_to_string(get_id(), sync_file_created_get(this)));
+}
+
+template <typename Mutex>
+std::string BlockMutexPolicy<Mutex>::to_string() const
+{
+	/* I don't think it makes sense to keep track of the file name
+	and line number for each block mutex. Too much of overhead. Use the
+	latch id to figure out the location from the source. */
+	return(sync_mutex_to_string(get_id(), "buf0buf.cc:0"));
+}
+
+#ifdef UNIV_DEBUG
+
+template <typename Mutex>
+void MutexDebug<Mutex>::init(latch_id_t id)
+	UNIV_NOTHROW
+{
+	m_context.m_id = id;
+
+	m_context.release();
+
+	m_magic_n = MUTEX_MAGIC_N;
+}
+
+template <typename Mutex>
+void MutexDebug<Mutex>::enter(
+	const Mutex*	mutex,
+	const char*	name,
+	ulint		line)
+	UNIV_NOTHROW
+{
+	ut_ad(!is_owned());
+
+	Context	context(m_context.get_id());
+
+	context.locked(mutex, name, line);
+
+	/* Check for latch order violation. */
+
+	sync_check_lock_validate(&context);
+}
+
+template <typename Mutex>
+void MutexDebug<Mutex>::locked(
+	const Mutex*	mutex,
+	const char*	name,
+	ulint		line)
+	UNIV_NOTHROW
+{
+	ut_ad(!is_owned());
+	ut_ad(m_context.m_thread_id == os_thread_id_t(ULINT_UNDEFINED));
+
+	m_context.locked(mutex, name, line);
+
+	sync_check_lock_granted(&m_context);
+}
+
+template <typename Mutex>
+void MutexDebug<Mutex>::release(const Mutex* mutex)
+	UNIV_NOTHROW
+{
+	ut_ad(is_owned());
+
+	m_context.release();
+
+	sync_check_unlock(&m_context);
+}
+
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
index b0fa214be81..396156e9518 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innobase/include/sync0rw.h
@@ -35,115 +35,92 @@ Created 9/11/1995 Heikki Tuuri
 
 #include "univ.i"
 #ifndef UNIV_HOTBACKUP
-#include "ut0lst.h"
 #include "ut0counter.h"
-#include "sync0sync.h"
-#include "os0sync.h"
+#include "os0event.h"
+#include "ut0mutex.h"
 
 /** Enable semaphore request instrumentation */
 extern my_bool srv_instrument_semaphores;
 
-/* The following undef is to prevent a name conflict with a macro
-in MySQL: */
-#undef rw_lock_t
 #endif /* !UNIV_HOTBACKUP */
 
 /** Counters for RW locks. */
 struct rw_lock_stats_t {
-	typedef ib_counter_t<ib_int64_t, IB_N_SLOTS> ib_int64_counter_t;
+	typedef ib_counter_t<int64_t, IB_N_SLOTS> int64_counter_t;
 
 	/** number of spin waits on rw-latches,
 	resulted during shared (read) locks */
-	ib_int64_counter_t	rw_s_spin_wait_count;
+	int64_counter_t		rw_s_spin_wait_count;
 
 	/** number of spin loop rounds on rw-latches,
 	resulted during shared (read) locks */
-	ib_int64_counter_t	rw_s_spin_round_count;
+	int64_counter_t		rw_s_spin_round_count;
 
 	/** number of OS waits on rw-latches,
 	resulted during shared (read) locks */
-	ib_int64_counter_t	rw_s_os_wait_count;
-
-	/** number of unlocks (that unlock shared locks),
-	set only when UNIV_SYNC_PERF_STAT is defined */
-	ib_int64_counter_t	rw_s_exit_count;
+	int64_counter_t		rw_s_os_wait_count;
 
 	/** number of spin waits on rw-latches,
 	resulted during exclusive (write) locks */
-	ib_int64_counter_t	rw_x_spin_wait_count;
+	int64_counter_t		rw_x_spin_wait_count;
 
 	/** number of spin loop rounds on rw-latches,
 	resulted during exclusive (write) locks */
-	ib_int64_counter_t	rw_x_spin_round_count;
+	int64_counter_t		rw_x_spin_round_count;
 
 	/** number of OS waits on rw-latches,
 	resulted during exclusive (write) locks */
-	ib_int64_counter_t	rw_x_os_wait_count;
+	int64_counter_t		rw_x_os_wait_count;
+
+	/** number of spin waits on rw-latches,
+	resulted during sx locks */
+	int64_counter_t		rw_sx_spin_wait_count;
 
-	/** number of unlocks (that unlock exclusive locks),
-	set only when UNIV_SYNC_PERF_STAT is defined */
-	ib_int64_counter_t	rw_x_exit_count;
+	/** number of spin loop rounds on rw-latches,
+	resulted during sx locks */
+	int64_counter_t		rw_sx_spin_round_count;
+
+	/** number of OS waits on rw-latches,
+	resulted during sx locks */
+	int64_counter_t		rw_sx_os_wait_count;
 };
 
-/* Latch types; these are used also in btr0btr.h: keep the numerical values
-smaller than 30 and the order of the numerical values like below! */
-#define RW_S_LATCH	1
-#define	RW_X_LATCH	2
-#define	RW_NO_LATCH	3
+/* Latch types; these are used also in btr0btr.h and mtr0mtr.h: keep the
+numerical values smaller than 30 (smaller than BTR_MODIFY_TREE and
+MTR_MEMO_MODIFY) and the order of the numerical values like below! and they
+should be 2pow value to be used also as ORed combination of flag. */
+enum rw_lock_type_t {
+	RW_S_LATCH = 1,
+	RW_X_LATCH = 2,
+	RW_SX_LATCH = 4,
+	RW_NO_LATCH = 8
+};
 
 #ifndef UNIV_HOTBACKUP
-/* We decrement lock_word by this amount for each x_lock. It is also the
+/* We decrement lock_word by X_LOCK_DECR for each x_lock. It is also the
 start value for the lock_word, meaning that it limits the maximum number
-of concurrent read locks before the rw_lock breaks. The current value of
-0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
-#define X_LOCK_DECR		0x00100000
+of concurrent read locks before the rw_lock breaks. */
+/* We decrement lock_word by X_LOCK_HALF_DECR for sx_lock. */
+#define X_LOCK_DECR		0x20000000
+#define X_LOCK_HALF_DECR	0x10000000
 
+#ifdef rw_lock_t
+#undef rw_lock_t
+#endif
 struct rw_lock_t;
-#ifdef UNIV_SYNC_DEBUG
+
+#ifdef UNIV_DEBUG
 struct rw_lock_debug_t;
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 
 typedef UT_LIST_BASE_NODE_T(rw_lock_t)	rw_lock_list_t;
 
-extern rw_lock_list_t	rw_lock_list;
-extern ib_mutex_t		rw_lock_list_mutex;
-
-#ifdef UNIV_SYNC_DEBUG
-/* The global mutex which protects debug info lists of all rw-locks.
-To modify the debug info list of an rw-lock, this mutex has to be
-acquired in addition to the mutex protecting the lock. */
-extern os_fast_mutex_t		rw_lock_debug_mutex;
-#endif /* UNIV_SYNC_DEBUG */
+extern rw_lock_list_t			rw_lock_list;
+extern ib_mutex_t			rw_lock_list_mutex;
 
 /** Counters for RW locks. */
 extern rw_lock_stats_t	rw_lock_stats;
 
-#ifdef UNIV_PFS_RWLOCK
-/* Following are rwlock keys used to register with MySQL
-performance schema */
-# ifdef UNIV_LOG_ARCHIVE
-extern	mysql_pfs_key_t	archive_lock_key;
-# endif /* UNIV_LOG_ARCHIVE */
-extern	mysql_pfs_key_t btr_search_latch_key;
-extern	mysql_pfs_key_t	buf_block_lock_key;
-# ifdef UNIV_SYNC_DEBUG
-extern	mysql_pfs_key_t	buf_block_debug_latch_key;
-# endif /* UNIV_SYNC_DEBUG */
-extern	mysql_pfs_key_t	dict_operation_lock_key;
-extern	mysql_pfs_key_t	checkpoint_lock_key;
-extern	mysql_pfs_key_t	fil_space_latch_key;
-extern	mysql_pfs_key_t	fts_cache_rw_lock_key;
-extern	mysql_pfs_key_t	fts_cache_init_rw_lock_key;
-extern	mysql_pfs_key_t	trx_i_s_cache_lock_key;
-extern	mysql_pfs_key_t	trx_purge_latch_key;
-extern	mysql_pfs_key_t	index_tree_rw_lock_key;
-extern	mysql_pfs_key_t	index_online_log_key;
-extern	mysql_pfs_key_t	dict_table_stats_key;
-extern  mysql_pfs_key_t trx_sys_rw_lock_key;
-extern  mysql_pfs_key_t hash_table_rw_lock_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-
 #ifndef UNIV_PFS_RWLOCK
 /******************************************************************//**
 Creates, or rather, initializes an rw-lock object in a specified memory
@@ -153,13 +130,8 @@ is necessary only if the memory block containing it is freed.
 if MySQL performance schema is enabled and "UNIV_PFS_RWLOCK" is
 defined, the rwlock are instrumented with performance schema probes. */
 # ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-#   define rw_lock_create(K, L, level)				\
+#  define rw_lock_create(K, L, level)				\
 	rw_lock_create_func((L), (level), #L, __FILE__, __LINE__)
-#  else	/* UNIV_SYNC_DEBUG */
-#   define rw_lock_create(K, L, level)				\
-	rw_lock_create_func((L), #L, __FILE__, __LINE__)
-#  endif/* UNIV_SYNC_DEBUG */
 # else /* UNIV_DEBUG */
 #  define rw_lock_create(K, L, level)				\
 	rw_lock_create_func((L), #L, __FILE__, __LINE__)
@@ -178,18 +150,46 @@ unlocking, not the corresponding function. */
 # define rw_lock_s_lock_gen(M, P)				\
 	rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
 
-# define rw_lock_s_lock_gen_nowait(M, P)			\
-	rw_lock_s_lock_low((M), (P), __FILE__, __LINE__)
-
 # define rw_lock_s_lock_nowait(M, F, L)				\
 	rw_lock_s_lock_low((M), 0, (F), (L))
 
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 #  define rw_lock_s_unlock_gen(L, P)	rw_lock_s_unlock_func(P, L)
 # else
 #  define rw_lock_s_unlock_gen(L, P)	rw_lock_s_unlock_func(L)
-# endif
+# endif /* UNIV_DEBUG */
+
+#define rw_lock_sx_lock(L)					\
+	rw_lock_sx_lock_func((L), 0, __FILE__, __LINE__)
 
+#define rw_lock_sx_lock_inline(M, P, F, L)			\
+	rw_lock_sx_lock_func((M), (P), (F), (L))
+
+#define rw_lock_sx_lock_gen(M, P)				\
+	rw_lock_sx_lock_func((M), (P), __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_nowait(M, P)				\
+	rw_lock_sx_lock_low((M), (P), __FILE__, __LINE__)
+
+#define rw_lock_sx_lock(L)					\
+	rw_lock_sx_lock_func((L), 0, __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_inline(M, P, F, L)			\
+	rw_lock_sx_lock_func((M), (P), (F), (L))
+
+#define rw_lock_sx_lock_gen(M, P)				\
+	rw_lock_sx_lock_func((M), (P), __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_nowait(M, P)				\
+	rw_lock_sx_lock_low((M), (P), __FILE__, __LINE__)
+
+# ifdef UNIV_DEBUG
+#  define rw_lock_sx_unlock(L)		rw_lock_sx_unlock_func(0, L)
+#  define rw_lock_sx_unlock_gen(L, P)	rw_lock_sx_unlock_func(P, L)
+# else /* UNIV_DEBUG */
+#  define rw_lock_sx_unlock(L)		rw_lock_sx_unlock_func(L)
+#  define rw_lock_sx_unlock_gen(L, P)	rw_lock_sx_unlock_func(L)
+# endif /* UNIV_DEBUG */
 
 # define rw_lock_x_lock(M)					\
 	rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
@@ -206,7 +206,7 @@ unlocking, not the corresponding function. */
 # define rw_lock_x_lock_func_nowait_inline(M, F, L)		\
 	rw_lock_x_lock_func_nowait((M), (F), (L))
 
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 #  define rw_lock_x_unlock_gen(L, P)	rw_lock_x_unlock_func(P, L)
 # else
 #  define rw_lock_x_unlock_gen(L, P)	rw_lock_x_unlock_func(L)
@@ -218,13 +218,8 @@ unlocking, not the corresponding function. */
 
 /* Following macros point to Performance Schema instrumented functions. */
 # ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
 #   define rw_lock_create(K, L, level)				\
 	pfs_rw_lock_create_func((K), (L), (level), #L, __FILE__, __LINE__)
-#  else	/* UNIV_SYNC_DEBUG */
-#   define rw_lock_create(K, L, level)				\
-	pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__)
-#  endif/* UNIV_SYNC_DEBUG */
 # else	/* UNIV_DEBUG */
 #  define rw_lock_create(K, L, level)				\
 	pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__)
@@ -243,18 +238,35 @@ unlocking, not the corresponding function. */
 # define rw_lock_s_lock_gen(M, P)				\
 	pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
 
-# define rw_lock_s_lock_gen_nowait(M, P)			\
-	pfs_rw_lock_s_lock_low((M), (P), __FILE__, __LINE__)
-
 # define rw_lock_s_lock_nowait(M, F, L)				\
 	pfs_rw_lock_s_lock_low((M), 0, (F), (L))
 
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 #  define rw_lock_s_unlock_gen(L, P)	pfs_rw_lock_s_unlock_func(P, L)
 # else
 #  define rw_lock_s_unlock_gen(L, P)	pfs_rw_lock_s_unlock_func(L)
 # endif
 
+# define rw_lock_sx_lock(M)					\
+	pfs_rw_lock_sx_lock_func((M), 0, __FILE__, __LINE__)
+
+# define rw_lock_sx_lock_inline(M, P, F, L)			\
+	pfs_rw_lock_sx_lock_func((M), (P), (F), (L))
+
+# define rw_lock_sx_lock_gen(M, P)				\
+	pfs_rw_lock_sx_lock_func((M), (P), __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_nowait(M, P)				\
+	pfs_rw_lock_sx_lock_low((M), (P), __FILE__, __LINE__)
+
+# ifdef UNIV_DEBUG
+#  define rw_lock_sx_unlock(L)		pfs_rw_lock_sx_unlock_func(0, L)
+#  define rw_lock_sx_unlock_gen(L, P)	pfs_rw_lock_sx_unlock_func(P, L)
+# else
+#  define rw_lock_sx_unlock(L)		pfs_rw_lock_sx_unlock_func(L)
+#  define rw_lock_sx_unlock_gen(L, P)	pfs_rw_lock_sx_unlock_func(L)
+# endif
+
 # define rw_lock_x_lock(M)					\
 	pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
 
@@ -270,7 +282,7 @@ unlocking, not the corresponding function. */
 # define rw_lock_x_lock_func_nowait_inline(M, F, L)		\
 	pfs_rw_lock_x_lock_func_nowait((M), (F), (L))
 
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 #  define rw_lock_x_unlock_gen(L, P)	pfs_rw_lock_x_unlock_func(P, L)
 # else
 #  define rw_lock_x_unlock_gen(L, P)	pfs_rw_lock_x_unlock_func(L)
@@ -278,7 +290,7 @@ unlocking, not the corresponding function. */
 
 # define rw_lock_free(M)		pfs_rw_lock_free_func(M)
 
-#endif /* UNIV_PFS_RWLOCK */
+#endif /* !UNIV_PFS_RWLOCK */
 
 #define rw_lock_s_unlock(L)		rw_lock_s_unlock_gen(L, 0)
 #define rw_lock_x_unlock(L)		rw_lock_x_unlock_gen(L, 0)
@@ -288,15 +300,12 @@ Creates, or rather, initializes an rw-lock object in a specified memory
 location (which must be appropriately aligned). The rw-lock is initialized
 to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
 is necessary only if the memory block containing it is freed. */
-UNIV_INTERN
 void
 rw_lock_create_func(
 /*================*/
 	rw_lock_t*	lock,		/*!< in: pointer to memory */
 #ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
+	latch_level_t	level,		/*!< in: level */
 #endif /* UNIV_DEBUG */
 	const char*	cmutex_name,	/*!< in: mutex name */
 	const char*	cfile_name,	/*!< in: file name where created */
@@ -305,26 +314,24 @@ rw_lock_create_func(
 Calling this function is obligatory only if the memory buffer containing
 the rw-lock is freed. Removes an rw-lock object from the global list. The
 rw-lock is checked to be in the non-locked state. */
-UNIV_INTERN
 void
 rw_lock_free_func(
 /*==============*/
-	rw_lock_t*	lock);	/*!< in: rw-lock */
+	rw_lock_t*	lock);		/*!< in/out: rw-lock */
 #ifdef UNIV_DEBUG
 /******************************************************************//**
 Checks that the rw-lock has been initialized and that there are no
 simultaneous shared and exclusive locks.
-@return	TRUE */
-UNIV_INTERN
-ibool
+@return true */
+bool
 rw_lock_validate(
 /*=============*/
-	rw_lock_t*	lock);	/*!< in: rw-lock */
+	const rw_lock_t*	lock);	/*!< in: rw-lock */
 #endif /* UNIV_DEBUG */
 /******************************************************************//**
 Low-level function which tries to lock an rw-lock in s-mode. Performs no
 spinning.
-@return	TRUE if success */
+@return TRUE if success */
 UNIV_INLINE
 ibool
 rw_lock_s_lock_low(
@@ -340,7 +347,7 @@ NOTE! Use the corresponding macro, not directly this function, except if
 you supply the file name and line number. Lock an rw-lock in shared mode
 for the current thread. If the rw-lock is locked in exclusive mode, or
 there is an exclusive lock request waiting, the function spins a preset
-time (controlled by SYNC_SPIN_ROUNDS), waiting for the lock, before
+time (controlled by srv_n_spin_wait_rounds), waiting for the lock, before
 suspending the thread. */
 UNIV_INLINE
 void
@@ -355,7 +362,7 @@ rw_lock_s_lock_func(
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in exclusive mode for the current thread if the lock can be
 obtained immediately.
-@return	TRUE if success */
+@return TRUE if success */
 UNIV_INLINE
 ibool
 rw_lock_x_lock_func_nowait(
@@ -369,22 +376,21 @@ UNIV_INLINE
 void
 rw_lock_s_unlock_func(
 /*==================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
 				been passed to another thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock);	/*!< in/out: rw-lock */
 
 /******************************************************************//**
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in exclusive mode for the current thread. If the rw-lock is locked
 in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting
 for the lock, before suspending the thread. If the same thread has an x-lock
 on the rw-lock, locking succeed, with the following exception: if pass != 0,
 only a single x-lock may be taken on the lock. NOTE: If the same thread has
 an s-lock, locking does not succeed! */
-UNIV_INTERN
 void
 rw_lock_x_lock_func(
 /*================*/
@@ -394,16 +400,57 @@ rw_lock_x_lock_func(
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line);	/*!< in: line where requested */
 /******************************************************************//**
+Low-level function for acquiring an sx lock.
+@return FALSE if did not succeed, TRUE if success. */
+ibool
+rw_lock_sx_lock_low(
+/*================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in SX mode for the current thread. If the rw-lock is locked
+in exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock, before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single sx-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
+void
+rw_lock_sx_lock_func(
+/*=================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
 Releases an exclusive mode lock. */
 UNIV_INLINE
 void
 rw_lock_x_unlock_func(
 /*==================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
 				been passed to another thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock);	/*!< in/out: rw-lock */
+
+/******************************************************************//**
+Releases an sx mode lock. */
+UNIV_INLINE
+void
+rw_lock_sx_unlock_func(
+/*===================*/
+#ifdef UNIV_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
+				been passed to another thread to unlock */
+#endif /* UNIV_DEBUG */
+	rw_lock_t*	lock);	/*!< in/out: rw-lock */
+
 /******************************************************************//**
 This function is used in the insert buffer to move the ownership of an
 x-latch on a buffer frame to the current thread. The x-latch was set by
@@ -412,7 +459,6 @@ read was done. The ownership is moved because we want that the current
 thread is able to acquire a second x-latch which is stored in an mtr.
 This, in turn, is needed to pass the debug checks of index page
 operations. */
-UNIV_INTERN
 void
 rw_lock_x_lock_move_ownership(
 /*==========================*/
@@ -421,32 +467,33 @@ rw_lock_x_lock_move_ownership(
 /******************************************************************//**
 Returns the value of writer_count for the lock. Does not reserve the lock
 mutex, so the caller must be sure it is not changed during the call.
-@return	value of writer_count */
+@return value of writer_count */
 UNIV_INLINE
 ulint
 rw_lock_get_x_lock_count(
 /*=====================*/
 	const rw_lock_t*	lock);	/*!< in: rw-lock */
-/********************************************************************//**
-Check if there are threads waiting for the rw-lock.
-@return	1 if waiters, 0 otherwise */
+/******************************************************************//**
+Returns the number of sx-lock for the lock. Does not reserve the lock
+mutex, so the caller must be sure it is not changed during the call.
+@return value of writer_count */
 UNIV_INLINE
 ulint
-rw_lock_get_waiters(
-/*================*/
+rw_lock_get_sx_lock_count(
+/*======================*/
 	const rw_lock_t*	lock);	/*!< in: rw-lock */
 /******************************************************************//**
 Returns the write-status of the lock - this function made more sense
 with the old rw_lock implementation.
-@return	RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
+@return RW_LOCK_NOT_LOCKED, RW_LOCK_X, RW_LOCK_X_WAIT, RW_LOCK_SX */
 UNIV_INLINE
 ulint
 rw_lock_get_writer(
 /*===============*/
 	const rw_lock_t*	lock);	/*!< in: rw-lock */
 /******************************************************************//**
-Returns the number of readers.
-@return	number of readers */
+Returns the number of readers (s-locks).
+@return number of readers */
 UNIV_INLINE
 ulint
 rw_lock_get_reader_count(
@@ -455,111 +502,77 @@ rw_lock_get_reader_count(
 /******************************************************************//**
 Decrements lock_word the specified amount if it is greater than 0.
 This is used by both s_lock and x_lock operations.
-@return	TRUE if decr occurs */
+@return true if decr occurs */
 UNIV_INLINE
-ibool
+bool
 rw_lock_lock_word_decr(
 /*===================*/
 	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	ulint		amount);	/*!< in: amount to decrement */
-/******************************************************************//**
-Increments lock_word the specified amount and returns new value.
-@return	lock->lock_word after increment */
-UNIV_INLINE
-lint
-rw_lock_lock_word_incr(
-/*===================*/
-	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	ulint		amount);	/*!< in: amount to increment */
-/******************************************************************//**
-This function sets the lock->writer_thread and lock->recursive fields.
-For platforms where we are using atomic builtins instead of lock->mutex
-it sets the lock->writer_thread field using atomics to ensure memory
-ordering. Note that it is assumed that the caller of this function
-effectively owns the lock i.e.: nobody else is allowed to modify
-lock->writer_thread at this point in time.
-The protocol is that lock->writer_thread MUST be updated BEFORE the
-lock->recursive flag is set. */
-UNIV_INLINE
-void
-rw_lock_set_writer_id_and_recursion_flag(
-/*=====================================*/
-	rw_lock_t*	lock,		/*!< in/out: lock to work on */
-	ibool		recursive);	/*!< in: TRUE if recursion
-					allowed */
-#ifdef UNIV_SYNC_DEBUG
+	ulint		amount,		/*!< in: amount to decrement */
+	lint		threshold);	/*!< in: threshold of judgement */
+#ifdef UNIV_DEBUG
 /******************************************************************//**
 Checks if the thread has locked the rw-lock in the specified mode, with
 the pass value == 0. */
-UNIV_INTERN
 ibool
 rw_lock_own(
 /*========*/
 	rw_lock_t*	lock,		/*!< in: rw-lock */
-	ulint		lock_type)	/*!< in: lock type: RW_LOCK_SHARED,
-					RW_LOCK_EX */
+	ulint		lock_type)	/*!< in: lock type: RW_LOCK_S,
+					RW_LOCK_X */
 	MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_SYNC_DEBUG */
+
 /******************************************************************//**
-Checks if somebody has locked the rw-lock in the specified mode. */
-UNIV_INTERN
-ibool
+Checks if the thread has locked the rw-lock in the specified mode, with
+the pass value == 0. */
+bool
+rw_lock_own_flagged(
+/*================*/
+	const rw_lock_t*	lock,	/*!< in: rw-lock */
+	rw_lock_flags_t		flags)	/*!< in: specify lock types with
+					OR of the rw_lock_flag_t values */
+	MY_ATTRIBUTE((warn_unused_result));
+#endif /* UNIV_DEBUG */
+/******************************************************************//**
+Checks if somebody has locked the rw-lock in the specified mode.
+@return true if locked */
+bool
 rw_lock_is_locked(
 /*==============*/
 	rw_lock_t*	lock,		/*!< in: rw-lock */
-	ulint		lock_type);	/*!< in: lock type: RW_LOCK_SHARED,
-					RW_LOCK_EX */
-#ifdef UNIV_SYNC_DEBUG
+	ulint		lock_type);	/*!< in: lock type: RW_LOCK_S,
+					RW_LOCK_X or RW_LOCK_SX */
+#ifdef UNIV_DEBUG
 /***************************************************************//**
 Prints debug info of an rw-lock. */
-UNIV_INTERN
 void
 rw_lock_print(
 /*==========*/
-	rw_lock_t*	lock);	/*!< in: rw-lock */
+	rw_lock_t*	lock);		/*!< in: rw-lock */
 /***************************************************************//**
 Prints debug info of currently locked rw-locks. */
-UNIV_INTERN
 void
 rw_lock_list_print_info(
 /*====================*/
-	FILE*	file);		/*!< in: file where to print */
+	FILE*		file);		/*!< in: file where to print */
 /***************************************************************//**
 Returns the number of currently locked rw-locks.
 Works only in the debug version.
-@return	number of locked rw-locks */
-UNIV_INTERN
+@return number of locked rw-locks */
 ulint
 rw_lock_n_locked(void);
 /*==================*/
 
 /*#####################################################################*/
 
-/******************************************************************//**
-Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
-because the debug mutex is also acquired in sync0arr while holding the OS
-mutex protecting the sync array, and the ordinary mutex_enter might
-recursively call routines in sync0arr, leading to a deadlock on the OS
-mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_enter(void);
-/*===========================*/
-/******************************************************************//**
-Releases the debug mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_exit(void);
-/*==========================*/
 /*********************************************************************//**
 Prints info of a debug struct. */
-UNIV_INTERN
 void
 rw_lock_debug_print(
 /*================*/
 	FILE*			f,	/*!< in: output stream */
-	rw_lock_debug_t*	info);	/*!< in: debug struct */
-#endif /* UNIV_SYNC_DEBUG */
+	const rw_lock_debug_t*	info);	/*!< in: debug struct */
+#endif /* UNIV_DEBUG */
 
 /* NOTE! The structure appears here only for the compiler to know its size.
 Do not use its fields directly! */
@@ -571,73 +584,112 @@ shared locks are allowed. To prevent starving of a writer blocked by
 readers, a writer may queue for x-lock by decrementing lock_word: no
 new readers will be let in while the thread waits for readers to
 exit. */
-struct rw_lock_t {
+
+struct rw_lock_t
+#ifdef UNIV_DEBUG
+	: public latch_t
+#endif /* UNIV_DEBUG */
+{
+	/** Holds the state of the lock. */
 	volatile lint	lock_word;
-				/*!< Holds the state of the lock. */
-	volatile ulint	waiters;/*!< 1: there are waiters */
-	volatile ibool	recursive;/*!< Default value FALSE which means the lock
-				is non-recursive. The value is typically set
-				to TRUE making normal rw_locks recursive. In
-				case of asynchronous IO, when a non-zero
-				value of 'pass' is passed then we keep the
-				lock non-recursive.
-				This flag also tells us about the state of
-				writer_thread field. If this flag is set
-				then writer_thread MUST contain the thread
-				id of the current x-holder or wait-x thread.
-				This flag must be reset in x_unlock
-				functions before incrementing the lock_word */
+
+	/** 1: there are waiters */
+	volatile uint32_t	waiters;
+
+	/** number of granted SX locks. */
+	volatile ulint	sx_recursive;
+
+	/** This is TRUE if the writer field is RW_LOCK_X_WAIT; this field
+	is located far from the memory update hotspot fields which are at
+	the start of this struct, thus we can peek this field without
+	causing much memory bus traffic */
+	bool		writer_is_wait_ex;
+
+	/** The value is typically set to thread id of a writer thread making
+	normal rw_locks recursive. In case of asynchronous IO, when a non-zero
+	value of 'pass' is passed then we keep the lock non-recursive.
+
+	writer_thread must be reset in x_unlock functions before incrementing
+	the lock_word. */
 	volatile os_thread_id_t	writer_thread;
-				/*!< Thread id of writer thread. Is only
-				guaranteed to have sane and non-stale
-				value iff recursive flag is set. */
-	os_event_t	event;	/*!< Used by sync0arr.cc for thread queueing */
+
+	/** Used by sync0arr.cc for thread queueing */
+	os_event_t	event;
+
+	/** Event for next-writer to wait on. A thread must decrement
+	lock_word before waiting. */
 	os_event_t	wait_ex_event;
-				/*!< Event for next-writer to wait on. A thread
-				must decrement lock_word before waiting. */
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
-	ib_mutex_t	mutex;		/*!< The mutex protecting rw_lock_t */
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
 
+	/** File name where lock created */
+	const char*	cfile_name;
+
+	/** last s-lock file/line is not guaranteed to be correct */
+	const char*	last_s_file_name;
+
+	/** File name where last x-locked */
+	const char*	last_x_file_name;
+
+	/** Line where created */
+	unsigned	cline:13;
+
+	/** If 1 then the rw-lock is a block lock */
+	unsigned	is_block_lock:1;
+
+	/** Line number where last time s-locked */
+	unsigned	last_s_line:14;
+
+	/** Line number where last time x-locked */
+	unsigned	last_x_line:14;
+
+	const char*	lock_name;
+	const char*	file_name;/*!< File name where the lock was obtained */
+	ulint	line;		/*!< Line where the rw-lock was locked */
+	os_thread_id_t	thread_id;
+
+	/** Count of os_waits. May not be accurate */
+	uint32_t	count_os_wait;
+
+	/** All allocated rw locks are put into a list */
 	UT_LIST_NODE_T(rw_lock_t) list;
-				/*!< All allocated rw locks are put into a
-				list */
-#ifdef UNIV_SYNC_DEBUG
-	UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
-				/*!< In the debug version: pointer to the debug
-				info list of the lock */
-	ulint	level;		/*!< Level in the global latching order. */
-#endif /* UNIV_SYNC_DEBUG */
+
 #ifdef UNIV_PFS_RWLOCK
-	struct PSI_rwlock *pfs_psi;/*!< The instrumentation hook */
-#endif
-	ulint count_os_wait;	/*!< Count of os_waits. May not be accurate */
-	const char*	cfile_name;/*!< File name where lock created */
-	const char*	lock_name; /*!< lock name */
- 	os_thread_id_t	thread_id;/*!< thread id */
-	const char*	file_name;/*!< File name where the lock was obtained */
-	ulint		line;	  /*!< Line where the rw-lock was locked */
-        /* last s-lock file/line is not guaranteed to be correct */
-	const char*	last_s_file_name;/*!< File name where last s-locked */
-	const char*	last_x_file_name;/*!< File name where last x-locked */
-	ibool		writer_is_wait_ex;
-				/*!< This is TRUE if the writer field is
-				RW_LOCK_WAIT_EX; this field is located far
-				from the memory update hotspot fields which
-				are at the start of this struct, thus we can
-				peek this field without causing much memory
-				bus traffic */
-	unsigned	cline:14;	/*!< Line where created */
-	unsigned	last_s_line:14;	/*!< Line number where last time s-locked */
-	unsigned	last_x_line:14;	/*!< Line number where last time x-locked */
+	/** The instrumentation hook */
+	struct PSI_rwlock*	pfs_psi;
+#endif /* UNIV_PFS_RWLOCK */
+
 #ifdef UNIV_DEBUG
-	ulint	magic_n;	/*!< RW_LOCK_MAGIC_N */
 /** Value of rw_lock_t::magic_n */
-#define	RW_LOCK_MAGIC_N	22643
+# define RW_LOCK_MAGIC_N	22643
+
+	/** Constructor */
+	rw_lock_t()
+	{
+		magic_n = RW_LOCK_MAGIC_N;
+	}
+
+	/** Destructor */
+	virtual ~rw_lock_t()
+	{
+		ut_ad(magic_n == RW_LOCK_MAGIC_N);
+		magic_n = 0;
+	}
+
+	virtual std::string to_string() const;
+	virtual std::string locked_from() const;
+
+	/** For checking memory corruption. */
+	ulint		magic_n;
+
+	/** In the debug version: pointer to the debug info list of the lock */
+	UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
+
+	/** Level in the global latching order. */
+	latch_level_t	level;
+
 #endif /* UNIV_DEBUG */
-};
 
-#ifdef UNIV_SYNC_DEBUG
+};
+#ifdef UNIV_DEBUG
 /** The structure for storing debug info of an rw-lock.  All access to this
 structure must be protected by rw_lock_debug_mutex_enter(). */
 struct	rw_lock_debug_t {
@@ -645,15 +697,15 @@ struct	rw_lock_debug_t {
 	os_thread_id_t thread_id;  /*!< The thread id of the thread which
 				locked the rw-lock */
 	ulint	pass;		/*!< Pass value given in the lock operation */
-	ulint	lock_type;	/*!< Type of the lock: RW_LOCK_EX,
-				RW_LOCK_SHARED, RW_LOCK_WAIT_EX */
+	ulint	lock_type;	/*!< Type of the lock: RW_LOCK_X,
+				RW_LOCK_S, RW_LOCK_X_WAIT */
 	const char*	file_name;/*!< File name where the lock was obtained */
 	ulint	line;		/*!< Line where the rw-lock was locked */
 	UT_LIST_NODE_T(rw_lock_debug_t) list;
 				/*!< Debug structs are linked in a two-way
 				list */
 };
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 
 /* For performance schema instrumentation, a new set of rwlock
 wrap functions are created if "UNIV_PFS_RWLOCK" is defined.
@@ -676,6 +728,8 @@ rw_lock_s_lock()
 rw_lock_s_lock_gen()
 rw_lock_s_lock_nowait()
 rw_lock_s_unlock_gen()
+rw_lock_sx_lock()
+rw_lock_sx_unlock_gen()
 rw_lock_free()
 */
 
@@ -692,9 +746,7 @@ pfs_rw_lock_create_func(
 					performance schema */
 	rw_lock_t*	lock,		/*!< in: rw lock */
 #ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
+	latch_level_t	level,		/*!< in: level */
 #endif /* UNIV_DEBUG */
 	const char*	cmutex_name,	/*!< in: mutex name */
 	const char*	cfile_name,	/*!< in: file name where created */
@@ -774,25 +826,65 @@ UNIV_INLINE
 void
 pfs_rw_lock_s_unlock_func(
 /*======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the
 				lock may have been passed to another
 				thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock);	/*!< in/out: rw-lock */
 /******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_unlock_func()
+Performance schema instrumented wrap function for rw_lock_x_unlock_func()
 NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
 this function! */
 UNIV_INLINE
 void
 pfs_rw_lock_x_unlock_func(
 /*======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the
 				lock may have been passed to another
 				thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
+	rw_lock_t*	lock);	/*!< in/out: rw-lock */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_lock_func()
+NOTE! Please use the corresponding macro rw_lock_sx_lock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_sx_lock_func(
+/*====================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_lock_nowait()
+NOTE! Please use the corresponding macro, not directly
+this function! */
+UNIV_INLINE
+ibool
+pfs_rw_lock_sx_lock_low(
+/*================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_sx_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_sx_unlock_func(
+/*======================*/
+#ifdef UNIV_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock may have been passed to another
+				thread to unlock */
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock);	/*!< in/out: rw-lock */
 /******************************************************************//**
 Performance schema instrumented wrap function for rw_lock_free_func()
@@ -808,7 +900,8 @@ pfs_rw_lock_free_func(
 
 #ifndef UNIV_NONINL
 #include "sync0rw.ic"
-#endif
+#endif /* !UNIV_NONINL */
+
 #endif /* !UNIV_HOTBACKUP */
 
-#endif
+#endif /* sync0rw.h */
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
index 27970188165..d67e26d961d 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
@@ -30,12 +30,13 @@ The read-write lock (for threads)
 Created 9/11/1995 Heikki Tuuri
 *******************************************************/
 
+#include "os0event.h"
+
 /******************************************************************//**
 Lock an rw-lock in shared mode for the current thread. If the rw-lock is
 locked in exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
+the function spins a preset time (controlled by srv_n_spin_wait_rounds),
 waiting for the lock before suspending the thread. */
-UNIV_INTERN
 void
 rw_lock_s_lock_spin(
 /*================*/
@@ -44,10 +45,9 @@ rw_lock_s_lock_spin(
 				be passed to another thread to unlock */
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line);	/*!< in: line where requested */
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 /******************************************************************//**
 Inserts the debug information for an rw-lock. */
-UNIV_INTERN
 void
 rw_lock_add_debug_info(
 /*===================*/
@@ -58,67 +58,18 @@ rw_lock_add_debug_info(
 	ulint		line);		/*!< in: line where requested */
 /******************************************************************//**
 Removes a debug information struct for an rw-lock. */
-UNIV_INTERN
 void
 rw_lock_remove_debug_info(
 /*======================*/
 	rw_lock_t*	lock,		/*!< in: rw-lock */
 	ulint		pass,		/*!< in: pass value */
 	ulint		lock_type);	/*!< in: lock type */
-#endif /* UNIV_SYNC_DEBUG */
-
-/********************************************************************//**
-Check if there are threads waiting for the rw-lock.
-@return	1 if waiters, 0 otherwise */
-UNIV_INLINE
-ulint
-rw_lock_get_waiters(
-/*================*/
-	const rw_lock_t*	lock)	/*!< in: rw-lock */
-{
-	return(lock->waiters);
-}
-
-/********************************************************************//**
-Sets lock->waiters to 1. It is not an error if lock->waiters is already
-1. On platforms where ATOMIC builtins are used this function enforces a
-memory barrier. */
-UNIV_INLINE
-void
-rw_lock_set_waiter_flag(
-/*====================*/
-	rw_lock_t*	lock)	/*!< in/out: rw-lock */
-{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
-	(void) os_compare_and_swap_ulint(&lock->waiters, 0, 1);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
-	lock->waiters = 1;
-	os_wmb;
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/********************************************************************//**
-Resets lock->waiters to 0. It is not an error if lock->waiters is already
-0. On platforms where ATOMIC builtins are used this function enforces a
-memory barrier. */
-UNIV_INLINE
-void
-rw_lock_reset_waiter_flag(
-/*======================*/
-	rw_lock_t*	lock)	/*!< in/out: rw-lock */
-{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
-	(void) os_compare_and_swap_ulint(&lock->waiters, 1, 0);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
-	lock->waiters = 0;
-	os_wmb;
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
+#endif /* UNIV_DEBUG */
 
 /******************************************************************//**
 Returns the write-status of the lock - this function made more sense
 with the old rw_lock implementation.
-@return	RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
+@return RW_LOCK_NOT_LOCKED, RW_LOCK_X, RW_LOCK_X_WAIT, RW_LOCK_SX */
 UNIV_INLINE
 ulint
 rw_lock_get_writer(
@@ -126,21 +77,31 @@ rw_lock_get_writer(
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	lint lock_word = lock->lock_word;
-	if (lock_word > 0) {
+
+	ut_ad(lock_word <= X_LOCK_DECR);
+	if (lock_word > X_LOCK_HALF_DECR) {
 		/* return NOT_LOCKED in s-lock state, like the writer
 		member of the old lock implementation. */
 		return(RW_LOCK_NOT_LOCKED);
-	} else if ((lock_word == 0) || (lock_word <= -X_LOCK_DECR)) {
-		return(RW_LOCK_EX);
+	} else if (lock_word > 0) {
+		/* sx-locked, no x-locks */
+		return(RW_LOCK_SX);
+	} else if (lock_word == 0
+		   || lock_word == -X_LOCK_HALF_DECR
+		   || lock_word <= -X_LOCK_DECR) {
+		/* x-lock with sx-lock is also treated as RW_LOCK_EX */
+		return(RW_LOCK_X);
 	} else {
-		ut_ad(lock_word > -X_LOCK_DECR);
-		return(RW_LOCK_WAIT_EX);
+		/* x-waiter with sx-lock is also treated as RW_LOCK_WAIT_EX
+		e.g. -X_LOCK_HALF_DECR < lock_word < 0 : without sx
+		     -X_LOCK_DECR < lock_word < -X_LOCK_HALF_DECR : with sx */
+		return(RW_LOCK_X_WAIT);
 	}
 }
 
 /******************************************************************//**
-Returns the number of readers.
-@return	number of readers */
+Returns the number of readers (s-locks).
+@return number of readers */
 UNIV_INLINE
 ulint
 rw_lock_get_reader_count(
@@ -148,31 +109,35 @@ rw_lock_get_reader_count(
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	lint lock_word = lock->lock_word;
-	if (lock_word > 0) {
-		/* s-locked, no x-waiters */
+	ut_ad(lock_word <= X_LOCK_DECR);
+
+	if (lock_word > X_LOCK_HALF_DECR) {
+		/* s-locked, no x-waiter */
 		return(X_LOCK_DECR - lock_word);
-	} else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
-		/* s-locked, with x-waiters */
+	} else if (lock_word > 0) {
+		/* s-locked, with sx-locks only */
+		return(X_LOCK_HALF_DECR - lock_word);
+	} else if (lock_word == 0) {
+		/* x-locked */
+		return(0);
+	} else if (lock_word > -X_LOCK_HALF_DECR) {
+		/* s-locked, with x-waiter */
 		return((ulint)(-lock_word));
+	} else if (lock_word == -X_LOCK_HALF_DECR) {
+		/* x-locked with sx-locks */
+		return(0);
+	} else if (lock_word > -X_LOCK_DECR) {
+		/* s-locked, with x-waiter and sx-lock */
+		return((ulint)(-(lock_word + X_LOCK_HALF_DECR)));
 	}
+	/* no s-locks */
 	return(0);
 }
 
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
-UNIV_INLINE
-ib_mutex_t*
-rw_lock_get_mutex(
-/*==============*/
-	rw_lock_t*	lock)
-{
-	return(&(lock->mutex));
-}
-#endif
-
 /******************************************************************//**
 Returns the value of writer_count for the lock. Does not reserve the lock
 mutex, so the caller must be sure it is not changed during the call.
-@return	value of writer_count */
+@return value of writer_count */
 UNIV_INLINE
 ulint
 rw_lock_get_x_lock_count(
@@ -180,127 +145,88 @@ rw_lock_get_x_lock_count(
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	lint lock_copy = lock->lock_word;
-	if ((lock_copy != 0) && (lock_copy > -X_LOCK_DECR)) {
+	ut_ad(lock_copy <= X_LOCK_DECR);
+
+	if (lock_copy == 0 || lock_copy == -X_LOCK_HALF_DECR) {
+		/* "1 x-lock" or "1 x-lock + sx-locks" */
+		return(1);
+	} else if (lock_copy > -X_LOCK_DECR) {
+		/* s-locks, one or more sx-locks if > 0, or x-waiter if < 0 */
 		return(0);
+	} else if (lock_copy > -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
+		/* no s-lock, no sx-lock, 2 or more x-locks.
+		First 2 x-locks are set with -X_LOCK_DECR,
+		all other recursive x-locks are set with -1 */
+		return(2 - (lock_copy + X_LOCK_DECR));
+	} else {
+		/* no s-lock, 1 or more sx-lock, 2 or more x-locks.
+		First 2 x-locks are set with -(X_LOCK_DECR + X_LOCK_HALF_DECR),
+		all other recursive x-locks are set with -1 */
+		return(2 - (lock_copy + X_LOCK_DECR + X_LOCK_HALF_DECR));
 	}
-	return((lock_copy == 0) ? 1 : (2 - (lock_copy + X_LOCK_DECR)));
 }
 
 /******************************************************************//**
-Two different implementations for decrementing the lock_word of a rw_lock:
-one for systems supporting atomic operations, one for others. This does
-does not support recusive x-locks: they should be handled by the caller and
-need not be atomic since they are performed by the current lock holder.
-Returns true if the decrement was made, false if not.
-@return	TRUE if decr occurs */
+Returns the number of sx-lock for the lock. Does not reserve the lock
+mutex, so the caller must be sure it is not changed during the call.
+@return value of sx-lock count */
 UNIV_INLINE
-ibool
-rw_lock_lock_word_decr(
-/*===================*/
-	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	ulint		amount)		/*!< in: amount to decrement */
+ulint
+rw_lock_get_sx_lock_count(
+/*======================*/
+	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
-	lint local_lock_word;
+#ifdef UNIV_DEBUG
+	lint lock_copy = lock->lock_word;
 
-	os_rmb;
-	local_lock_word = lock->lock_word;
-	while (local_lock_word > 0) {
-		if (os_compare_and_swap_lint(&lock->lock_word,
-					     local_lock_word,
-					     local_lock_word - amount)) {
-			return(TRUE);
-		}
-		local_lock_word = lock->lock_word;
+	ut_ad(lock_copy <= X_LOCK_DECR);
+
+	while (lock_copy < 0) {
+		lock_copy += X_LOCK_DECR;
 	}
-	return(FALSE);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
-	ibool success = FALSE;
-	mutex_enter(&(lock->mutex));
-	if (lock->lock_word > 0) {
-		lock->lock_word -= amount;
-		success = TRUE;
+
+	if (lock_copy > 0 && lock_copy <= X_LOCK_HALF_DECR) {
+		return(lock->sx_recursive);
 	}
-	mutex_exit(&(lock->mutex));
-	return(success);
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+
+	return(0);
+#else /* UNIV_DEBUG */
+	return(lock->sx_recursive);
+#endif /* UNIV_DEBUG */
 }
 
 /******************************************************************//**
-Increments lock_word the specified amount and returns new value.
-@return	lock->lock_word after increment */
+Two different implementations for decrementing the lock_word of a rw_lock:
+one for systems supporting atomic operations, one for others. This does
+does not support recusive x-locks: they should be handled by the caller and
+need not be atomic since they are performed by the current lock holder.
+Returns true if the decrement was made, false if not.
+@return true if decr occurs */
 UNIV_INLINE
-lint
-rw_lock_lock_word_incr(
+bool
+rw_lock_lock_word_decr(
 /*===================*/
 	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	ulint		amount)		/*!< in: amount of increment */
+	ulint		amount,		/*!< in: amount to decrement */
+	lint		threshold)	/*!< in: threshold of judgement */
 {
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
-	return(os_atomic_increment_lint(&lock->lock_word, amount));
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
 	lint local_lock_word;
 
-	mutex_enter(&(lock->mutex));
-
-	lock->lock_word += amount;
 	local_lock_word = lock->lock_word;
-
-	mutex_exit(&(lock->mutex));
-
-	return(local_lock_word);
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/******************************************************************//**
-This function sets the lock->writer_thread and lock->recursive fields.
-For platforms where we are using atomic builtins instead of lock->mutex
-it sets the lock->writer_thread field using atomics to ensure memory
-ordering. Note that it is assumed that the caller of this function
-effectively owns the lock i.e.: nobody else is allowed to modify
-lock->writer_thread at this point in time.
-The protocol is that lock->writer_thread MUST be updated BEFORE the
-lock->recursive flag is set. */
-UNIV_INLINE
-void
-rw_lock_set_writer_id_and_recursion_flag(
-/*=====================================*/
-	rw_lock_t*	lock,		/*!< in/out: lock to work on */
-	ibool		recursive)	/*!< in: TRUE if recursion
-					allowed */
-{
-	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
-
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
-	os_thread_id_t	local_thread;
-	ibool		success;
-
-	/* Prevent Valgrind warnings about writer_thread being
-	uninitialized.  It does not matter if writer_thread is
-	uninitialized, because we are comparing writer_thread against
-	itself, and the operation should always succeed. */
-	UNIV_MEM_VALID(&lock->writer_thread, sizeof lock->writer_thread);
-
-	local_thread = lock->writer_thread;
-	success = os_compare_and_swap_thread_id(
-		&lock->writer_thread, local_thread, curr_thread);
-	ut_a(success);
-	lock->recursive = recursive;
-
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
-
-	mutex_enter(&lock->mutex);
-	lock->writer_thread = curr_thread;
-	lock->recursive = recursive;
-	mutex_exit(&lock->mutex);
-
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+	while (local_lock_word > threshold) {
+		if (my_atomic_caslint(&lock->lock_word,
+				      &local_lock_word,
+				      local_lock_word - amount)) {
+			return(true);
+		}
+	}
+	return(false);
 }
 
 /******************************************************************//**
 Low-level function which tries to lock an rw-lock in s-mode. Performs no
 spinning.
-@return	TRUE if success */
+@return TRUE if success */
 UNIV_INLINE
 ibool
 rw_lock_s_lock_low(
@@ -312,24 +238,25 @@ rw_lock_s_lock_low(
 	const char*	file_name, /*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	if (!rw_lock_lock_word_decr(lock, 1)) {
+	if (!rw_lock_lock_word_decr(lock, 1, 0)) {
 		/* Locking did not succeed */
 		return(FALSE);
 	}
 
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
-#endif
+	ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_S, file_name, line));
+
 	/* These debugging values are not set safely: they may be incorrect
 	or even refer to a line that is invalid for the file name. */
 	lock->last_s_file_name = file_name;
 	lock->last_s_line = line;
 
+	/*
 	if (srv_instrument_semaphores) {
 		lock->thread_id = os_thread_get_curr_id();
 		lock->file_name = file_name;
 		lock->line = line;
 	}
+	*/
 
 	return(TRUE);	/* locking succeeded */
 }
@@ -338,7 +265,7 @@ rw_lock_s_lock_low(
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in shared mode for the current thread. If the rw-lock is locked
 in exclusive mode, or there is an exclusive lock request waiting, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for
+function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting for
 the lock, before suspending the thread. */
 UNIV_INLINE
 void
@@ -361,20 +288,14 @@ rw_lock_s_lock_func(
 	the threads which have s-locked a latch. This would use some CPU
 	time. */
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
-	ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(lock, RW_LOCK_S)); /* see NOTE above */
+	ut_ad(!rw_lock_own(lock, RW_LOCK_X));
 
-	if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
+	if (!rw_lock_s_lock_low(lock, pass, file_name, line)) {
 
-		return; /* Success */
-	} else {
 		/* Did not succeed, try spin wait */
 
 		rw_lock_s_lock_spin(lock, pass, file_name, line);
-
-		return;
 	}
 }
 
@@ -382,7 +303,7 @@ rw_lock_s_lock_func(
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in exclusive mode for the current thread if the lock can be
 obtained immediately.
-@return	TRUE if success */
+@return TRUE if success */
 UNIV_INLINE
 ibool
 rw_lock_x_lock_func_nowait(
@@ -391,39 +312,24 @@ rw_lock_x_lock_func_nowait(
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	ibool success;
-	ibool local_recursive= lock->recursive;
-
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
-	success = os_compare_and_swap_lint(&lock->lock_word, X_LOCK_DECR, 0);
-#else
-
-	success = FALSE;
-	mutex_enter(&(lock->mutex));
-	if (lock->lock_word == X_LOCK_DECR) {
-		lock->lock_word = 0;
-		success = TRUE;
-	}
-	mutex_exit(&(lock->mutex));
-
-#endif
-	/* Note: recursive must be loaded before writer_thread see
-	comment for rw_lock_set_writer_id_and_recursion_flag().
-	To achieve this we load it before os_compare_and_swap_lint(),
-	which implies full memory barrier in current implementation. */
-	if (success) {
-		rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
-
-	} else if (local_recursive
-		   && os_thread_eq(lock->writer_thread,
-				   os_thread_get_curr_id())) {
+	lint oldval = X_LOCK_DECR;
+
+	if (my_atomic_caslint(&lock->lock_word, &oldval, 0)) {
+		lock->writer_thread = os_thread_get_curr_id();
+
+	} else if (os_thread_eq(lock->writer_thread, os_thread_get_curr_id())) {
 		/* Relock: this lock_word modification is safe since no other
 		threads can modify (lock, unlock, or reserve) lock_word while
 		there is an exclusive writer and this is the writer thread. */
-		if (lock->lock_word == 0) {
-			lock->lock_word = -X_LOCK_DECR;
-		} else {
+		if (lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR) {
+			/* There are 1 x-locks */
+			lock->lock_word -= X_LOCK_DECR;
+		} else if (lock->lock_word <= -X_LOCK_DECR) {
+			/* There are 2 or more x-locks */
 			lock->lock_word--;
+		} else {
+			/* Failure */
+			return(FALSE);
 		}
 
 		/* Watch for too many recursive locks */
@@ -433,15 +339,16 @@ rw_lock_x_lock_func_nowait(
 		/* Failure */
 		return(FALSE);
 	}
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
-#endif
 
+	ut_d(rw_lock_add_debug_info(lock, 0, RW_LOCK_X, file_name, line));
+
+	/*
 	if (srv_instrument_semaphores) {
 		lock->thread_id = os_thread_get_curr_id();
 		lock->file_name = file_name;
 		lock->line = line;
 	}
+	*/
 
 	lock->last_x_file_name = file_name;
 	lock->last_x_line = line;
@@ -457,22 +364,21 @@ UNIV_INLINE
 void
 rw_lock_s_unlock_func(
 /*==================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
 				been passed to another thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
 	ut_ad(lock->lock_word > -X_LOCK_DECR);
 	ut_ad(lock->lock_word != 0);
 	ut_ad(lock->lock_word < X_LOCK_DECR);
 
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
-#endif
+	ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_S));
 
 	/* Increment lock_word to indicate 1 less reader */
-	if (rw_lock_lock_word_incr(lock, 1) == 0) {
+	lint	lock_word = my_atomic_addlint(&lock->lock_word, 1) + 1;
+	if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) {
 
 		/* wait_ex waiter exists. It may not be asleep, but we signal
 		anyway. We do not wake other waiters, because they can't
@@ -483,10 +389,6 @@ rw_lock_s_unlock_func(
 	}
 
 	ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
-	rw_s_exit_count++;
-#endif
 }
 
 /******************************************************************//**
@@ -495,55 +397,98 @@ UNIV_INLINE
 void
 rw_lock_x_unlock_func(
 /*==================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
 				been passed to another thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
-	ut_ad(lock->lock_word == 0 || lock->lock_word <= -X_LOCK_DECR);
-
-	/* lock->recursive flag also indicates if lock->writer_thread is
-	valid or stale. If we are the last of the recursive callers
-	then we must unset lock->recursive flag to indicate that the
-	lock->writer_thread is now stale.
-	Note that since we still hold the x-lock we can safely read the
-	lock_word. */
+	ut_ad(lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR
+	      || lock->lock_word <= -X_LOCK_DECR);
+
 	if (lock->lock_word == 0) {
 		/* Last caller in a possible recursive chain. */
-		lock->recursive = FALSE;
+		lock->writer_thread = 0;
 	}
 
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
-#endif
+	ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X));
 
-	ulint x_lock_incr;
-	if (lock->lock_word == 0) {
-		x_lock_incr = X_LOCK_DECR;
-	} else if (lock->lock_word == -X_LOCK_DECR) {
-		x_lock_incr = X_LOCK_DECR;
-	} else {
-		ut_ad(lock->lock_word < -X_LOCK_DECR);
-		x_lock_incr = 1;
-	}
+	if (lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR) {
+		/* There is 1 x-lock */
+		/* atomic increment is needed, because it is last */
+		if (my_atomic_addlint(&lock->lock_word, X_LOCK_DECR) <= -X_LOCK_DECR) {
+			ut_error;
+		}
 
-	if (rw_lock_lock_word_incr(lock, x_lock_incr) == X_LOCK_DECR) {
-		/* Lock is now free. May have to signal read/write waiters.
+		/* This no longer has an X-lock but it may still have
+		an SX-lock. So it is now free for S-locks by other threads.
+		We need to signal read/write waiters.
 		We do not need to signal wait_ex waiters, since they cannot
 		exist when there is a writer. */
 		if (lock->waiters) {
-			rw_lock_reset_waiter_flag(lock);
+			my_atomic_store32((int32*) &lock->waiters, 0);
 			os_event_set(lock->event);
 			sync_array_object_signalled();
 		}
+	} else if (lock->lock_word == -X_LOCK_DECR
+		   || lock->lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
+		/* There are 2 x-locks */
+		lock->lock_word += X_LOCK_DECR;
+	} else {
+		/* There are more than 2 x-locks. */
+		ut_ad(lock->lock_word < -X_LOCK_DECR);
+		lock->lock_word += 1;
 	}
 
 	ut_ad(rw_lock_validate(lock));
+}
+
+/******************************************************************//**
+Releases a sx mode lock. */
+UNIV_INLINE
+void
+rw_lock_sx_unlock_func(
+/*===================*/
+#ifdef UNIV_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
+				been passed to another thread to unlock */
+#endif /* UNIV_DEBUG */
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
+{
+	ut_ad(rw_lock_get_sx_lock_count(lock));
+	ut_ad(lock->sx_recursive > 0);
 
-#ifdef UNIV_SYNC_PERF_STAT
-	rw_x_exit_count++;
-#endif
+	--lock->sx_recursive;
+
+	ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_SX));
+
+	if (lock->sx_recursive == 0) {
+		/* Last caller in a possible recursive chain. */
+		if (lock->lock_word > 0) {
+			lock->writer_thread = 0;
+
+			if (my_atomic_addlint(&lock->lock_word, X_LOCK_HALF_DECR) <= 0) {
+				ut_error;
+			}
+			/* Lock is now free. May have to signal read/write
+			waiters. We do not need to signal wait_ex waiters,
+			since they cannot exist when there is an sx-lock
+			holder. */
+			if (lock->waiters) {
+				my_atomic_store32((int32*) &lock->waiters, 0);
+				os_event_set(lock->event);
+				sync_array_object_signalled();
+			}
+		} else {
+			/* still has x-lock */
+			ut_ad(lock->lock_word == -X_LOCK_HALF_DECR
+			      || lock->lock_word <= -(X_LOCK_DECR
+						      + X_LOCK_HALF_DECR));
+			lock->lock_word += X_LOCK_HALF_DECR;
+		}
+	}
+
+	ut_ad(rw_lock_validate(lock));
 }
 
 #ifdef UNIV_PFS_RWLOCK
@@ -558,26 +503,24 @@ pfs_rw_lock_create_func(
 /*====================*/
 	mysql_pfs_key_t	key,		/*!< in: key registered with
 					performance schema */
-	rw_lock_t*	lock,		/*!< in: pointer to memory */
+	rw_lock_t*	lock,		/*!< in/out: pointer to memory */
 # ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-#  endif /* UNIV_SYNC_DEBUG */
+	latch_level_t	level,		/*!< in: level */
 # endif /* UNIV_DEBUG */
 	const char*	cmutex_name,	/*!< in: mutex name */
 	const char*	cfile_name,	/*!< in: file name where created */
 	ulint		cline)		/*!< in: file line where created */
 {
+	ut_d(new(lock) rw_lock_t());
+
 	/* Initialize the rwlock for performance schema */
 	lock->pfs_psi = PSI_RWLOCK_CALL(init_rwlock)(key, lock);
 
 	/* The actual function to initialize an rwlock */
 	rw_lock_create_func(lock,
-# ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 			    level,
-#  endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
+#endif /* UNIV_DEBUG */
 			    cmutex_name,
 			    cfile_name,
 			    cline);
@@ -596,14 +539,17 @@ pfs_rw_lock_x_lock_func(
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	if (lock->pfs_psi != NULL)
-	{
+	if (lock->pfs_psi != NULL) {
 		PSI_rwlock_locker*	locker;
 		PSI_rwlock_locker_state	state;
 
-		/* Record the entry of rw x lock request in performance schema */
+		/* Record the acquisition of a read-write lock in exclusive
+		mode in performance schema */
+/* MySQL 5.7 New PSI */
+#define PSI_RWLOCK_EXCLUSIVELOCK PSI_RWLOCK_WRITELOCK
+
 		locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
-			&state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK,
+			&state, lock->pfs_psi, PSI_RWLOCK_EXCLUSIVELOCK,
 			file_name, static_cast<uint>(line));
 
 		rw_lock_x_lock_func(
@@ -612,9 +558,7 @@ pfs_rw_lock_x_lock_func(
 		if (locker != NULL) {
 			PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
 		}
-	}
-	else
-	{
+	} else {
 		rw_lock_x_lock_func(lock, pass, file_name, line);
 	}
 }
@@ -623,7 +567,7 @@ Performance schema instrumented wrap function for
 rw_lock_x_lock_func_nowait()
 NOTE! Please use the corresponding macro rw_lock_x_lock_func(),
 not directly this function!
-@return	TRUE if success */
+@return TRUE if success */
 UNIV_INLINE
 ibool
 pfs_rw_lock_x_lock_func_nowait(
@@ -633,16 +577,18 @@ pfs_rw_lock_x_lock_func_nowait(
 				requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	ibool	ret;
+	ibool		ret;
 
-	if (lock->pfs_psi != NULL)
-	{
+	if (lock->pfs_psi != NULL) {
 		PSI_rwlock_locker*	locker;
-		PSI_rwlock_locker_state		state;
+		PSI_rwlock_locker_state	state;
 
-		/* Record the entry of rw x lock request in performance schema */
+		/* Record the acquisition of a read-write trylock in exclusive
+		mode in performance schema */
+
+#define PSI_RWLOCK_TRYEXCLUSIVELOCK PSI_RWLOCK_TRYWRITELOCK
 		locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
-			&state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK,
+			&state, lock->pfs_psi, PSI_RWLOCK_TRYEXCLUSIVELOCK,
 			file_name, static_cast<uint>(line));
 
 		ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
@@ -651,9 +597,7 @@ pfs_rw_lock_x_lock_func_nowait(
 			PSI_RWLOCK_CALL(end_rwlock_wrwait)(
 				locker, static_cast<int>(ret));
 		}
-	}
-	else
-	{
+	} else {
 		ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
 	}
 
@@ -669,8 +613,7 @@ pfs_rw_lock_free_func(
 /*==================*/
 	rw_lock_t*	lock)	/*!< in: pointer to rw-lock */
 {
-	if (lock->pfs_psi != NULL)
-	{
+	if (lock->pfs_psi != NULL) {
 		PSI_RWLOCK_CALL(destroy_rwlock)(lock->pfs_psi);
 		lock->pfs_psi = NULL;
 	}
@@ -693,14 +636,14 @@ pfs_rw_lock_s_lock_func(
 				requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	if (lock->pfs_psi != NULL)
-	{
+	if (lock->pfs_psi != NULL) {
 		PSI_rwlock_locker*	locker;
 		PSI_rwlock_locker_state	state;
 
+#define  PSI_RWLOCK_SHAREDLOCK  PSI_RWLOCK_READLOCK
 		/* Instrumented to inform we are aquiring a shared rwlock */
 		locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
-			&state, lock->pfs_psi, PSI_RWLOCK_READLOCK,
+			&state, lock->pfs_psi, PSI_RWLOCK_SHAREDLOCK,
 			file_name, static_cast<uint>(line));
 
 		rw_lock_s_lock_func(lock, pass, file_name, line);
@@ -708,19 +651,50 @@ pfs_rw_lock_s_lock_func(
 		if (locker != NULL) {
 			PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
 		}
-	}
-	else
-	{
+	} else {
 		rw_lock_s_lock_func(lock, pass, file_name, line);
 	}
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_lock_func()
+NOTE! Please use the corresponding macro rw_lock_sx_lock(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_sx_lock_func(
+/*====================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock will be passed to another
+				thread to unlock */
+	const char*	file_name,/*!< in: file name where lock
+				requested */
+	ulint		line)	/*!< in: line where requested */
+{
+	if (lock->pfs_psi != NULL) {
+		PSI_rwlock_locker*	locker;
+		PSI_rwlock_locker_state	state;
+
+#define PSI_RWLOCK_SHAREDEXCLUSIVELOCK PSI_RWLOCK_WRITELOCK
+		/* Instrumented to inform we are aquiring a shared rwlock */
+		locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
+			&state, lock->pfs_psi, PSI_RWLOCK_SHAREDEXCLUSIVELOCK,
+			file_name, static_cast<uint>(line));
 
-	return;
+		rw_lock_sx_lock_func(lock, pass, file_name, line);
+
+		if (locker != NULL) {
+			PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
+		}
+	} else {
+		rw_lock_sx_lock_func(lock, pass, file_name, line);
+	}
 }
 /******************************************************************//**
 Performance schema instrumented wrap function for rw_lock_s_lock_func()
 NOTE! Please use the corresponding macro rw_lock_s_lock(), not
 directly this function!
-@return	TRUE if success */
+@return TRUE if success */
 UNIV_INLINE
 ibool
 pfs_rw_lock_s_lock_low(
@@ -732,16 +706,16 @@ pfs_rw_lock_s_lock_low(
 	const char*	file_name, /*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	ibool	ret;
+	ibool		ret;
 
-	if (lock->pfs_psi != NULL)
-	{
+	if (lock->pfs_psi != NULL) {
 		PSI_rwlock_locker*	locker;
 		PSI_rwlock_locker_state	state;
 
+#define PSI_RWLOCK_TRYSHAREDLOCK PSI_RWLOCK_TRYREADLOCK
 		/* Instrumented to inform we are aquiring a shared rwlock */
 		locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
-			&state, lock->pfs_psi, PSI_RWLOCK_READLOCK,
+			&state, lock->pfs_psi, PSI_RWLOCK_TRYSHAREDLOCK,
 			file_name, static_cast<uint>(line));
 
 		ret = rw_lock_s_lock_low(lock, pass, file_name, line);
@@ -750,15 +724,54 @@ pfs_rw_lock_s_lock_low(
 			PSI_RWLOCK_CALL(end_rwlock_rdwait)(
 				locker, static_cast<int>(ret));
 		}
-	}
-	else
-	{
+	} else {
 		ret = rw_lock_s_lock_low(lock, pass, file_name, line);
 	}
 
 	return(ret);
 }
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_lock_nowait()
+NOTE! Please use the corresponding macro, not
+directly this function!
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_sx_lock_low(
+/*====================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock will be passed to another
+				thread to unlock */
+	const char*	file_name, /*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
+{
+	ibool		ret;
+
+	if (lock->pfs_psi != NULL) {
+		PSI_rwlock_locker*	locker;
+		PSI_rwlock_locker_state	state;
+
+#define PSI_RWLOCK_TRYSHAREDEXCLUSIVELOCK PSI_RWLOCK_TRYWRITELOCK
+		/* Instrumented to inform we are aquiring a shared
+		exclusive rwlock */
+		locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
+			&state, lock->pfs_psi,
+			PSI_RWLOCK_TRYSHAREDEXCLUSIVELOCK,
+			file_name, static_cast<uint>(line));
+
+		ret = rw_lock_sx_lock_low(lock, pass, file_name, line);
+
+		if (locker != NULL) {
+			PSI_RWLOCK_CALL(end_rwlock_rdwait)(
+				locker, static_cast<int>(ret));
+		}
+	} else {
+		ret = rw_lock_sx_lock_low(lock, pass, file_name, line);
+	}
 
+	return(ret);
+}
 /******************************************************************//**
 Performance schema instrumented wrap function for rw_lock_x_unlock_func()
 NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
@@ -767,21 +780,49 @@ UNIV_INLINE
 void
 pfs_rw_lock_x_unlock_func(
 /*======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the
 				lock may have been passed to another
 				thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
 	/* Inform performance schema we are unlocking the lock */
-	if (lock->pfs_psi != NULL)
+	if (lock->pfs_psi != NULL) {
 		PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
+	}
 
 	rw_lock_x_unlock_func(
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 		pass,
-#endif
+#endif /* UNIV_DEBUG */
+		lock);
+}
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_sx_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_sx_unlock_func(
+/*======================*/
+#ifdef UNIV_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock may have been passed to another
+				thread to unlock */
+#endif /* UNIV_DEBUG */
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
+{
+	/* Inform performance schema we are unlocking the lock */
+	if (lock->pfs_psi != NULL) {
+		PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
+	}
+
+	rw_lock_sx_unlock_func(
+#ifdef UNIV_DEBUG
+		pass,
+#endif /* UNIV_DEBUG */
 		lock);
 }
 
@@ -793,21 +834,22 @@ UNIV_INLINE
 void
 pfs_rw_lock_s_unlock_func(
 /*======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the
 				lock may have been passed to another
 				thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
 	/* Inform performance schema we are unlocking the lock */
-	if (lock->pfs_psi != NULL)
+	if (lock->pfs_psi != NULL) {
 		PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
+	}
 
 	rw_lock_s_unlock_func(
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 		pass,
-#endif
+#endif /* UNIV_DEBUG */
 		lock);
 
 }
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index 6dff729ee60..7fddada10f8 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -35,19 +35,7 @@ Created 9/5/1995 Heikki Tuuri
 #define sync0sync_h
 
 #include "univ.i"
-#include "sync0types.h"
-#include "ut0lst.h"
-#include "ut0mem.h"
-#include "os0thread.h"
-#include "os0sync.h"
-#include "sync0arr.h"
-
-/** Enable semaphore request instrumentation */
-extern my_bool srv_instrument_semaphores;
-
-#if  defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP)
-extern "C" my_bool	timed_mutexes;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+#include "ut0counter.h"
 
 #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
 
@@ -82,759 +70,78 @@ extern mysql_pfs_key_t	ibuf_bitmap_mutex_key;
 extern mysql_pfs_key_t	ibuf_mutex_key;
 extern mysql_pfs_key_t	ibuf_pessimistic_insert_mutex_key;
 extern mysql_pfs_key_t	log_sys_mutex_key;
+extern mysql_pfs_key_t	log_sys_write_mutex_key;
+extern mysql_pfs_key_t	log_cmdq_mutex_key;
 extern mysql_pfs_key_t	log_flush_order_mutex_key;
-# ifndef HAVE_ATOMIC_BUILTINS
-extern mysql_pfs_key_t	server_mutex_key;
-# endif /* !HAVE_ATOMIC_BUILTINS */
-# ifdef UNIV_MEM_DEBUG
-extern mysql_pfs_key_t	mem_hash_mutex_key;
-# endif /* UNIV_MEM_DEBUG */
-extern mysql_pfs_key_t	mem_pool_mutex_key;
 extern mysql_pfs_key_t	mutex_list_mutex_key;
-extern mysql_pfs_key_t	purge_sys_bh_mutex_key;
+extern mysql_pfs_key_t	recalc_pool_mutex_key;
+extern mysql_pfs_key_t	page_cleaner_mutex_key;
+extern mysql_pfs_key_t	purge_sys_pq_mutex_key;
 extern mysql_pfs_key_t	recv_sys_mutex_key;
 extern mysql_pfs_key_t	recv_writer_mutex_key;
-extern mysql_pfs_key_t	rseg_mutex_key;
-# ifdef UNIV_SYNC_DEBUG
+extern mysql_pfs_key_t	rtr_active_mutex_key;
+extern mysql_pfs_key_t	rtr_match_mutex_key;
+extern mysql_pfs_key_t	rtr_path_mutex_key;
+extern mysql_pfs_key_t	rtr_ssn_mutex_key;
+extern mysql_pfs_key_t	redo_rseg_mutex_key;
+extern mysql_pfs_key_t	noredo_rseg_mutex_key;
+extern mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
+# ifdef UNIV_DEBUG
 extern mysql_pfs_key_t	rw_lock_debug_mutex_key;
-# endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
 extern mysql_pfs_key_t	rw_lock_list_mutex_key;
 extern mysql_pfs_key_t	rw_lock_mutex_key;
 extern mysql_pfs_key_t	srv_dict_tmpfile_mutex_key;
 extern mysql_pfs_key_t	srv_innodb_monitor_mutex_key;
 extern mysql_pfs_key_t	srv_misc_tmpfile_mutex_key;
-extern mysql_pfs_key_t	srv_threads_mutex_key;
 extern mysql_pfs_key_t	srv_monitor_file_mutex_key;
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 extern mysql_pfs_key_t	sync_thread_mutex_key;
-# endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
 extern mysql_pfs_key_t	buf_dblwr_mutex_key;
 extern mysql_pfs_key_t	trx_undo_mutex_key;
 extern mysql_pfs_key_t	trx_mutex_key;
-extern mysql_pfs_key_t	lock_sys_mutex_key;
-extern mysql_pfs_key_t	lock_sys_wait_mutex_key;
+extern mysql_pfs_key_t	trx_pool_mutex_key;
+extern mysql_pfs_key_t	trx_pool_manager_mutex_key;
+extern mysql_pfs_key_t	lock_mutex_key;
+extern mysql_pfs_key_t	lock_wait_mutex_key;
 extern mysql_pfs_key_t	trx_sys_mutex_key;
 extern mysql_pfs_key_t	srv_sys_mutex_key;
-extern mysql_pfs_key_t	srv_sys_tasks_mutex_key;
-#ifndef HAVE_ATOMIC_BUILTINS
-extern mysql_pfs_key_t	srv_conc_mutex_key;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-#ifndef HAVE_ATOMIC_BUILTINS_64
-extern mysql_pfs_key_t	monitor_mutex_key;
-#endif /* !HAVE_ATOMIC_BUILTINS_64 */
-extern mysql_pfs_key_t	event_os_mutex_key;
-extern mysql_pfs_key_t	ut_list_mutex_key;
-extern mysql_pfs_key_t	os_mutex_key;
+extern mysql_pfs_key_t	srv_threads_mutex_key;
+extern mysql_pfs_key_t	event_mutex_key;
+extern mysql_pfs_key_t	event_manager_mutex_key;
+extern mysql_pfs_key_t	sync_array_mutex_key;
+extern mysql_pfs_key_t	thread_mutex_key;
 extern mysql_pfs_key_t  zip_pad_mutex_key;
+extern mysql_pfs_key_t  row_drop_list_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
-/******************************************************************//**
-Initializes the synchronization data structures. */
-UNIV_INTERN
-void
-sync_init(void);
-/*===========*/
-/******************************************************************//**
-Frees the resources in synchronization data structures. */
-UNIV_INTERN
-void
-sync_close(void);
-/*===========*/
-
-#undef mutex_free			/* Fix for MacOS X */
-
-#ifdef UNIV_PFS_MUTEX
-/**********************************************************************
-Following mutex APIs would be performance schema instrumented
-if "UNIV_PFS_MUTEX" is defined:
-
-mutex_create
-mutex_enter
-mutex_exit
-mutex_enter_nowait
-mutex_free
-
-These mutex APIs will point to corresponding wrapper functions that contain
-the performance schema instrumentation if "UNIV_PFS_MUTEX" is defined.
-The instrumented wrapper functions have the prefix of "innodb_".
-
-NOTE! The following macro should be used in mutex operation, not the
-corresponding function. */
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object to a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-# ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-#   define mutex_create(K, M, level)				\
-	pfs_mutex_create_func((K), (M), #M, (level), __FILE__, __LINE__)
-#  else
-#   define mutex_create(K, M, level)				\
-	pfs_mutex_create_func((K), (M), #M, __FILE__, __LINE__)
-#  endif/* UNIV_SYNC_DEBUG */
-# else
-#  define mutex_create(K, M, level)				\
-	pfs_mutex_create_func((K), (M), #M, __FILE__, __LINE__)
-# endif	/* UNIV_DEBUG */
-
-# define mutex_enter(M)						\
-	pfs_mutex_enter_func((M), __FILE__, __LINE__)
-
-# define mutex_enter_nowait(M)					\
-	pfs_mutex_enter_nowait_func((M), __FILE__, __LINE__)
-
-# define mutex_exit(M)	pfs_mutex_exit_func(M)
-
-# define mutex_free(M)	pfs_mutex_free_func(M)
-
-#else	/* UNIV_PFS_MUTEX */
-
-/* If "UNIV_PFS_MUTEX" is not defined, the mutex APIs point to
-original non-instrumented functions */
+#ifdef UNIV_PFS_RWLOCK
+/* Following are rwlock keys used to register with MySQL
+performance schema */
+extern	mysql_pfs_key_t btr_search_latch_key;
+extern	mysql_pfs_key_t	buf_block_lock_key;
 # ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-#   define mutex_create(K, M, level)			\
-	mutex_create_func((M), #M, (level), __FILE__, __LINE__)
-#  else /* UNIV_SYNC_DEBUG */
-#   define mutex_create(K, M, level)				\
-	mutex_create_func((M), #M, __FILE__, __LINE__)
-#  endif /* UNIV_SYNC_DEBUG */
-# else /* UNIV_DEBUG */
-#  define mutex_create(K, M, level)				\
-	mutex_create_func((M), #M, __FILE__, __LINE__)
-# endif	/* UNIV_DEBUG */
-
-# define mutex_enter(M)	mutex_enter_func((M), __FILE__, __LINE__)
-
-# define mutex_enter_nowait(M)	\
-	mutex_enter_nowait_func((M), __FILE__, __LINE__)
-
-# define mutex_exit(M)	mutex_exit_func(M)
-
-# define mutex_free(M)	mutex_free_func(M)
-
-#endif	/* UNIV_PFS_MUTEX */
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-mutex_create_func(
-/*==============*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
-	const char*	cmutex_name,	/*!< in: mutex name */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
-	const char*	cfile_name,	/*!< in: file name where created */
-	ulint		cline);		/*!< in: file line where created */
-
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_free(), not directly this function!
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a mutex object from the mutex list. The mutex
-is checked to be in the reset state. */
-UNIV_INTERN
-void
-mutex_free_func(
-/*============*/
-	ib_mutex_t*	mutex);	/*!< in: mutex */
-/**************************************************************//**
-NOTE! The following macro should be used in mutex locking, not the
-corresponding function. */
-
-/* NOTE! currently same as mutex_enter! */
-
-#define mutex_enter_fast(M)	mutex_enter_func((M), __FILE__, __LINE__)
-/******************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Locks a mutex for the current thread. If the mutex is reserved
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
-for the mutex before suspending the thread. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where locked */
-	ulint		line);		/*!< in: line where locked */
-/********************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1.
-@return	0 if succeed, 1 if not */
-UNIV_INTERN
-ulint
-mutex_enter_nowait_func(
-/*====================*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where mutex
-					requested */
-	ulint		line);		/*!< in: line where requested */
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_exit(), not directly this function!
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit_func(
-/*============*/
-	ib_mutex_t*	mutex);	/*!< in: pointer to mutex */
-
-
-#ifdef UNIV_PFS_MUTEX
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_create(), not directly
-this function!
-A wrapper function for mutex_create_func(), registers the mutex
-with peformance schema if "UNIV_PFS_MUTEX" is defined when
-creating the mutex */
-UNIV_INLINE
-void
-pfs_mutex_create_func(
-/*==================*/
-	PSI_mutex_key	key,		/*!< in: Performance Schema key */
-	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
-	const char*	cmutex_name,	/*!< in: mutex name */
-# ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-#  endif /* UNIV_SYNC_DEBUG */
+extern	mysql_pfs_key_t	buf_block_debug_latch_key;
 # endif /* UNIV_DEBUG */
-	const char*	cfile_name,	/*!< in: file name where created */
-	ulint		cline);		/*!< in: file line where created */
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_func(). */
-UNIV_INLINE
-void
-pfs_mutex_enter_func(
-/*=================*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where locked */
-	ulint		line);		/*!< in: line where locked */
-/********************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_nowait_func.
-@return	0 if succeed, 1 if not */
-UNIV_INLINE
-ulint
-pfs_mutex_enter_nowait_func(
-/*========================*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where mutex
-					requested */
-	ulint		line);		/*!< in: line where requested */
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_exit(), not directly
-this function!
-A wrap function of mutex_exit_func() with peformance schema instrumentation.
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-pfs_mutex_exit_func(
-/*================*/
-	ib_mutex_t*	mutex);	/*!< in: pointer to mutex */
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_free(), not directly
-this function!
-Wrapper function for mutex_free_func(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_mutex_free_func(
-/*================*/
-	ib_mutex_t*	mutex);	/*!< in: mutex */
-
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Returns TRUE if no mutex or rw-lock is currently locked.
-Works only in the debug version.
-@return	TRUE if no mutexes and rw-locks reserved */
-UNIV_INTERN
-ibool
-sync_all_freed(void);
-/*================*/
-#endif /* UNIV_SYNC_DEBUG */
-/*#####################################################################
-FUNCTION PROTOTYPES FOR DEBUGGING */
-/*******************************************************************//**
-Prints wait info of the sync system. */
-UNIV_INTERN
-void
-sync_print_wait_info(
-/*=================*/
-	FILE*	file);		/*!< in: file where to print */
-/*******************************************************************//**
-Prints info of the sync system. */
-UNIV_INTERN
-void
-sync_print(
-/*=======*/
-	FILE*	file);		/*!< in: file where to print */
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the mutex has been initialized.
-@return	TRUE */
-UNIV_INTERN
-ibool
-mutex_validate(
-/*===========*/
-	const ib_mutex_t*	mutex);	/*!< in: mutex */
-/******************************************************************//**
-Checks that the current thread owns the mutex. Works only
-in the debug version.
-@return	TRUE if owns */
-UNIV_INTERN
-ibool
-mutex_own(
-/*======*/
-	const ib_mutex_t*	mutex)	/*!< in: mutex */
-	MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Adds a latch and its level in the thread level array. Allocates the memory
-for the array if called first time for this OS thread. Makes the checks
-against other latch levels stored in the array for this thread. */
-UNIV_INTERN
-void
-sync_thread_add_level(
-/*==================*/
-	void*	latch,	/*!< in: pointer to a mutex or an rw-lock */
-	ulint	level,	/*!< in: level in the latching order; if
-			SYNC_LEVEL_VARYING, nothing is done */
-	ibool	relock)	/*!< in: TRUE if re-entering an x-lock */
-	MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Removes a latch from the thread level array if it is found there.
-@return TRUE if found in the array; it is no error if the latch is
-not found, as we presently are not able to determine the level for
-every latch reservation the program does */
-UNIV_INTERN
-ibool
-sync_thread_reset_level(
-/*====================*/
-	void*	latch);	/*!< in: pointer to a mutex or an rw-lock */
-/******************************************************************//**
-Checks if the level array for the current thread contains a
-mutex or rw-latch at the specified level.
-@return	a matching latch, or NULL if not found */
-UNIV_INTERN
-void*
-sync_thread_levels_contains(
-/*========================*/
-	ulint	level);			/*!< in: latching order level
-					(SYNC_DICT, ...)*/
-/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return	a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_gen(
-/*============================*/
-	ibool	dict_mutex_allowed)	/*!< in: TRUE if dictionary mutex is
-					allowed to be owned by the thread */
-	MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************//**
-Checks if the level array for the current thread is empty,
-except for data dictionary latches. */
-#define sync_thread_levels_empty_except_dict()		\
-	(!sync_thread_levels_nonempty_gen(TRUE))
-/******************************************************************//**
-Checks if the level array for the current thread is empty,
-except for the btr_search_latch.
-@return	a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_trx(
-/*============================*/
-	ibool	has_search_latch)
-				/*!< in: TRUE if and only if the thread
-				is supposed to hold btr_search_latch */
-	MY_ATTRIBUTE((warn_unused_result));
-
-/******************************************************************//**
-Gets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_get_debug_info(
-/*=================*/
-	ib_mutex_t*	mutex,		/*!< in: mutex */
-	const char**	file_name,	/*!< out: file where requested */
-	ulint*		line,		/*!< out: line where requested */
-	os_thread_id_t* thread_id);	/*!< out: id of the thread which owns
-					the mutex */
-/******************************************************************//**
-Counts currently reserved mutexes. Works only in the debug version.
-@return	number of reserved mutexes */
-UNIV_INTERN
-ulint
-mutex_n_reserved(void);
-/*==================*/
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-NOT to be used outside this module except in debugging! Gets the value
-of the lock word. */
-UNIV_INLINE
-lock_word_t
-mutex_get_lock_word(
-/*================*/
-	const ib_mutex_t*	mutex);	/*!< in: mutex */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-NOT to be used outside this module except in debugging! Gets the waiters
-field in a mutex.
-@return	value to set */
-UNIV_INLINE
-ulint
-mutex_get_waiters(
-/*==============*/
-	const ib_mutex_t*	mutex);	/*!< in: mutex */
-#endif /* UNIV_SYNC_DEBUG */
-
-/*
-		LATCHING ORDER WITHIN THE DATABASE
-		==================================
-
-The mutex or latch in the central memory object, for instance, a rollback
-segment object, must be acquired before acquiring the latch or latches to
-the corresponding file data structure. In the latching order below, these
-file page object latches are placed immediately below the corresponding
-central memory object latch or mutex.
-
-Synchronization object			Notes
-----------------------			-----
-
-Dictionary mutex			If we have a pointer to a dictionary
-|					object, e.g., a table, it can be
-|					accessed without reserving the
-|					dictionary mutex. We must have a
-|					reservation, a memoryfix, to the
-|					appropriate table object in this case,
-|					and the table must be explicitly
-|					released later.
-V
-Dictionary header
-|
-V
-Secondary index tree latch		The tree latch protects also all
-|					the B-tree non-leaf pages. These
-V					can be read with the page only
-Secondary index non-leaf		bufferfixed to save CPU time,
-|					no s-latch is needed on the page.
-|					Modification of a page requires an
-|					x-latch on the page, however. If a
-|					thread owns an x-latch to the tree,
-|					it is allowed to latch non-leaf pages
-|					even after it has acquired the fsp
-|					latch.
-V
-Secondary index leaf			The latch on the secondary index leaf
-|					can be kept while accessing the
-|					clustered index, to save CPU time.
-V
-Clustered index tree latch		To increase concurrency, the tree
-|					latch is usually released when the
-|					leaf page latch has been acquired.
-V
-Clustered index non-leaf
-|
-V
-Clustered index leaf
-|
-V
-Transaction system header
-|
-V
-Transaction undo mutex			The undo log entry must be written
-|					before any index page is modified.
-|					Transaction undo mutex is for the undo
-|					logs the analogue of the tree latch
-|					for a B-tree. If a thread has the
-|					trx undo mutex reserved, it is allowed
-|					to latch the undo log pages in any
-|					order, and also after it has acquired
-|					the fsp latch.
-V
-Rollback segment mutex			The rollback segment mutex must be
-|					reserved, if, e.g., a new page must
-|					be added to an undo log. The rollback
-|					segment and the undo logs in its
-|					history list can be seen as an
-|					analogue of a B-tree, and the latches
-|					reserved similarly, using a version of
-|					lock-coupling. If an undo log must be
-|					extended by a page when inserting an
-|					undo log record, this corresponds to
-|					a pessimistic insert in a B-tree.
-V
-Rollback segment header
-|
-V
-Purge system latch
-|
-V
-Undo log pages				If a thread owns the trx undo mutex,
-|					or for a log in the history list, the
-|					rseg mutex, it is allowed to latch
-|					undo log pages in any order, and even
-|					after it has acquired the fsp latch.
-|					If a thread does not have the
-|					appropriate mutex, it is allowed to
-|					latch only a single undo log page in
-|					a mini-transaction.
-V
-File space management latch		If a mini-transaction must allocate
-|					several file pages, it can do that,
-|					because it keeps the x-latch to the
-|					file space management in its memo.
-V
-File system pages
-|
-V
-lock_sys_wait_mutex			Mutex protecting lock timeout data
-|
-V
-lock_sys_mutex				Mutex protecting lock_sys_t
-|
-V
-trx_sys->mutex				Mutex protecting trx_sys_t
-|
-V
-Threads mutex				Background thread scheduling mutex
-|
-V
-query_thr_mutex				Mutex protecting query threads
-|
-V
-trx_mutex				Mutex protecting trx_t fields
-|
-V
-Search system mutex
-|
-V
-Buffer pool mutex
-|
-V
-Log mutex
-|
-Any other latch
-|
-V
-Memory pool mutex */
-
-/* Latching order levels. If you modify these, you have to also update
-sync_thread_add_level(). */
-
-/* User transaction locks are higher than any of the latch levels below:
-no latches are allowed when a thread goes to wait for a normal table
-or row lock! */
-#define SYNC_USER_TRX_LOCK	9999
-#define SYNC_NO_ORDER_CHECK	3000	/* this can be used to suppress
-					latching order checking */
-#define	SYNC_LEVEL_VARYING	2000	/* Level is varying. Only used with
-					buffer pool page locks, which do not
-					have a fixed level, but instead have
-					their level set after the page is
-					locked; see e.g.
-					ibuf_bitmap_get_map_page(). */
-#define SYNC_TRX_I_S_RWLOCK	1910	/* Used for
-					trx_i_s_cache_t::rw_lock */
-#define SYNC_TRX_I_S_LAST_READ	1900	/* Used for
-					trx_i_s_cache_t::last_read_mutex */
-#define SYNC_FILE_FORMAT_TAG	1200	/* Used to serialize access to the
-					file format tag */
-#define	SYNC_DICT_OPERATION	1010	/* table create, drop, etc. reserve
-					this in X-mode; implicit or backround
-					operations purge, rollback, foreign
-					key checks reserve this in S-mode */
-#define SYNC_FTS_CACHE		1005	/* FTS cache rwlock */
-#define SYNC_DICT		1000
-#define SYNC_DICT_AUTOINC_MUTEX	999
-#define SYNC_STATS_AUTO_RECALC	997
-#define SYNC_DICT_HEADER	995
-#define SYNC_IBUF_HEADER	914
-#define SYNC_IBUF_PESS_INSERT_MUTEX 912
-/*-------------------------------*/
-#define	SYNC_INDEX_TREE		900
-#define SYNC_TREE_NODE_NEW	892
-#define SYNC_TREE_NODE_FROM_HASH 891
-#define SYNC_TREE_NODE		890
-#define	SYNC_PURGE_LATCH	800
-#define	SYNC_TRX_UNDO		700
-#define SYNC_RSEG		600
-#define SYNC_RSEG_HEADER_NEW	591
-#define SYNC_RSEG_HEADER	590
-#define SYNC_TRX_UNDO_PAGE	570
-#define SYNC_EXTERN_STORAGE	500
-#define	SYNC_FSP		400
-#define	SYNC_FSP_PAGE		395
-#define SYNC_STATS_DEFRAG	390
-/*------------------------------------- Change buffer headers */
-#define SYNC_IBUF_MUTEX		370	/* ibuf_mutex */
-/*------------------------------------- Change buffer tree */
-#define SYNC_IBUF_INDEX_TREE	360
-#define SYNC_IBUF_TREE_NODE_NEW	359
-#define SYNC_IBUF_TREE_NODE	358
-#define	SYNC_IBUF_BITMAP_MUTEX	351
-#define	SYNC_IBUF_BITMAP	350
-/*------------------------------------- Change log for online create index */
-#define SYNC_INDEX_ONLINE_LOG	340
-/*------------------------------------- MySQL query cache mutex */
-/*------------------------------------- MySQL binlog mutex */
-/*-------------------------------*/
-#define SYNC_LOCK_WAIT_SYS	300
-#define SYNC_LOCK_SYS		299
-#define SYNC_TRX_SYS		298
-#define SYNC_TRX		297
-#define SYNC_THREADS		295
-#define SYNC_REC_LOCK		294
-#define SYNC_TRX_SYS_HEADER	290
-#define	SYNC_PURGE_QUEUE	200
-#define SYNC_LOG		170
-#define SYNC_LOG_FLUSH_ORDER	147
-#define SYNC_RECV		168
-#define SYNC_FTS_TOKENIZE	167
-#define SYNC_FTS_CACHE_INIT	166	/* Used for FTS cache initialization */
-#define SYNC_FTS_BG_THREADS	165
-#define SYNC_FTS_OPTIMIZE       164     // FIXME: is this correct number, test
-#define	SYNC_WORK_QUEUE		162
-#define	SYNC_SEARCH_SYS		160	/* NOTE that if we have a memory
-					heap that can be extended to the
-					buffer pool, its logical level is
-					SYNC_SEARCH_SYS, as memory allocation
-					can call routines there! Otherwise
-					the level is SYNC_MEM_HASH. */
-#define	SYNC_BUF_POOL		150	/* Buffer pool mutex */
-#define	SYNC_BUF_PAGE_HASH	149	/* buf_pool->page_hash rw_lock */
-#define	SYNC_BUF_BLOCK		146	/* Block mutex */
-#define	SYNC_BUF_FLUSH_LIST	145	/* Buffer flush list mutex */
-#define SYNC_DOUBLEWRITE	140
-#define	SYNC_ANY_LATCH		135
-#define	SYNC_MEM_HASH		131
-#define	SYNC_MEM_POOL		130
-
-/* Codes used to designate lock operations */
-#define RW_LOCK_NOT_LOCKED	350
-#define RW_LOCK_EX		351
-#define RW_LOCK_EXCLUSIVE	351
-#define RW_LOCK_SHARED		352
-#define RW_LOCK_WAIT_EX		353
-#define SYNC_MUTEX		354
-
-/* NOTE! The structure appears here only for the compiler to know its size.
-Do not use its fields directly! The structure used in the spin lock
-implementation of a mutual exclusion semaphore. */
-
-/** InnoDB mutex */
-struct ib_mutex_t {
-	os_event_t	event;	/*!< Used by sync0arr.cc for the wait queue */
-	volatile lock_word_t	lock_word;	/*!< lock_word is the target
-				of the atomic test-and-set instruction when
-				atomic operations are enabled. */
-
-#if !defined(HAVE_ATOMIC_BUILTINS)
-	os_fast_mutex_t
-		os_fast_mutex;	/*!< We use this OS mutex in place of lock_word
-				when atomic operations are not enabled */
-#endif
-	ulint	waiters;	/*!< This ulint is set to 1 if there are (or
-				may be) threads waiting in the global wait
-				array for this mutex to be released.
-				Otherwise, this is 0. */
-	UT_LIST_NODE_T(ib_mutex_t)	list; /*!< All allocated mutexes are put into
-				a list.	Pointers to the next and prev. */
-#ifdef UNIV_SYNC_DEBUG
-	ulint	level;		/*!< Level in the global latching order */
-#endif /* UNIV_SYNC_DEBUG */
-
-	const char*	file_name;	/*!< File where the mutex was locked */
-	ulint		line;		/*!< Line where the mutex was locked */
-	const char*	cfile_name;/*!< File name where mutex created */
-	ulint		cline;	/*!< Line where created */
-	ulong		count_os_wait;	/*!< count of os_wait */
-	const char*	cmutex_name;	/*!< mutex name */
-	os_thread_id_t thread_id;	/*!< The thread id of the thread
-					which locked the mutex. */
-#ifdef UNIV_DEBUG
-
-/** Value of mutex_t::magic_n */
-# define MUTEX_MAGIC_N	979585UL
-	ulint		magic_n;	/*!< MUTEX_MAGIC_N */
-	ulint		ib_mutex_type;	/*!< 0=usual mutex, 1=rw_lock mutex */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_PFS_MUTEX
-	struct PSI_mutex* pfs_psi;	/*!< The performance schema
-					instrumentation hook */
-#endif
-};
-
-/** Constant determining how long spin wait is continued before suspending
-the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
-to 20 microseconds. */
-
-#define	SYNC_SPIN_ROUNDS	srv_n_spin_wait_rounds
-
-/** The number of mutex_exit calls. Intended for performance monitoring. */
-extern	ib_int64_t	mutex_exit_count;
-
-#ifdef UNIV_SYNC_DEBUG
-/** Latching order checks start when this is set TRUE */
-extern ibool	sync_order_checks_on;
-#endif /* UNIV_SYNC_DEBUG */
-
-/** This variable is set to TRUE when sync_init is called */
-extern ibool	sync_initialized;
-
-/** Global list of database mutexes (not OS mutexes) created. */
-typedef UT_LIST_BASE_NODE_T(ib_mutex_t)  ut_list_base_node_t;
-/** Global list of database mutexes (not OS mutexes) created. */
-extern ut_list_base_node_t  mutex_list;
-
-/** Mutex protecting the mutex_list variable */
-extern ib_mutex_t mutex_list_mutex;
-
-#ifndef HAVE_ATOMIC_BUILTINS
-/**********************************************************//**
-Function that uses a mutex to decrement a variable atomically */
-UNIV_INLINE
-void
-os_atomic_dec_ulint_func(
-/*=====================*/
-	ib_mutex_t*		mutex,		/*!< in: mutex guarding the
-						decrement */
-	volatile ulint*		var,		/*!< in/out: variable to
-						decrement */
-	ulint			delta);		/*!< in: delta to decrement */
-/**********************************************************//**
-Function that uses a mutex to increment a variable atomically */
-UNIV_INLINE
-void
-os_atomic_inc_ulint_func(
-/*=====================*/
-	ib_mutex_t*		mutex,		/*!< in: mutex guarding the
-						increment */
-	volatile ulint*		var,		/*!< in/out: variable to
-						increment */
-	ulint			delta);		/*!< in: delta to increment */
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
-#ifndef UNIV_NONINL
-#include "sync0sync.ic"
-#endif
-
-#endif
+extern	mysql_pfs_key_t	dict_operation_lock_key;
+extern	mysql_pfs_key_t	checkpoint_lock_key;
+extern	mysql_pfs_key_t	fil_space_latch_key;
+extern	mysql_pfs_key_t	fts_cache_rw_lock_key;
+extern	mysql_pfs_key_t	fts_cache_init_rw_lock_key;
+extern	mysql_pfs_key_t	trx_i_s_cache_lock_key;
+extern	mysql_pfs_key_t	trx_purge_latch_key;
+extern	mysql_pfs_key_t	index_tree_rw_lock_key;
+extern	mysql_pfs_key_t	index_online_log_key;
+extern	mysql_pfs_key_t	dict_table_stats_key;
+extern  mysql_pfs_key_t trx_sys_rw_lock_key;
+extern  mysql_pfs_key_t hash_table_locks_key;
+extern  mysql_pfs_key_t master_key_id_mutex_key;
+#endif /* UNIV_PFS_RWLOCK */
+
+/** Prints info of the sync system.
+@param[in]	file	where to print */
+void
+sync_print(FILE* file);
+
+#endif /* !sync0sync_h */
diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
deleted file mode 100644
index 3d1f098826e..00000000000
--- a/storage/innobase/include/sync0sync.ic
+++ /dev/null
@@ -1,415 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0sync.ic
-Mutex, the basic synchronization primitive
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-/******************************************************************//**
-Sets the waiters field in a mutex. */
-UNIV_INTERN
-void
-mutex_set_waiters(
-/*==============*/
-	ib_mutex_t*	mutex,	/*!< in: mutex */
-	ulint		n);	/*!< in: value to set */
-/******************************************************************//**
-Reserves a mutex for the current thread. If the mutex is reserved, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
-for the mutex before suspending the thread. */
-UNIV_INTERN
-void
-mutex_spin_wait(
-/*============*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where mutex
-					requested */
-	ulint		line);		/*!< in: line where requested */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Sets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_set_debug_info(
-/*=================*/
-	ib_mutex_t*	mutex,		/*!< in: mutex */
-	const char*	file_name,	/*!< in: file where requested */
-	ulint		line);		/*!< in: line where requested */
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-Releases the threads waiting in the primary wait array for this mutex. */
-UNIV_INTERN
-void
-mutex_signal_object(
-/*================*/
-	ib_mutex_t*	mutex);	/*!< in: mutex */
-
-/******************************************************************//**
-Performs an atomic test-and-set instruction to the lock_word field of a
-mutex.
-@return	the previous value of lock_word: 0 or 1 */
-UNIV_INLINE
-lock_word_t
-ib_mutex_test_and_set(
-/*==================*/
-	ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
-	return(os_atomic_test_and_set(&mutex->lock_word));
-#else
-	ibool	ret;
-
-	ret = os_fast_mutex_trylock_full_barrier(&(mutex->os_fast_mutex));
-
-	if (ret == 0) {
-		/* We check that os_fast_mutex_trylock does not leak
-		and allow race conditions */
-		ut_a(mutex->lock_word == 0);
-
-		mutex->lock_word = 1;
-	}
-
-	return((byte) ret);
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/******************************************************************//**
-Performs a reset instruction to the lock_word field of a mutex. This
-instruction also serializes memory operations to the program order. */
-UNIV_INLINE
-void
-mutex_reset_lock_word(
-/*==================*/
-	ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
-	os_atomic_clear(&mutex->lock_word);
-#else
-	mutex->lock_word = 0;
-
-	os_fast_mutex_unlock(&(mutex->os_fast_mutex));
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/******************************************************************//**
-Gets the value of the lock word. */
-UNIV_INLINE
-lock_word_t
-mutex_get_lock_word(
-/*================*/
-	const ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-	ut_ad(mutex);
-
-	return(mutex->lock_word);
-}
-
-/******************************************************************//**
-Gets the waiters field in a mutex.
-@return	value to set */
-UNIV_INLINE
-ulint
-mutex_get_waiters(
-/*==============*/
-	const ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-	const volatile ulint*	ptr;	/*!< declared volatile to ensure that
-					the value is read from memory */
-	ut_ad(mutex);
-
-	ptr = &(mutex->waiters);
-
-	return(*ptr);		/* Here we assume that the read of a single
-				word from memory is atomic */
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_exit(), not directly this function!
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit_func(
-/*============*/
-	ib_mutex_t*	mutex)	/*!< in: pointer to mutex */
-{
-	ut_ad(mutex_own(mutex));
-
-	mutex->thread_id = (os_thread_id_t) ULINT_UNDEFINED;
-
-#ifdef UNIV_SYNC_DEBUG
-	sync_thread_reset_level(mutex);
-#endif
-	mutex_reset_lock_word(mutex);
-
-	/* A problem: we assume that mutex_reset_lock word
-	is a memory barrier, that is when we read the waiters
-	field next, the read must be serialized in memory
-	after the reset. A speculative processor might
-	perform the read first, which could leave a waiting
-	thread hanging indefinitely.
-
-	Our current solution call every second
-	sync_arr_wake_threads_if_sema_free()
-	to wake up possible hanging threads if
-	they are missed in mutex_signal_object. */
-
-	/* We add a memory barrier to prevent reading of the
-	number of waiters before releasing the lock. */
-
-	os_mb;
-
-	if (mutex_get_waiters(mutex) != 0) {
-
-		mutex_signal_object(mutex);
-	}
-
-#ifdef UNIV_SYNC_PERF_STAT
-	mutex_exit_count++;
-#endif
-}
-
-/******************************************************************//**
-Locks a mutex for the current thread. If the mutex is reserved, the function
-spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex
-before suspending the thread. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where locked */
-	ulint		line)		/*!< in: line where locked */
-{
-	ut_ad(mutex_validate(mutex));
-#ifndef WITH_WSREP
-	/* this cannot be be granted when BF trx kills a trx in lock wait state */
-	ut_ad(!mutex_own(mutex));
-#endif /* WITH_WSREP */
-
-	/* Note that we do not peek at the value of lock_word before trying
-	the atomic test_and_set; we could peek, and possibly save time. */
-
-	if (!ib_mutex_test_and_set(mutex)) {
-		mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
-		mutex_set_debug_info(mutex, file_name, line);
-#endif
-		if (srv_instrument_semaphores) {
-			mutex->file_name = file_name;
-			mutex->line = line;
-		}
-
-		return;	/* Succeeded! */
-	}
-
-	mutex_spin_wait(mutex, file_name, line);
-}
-
-#ifdef UNIV_PFS_MUTEX
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_func(). */
-UNIV_INLINE
-void
-pfs_mutex_enter_func(
-/*=================*/
-	ib_mutex_t*	mutex,	/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where locked */
-	ulint		line)		/*!< in: line where locked */
-{
-	if (mutex->pfs_psi != NULL) {
-		PSI_mutex_locker*	locker;
-		PSI_mutex_locker_state	state;
-
-		locker = PSI_MUTEX_CALL(start_mutex_wait)(
-			&state, mutex->pfs_psi,
-			PSI_MUTEX_LOCK, file_name,
-			static_cast<uint>(line));
-
-		mutex_enter_func(mutex, file_name, line);
-
-		if (locker != NULL) {
-			PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
-		}
-	} else {
-		mutex_enter_func(mutex, file_name, line);
-	}
-}
-
-/********************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_nowait_func.
-@return 0 if succeed, 1 if not */
-UNIV_INLINE
-ulint
-pfs_mutex_enter_nowait_func(
-/*========================*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where mutex
-					requested */
-	ulint		line)		/*!< in: line where requested */
-{
-	ulint		ret;
-
-	if (mutex->pfs_psi != NULL) {
-		PSI_mutex_locker*	locker;
-		PSI_mutex_locker_state		state;
-
-		locker = PSI_MUTEX_CALL(start_mutex_wait)(
-			&state, mutex->pfs_psi,
-			PSI_MUTEX_TRYLOCK, file_name,
-			static_cast<uint>(line));
-
-		ret = mutex_enter_nowait_func(mutex, file_name, line);
-
-		if (locker != NULL) {
-			PSI_MUTEX_CALL(end_mutex_wait)(locker, (int) ret);
-		}
-	} else {
-		ret = mutex_enter_nowait_func(mutex, file_name, line);
-	}
-
-	return(ret);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_exit(), not directly
-this function!
-A wrap function of mutex_exit_func() with performance schema instrumentation.
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-pfs_mutex_exit_func(
-/*================*/
-	ib_mutex_t*	mutex)	/*!< in: pointer to mutex */
-{
-	if (mutex->pfs_psi != NULL) {
-		PSI_MUTEX_CALL(unlock_mutex)(mutex->pfs_psi);
-	}
-
-	mutex_exit_func(mutex);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_create(), not directly
-this function!
-A wrapper function for mutex_create_func(), registers the mutex
-with performance schema if "UNIV_PFS_MUTEX" is defined when
-creating the mutex */
-UNIV_INLINE
-void
-pfs_mutex_create_func(
-/*==================*/
-	mysql_pfs_key_t	key,		/*!< in: Performance Schema key */
-	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
-	const char*	cmutex_name,	/*!< in: mutex name */
-# ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-#  endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
-	const char*	cfile_name,	/*!< in: file name where created */
-	ulint		cline)		/*!< in: file line where created */
-{
-	mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, mutex);
-
-	mutex_create_func(mutex,
-			  cmutex_name,
-# ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-			  level,
-#  endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
-			  cfile_name,
-			  cline);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_free(), not directly
-this function!
-Wrapper function for mutex_free_func(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_mutex_free_func(
-/*================*/
-	ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-	if (mutex->pfs_psi != NULL) {
-		PSI_MUTEX_CALL(destroy_mutex)(mutex->pfs_psi);
-		mutex->pfs_psi = NULL;
-	}
-
-	mutex_free_func(mutex);
-}
-
-#endif /* UNIV_PFS_MUTEX */
-
-#ifndef HAVE_ATOMIC_BUILTINS
-/**********************************************************//**
-Function that uses a mutex to decrement a variable atomically */
-UNIV_INLINE
-void
-os_atomic_dec_ulint_func(
-/*=====================*/
-	ib_mutex_t*	mutex,		/*!< in: mutex guarding the dec */
-	volatile ulint*	var,		/*!< in/out: variable to decrement */
-	ulint		delta)		/*!< in: delta to decrement */
-{
-	mutex_enter(mutex);
-
-	/* I don't think we will encounter a situation where
-	this check will not be required. */
-	ut_ad(*var >= delta);
-
-	*var -= delta;
-
-	mutex_exit(mutex);
-}
-
-/**********************************************************//**
-Function that uses a mutex to increment a variable atomically */
-UNIV_INLINE
-void
-os_atomic_inc_ulint_func(
-/*=====================*/
-	ib_mutex_t*	mutex,		/*!< in: mutex guarding the increment */
-	volatile ulint*	var,		/*!< in/out: variable to increment */
-	ulint		delta)		/*!< in: delta to increment */
-{
-	mutex_enter(mutex);
-
-	*var += delta;
-
-	mutex_exit(mutex);
-}
-#endif /* !HAVE_ATOMIC_BUILTINS */
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
index 0d143004a7a..bd49e034384 100644
--- a/storage/innobase/include/sync0types.h
+++ b/storage/innobase/include/sync0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,1229 @@ Created 9/5/1995 Heikki Tuuri
 #ifndef sync0types_h
 #define sync0types_h
 
-struct ib_mutex_t;
+#include <vector>
+#include <iostream>
+#include <my_atomic.h>
 
+#include "ut0new.h"
+#include "ut0counter.h"
+
+#if defined(UNIV_DEBUG) && !defined(UNIV_INNOCHECKSUM)
+/** Set when InnoDB has invoked exit(). */
+extern bool	innodb_calling_exit;
+#endif /* UNIV_DEBUG && !UNIV_INNOCHECKSUM */
+
+#ifdef _WIN32
+/** Native mutex */
+typedef CRITICAL_SECTION	sys_mutex_t;
+#else
+/** Native mutex */
+typedef pthread_mutex_t		sys_mutex_t;
+#endif /* _WIN32 */
+
+/** Mutex states. */
+enum mutex_state_t {
+	/** Mutex is free */
+	MUTEX_STATE_UNLOCKED = 0,
+
+	/** Mutex is acquired by some thread. */
+	MUTEX_STATE_LOCKED = 1,
+
+	/** Mutex is contended and there are threads waiting on the lock. */
+	MUTEX_STATE_WAITERS = 2
+};
+
+/*
+		LATCHING ORDER WITHIN THE DATABASE
+		==================================
+
+The mutex or latch in the central memory object, for instance, a rollback
+segment object, must be acquired before acquiring the latch or latches to
+the corresponding file data structure. In the latching order below, these
+file page object latches are placed immediately below the corresponding
+central memory object latch or mutex.
+
+Synchronization object			Notes
+----------------------			-----
+
+Dictionary mutex			If we have a pointer to a dictionary
+|					object, e.g., a table, it can be
+|					accessed without reserving the
+|					dictionary mutex. We must have a
+|					reservation, a memoryfix, to the
+|					appropriate table object in this case,
+|					and the table must be explicitly
+|					released later.
+V
+Dictionary header
+|
+V
+Secondary index tree latch		The tree latch protects also all
+|					the B-tree non-leaf pages. These
+V					can be read with the page only
+Secondary index non-leaf		bufferfixed to save CPU time,
+|					no s-latch is needed on the page.
+|					Modification of a page requires an
+|					x-latch on the page, however. If a
+|					thread owns an x-latch to the tree,
+|					it is allowed to latch non-leaf pages
+|					even after it has acquired the fsp
+|					latch.
+V
+Secondary index leaf			The latch on the secondary index leaf
+|					can be kept while accessing the
+|					clustered index, to save CPU time.
+V
+Clustered index tree latch		To increase concurrency, the tree
+|					latch is usually released when the
+|					leaf page latch has been acquired.
+V
+Clustered index non-leaf
+|
+V
+Clustered index leaf
+|
+V
+Transaction system header
+|
+V
+Transaction undo mutex			The undo log entry must be written
+|					before any index page is modified.
+|					Transaction undo mutex is for the undo
+|					logs the analogue of the tree latch
+|					for a B-tree. If a thread has the
+|					trx undo mutex reserved, it is allowed
+|					to latch the undo log pages in any
+|					order, and also after it has acquired
+|					the fsp latch.
+V
+Rollback segment mutex			The rollback segment mutex must be
+|					reserved, if, e.g., a new page must
+|					be added to an undo log. The rollback
+|					segment and the undo logs in its
+|					history list can be seen as an
+|					analogue of a B-tree, and the latches
+|					reserved similarly, using a version of
+|					lock-coupling. If an undo log must be
+|					extended by a page when inserting an
+|					undo log record, this corresponds to
+|					a pessimistic insert in a B-tree.
+V
+Rollback segment header
+|
+V
+Purge system latch
+|
+V
+Undo log pages				If a thread owns the trx undo mutex,
+|					or for a log in the history list, the
+|					rseg mutex, it is allowed to latch
+|					undo log pages in any order, and even
+|					after it has acquired the fsp latch.
+|					If a thread does not have the
+|					appropriate mutex, it is allowed to
+|					latch only a single undo log page in
+|					a mini-transaction.
+V
+File space management latch		If a mini-transaction must allocate
+|					several file pages, it can do that,
+|					because it keeps the x-latch to the
+|					file space management in its memo.
+V
+File system pages
+|
+V
+lock_sys_wait_mutex			Mutex protecting lock timeout data
+|
+V
+lock_sys_mutex				Mutex protecting lock_sys_t
+|
+V
+trx_sys->mutex				Mutex protecting trx_sys_t
+|
+V
+Threads mutex				Background thread scheduling mutex
+|
+V
+query_thr_mutex				Mutex protecting query threads
+|
+V
+trx_mutex				Mutex protecting trx_t fields
+|
+V
+Search system mutex
+|
+V
+Buffer pool mutex
+|
+V
+Log mutex
+|
+Any other latch
+|
+V
+Memory pool mutex */
+
+/** Latching order levels. If you modify these, you have to also update
+LatchDebug internals in sync0debug.cc */
+
+enum latch_level_t {
+	SYNC_UNKNOWN = 0,
+
+	SYNC_MUTEX = 1,
+
+	RW_LOCK_SX,
+	RW_LOCK_X_WAIT,
+	RW_LOCK_S,
+	RW_LOCK_X,
+	RW_LOCK_NOT_LOCKED,
+
+	SYNC_MONITOR_MUTEX,
+
+	SYNC_ANY_LATCH,
+
+	SYNC_DOUBLEWRITE,
+
+	SYNC_BUF_FLUSH_LIST,
+
+	SYNC_BUF_BLOCK,
+	SYNC_BUF_PAGE_HASH,
+
+	SYNC_BUF_POOL,
+
+	SYNC_POOL,
+	SYNC_POOL_MANAGER,
+
+	SYNC_SEARCH_SYS,
+
+	SYNC_WORK_QUEUE,
+
+	SYNC_FTS_TOKENIZE,
+	SYNC_FTS_OPTIMIZE,
+	SYNC_FTS_BG_THREADS,
+	SYNC_FTS_CACHE_INIT,
+	SYNC_RECV,
+	SYNC_LOG_FLUSH_ORDER,
+	SYNC_LOG,
+	SYNC_LOG_WRITE,
+	SYNC_PAGE_CLEANER,
+	SYNC_PURGE_QUEUE,
+	SYNC_TRX_SYS_HEADER,
+	SYNC_REC_LOCK,
+	SYNC_THREADS,
+	SYNC_TRX,
+	SYNC_TRX_SYS,
+	SYNC_LOCK_SYS,
+	SYNC_LOCK_WAIT_SYS,
+
+	SYNC_INDEX_ONLINE_LOG,
+
+	SYNC_IBUF_BITMAP,
+	SYNC_IBUF_BITMAP_MUTEX,
+	SYNC_IBUF_TREE_NODE,
+	SYNC_IBUF_TREE_NODE_NEW,
+	SYNC_IBUF_INDEX_TREE,
+
+	SYNC_IBUF_MUTEX,
+
+	SYNC_FSP_PAGE,
+	SYNC_FSP,
+	SYNC_EXTERN_STORAGE,
+	SYNC_TRX_UNDO_PAGE,
+	SYNC_RSEG_HEADER,
+	SYNC_RSEG_HEADER_NEW,
+	SYNC_NOREDO_RSEG,
+	SYNC_REDO_RSEG,
+	SYNC_TRX_UNDO,
+	SYNC_PURGE_LATCH,
+	SYNC_TREE_NODE,
+	SYNC_TREE_NODE_FROM_HASH,
+	SYNC_TREE_NODE_NEW,
+	SYNC_INDEX_TREE,
+
+	SYNC_IBUF_PESS_INSERT_MUTEX,
+	SYNC_IBUF_HEADER,
+	SYNC_DICT_HEADER,
+	SYNC_STATS_AUTO_RECALC,
+	SYNC_DICT_AUTOINC_MUTEX,
+	SYNC_DICT,
+	SYNC_FTS_CACHE,
+
+	SYNC_DICT_OPERATION,
+
+	SYNC_FILE_FORMAT_TAG,
+
+	SYNC_TRX_I_S_LAST_READ,
+
+	SYNC_TRX_I_S_RWLOCK,
+
+	SYNC_RECV_WRITER,
+
+	/** Level is varying. Only used with buffer pool page locks, which
+	do not have a fixed level, but instead have their level set after
+	the page is locked; see e.g.  ibuf_bitmap_get_map_page(). */
+
+	SYNC_LEVEL_VARYING,
+
+	/** This can be used to suppress order checking. */
+	SYNC_NO_ORDER_CHECK,
+
+	/** Maximum level value */
+	SYNC_LEVEL_MAX = SYNC_NO_ORDER_CHECK
+};
+
+/** Each latch has an ID. This id is used for creating the latch and to look
+up its meta-data. See sync0debug.c. */
+enum latch_id_t {
+	LATCH_ID_NONE = 0,
+	LATCH_ID_AUTOINC,
+	LATCH_ID_BUF_BLOCK_MUTEX,
+	LATCH_ID_BUF_POOL,
+	LATCH_ID_BUF_POOL_ZIP,
+	LATCH_ID_CACHE_LAST_READ,
+	LATCH_ID_DICT_FOREIGN_ERR,
+	LATCH_ID_DICT_SYS,
+	LATCH_ID_FILE_FORMAT_MAX,
+	LATCH_ID_FIL_SYSTEM,
+	LATCH_ID_FLUSH_LIST,
+	LATCH_ID_FTS_BG_THREADS,
+	LATCH_ID_FTS_DELETE,
+	LATCH_ID_FTS_OPTIMIZE,
+	LATCH_ID_FTS_DOC_ID,
+	LATCH_ID_FTS_PLL_TOKENIZE,
+	LATCH_ID_HASH_TABLE_MUTEX,
+	LATCH_ID_IBUF_BITMAP,
+	LATCH_ID_IBUF,
+	LATCH_ID_IBUF_PESSIMISTIC_INSERT,
+	LATCH_ID_LOG_SYS,
+	LATCH_ID_LOG_WRITE,
+	LATCH_ID_LOG_FLUSH_ORDER,
+	LATCH_ID_LIST,
+	LATCH_ID_MUTEX_LIST,
+	LATCH_ID_PAGE_CLEANER,
+	LATCH_ID_PURGE_SYS_PQ,
+	LATCH_ID_RECALC_POOL,
+	LATCH_ID_RECV_SYS,
+	LATCH_ID_RECV_WRITER,
+	LATCH_ID_REDO_RSEG,
+	LATCH_ID_NOREDO_RSEG,
+	LATCH_ID_RW_LOCK_DEBUG,
+	LATCH_ID_RTR_SSN_MUTEX,
+	LATCH_ID_RTR_ACTIVE_MUTEX,
+	LATCH_ID_RTR_MATCH_MUTEX,
+	LATCH_ID_RTR_PATH_MUTEX,
+	LATCH_ID_RW_LOCK_LIST,
+	LATCH_ID_RW_LOCK_MUTEX,
+	LATCH_ID_SRV_DICT_TMPFILE,
+	LATCH_ID_SRV_INNODB_MONITOR,
+	LATCH_ID_SRV_MISC_TMPFILE,
+	LATCH_ID_SRV_MONITOR_FILE,
+	LATCH_ID_SYNC_THREAD,
+	LATCH_ID_BUF_DBLWR,
+	LATCH_ID_TRX_UNDO,
+	LATCH_ID_TRX_POOL,
+	LATCH_ID_TRX_POOL_MANAGER,
+	LATCH_ID_TRX,
+	LATCH_ID_LOCK_SYS,
+	LATCH_ID_LOCK_SYS_WAIT,
+	LATCH_ID_TRX_SYS,
+	LATCH_ID_SRV_SYS,
+	LATCH_ID_SRV_SYS_TASKS,
+	LATCH_ID_PAGE_ZIP_STAT_PER_INDEX,
+	LATCH_ID_EVENT_MANAGER,
+	LATCH_ID_EVENT_MUTEX,
+	LATCH_ID_SYNC_ARRAY_MUTEX,
+	LATCH_ID_THREAD_MUTEX,
+	LATCH_ID_ZIP_PAD_MUTEX,
+	LATCH_ID_OS_AIO_READ_MUTEX,
+	LATCH_ID_OS_AIO_WRITE_MUTEX,
+	LATCH_ID_OS_AIO_LOG_MUTEX,
+	LATCH_ID_OS_AIO_IBUF_MUTEX,
+	LATCH_ID_OS_AIO_SYNC_MUTEX,
+	LATCH_ID_ROW_DROP_LIST,
+	LATCH_ID_INDEX_ONLINE_LOG,
+	LATCH_ID_WORK_QUEUE,
+	LATCH_ID_BTR_SEARCH,
+	LATCH_ID_BUF_BLOCK_LOCK,
+	LATCH_ID_BUF_BLOCK_DEBUG,
+	LATCH_ID_DICT_OPERATION,
+	LATCH_ID_CHECKPOINT,
+	LATCH_ID_FIL_SPACE,
+	LATCH_ID_FTS_CACHE,
+	LATCH_ID_FTS_CACHE_INIT,
+	LATCH_ID_TRX_I_S_CACHE,
+	LATCH_ID_TRX_PURGE,
+	LATCH_ID_IBUF_INDEX_TREE,
+	LATCH_ID_INDEX_TREE,
+	LATCH_ID_DICT_TABLE_STATS,
+	LATCH_ID_HASH_TABLE_RW_LOCK,
+	LATCH_ID_BUF_CHUNK_MAP_LATCH,
+	LATCH_ID_SYNC_DEBUG_MUTEX,
+	LATCH_ID_MASTER_KEY_ID_MUTEX,
+	LATCH_ID_SCRUB_STAT_MUTEX,
+	LATCH_ID_DEFRAGMENT_MUTEX,
+	LATCH_ID_BTR_DEFRAGMENT_MUTEX,
+	LATCH_ID_MTFLUSH_THREAD_MUTEX,
+	LATCH_ID_MTFLUSH_MUTEX,
+	LATCH_ID_FIL_CRYPT_MUTEX,
+	LATCH_ID_FIL_CRYPT_STAT_MUTEX,
+	LATCH_ID_FIL_CRYPT_DATA_MUTEX,
+	LATCH_ID_FIL_CRYPT_THREADS_MUTEX,
+	LATCH_ID_TEST_MUTEX,
+	LATCH_ID_MAX = LATCH_ID_TEST_MUTEX
+};
+
+#ifndef UNIV_INNOCHECKSUM
+/** OS mutex, without any policy. It is a thin wrapper around the
+system mutexes. The interface is different from the policy mutexes,
+to ensure that it is called directly and not confused with the
+policy mutexes. */
+struct OSMutex {
+
+	/** Constructor */
+	OSMutex()
+		UNIV_NOTHROW
+	{
+		ut_d(m_freed = true);
+	}
+
+	/** Create the mutex by calling the system functions. */
+	void init()
+		UNIV_NOTHROW
+	{
+		ut_ad(m_freed);
+
+#ifdef _WIN32
+		InitializeCriticalSection((LPCRITICAL_SECTION) &m_mutex);
+#else
+		{
+			int	ret = pthread_mutex_init(&m_mutex, NULL);
+			ut_a(ret == 0);
+		}
+#endif /* _WIN32 */
+
+		ut_d(m_freed = false);
+	}
+
+	/** Destructor */
+	~OSMutex() { }
+
+	/** Destroy the mutex */
+	void destroy()
+		UNIV_NOTHROW
+	{
+		ut_ad(innodb_calling_exit || !m_freed);
+#ifdef _WIN32
+		DeleteCriticalSection((LPCRITICAL_SECTION) &m_mutex);
+#else
+		int	ret;
+
+		ret = pthread_mutex_destroy(&m_mutex);
+
+		if (ret != 0) {
+
+			ib::error()
+				<< "Return value " << ret << " when calling "
+				<< "pthread_mutex_destroy().";
+		}
+#endif /* _WIN32 */
+		ut_d(m_freed = true);
+	}
+
+	/** Release the mutex. */
+	void exit()
+		UNIV_NOTHROW
+	{
+		ut_ad(innodb_calling_exit || !m_freed);
+#ifdef _WIN32
+		LeaveCriticalSection(&m_mutex);
+#else
+		int	ret = pthread_mutex_unlock(&m_mutex);
+		ut_a(ret == 0);
+#endif /* _WIN32 */
+	}
+
+	/** Acquire the mutex. */
+	void enter()
+		UNIV_NOTHROW
+	{
+		ut_ad(innodb_calling_exit || !m_freed);
+#ifdef _WIN32
+		EnterCriticalSection((LPCRITICAL_SECTION) &m_mutex);
+#else
+		int	ret = pthread_mutex_lock(&m_mutex);
+		ut_a(ret == 0);
+#endif /* _WIN32 */
+	}
+
+	/** @return true if locking succeeded */
+	bool try_lock()
+		UNIV_NOTHROW
+	{
+		ut_ad(innodb_calling_exit || !m_freed);
+#ifdef _WIN32
+		return(TryEnterCriticalSection(&m_mutex) != 0);
+#else
+		return(pthread_mutex_trylock(&m_mutex) == 0);
+#endif /* _WIN32 */
+	}
+
+	/** Required for os_event_t */
+	operator sys_mutex_t*()
+		UNIV_NOTHROW
+	{
+		return(&m_mutex);
+	}
+
+private:
+#ifdef UNIV_DEBUG
+	/** true if the mutex has been freed/destroyed. */
+	bool			m_freed;
+#endif /* UNIV_DEBUG */
+
+	sys_mutex_t		m_mutex;
+};
+
+#ifdef UNIV_PFS_MUTEX
+/** Latch element
+@param[in]	id		Latch id
+@param[in]	level		Latch level
+@param[in]	key		PFS key */
+# define LATCH_ADD(id, level, key)	latch_meta[LATCH_ID_ ## id] =	\
+	UT_NEW_NOKEY(latch_meta_t(LATCH_ID_ ## id, #id, level, #level, key))
+#else
+# define LATCH_ADD(id, level, key)	latch_meta[LATCH_ID_ ## id] =	\
+	UT_NEW_NOKEY(latch_meta_t(LATCH_ID_ ## id, #id, level, #level))
+#endif /* UNIV_PFS_MUTEX */
+
+/** Default latch counter */
+class LatchCounter {
+
+public:
+	/** The counts we collect for a mutex */
+	struct Count {
+
+		/** Constructor */
+		Count()
+			UNIV_NOTHROW
+			:
+			m_spins(),
+			m_waits(),
+			m_calls(),
+			m_enabled()
+		{
+			/* No op */
+		}
+
+		/** Rest the values to zero */
+		void reset()
+			UNIV_NOTHROW
+		{
+			m_spins = 0;
+			m_waits = 0;
+			m_calls = 0;
+		}
+
+		/** Number of spins trying to acquire the latch. */
+		uint32_t	m_spins;
+
+		/** Number of waits trying to acquire the latch */
+		uint32_t	m_waits;
+
+		/** Number of times it was called */
+		uint32_t	m_calls;
+
+		/** true if enabled */
+		bool		m_enabled;
+	};
+
+	/** Constructor */
+	LatchCounter()
+		UNIV_NOTHROW
+		:
+		m_active(false)
+	{
+		m_mutex.init();
+	}
+
+	/** Destructor */
+	~LatchCounter()
+		UNIV_NOTHROW
+	{
+		m_mutex.destroy();
+
+		for (Counters::iterator it = m_counters.begin();
+		     it != m_counters.end();
+		     ++it) {
+
+			Count*	count = *it;
+
+			UT_DELETE(count);
+		}
+	}
+
+	/** Reset all counters to zero. It is not protected by any
+	mutex and we don't care about atomicity. Unless it is a
+	demonstrated problem. The information collected is not
+	required for the correct functioning of the server. */
+	void reset()
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		Counters::iterator	end = m_counters.end();
+
+		for (Counters::iterator it = m_counters.begin();
+		     it != end;
+		     ++it) {
+
+			(*it)->reset();
+		}
+
+		m_mutex.exit();
+	}
+
+	/** @return the aggregate counter */
+	Count* sum_register()
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		Count*	count;
+
+		if (m_counters.empty()) {
+			count = UT_NEW_NOKEY(Count());
+			m_counters.push_back(count);
+		} else {
+			ut_a(m_counters.size() == 1);
+			count = m_counters[0];
+		}
+
+		m_mutex.exit();
+
+		return(count);
+	}
+
+	/** Deregister the count. We don't do anything
+	@param[in]	count		The count instance to deregister */
+	void sum_deregister(Count* count)
+		UNIV_NOTHROW
+	{
+		/* Do nothing */
+	}
+
+	/** Register a single instance counter */
+	void single_register(Count* count)
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		m_counters.push_back(count);
+
+		m_mutex.exit();
+	}
+
+	/** Deregister a single instance counter
+	@param[in]	count		The count instance to deregister */
+	void single_deregister(Count* count)
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		m_counters.erase(
+			std::remove(
+				m_counters.begin(),
+				m_counters.end(), count),
+			m_counters.end());
+
+		m_mutex.exit();
+	}
+
+	/** Iterate over the counters */
+	template <typename Callback>
+	void iterate(Callback& callback) const
+		UNIV_NOTHROW
+	{
+		Counters::const_iterator	end = m_counters.end();
+
+		for (Counters::const_iterator it = m_counters.begin();
+		     it != end;
+		     ++it) {
+
+			callback(*it);
+		}
+	}
+
+	/** Disable the monitoring */
+	void enable()
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		Counters::const_iterator	end = m_counters.end();
+
+		for (Counters::const_iterator it = m_counters.begin();
+		     it != end;
+		     ++it) {
+
+			(*it)->m_enabled = true;
+		}
+
+		m_active = true;
+
+		m_mutex.exit();
+	}
+
+	/** Disable the monitoring */
+	void disable()
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		Counters::const_iterator	end = m_counters.end();
+
+		for (Counters::const_iterator it = m_counters.begin();
+		     it != end;
+		     ++it) {
+
+			(*it)->m_enabled = false;
+		}
+
+		m_active = false;
+
+		m_mutex.exit();
+	}
+
+	/** @return if monitoring is active */
+	bool is_enabled() const
+		UNIV_NOTHROW
+	{
+		return(m_active);
+	}
+
+private:
+	/* Disable copying */
+	LatchCounter(const LatchCounter&);
+	LatchCounter& operator=(const LatchCounter&);
+
+private:
+	typedef OSMutex Mutex;
+	typedef std::vector<Count*> Counters;
+
+	/** Mutex protecting m_counters */
+	Mutex			m_mutex;
+
+	/** Counters for the latches */
+	Counters		m_counters;
+
+	/** if true then we collect the data */
+	bool			m_active;
+};
+
+/** Latch meta data */
+template <typename Counter = LatchCounter>
+class LatchMeta {
+
+public:
+	typedef Counter CounterType;
+
+#ifdef UNIV_PFS_MUTEX
+	typedef	mysql_pfs_key_t	pfs_key_t;
+#endif /* UNIV_PFS_MUTEX */
+
+	/** Constructor */
+	LatchMeta()
+		:
+		m_id(LATCH_ID_NONE),
+		m_name(),
+		m_level(SYNC_UNKNOWN),
+		m_level_name()
+#ifdef UNIV_PFS_MUTEX
+		,m_pfs_key()
+#endif /* UNIV_PFS_MUTEX */
+	{
+	}
+
+	/** Destructor */
+	~LatchMeta() { }
+
+	/** Constructor
+	@param[in]	id		Latch id
+	@param[in]	name		Latch name
+	@param[in]	level		Latch level
+	@param[in]	level_name	Latch level text representation
+	@param[in]	key		PFS key */
+	LatchMeta(
+		latch_id_t	id,
+		const char*	name,
+		latch_level_t	level,
+		const char*	level_name
+#ifdef UNIV_PFS_MUTEX
+		,pfs_key_t	key
+#endif /* UNIV_PFS_MUTEX */
+	      )
+		:
+		m_id(id),
+		m_name(name),
+		m_level(level),
+		m_level_name(level_name)
+#ifdef UNIV_PFS_MUTEX
+		,m_pfs_key(key)
+#endif /* UNIV_PFS_MUTEX */
+	{
+		/* No op */
+	}
+
+	/* Less than operator.
+	@param[in]	rhs		Instance to compare against
+	@return true if this.get_id() < rhs.get_id() */
+	bool operator<(const LatchMeta& rhs) const
+	{
+		return(get_id() < rhs.get_id());
+	}
+
+	/** @return the latch id */
+	latch_id_t get_id() const
+	{
+		return(m_id);
+	}
+
+	/** @return the latch name */
+	const char* get_name() const
+	{
+		return(m_name);
+	}
+
+	/** @return the latch level */
+	latch_level_t get_level() const
+	{
+		return(m_level);
+	}
+
+	/** @return the latch level name */
+	const char* get_level_name() const
+	{
+		return(m_level_name);
+	}
+
+#ifdef UNIV_PFS_MUTEX
+	/** @return the PFS key for the latch */
+	pfs_key_t get_pfs_key() const
+	{
+		return(m_pfs_key);
+	}
+#endif /* UNIV_PFS_MUTEX */
+
+	/** @return the counter instance */
+	Counter* get_counter()
+	{
+		return(&m_counter);
+	}
+
+private:
+	/** Latch id */
+	latch_id_t		m_id;
+
+	/** Latch name */
+	const char*		m_name;
+
+	/** Latch level in the ordering */
+	latch_level_t		m_level;
+
+	/** Latch level text representation */
+	const char*		m_level_name;
+
+#ifdef UNIV_PFS_MUTEX
+	/** PFS key */
+	pfs_key_t		m_pfs_key;
+#endif /* UNIV_PFS_MUTEX */
+
+	/** For gathering latch statistics */
+	Counter			m_counter;
+};
+
+typedef LatchMeta<LatchCounter> latch_meta_t;
+typedef std::vector<latch_meta_t*, ut_allocator<latch_meta_t*> > LatchMetaData;
+
+/** Note: This is accessed without any mutex protection. It is initialised
+at startup and elements should not be added to or removed from it after
+that.  See sync_latch_meta_init() */
+extern LatchMetaData	latch_meta;
+
+/** Get the latch meta-data from the latch ID
+@param[in]	id		Latch ID
+@return the latch meta data */
+inline
+latch_meta_t&
+sync_latch_get_meta(latch_id_t id)
+{
+	ut_ad(static_cast<size_t>(id) < latch_meta.size());
+	ut_ad(id == latch_meta[id]->get_id());
+
+	return(*latch_meta[id]);
+}
+
+/** Fetch the counter for the latch
+@param[in]	id		Latch ID
+@return the latch counter */
+inline
+latch_meta_t::CounterType*
+sync_latch_get_counter(latch_id_t id)
+{
+	latch_meta_t&	meta = sync_latch_get_meta(id);
+
+	return(meta.get_counter());
+}
+
+/** Get the latch name from the latch ID
+@param[in]	id		Latch ID
+@return the name, will assert if not found */
+inline
+const char*
+sync_latch_get_name(latch_id_t id)
+{
+	const latch_meta_t&	meta = sync_latch_get_meta(id);
+
+	return(meta.get_name());
+}
+
+/** Get the latch ordering level
+@param[in]	id		Latch id to lookup
+@return the latch level */
+inline
+latch_level_t
+sync_latch_get_level(latch_id_t id)
+{
+	const latch_meta_t&	meta = sync_latch_get_meta(id);
+
+	return(meta.get_level());
+}
+
+#ifdef HAVE_PSI_INTERFACE
+/** Get the latch PFS key from the latch ID
+@param[in]	id		Latch ID
+@return the PFS key */
+inline
+mysql_pfs_key_t
+sync_latch_get_pfs_key(latch_id_t id)
+{
+	const latch_meta_t&	meta = sync_latch_get_meta(id);
+
+	return(meta.get_pfs_key());
+}
 #endif
+
+/** String representation of the filename and line number where the
+latch was created
+@param[in]	id		Latch ID
+@param[in]	created		Filename and line number where it was crated
+@return the string representation */
+std::string
+sync_mutex_to_string(
+	latch_id_t		id,
+	const std::string&	created);
+
+/** Get the latch name from a sync level
+@param[in]	level		Latch level to lookup
+@return 0 if not found. */
+const char*
+sync_latch_get_name(latch_level_t level);
+
+/** Print the filename "basename"
+@return the basename */
+const char*
+sync_basename(const char* filename);
+
+/** Register a latch, called when it is created
+@param[in]	ptr		Latch instance that was created
+@param[in]	filename	Filename where it was created
+@param[in]	line		Line number in filename */
+void
+sync_file_created_register(
+	const void*	ptr,
+	const char*	filename,
+	uint16_t	line);
+
+/** Deregister a latch, called when it is destroyed
+@param[in]	ptr		Latch to be destroyed */
+void
+sync_file_created_deregister(const void* ptr);
+
+/** Get the string where the file was created. Its format is "name:line"
+@param[in]	ptr		Latch instance
+@return created information or "" if can't be found */
+std::string
+sync_file_created_get(const void* ptr);
+
+#ifdef UNIV_DEBUG
+
+/** All (ordered) latches, used in debugging, must derive from this class. */
+struct latch_t {
+
+	/** Constructor
+	@param[in]	id	The latch ID */
+	explicit latch_t(latch_id_t id = LATCH_ID_NONE)
+		UNIV_NOTHROW
+		:
+		m_id(id),
+		m_rw_lock(),
+		m_temp_fsp() { }
+
+	/** Destructor */
+	virtual ~latch_t() UNIV_NOTHROW { }
+
+	/** @return the latch ID */
+	latch_id_t get_id() const
+	{
+		return(m_id);
+	}
+
+	/** @return true if it is a rw-lock */
+	bool is_rw_lock() const
+		UNIV_NOTHROW
+	{
+		return(m_rw_lock);
+	}
+
+	/** Print the latch context
+	@return the string representation */
+	virtual std::string to_string() const = 0;
+
+	/** @return "filename:line" from where the latch was last locked */
+	virtual std::string locked_from() const = 0;
+
+	/** @return the latch level */
+	latch_level_t get_level() const
+		UNIV_NOTHROW
+	{
+		ut_a(m_id != LATCH_ID_NONE);
+
+		return(sync_latch_get_level(m_id));
+	}
+
+	/** @return true if the latch is for a temporary file space*/
+	bool is_temp_fsp() const
+		UNIV_NOTHROW
+	{
+		return(m_temp_fsp);
+	}
+
+	/** Set the temporary tablespace flag. (For internal temporary
+	tables, MySQL 5.7 does not always acquire the index->lock. We
+	need to figure out the context and add some special rules
+	during the checks.) */
+	void set_temp_fsp()
+		UNIV_NOTHROW
+	{
+		ut_ad(get_id() == LATCH_ID_FIL_SPACE);
+		m_temp_fsp = true;
+	}
+
+	/** @return the latch name, m_id must be set  */
+	const char* get_name() const
+		UNIV_NOTHROW
+	{
+		ut_a(m_id != LATCH_ID_NONE);
+
+		return(sync_latch_get_name(m_id));
+	}
+
+	/** Latch ID */
+	latch_id_t	m_id;
+
+	/** true if it is a rw-lock. In debug mode, rw_lock_t derives from
+	this class and sets this variable. */
+	bool		m_rw_lock;
+
+	/** true if it is an temporary space latch */
+	bool		m_temp_fsp;
+};
+
+/** Subclass this to iterate over a thread's acquired latch levels. */
+struct sync_check_functor_t {
+	virtual ~sync_check_functor_t() { }
+	virtual bool operator()(const latch_level_t) = 0;
+	virtual bool result() const = 0;
+};
+
+/** Functor to check whether the calling thread owns the btr search mutex. */
+struct btrsea_sync_check : public sync_check_functor_t {
+
+	/** Constructor
+	@param[in]	has_search_latch	true if owns the latch */
+	explicit btrsea_sync_check(bool has_search_latch)
+		:
+		m_result(),
+		m_has_search_latch(has_search_latch) { }
+
+	/** Destructor */
+	virtual ~btrsea_sync_check() { }
+
+	/** Called for every latch owned by the calling thread.
+	@param[in]	level		Level of the existing latch
+	@return true if the predicate check is successful */
+	virtual bool operator()(const latch_level_t level)
+	{
+		/* If calling thread doesn't hold search latch then
+		check if there are latch level exception provided. */
+
+		if (!m_has_search_latch
+		    && (level != SYNC_SEARCH_SYS
+			&& level != SYNC_FTS_CACHE)) {
+
+			m_result = true;
+
+			return(m_result);
+		}
+
+		return(false);
+	}
+
+	/** @return result from the check */
+	virtual bool result() const
+	{
+		return(m_result);
+	}
+
+private:
+	/** True if all OK */
+	bool		m_result;
+
+	/** If the caller owns the search latch */
+	const bool	m_has_search_latch;
+};
+
+/** Functor to check for dictionay latching constraints. */
+struct dict_sync_check : public sync_check_functor_t {
+
+	/** Constructor
+	@param[in]	dict_mutex_allow	true if the dict mutex
+						is allowed */
+	explicit dict_sync_check(bool dict_mutex_allowed)
+		:
+		m_result(),
+		m_dict_mutex_allowed(dict_mutex_allowed) { }
+
+	/** Destructor */
+	virtual ~dict_sync_check() { }
+
+	/** Check the latching constraints
+	@param[in]	level		The level held by the thread */
+	virtual bool operator()(const latch_level_t level)
+	{
+		if (!m_dict_mutex_allowed
+		    || (level != SYNC_DICT
+			&& level != SYNC_DICT_OPERATION
+			&& level != SYNC_FTS_CACHE
+			/* This only happens in recv_apply_hashed_log_recs. */
+			&& level != SYNC_RECV_WRITER
+			&& level != SYNC_NO_ORDER_CHECK)) {
+
+			m_result = true;
+
+			return(true);
+		}
+
+		return(false);
+	}
+
+	/** @return the result of the check */
+	virtual bool result() const
+	{
+		return(m_result);
+	}
+
+private:
+	/** True if all OK */
+	bool		m_result;
+
+	/** True if it is OK to hold the dict mutex */
+	const bool	m_dict_mutex_allowed;
+};
+
+/** Functor to check for given latching constraints. */
+struct sync_allowed_latches : public sync_check_functor_t {
+
+	/** Constructor
+	@param[in]	from	first element in an array of latch_level_t
+	@param[in]	to	last element in an array of latch_level_t */
+	sync_allowed_latches(
+		const latch_level_t*	from,
+		const latch_level_t*	to)
+		:
+		m_result(),
+		m_latches(from, to) { }
+
+	/** Checks whether the given latch_t violates the latch constraint.
+	This object maintains a list of allowed latch levels, and if the given
+	latch belongs to a latch level that is not there in the allowed list,
+	then it is a violation.
+
+	@param[in]	latch	The latch level to check
+	@return true if there is a latch ordering violation */
+	virtual bool operator()(const latch_level_t level)
+	{
+		for (latches_t::const_iterator it = m_latches.begin();
+		     it != m_latches.end();
+		     ++it) {
+
+			if (level == *it) {
+
+				m_result = false;
+
+				/* No violation */
+				return(false);
+			}
+		}
+
+		return(true);
+	}
+
+	/** @return the result of the check */
+	virtual bool result() const
+	{
+		return(m_result);
+	}
+
+private:
+	/** Save the result of validation check here
+	True if all OK */
+	bool		m_result;
+
+	typedef std::vector<latch_level_t, ut_allocator<latch_level_t> >
+		latches_t;
+
+	/** List of latch levels that are allowed to be held */
+	latches_t	m_latches;
+};
+
+/** Get the latch id from a latch name.
+@param[in]	id	Latch name
+@return LATCH_ID_NONE. */
+latch_id_t
+sync_latch_get_id(const char* name);
+
+typedef ulint rw_lock_flags_t;
+
+/* Flags to specify lock types for rw_lock_own_flagged() */
+enum rw_lock_flag_t {
+	RW_LOCK_FLAG_S  = 1 << 0,
+	RW_LOCK_FLAG_X  = 1 << 1,
+	RW_LOCK_FLAG_SX = 1 << 2
+};
+
+#endif /* UNIV_DBEUG */
+
+#endif /* UNIV_INNOCHECKSUM */
+
+#ifdef _WIN64
+#define my_atomic_addlint(A,B) my_atomic_add64((int64*) (A), (B))
+#define my_atomic_loadlint(A) my_atomic_load64((int64*) (A))
+#define my_atomic_caslint(A,B,C) my_atomic_cas64((int64*) (A), (int64*) (B), (C))
+#else
+#define my_atomic_addlint my_atomic_addlong
+#define my_atomic_loadlint my_atomic_loadlong
+#define my_atomic_caslint my_atomic_caslong
+#endif
+
+#endif /* sync0types_h */
diff --git a/storage/innobase/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h
index 662971a7841..f588d820743 100644
--- a/storage/innobase/include/trx0i_s.h
+++ b/storage/innobase/include/trx0i_s.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,7 +31,6 @@ Created July 17, 2007 Vasil Dimov
 #include "univ.i"
 #include "trx0types.h"
 #include "dict0types.h"
-#include "ut0ut.h"
 
 /** The maximum amount of memory that can be consumed by innodb_trx,
 innodb_locks and innodb_lock_waits information schema tables. */
@@ -134,13 +133,11 @@ struct i_s_trx_row_t {
 					in innodb_locks if trx
 					is waiting, or NULL */
 	ib_time_t	trx_wait_started; /*!< trx_t::wait_started */
-	ullint		trx_weight;	/*!< TRX_WEIGHT() */
+	uintmax_t	trx_weight;	/*!< TRX_WEIGHT() */
 	ulint		trx_mysql_thread_id; /*!< thd_get_thread_id() */
 	const char*	trx_query;	/*!< MySQL statement being
 					executed in the transaction */
-	struct charset_info_st*	trx_query_cs;
-					/*!< charset encode the MySQL
-					statement */
+	CHARSET_INFO*	trx_query_cs;	/*!< the charset of trx_query */
 	const char*	trx_operation_state; /*!< trx_t::op_info */
 	ulint		trx_tables_in_use;/*!< n_mysql_tables_in_use in
 					 trx_t */
@@ -153,7 +150,7 @@ struct i_s_trx_row_t {
 					/*!< mem_heap_get_size(
 					trx->lock_heap) */
 	ulint		trx_rows_locked;/*!< lock_number_of_rows_locked() */
-	ullint		trx_rows_modified;/*!< trx_t::undo_no */
+	uintmax_t	trx_rows_modified;/*!< trx_t::undo_no */
 	ulint		trx_concurrency_tickets;
 					/*!< n_tickets_to_enter_innodb in
 					trx_t */
@@ -167,8 +164,6 @@ struct i_s_trx_row_t {
 					/*!< detailed_error in trx_t */
 	ibool		trx_has_search_latch;
 					/*!< has_search_latch in trx_t */
-	ulint		trx_search_latch_timeout;
-					/*!< search_latch_timeout in trx_t */
 	ulint		trx_is_read_only;
 					/*!< trx_t::read_only */
 	ulint		trx_is_autocommit_non_locking;
@@ -200,14 +195,12 @@ extern trx_i_s_cache_t*	trx_i_s_cache;
 
 /*******************************************************************//**
 Initialize INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_init(
 /*===============*/
 	trx_i_s_cache_t*	cache);	/*!< out: cache to init */
 /*******************************************************************//**
 Free the INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_free(
 /*===============*/
@@ -215,7 +208,6 @@ trx_i_s_cache_free(
 
 /*******************************************************************//**
 Issue a shared/read lock on the tables cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_start_read(
 /*=====================*/
@@ -223,7 +215,6 @@ trx_i_s_cache_start_read(
 
 /*******************************************************************//**
 Release a shared/read lock on the tables cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_end_read(
 /*===================*/
@@ -231,7 +222,6 @@ trx_i_s_cache_end_read(
 
 /*******************************************************************//**
 Issue an exclusive/write lock on the tables cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_start_write(
 /*======================*/
@@ -239,7 +229,6 @@ trx_i_s_cache_start_write(
 
 /*******************************************************************//**
 Release an exclusive/write lock on the tables cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_end_write(
 /*====================*/
@@ -249,8 +238,7 @@ trx_i_s_cache_end_write(
 /*******************************************************************//**
 Retrieves the number of used rows in the cache for a given
 INFORMATION SCHEMA table.
-@return	number of rows */
-UNIV_INTERN
+@return number of rows */
 ulint
 trx_i_s_cache_get_rows_used(
 /*========================*/
@@ -260,8 +248,7 @@ trx_i_s_cache_get_rows_used(
 /*******************************************************************//**
 Retrieves the nth row in the cache for a given INFORMATION SCHEMA
 table.
-@return	row */
-UNIV_INTERN
+@return row */
 void*
 trx_i_s_cache_get_nth_row(
 /*======================*/
@@ -271,8 +258,7 @@ trx_i_s_cache_get_nth_row(
 
 /*******************************************************************//**
 Update the transactions cache if it has not been read for some time.
-@return	0 - fetched, 1 - not */
-UNIV_INTERN
+@return 0 - fetched, 1 - not */
 int
 trx_i_s_possibly_fetch_data_into_cache(
 /*===================================*/
@@ -281,13 +267,11 @@ trx_i_s_possibly_fetch_data_into_cache(
 /*******************************************************************//**
 Returns TRUE if the data in the cache is truncated due to the memory
 limit posed by TRX_I_S_MEM_LIMIT.
-@return	TRUE if truncated */
-UNIV_INTERN
+@return TRUE if truncated */
 ibool
 trx_i_s_cache_is_truncated(
 /*=======================*/
 	trx_i_s_cache_t*	cache);	/*!< in: cache */
-
 /** The maximum length of a resulting lock_id_size in
 trx_i_s_create_lock_id(), not including the terminating NUL.
 ":%lu:%lu:%lu" -> 63 chars */
@@ -298,8 +282,7 @@ Crafts a lock id string from a i_s_locks_row_t object. Returns its
 second argument. This function aborts if there is not enough space in
 lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you
 want to be 100% sure that it will not abort.
-@return	resulting lock id */
-UNIV_INTERN
+@return resulting lock id */
 char*
 trx_i_s_create_lock_id(
 /*===================*/
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index 1e13c883800..8917169dc94 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,6 +34,7 @@ Created 3/26/1996 Heikki Tuuri
 #include "page0page.h"
 #include "usr0sess.h"
 #include "fil0fil.h"
+#include "read0types.h"
 
 /** The global data structure coordinating a purge */
 extern trx_purge_t*	purge_sys;
@@ -45,7 +46,7 @@ extern trx_undo_rec_t	trx_purge_dummy_rec;
 /********************************************************************//**
 Calculates the file address of an undo log header when we have the file
 address of its history list node.
-@return	file address of the log */
+@return file address of the log */
 UNIV_INLINE
 fil_addr_t
 trx_purge_get_log_from_hist(
@@ -55,33 +56,34 @@ trx_purge_get_log_from_hist(
 /********************************************************************//**
 Creates the global purge system control structure and inits the history
 mutex. */
-UNIV_INTERN
 void
 trx_purge_sys_create(
 /*=================*/
 	ulint		n_purge_threads,/*!< in: number of purge threads */
-	ib_bh_t*	ib_bh);		/*!< in/own: UNDO log min binary heap*/
+	purge_pq_t*	purge_queue);	/*!< in/own: UNDO log min binary heap*/
 /********************************************************************//**
 Frees the global purge system control structure. */
-UNIV_INTERN
 void
 trx_purge_sys_close(void);
 /*======================*/
 /************************************************************************
 Adds the update undo log as the first log in the history list. Removes the
 update undo log segment from the rseg slot if it is too big for reuse. */
-UNIV_INTERN
 void
 trx_purge_add_update_undo_to_history(
 /*=================================*/
-	trx_t*	trx,		/*!< in: transaction */
-	page_t*	undo_page,	/*!< in: update undo log header page,
-				x-latched */
-	mtr_t*	mtr);		/*!< in: mtr */
+	trx_t*		trx,		/*!< in: transaction */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: update undo log. */
+	page_t*		undo_page,	/*!< in: update undo log header page,
+					x-latched */
+	bool		update_rseg_history_len,
+					/*!< in: if true: update rseg history
+					len else skip updating it. */
+	ulint		n_added_logs,	/*!< in: number of logs added */
+	mtr_t*		mtr);		/*!< in: mtr */
 /*******************************************************************//**
 This function runs a purge batch.
-@return	number of undo log pages handled in the batch */
-UNIV_INTERN
+@return number of undo log pages handled in the batch */
 ulint
 trx_purge(
 /*======*/
@@ -92,13 +94,11 @@ trx_purge(
 	bool	truncate);		/*!< in: truncate history if true */
 /*******************************************************************//**
 Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
-UNIV_INTERN
 void
 trx_purge_stop(void);
 /*================*/
 /*******************************************************************//**
 Resume purge, move to PURGE_STATE_RUN. */
-UNIV_INTERN
 void
 trx_purge_run(void);
 /*================*/
@@ -115,21 +115,275 @@ enum purge_state_t {
 /*******************************************************************//**
 Get the purge state.
 @return purge state. */
-UNIV_INTERN
 purge_state_t
 trx_purge_state(void);
 /*=================*/
 
+// Forward declaration
+struct TrxUndoRsegsIterator;
+
 /** This is the purge pointer/iterator. We need both the undo no and the
 transaction no up to which purge has parsed and applied the records. */
 struct purge_iter_t {
+	purge_iter_t()
+		:
+		trx_no(),
+		undo_no(),
+		undo_rseg_space(ULINT_UNDEFINED)
+	{
+		// Do nothing
+	}
+
 	trx_id_t	trx_no;		/*!< Purge has advanced past all
 					transactions whose number is less
 					than this */
 	undo_no_t	undo_no;	/*!< Purge has advanced past all records
 					whose undo number is less than this */
+	ulint		undo_rseg_space;
+					/*!< Last undo record resided in this
+					space id. */
 };
 
+
+/* Namespace to hold all the related functions and variables need for truncate
+of undo tablespace. */
+namespace undo {
+
+	typedef std::vector<ulint>		undo_spaces_t;
+	typedef	std::vector<trx_rseg_t*>	rseg_for_trunc_t;
+
+	/** Magic Number to indicate truncate action is complete. */
+	const ib_uint32_t			s_magic = 76845412;
+
+	/** Truncate Log file Prefix. */
+	const char* const			s_log_prefix = "undo_";
+
+	/** Truncate Log file Extension. */
+	const char* const			s_log_ext = "trunc.log";
+
+	/** Populate log file name based on space_id
+	@param[in]	space_id	id of the undo tablespace.
+	@return DB_SUCCESS or error code */
+	dberr_t populate_log_file_name(
+		ulint	space_id,
+		char*&	log_file_name);
+
+	/** Create the truncate log file.
+	@param[in]	space_id	id of the undo tablespace to truncate.
+	@return DB_SUCCESS or error code. */
+	dberr_t init(ulint space_id);
+
+	/** Mark completion of undo truncate action by writing magic number to
+	the log file and then removing it from the disk.
+	If we are going to remove it from disk then why write magic number ?
+	This is to safeguard from unlink (file-system) anomalies that will keep
+	the link to the file even after unlink action is successfull and
+	ref-count = 0.
+	@param[in]	space_id	id of the undo tablespace to truncate.*/
+	void done(ulint	space_id);
+
+	/** Check if TRUNCATE_DDL_LOG file exist.
+	@param[in]	space_id	id of the undo tablespace.
+	@return true if exist else false. */
+	bool is_log_present(ulint space_id);
+
+	/** Track UNDO tablespace mark for truncate. */
+	class Truncate {
+	public:
+
+		Truncate()
+			:
+			m_undo_for_trunc(ULINT_UNDEFINED),
+			m_rseg_for_trunc(),
+			m_scan_start(1),
+			m_purge_rseg_truncate_frequency(
+				static_cast<ulint>(
+				srv_purge_rseg_truncate_frequency))
+		{
+			/* Do Nothing. */
+		}
+
+		/** Clear the cached rollback segment. Normally done
+		when purge is about to shutdown. */
+		void clear()
+		{
+			reset();
+			rseg_for_trunc_t	temp;
+			m_rseg_for_trunc.swap(temp);
+		}
+
+		/** Is tablespace selected for truncate.
+		@return true if undo tablespace is marked for truncate */
+		bool is_marked() const
+		{
+			return(!(m_undo_for_trunc == ULINT_UNDEFINED));
+		}
+
+		/** Mark the tablespace for truncate.
+		@param[in]	undo_id		tablespace for truncate. */
+		void mark(ulint undo_id)
+		{
+			m_undo_for_trunc = undo_id;
+
+			m_scan_start = (undo_id + 1)
+					% (srv_undo_tablespaces_active + 1);
+			if (m_scan_start == 0) {
+				/* Note: UNDO tablespace ids starts from 1. */
+				m_scan_start = 1;
+			}
+
+			/* We found an UNDO-tablespace to truncate so set the
+			local purge rseg truncate frequency to 1. This will help
+			accelerate the purge action and in turn truncate. */
+			m_purge_rseg_truncate_frequency = 1;
+		}
+
+		/** Get the tablespace marked for truncate.
+		@return tablespace id marked for truncate. */
+		ulint get_marked_space_id() const
+		{
+			return(m_undo_for_trunc);
+		}
+
+		/** Add rseg to truncate vector.
+		@param[in,out]	rseg	rseg for truncate */
+		void add_rseg_to_trunc(trx_rseg_t* rseg)
+		{
+			m_rseg_for_trunc.push_back(rseg);
+		}
+
+		/** Get number of rsegs registered for truncate.
+		@return return number of rseg that belongs to tablespace mark
+		for truncate. */
+		ulint rsegs_size() const
+		{
+			return(m_rseg_for_trunc.size());
+		}
+
+		/** Get ith registered rseg.
+		@param[in]	id	index of rseg to get.
+		@return reference to registered rseg. */
+		trx_rseg_t* get_ith_rseg(ulint id)
+		{
+			ut_ad(id < m_rseg_for_trunc.size());
+			return(m_rseg_for_trunc.at(id));
+		}
+
+		/** Reset for next rseg truncate. */
+		void reset()
+		{
+			m_undo_for_trunc = ULINT_UNDEFINED;
+			m_rseg_for_trunc.clear();
+
+			/* Sync with global value as we are done with
+			truncate now. */
+			m_purge_rseg_truncate_frequency = static_cast<ulint>(
+				srv_purge_rseg_truncate_frequency);
+		}
+
+		/** Get the tablespace id to start scanning from.
+		@return	id of UNDO tablespace to start scanning from. */
+		ulint get_scan_start() const
+		{
+			return(m_scan_start);
+		}
+
+		/** Check if the tablespace needs fix-up (based on presence of
+		DDL truncate log)
+		@param	space_id	space id of the undo tablespace to check
+		@return true if fix up is needed else false */
+		bool needs_fix_up(ulint	space_id) const
+		{
+			return(is_log_present(space_id));
+		}
+
+		/** Add undo tablespace to truncate vector.
+		@param[in]	space_id	space id of tablespace to
+						truncate */
+		static void add_space_to_trunc_list(ulint space_id)
+		{
+			s_spaces_to_truncate.push_back(space_id);
+		}
+
+		/** Clear the truncate vector. */
+		static void clear_trunc_list()
+		{
+			s_spaces_to_truncate.clear();
+		}
+
+		/** Is tablespace marked for truncate.
+		@param[in]	space_id	space id to check
+		@return true if marked for truncate, else false. */
+		static bool is_tablespace_truncated(ulint space_id)
+		{
+			return(std::find(s_spaces_to_truncate.begin(),
+					 s_spaces_to_truncate.end(), space_id)
+			       != s_spaces_to_truncate.end());
+		}
+
+		/** Was a tablespace truncated at startup
+		@param[in]	space_id	space id to check
+		@return whether space_id was truncated at startup */
+		static bool was_tablespace_truncated(ulint space_id)
+		{
+			return(std::find(s_fix_up_spaces.begin(),
+					 s_fix_up_spaces.end(),
+					 space_id)
+			       != s_fix_up_spaces.end());
+		}
+
+		/** Get local rseg purge truncate frequency
+		@return rseg purge truncate frequency. */
+		ulint get_rseg_truncate_frequency() const
+		{
+			return(m_purge_rseg_truncate_frequency);
+		}
+
+		/* Start writing log information to a special file.
+		On successfull completion, file is removed.
+		On crash, file is used to complete the truncate action.
+		@param	space_id	space id of undo tablespace
+		@return DB_SUCCESS or error code. */
+		dberr_t start_logging(ulint space_id)
+		{
+			return(init(space_id));
+		}
+
+		/* Mark completion of logging./
+		@param	space_id	space id of undo tablespace */
+		void done_logging(ulint space_id)
+		{
+			return(done(space_id));
+		}
+
+	private:
+		/** UNDO tablespace is mark for truncate. */
+		ulint			m_undo_for_trunc;
+
+		/** rseg that resides in UNDO tablespace is marked for
+		truncate. */
+		rseg_for_trunc_t	m_rseg_for_trunc;
+
+		/** Start scanning for UNDO tablespace from this space_id.
+		This is to avoid bias selection of one tablespace always. */
+		ulint			m_scan_start;
+
+		/** Rollback segment(s) purge frequency. This is local
+		value maintained along with global value. It is set to global
+		value on start but when tablespace is marked for truncate it
+		is updated to 1 and then minimum value among 2 is used by
+		purge action. */
+		ulint			m_purge_rseg_truncate_frequency;
+
+		/** List of UNDO tablespace(s) to truncate. */
+		static undo_spaces_t	s_spaces_to_truncate;
+	public:
+		/** Undo tablespaces that were truncated at startup */
+		static undo_spaces_t	s_fix_up_spaces;
+	};	/* class Truncate */
+
+};	/* namespace undo */
+
 /** The control structure used in the purge operation */
 struct trx_purge_t{
 	sess_t*		sess;		/*!< System session running the purge
@@ -154,8 +408,9 @@ struct trx_purge_t{
 					without holding the latch. */
 	que_t*		query;		/*!< The query graph which will do the
 					parallelized purge operation */
-	read_view_t*	view;		/*!< The purge will not remove undo logs
+	ReadView	view;		/*!< The purge will not remove undo logs
 					which are >= this view (purge view) */
+	bool		view_active;	/*!< true if view is active */
 	volatile ulint	n_submitted;	/*!< Count of total tasks submitted
 					to the task queue */
 	volatile ulint	n_completed;	/*!< Count of total tasks completed */
@@ -194,15 +449,19 @@ struct trx_purge_t{
 	ulint		hdr_page_no;	/*!< Header page of the undo log where
 					the next record to purge belongs */
 	ulint		hdr_offset;	/*!< Header byte offset on the page */
-	/*-----------------------------*/
-	mem_heap_t*	heap;		/*!< Temporary storage used during a
-					purge: can be emptied after purge
-					completes */
-	/*-----------------------------*/
-	ib_bh_t*	ib_bh;		/*!< Binary min-heap, ordered on
-					rseg_queue_t::trx_no. It is protected
-					by the bh_mutex */
-	ib_mutex_t		bh_mutex;	/*!< Mutex protecting ib_bh */
+
+
+	TrxUndoRsegsIterator*
+			rseg_iter;	/*!< Iterator to get the next rseg
+					to process */
+
+	purge_pq_t*	purge_queue;	/*!< Binary min-heap, ordered on
+					TrxUndoRsegs::trx_no. It is protected
+					by the pq_mutex */
+	PQMutex		pq_mutex;	/*!< Mutex protecting purge_queue */
+
+	undo::Truncate	undo_trunc;	/*!< Track UNDO tablespace marked
+					for truncate. */
 };
 
 /** Info required to purge a record */
@@ -211,8 +470,40 @@ struct trx_purge_rec_t {
 	roll_ptr_t	roll_ptr;	/*!< File pointr to UNDO record */
 };
 
+/**
+Chooses the rollback segment with the smallest trx_no. */
+struct TrxUndoRsegsIterator {
+
+	/** Constructor */
+	TrxUndoRsegsIterator(trx_purge_t* purge_sys);
+
+	/** Sets the next rseg to purge in m_purge_sys.
+	@return page size of the table for which the log is.
+	NOTE: if rseg is NULL when this function returns this means that
+	there are no rollback segments to purge and then the returned page
+	size object should not be used. */
+	const page_size_t set_next();
+
+private:
+	// Disable copying
+	TrxUndoRsegsIterator(const TrxUndoRsegsIterator&);
+	TrxUndoRsegsIterator& operator=(const TrxUndoRsegsIterator&);
+
+	/** The purge system pointer */
+	trx_purge_t*			m_purge_sys;
+
+	/** The current element to process */
+	TrxUndoRsegs			m_trx_undo_rsegs;
+
+	/** Track the current element in m_trx_undo_rseg */
+	TrxUndoRsegs::iterator		m_iter;
+
+	/** Sentinel value */
+	static const TrxUndoRsegs	NullElement;
+};
+
 #ifndef UNIV_NONINL
 #include "trx0purge.ic"
-#endif
+#endif /* UNIV_NOINL */
 
-#endif
+#endif /* trx0purge_h */
diff --git a/storage/innobase/include/trx0purge.ic b/storage/innobase/include/trx0purge.ic
index ca9cc1fb894..c32651b7a00 100644
--- a/storage/innobase/include/trx0purge.ic
+++ b/storage/innobase/include/trx0purge.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +28,7 @@ Created 3/26/1996 Heikki Tuuri
 /********************************************************************//**
 Calculates the file address of an undo log header when we have the file
 address of its history list node.
-@return	file address of the log */
+@return file address of the log */
 UNIV_INLINE
 fil_addr_t
 trx_purge_get_log_from_hist(
@@ -41,22 +41,23 @@ trx_purge_get_log_from_hist(
 	return(node_addr);
 }
 
-#ifdef UNIV_DEBUG
 /********************************************************************//**
 address of its history list node.
-@return	TRUE if purge_sys_t::limit <= purge_sys_t::iter*/
+@return true if purge_sys_t::limit <= purge_sys_t::iter */
 UNIV_INLINE
-ibool
+bool
 trx_purge_check_limit(void)
 /*=======================*/
 {
-	ut_ad(purge_sys->limit.trx_no <= purge_sys->iter.trx_no);
-
-	if (purge_sys->limit.trx_no == purge_sys->iter.trx_no) {
-		ut_ad(purge_sys->limit.undo_no <= purge_sys->iter.undo_no);
-	}
-
-	return(TRUE);
+	/* limit is used to track till what point purge element has been
+	processed and so limit <= iter.
+	undo_no ordering is enforced only within the same rollback segment.
+	If a transaction uses multiple rollback segments then we need to
+	consider the rollback segment space id too. */
+	return(purge_sys->iter.trx_no > purge_sys->limit.trx_no
+	       || (purge_sys->iter.trx_no == purge_sys->limit.trx_no
+		   && ((purge_sys->iter.undo_no >= purge_sys->limit.undo_no)
+		       || (purge_sys->iter.undo_rseg_space
+			   != purge_sys->limit.undo_rseg_space))));
 }
-#endif /* UNIV_DEBUG */
 
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
index 359937e3583..b7a2deac63e 100644
--- a/storage/innobase/include/trx0rec.h
+++ b/storage/innobase/include/trx0rec.h
@@ -33,13 +33,15 @@ Created 3/26/1996 Heikki Tuuri
 #include "dict0types.h"
 #include "data0data.h"
 #include "rem0types.h"
+#include "page0types.h"
+#include "row0log.h"
 
 #ifndef UNIV_HOTBACKUP
 # include "que0types.h"
 
 /***********************************************************************//**
 Copies the undo record to the heap.
-@return	own: copy of undo log record */
+@return own: copy of undo log record */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_rec_copy(
@@ -48,7 +50,7 @@ trx_undo_rec_copy(
 	mem_heap_t*		heap);		/*!< in: heap where copied */
 /**********************************************************************//**
 Reads the undo log record type.
-@return	record type */
+@return record type */
 UNIV_INLINE
 ulint
 trx_undo_rec_get_type(
@@ -56,7 +58,7 @@ trx_undo_rec_get_type(
 	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
 /**********************************************************************//**
 Reads from an undo log record the record compiler info.
-@return	compiler info */
+@return compiler info */
 UNIV_INLINE
 ulint
 trx_undo_rec_get_cmpl_info(
@@ -64,7 +66,7 @@ trx_undo_rec_get_cmpl_info(
 	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
 /**********************************************************************//**
 Returns TRUE if an undo log record contains an extern storage field.
-@return	TRUE if extern */
+@return TRUE if extern */
 UNIV_INLINE
 ibool
 trx_undo_rec_get_extern_storage(
@@ -72,21 +74,12 @@ trx_undo_rec_get_extern_storage(
 	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
 /**********************************************************************//**
 Reads the undo log record number.
-@return	undo no */
+@return undo no */
 UNIV_INLINE
 undo_no_t
 trx_undo_rec_get_undo_no(
 /*=====================*/
 	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
-/**********************************************************************//**
-Returns the start of the undo record data area.
-@return	offset to the data area */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_offset(
-/*====================*/
-	undo_no_t	undo_no)	/*!< in: undo no read from node */
-	MY_ATTRIBUTE((const));
 
 /**********************************************************************//**
 Returns the start of the undo record data area. */
@@ -95,8 +88,7 @@ Returns the start of the undo record data area. */
 
 /**********************************************************************//**
 Reads from an undo log record the general parameters.
-@return	remaining part of undo log record after reading these values */
-UNIV_INTERN
+@return remaining part of undo log record after reading these values */
 byte*
 trx_undo_rec_get_pars(
 /*==================*/
@@ -112,8 +104,7 @@ trx_undo_rec_get_pars(
 	MY_ATTRIBUTE((nonnull));
 /*******************************************************************//**
 Builds a row reference from an undo log record.
-@return	pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
 byte*
 trx_undo_rec_get_row_ref(
 /*=====================*/
@@ -129,8 +120,7 @@ trx_undo_rec_get_row_ref(
 				needed is allocated */
 /*******************************************************************//**
 Skips a row reference from an undo log record.
-@return	pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
 byte*
 trx_undo_rec_skip_row_ref(
 /*======================*/
@@ -140,12 +130,11 @@ trx_undo_rec_skip_row_ref(
 /**********************************************************************//**
 Reads from an undo log update record the system field values of the old
 version.
-@return	remaining part of undo log record after reading these values */
-UNIV_INTERN
+@return remaining part of undo log record after reading these values */
 byte*
 trx_undo_update_rec_get_sys_cols(
 /*=============================*/
-	byte*		ptr,		/*!< in: remaining part of undo
+	const byte*	ptr,		/*!< in: remaining part of undo
 					log record after reading
 					general parameters */
 	trx_id_t*	trx_id,		/*!< out: trx id */
@@ -155,11 +144,10 @@ trx_undo_update_rec_get_sys_cols(
 Builds an update vector based on a remaining part of an undo log record.
 @return remaining part of the record, NULL if an error detected, which
 means that the record is corrupted */
-UNIV_INTERN
 byte*
 trx_undo_update_rec_get_update(
 /*===========================*/
-	byte*		ptr,	/*!< in: remaining part in update undo log
+	const byte*	ptr,	/*!< in: remaining part in update undo log
 				record, after reading the row reference
 				NOTE that this copy of the undo log record must
 				be preserved as long as the update vector is
@@ -182,12 +170,11 @@ trx_undo_update_rec_get_update(
 Builds a partial row from an update undo log record, for purge.
 It contains the columns which occur as ordering in any index of the table.
 Any missing columns are indicated by col->mtype == DATA_MISSING.
-@return	pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
 byte*
 trx_undo_rec_get_partial_row(
 /*=========================*/
-	byte*		ptr,	/*!< in: remaining part in update undo log
+	const byte*	ptr,	/*!< in: remaining part in update undo log
 				record of a suitable type, at the start of
 				the stored index columns;
 				NOTE that this copy of the undo log record must
@@ -207,8 +194,7 @@ Writes information to an undo log about an insert, update, or a delete marking
 of a clustered index record. This information is used in a rollback of the
 transaction and in consistent reads that must look to the history of this
 transaction.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 trx_undo_report_row_operation(
 /*==========================*/
@@ -237,14 +223,27 @@ trx_undo_report_row_operation(
 /******************************************************************//**
 Copies an undo record to heap. This function can be called if we know that
 the undo log record exists.
-@return	own: copy of the record */
-UNIV_INTERN
+@return own: copy of the record */
 trx_undo_rec_t*
 trx_undo_get_undo_rec_low(
 /*======================*/
 	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
-	mem_heap_t*	heap)		/*!< in: memory heap where copied */
+	mem_heap_t*	heap,		/*!< in: memory heap where copied */
+	bool		is_redo_rseg)	/*!< in: true if redo rseg. */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** status bit used for trx_undo_prev_version_build() */
+
+/** TRX_UNDO_PREV_IN_PURGE tells trx_undo_prev_version_build() that it
+is being called purge view and we would like to get the purge record
+even it is in the purge view (in normal case, it will return without
+fetching the purge record */
+#define		TRX_UNDO_PREV_IN_PURGE		0x1
+
+/** This tells trx_undo_prev_version_build() to fetch the old value in
+the undo log (which is the after image for an update) */
+#define		TRX_UNDO_GET_OLD_V_VALUE	0x2
+
 /*******************************************************************//**
 Build a previous version of a clustered index record. The caller must
 hold a latch on the index page of the clustered index record.
@@ -252,7 +251,6 @@ hold a latch on the index page of the clustered index record.
 or the table has been rebuilt
 @retval false if the previous version is earlier than purge_view,
 which means that it may have been removed */
-UNIV_INTERN
 bool
 trx_undo_prev_version_build(
 /*========================*/
@@ -265,15 +263,23 @@ trx_undo_prev_version_build(
 	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
 	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
 				needed is allocated */
-	rec_t**		old_vers)/*!< out, own: previous version, or NULL if
+	rec_t**		old_vers,/*!< out, own: previous version, or NULL if
 				rec is the first inserted version, or if
 				history data has been deleted */
-	MY_ATTRIBUTE((nonnull));
+	mem_heap_t*	v_heap,	/* !< in: memory heap used to create vrow
+				dtuple if it is not yet created. This heap
+				diffs from "heap" above in that it could be
+				prebuilt->old_vers_heap for selection */
+	const dtuple_t**vrow,	/*!< out: virtual column info, if any */
+	ulint		v_status);
+				/*!< in: status determine if it is going
+				into this function by purge thread or not.
+				And if we read "after image" of undo log */
+
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Parses a redo log record of adding an undo log record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 trx_undo_parse_add_undo_rec(
 /*========================*/
@@ -282,8 +288,7 @@ trx_undo_parse_add_undo_rec(
 	page_t*	page);	/*!< in: page or NULL */
 /***********************************************************//**
 Parses a redo log record of erasing of an undo page end.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 trx_undo_parse_erase_page_end(
 /*==========================*/
@@ -292,6 +297,53 @@ trx_undo_parse_erase_page_end(
 	page_t*	page,	/*!< in: page or NULL */
 	mtr_t*	mtr);	/*!< in: mtr or NULL */
 
+/** Read from an undo log record a non-virtual column value.
+@param[in,out]	ptr		pointer to remaining part of the undo record
+@param[in,out]	field		stored field
+@param[in,out]	len		length of the field, or UNIV_SQL_NULL
+@param[in,out]	orig_len	original length of the locally stored part
+of an externally stored column, or 0
+@return remaining part of undo log record after reading these values */
+byte*
+trx_undo_rec_get_col_val(
+        const byte*     ptr,
+        const byte**    field,
+        ulint*          len,
+        ulint*          orig_len);
+
+/** Read virtual column value from undo log
+@param[in]	table		the table
+@param[in]	ptr		undo log pointer
+@param[in,out]	row		the dtuple to fill
+@param[in]	in_purge        called by purge thread
+@param[in]	col_map		online rebuild column map */
+void
+trx_undo_read_v_cols(
+	const dict_table_t*	table,
+	const byte*		ptr,
+	const dtuple_t*		row,
+	bool			in_purge,
+	const ulint*		col_map);
+
+/** Read virtual column index from undo log if the undo log contains such
+info, and verify the column is still indexed, and output its position
+@param[in]	table		the table
+@param[in]	ptr		undo log pointer
+@param[in]	first_v_col	if this is the first virtual column, which
+				has the version marker
+@param[in,out]	is_undo_log	his function is used to parse both undo log,
+				and online log for virtual columns. So
+				check to see if this is undo log
+@param[out]	field_no	the column number
+@return remaining part of undo log record after reading these values */
+const byte*
+trx_undo_read_v_idx(
+	const dict_table_t*	table,
+	const byte*		ptr,
+	bool			first_v_col,
+	bool*			is_undo_log,
+	ulint*			field_no);
+
 #ifndef UNIV_HOTBACKUP
 
 /* Types of an undo log record: these have to be smaller than 16, as the
diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
index 08704f6b821..111c05c60aa 100644
--- a/storage/innobase/include/trx0rec.ic
+++ b/storage/innobase/include/trx0rec.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,7 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Reads from an undo log record the record type.
-@return	record type */
+@return record type */
 UNIV_INLINE
 ulint
 trx_undo_rec_get_type(
@@ -38,7 +38,7 @@ trx_undo_rec_get_type(
 
 /**********************************************************************//**
 Reads from an undo log record the record compiler info.
-@return	compiler info */
+@return compiler info */
 UNIV_INLINE
 ulint
 trx_undo_rec_get_cmpl_info(
@@ -50,7 +50,7 @@ trx_undo_rec_get_cmpl_info(
 
 /**********************************************************************//**
 Returns TRUE if an undo log record contains an extern storage field.
-@return	TRUE if extern */
+@return TRUE if extern */
 UNIV_INLINE
 ibool
 trx_undo_rec_get_extern_storage(
@@ -67,7 +67,7 @@ trx_undo_rec_get_extern_storage(
 
 /**********************************************************************//**
 Reads the undo log record number.
-@return	undo no */
+@return undo no */
 UNIV_INLINE
 undo_no_t
 trx_undo_rec_get_undo_no(
@@ -78,24 +78,12 @@ trx_undo_rec_get_undo_no(
 
 	ptr = undo_rec + 3;
 
-	return(mach_ull_read_much_compressed(ptr));
-}
-
-/**********************************************************************//**
-Returns the start of the undo record data area.
-@return	offset to the data area */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_offset(
-/*====================*/
-	undo_no_t	undo_no)	/*!< in: undo no read from node */
-{
-	return(3 + mach_ull_get_much_compressed_size(undo_no));
+	return(mach_u64_read_much_compressed(ptr));
 }
 
 /***********************************************************************//**
 Copies the undo record to the heap.
-@return	own: copy of undo log record */
+@return own: copy of undo log record */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_rec_copy(
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
index 98a667b2ec1..ec4c7d57e5d 100644
--- a/storage/innobase/include/trx0roll.h
+++ b/storage/innobase/include/trx0roll.h
@@ -39,76 +39,49 @@ Determines if this transaction is rolling back an incomplete transaction
 in crash recovery.
 @return TRUE if trx is an incomplete transaction that is being rolled
 back in crash recovery */
-UNIV_INTERN
 ibool
 trx_is_recv(
 /*========*/
 	const trx_t*	trx);	/*!< in: transaction */
 /*******************************************************************//**
 Returns a transaction savepoint taken at this point in time.
-@return	savepoint */
-UNIV_INTERN
+@return savepoint */
 trx_savept_t
 trx_savept_take(
 /*============*/
 	trx_t*	trx);	/*!< in: transaction */
-/*******************************************************************//**
-Frees an undo number array. */
-UNIV_INTERN
-void
-trx_undo_arr_free(
-/*==============*/
-	trx_undo_arr_t*	arr);	/*!< in: undo number array */
-/*******************************************************************//**
-Returns pointer to nth element in an undo number array.
-@return	pointer to the nth element */
-UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
-	trx_undo_arr_t*	arr,	/*!< in: undo number array */
-	ulint		n);	/*!< in: position */
 /********************************************************************//**
 Pops the topmost record when the two undo logs of a transaction are seen
-as a single stack of records ordered by their undo numbers. Inserts the
-undo number of the popped undo record to the array of currently processed
-undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release.
+as a single stack of records ordered by their undo numbers.
+@return undo log record copied to heap, NULL if none left, or if the
+undo number of the top record would be less than the limit */
+trx_undo_rec_t*
+trx_roll_pop_top_rec_of_trx_low(
+/*============================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: rollback segment to look
+					for next undo log record. */
+	undo_no_t	limit,		/*!< in: least undo number we need */
+	roll_ptr_t*	roll_ptr,	/*!< out: roll pointer to undo record */
+	mem_heap_t*	heap);		/*!< in/out: memory heap where copied */
+
+/********************************************************************//**
+Get next undo log record from redo and noredo rollback segments.
 @return undo log record copied to heap, NULL if none left, or if the
 undo number of the top record would be less than the limit */
-UNIV_INTERN
 trx_undo_rec_t*
 trx_roll_pop_top_rec_of_trx(
 /*========================*/
-	trx_t*		trx,	/*!< in: transaction */
-	undo_no_t	limit,	/*!< in: least undo number we need */
-	roll_ptr_t*	roll_ptr,/*!< out: roll pointer to undo record */
-	mem_heap_t*	heap);	/*!< in: memory heap where copied */
-/********************************************************************//**
-Reserves an undo log record for a query thread to undo. This should be
-called if the query thread gets the undo log record not using the pop
-function above.
-@return	TRUE if succeeded */
-UNIV_INTERN
-ibool
-trx_undo_rec_reserve(
-/*=================*/
-	trx_t*		trx,	/*!< in/out: transaction */
-	undo_no_t	undo_no);/*!< in: undo number of the record */
-/*******************************************************************//**
-Releases a reserved undo record. */
-UNIV_INTERN
-void
-trx_undo_rec_release(
-/*=================*/
-	trx_t*		trx,	/*!< in/out: transaction */
-	undo_no_t	undo_no);/*!< in: undo number */
+	trx_t*		trx,		/*!< in: transaction */
+	undo_no_t	limit,		/*!< in: least undo number we need */
+	roll_ptr_t*	roll_ptr,	/*!< out: roll pointer to undo record */
+	mem_heap_t*	heap);		/*!< in: memory heap where copied */
+
 /*******************************************************************//**
 Rollback or clean up any incomplete transactions which were
 encountered in crash recovery.  If the transaction already was
 committed, then we clean up a possible insert undo log. If the
 transaction was not yet committed, then we roll it back. */
-UNIV_INTERN
 void
 trx_rollback_or_clean_recovered(
 /*============================*/
@@ -120,8 +93,8 @@ encountered in crash recovery.  If the transaction already was
 committed, then we clean up a possible insert undo log. If the
 transaction was not yet committed, then we roll it back.
 Note: this is done in a background thread.
-@return	a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
 /*================================================*/
@@ -130,24 +103,21 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
 			os_thread_create */
 /*********************************************************************//**
 Creates a rollback command node struct.
-@return	own: rollback node struct */
-UNIV_INTERN
+@return own: rollback node struct */
 roll_node_t*
 roll_node_create(
 /*=============*/
 	mem_heap_t*	heap);	/*!< in: mem heap where created */
 /***********************************************************//**
 Performs an execution step for a rollback command node in a query graph.
-@return	query thread to run next, or NULL */
-UNIV_INTERN
+@return query thread to run next, or NULL */
 que_thr_t*
 trx_rollback_step(
 /*==============*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /*******************************************************************//**
 Rollback a transaction used in MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 trx_rollback_for_mysql(
 /*===================*/
@@ -155,8 +125,7 @@ trx_rollback_for_mysql(
 	MY_ATTRIBUTE((nonnull));
 /*******************************************************************//**
 Rollback the latest SQL statement for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 trx_rollback_last_sql_stat_for_mysql(
 /*=================================*/
@@ -164,8 +133,7 @@ trx_rollback_last_sql_stat_for_mysql(
 	MY_ATTRIBUTE((nonnull));
 /*******************************************************************//**
 Rollback a transaction to a given savepoint or do a complete rollback.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 trx_rollback_to_savepoint(
 /*======================*/
@@ -183,13 +151,12 @@ the row, these locks are naturally released in the rollback. Savepoints which
 were set after this savepoint are deleted.
 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
 otherwise DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 trx_rollback_to_savepoint_for_mysql(
 /*================================*/
 	trx_t*		trx,			/*!< in: transaction handle */
 	const char*	savepoint_name,		/*!< in: savepoint name */
-	ib_int64_t*	mysql_binlog_cache_pos)	/*!< out: the MySQL binlog cache
+	int64_t*	mysql_binlog_cache_pos)	/*!< out: the MySQL binlog cache
 						position corresponding to this
 						savepoint; MySQL needs this
 						information to remove the
@@ -201,14 +168,13 @@ Creates a named savepoint. If the transaction is not yet started, starts it.
 If there is already a savepoint of the same name, this call erases that old
 savepoint and replaces it with a new. Savepoints are deleted in a transaction
 commit or rollback.
-@return	always DB_SUCCESS */
-UNIV_INTERN
+@return always DB_SUCCESS */
 dberr_t
 trx_savepoint_for_mysql(
 /*====================*/
 	trx_t*		trx,			/*!< in: transaction handle */
 	const char*	savepoint_name,		/*!< in: savepoint name */
-	ib_int64_t	binlog_cache_pos)	/*!< in: MySQL binlog cache
+	int64_t		binlog_cache_pos)	/*!< in: MySQL binlog cache
 						position corresponding to this
 						connection at the time of the
 						savepoint */
@@ -218,7 +184,6 @@ Releases a named savepoint. Savepoints which
 were set after this savepoint are deleted.
 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
 otherwise DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 trx_release_savepoint_for_mysql(
 /*============================*/
@@ -227,7 +192,6 @@ trx_release_savepoint_for_mysql(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*******************************************************************//**
 Frees savepoint structs starting from savep. */
-UNIV_INTERN
 void
 trx_roll_savepoints_free(
 /*=====================*/
@@ -235,39 +199,20 @@ trx_roll_savepoints_free(
 	trx_named_savept_t*	savep);	/*!< in: free all savepoints > this one;
 					if this is NULL, free all savepoints
 					of trx */
-
-/** A cell of trx_undo_arr_t; used during a rollback and a purge */
-struct	trx_undo_inf_t{
-	ibool		in_use;	/*!< true if cell is being used */
-	trx_id_t	trx_no;	/*!< transaction number: not defined during
-				a rollback */
-	undo_no_t	undo_no;/*!< undo number of an undo record */
-};
-
-/** During a rollback and a purge, undo numbers of undo records currently being
-processed are stored in this array */
-
-struct trx_undo_arr_t{
-	ulint		n_cells;	/*!< number of cells in the array */
-	ulint		n_used;		/*!< number of cells in use */
-	trx_undo_inf_t*	infos;		/*!< the array of undo infos */
-	mem_heap_t*	heap;		/*!< memory heap from which allocated */
-};
-
 /** Rollback node states */
 enum roll_node_state {
 	ROLL_NODE_NONE = 0,		/*!< Unknown state */
 	ROLL_NODE_SEND,			/*!< about to send a rollback signal to
 					the transaction */
 	ROLL_NODE_WAIT			/*!< rollback signal sent to the
-				       	transaction, waiting for completion */
+					transaction, waiting for completion */
 };
 
 /** Rollback command node in a query graph */
 struct roll_node_t{
 	que_common_t		common;	/*!< node type: QUE_NODE_ROLLBACK */
 	enum roll_node_state	state;	/*!< node execution state */
-	ibool			partial;/*!< TRUE if we want a partial
+	bool			partial;/*!< TRUE if we want a partial
 					rollback */
 	trx_savept_t		savept;	/*!< savepoint to which to
 					roll back, in the case of a
@@ -280,7 +225,7 @@ struct trx_named_savept_t{
 	char*		name;		/*!< savepoint name */
 	trx_savept_t	savept;		/*!< the undo number corresponding to
 					the savepoint */
-	ib_int64_t	mysql_binlog_cache_pos;
+	int64_t		mysql_binlog_cache_pos;
 					/*!< the MySQL binlog cache position
 					corresponding to this savepoint, not
 					defined if the MySQL binlogging is not
diff --git a/storage/innobase/include/trx0roll.ic b/storage/innobase/include/trx0roll.ic
index 178e9bb730a..b09a1471150 100644
--- a/storage/innobase/include/trx0roll.ic
+++ b/storage/innobase/include/trx0roll.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,18 +23,40 @@ Transaction rollback
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
+#ifdef UNIV_DEBUG
 /*******************************************************************//**
-Returns pointer to nth element in an undo number array.
-@return	pointer to the nth element */
+Check if undo numbering is maintained while processing undo records
+for rollback.
+@return true if undo numbering is maintained. */
 UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
-	trx_undo_arr_t*	arr,	/*!< in: undo number array */
-	ulint		n)	/*!< in: position */
+bool
+trx_roll_check_undo_rec_ordering(
+/*=============================*/
+	undo_no_t	curr_undo_rec_no,	/*!< in: record number of
+						undo record to process. */
+	ulint		curr_undo_space_id,	/*!< in: space-id of rollback
+						segment that contains the
+						undo record to process. */
+	const trx_t*	trx)			/*!< in: transaction */
 {
-	ut_ad(arr);
-	ut_ad(n < arr->n_cells);
-
-	return(arr->infos + n);
+	/* Each transaction now can have multiple rollback segments.
+	If a transaction involves temp and non-temp tables, both the rollback
+	segments will be active. In this case undo records will be distrubuted
+	across the two rollback segments.
+	CASE-1: UNDO action will apply all undo records from one rollback
+	segment before moving to next. This means undo record numbers can't be
+	sequential but ordering is still enforced as next undo record number
+	should be < processed undo record number.
+	CASE-2: For normal rollback (not initiated by crash) all rollback
+	segments will be active (including non-redo).
+	Based on transaction operation pattern undo record number of first
+	undo record from this new rollback segment can be > last undo number
+	from previous rollback segment and so we ignore this check if
+	rollback segments are switching. Once switched new rollback segment
+	should re-follow undo record number pattern (as mentioned in CASE-1). */
+
+	return(curr_undo_space_id != trx->undo_rseg_space
+	       || curr_undo_rec_no + 1 <= trx->undo_no);
 }
+#endif /* UNIV_DEBUG */
+
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index 185b05876b4..b9cbd387a62 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,35 +29,40 @@ Created 3/26/1996 Heikki Tuuri
 #include "univ.i"
 #include "trx0types.h"
 #include "trx0sys.h"
-#include "ut0bh.h"
+#include "fut0lst.h"
+#include <vector>
 
-/******************************************************************//**
-Gets a rollback segment header.
-@return	rollback segment header, page x-latched */
+/** Gets a rollback segment header.
+@param[in]	space		space where placed
+@param[in]	page_no		page number of the header
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
 trx_rsegf_get(
-/*==========*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number of the header */
-	mtr_t*	mtr);		/*!< in: mtr */
-/******************************************************************//**
-Gets a newly created rollback segment header.
-@return	rollback segment header, page x-latched */
+	ulint			space,
+	ulint			page_no,
+	const page_size_t&	page_size,
+	mtr_t*			mtr);
+
+/** Gets a newly created rollback segment header.
+@param[in]	space		space where placed
+@param[in]	page_no		page number of the header
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
 trx_rsegf_get_new(
-/*==============*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number of the header */
-	mtr_t*	mtr);		/*!< in: mtr */
+	ulint			space,
+	ulint			page_no,
+	const page_size_t&	page_size,
+	mtr_t*			mtr);
+
 /***************************************************************//**
 Gets the file page number of the nth undo log slot.
-@return	page number of the undo log segment */
+@return page number of the undo log segment */
 UNIV_INLINE
 ulint
 trx_rsegf_get_nth_undo(
@@ -77,7 +82,7 @@ trx_rsegf_set_nth_undo(
 	mtr_t*		mtr);	/*!< in: mtr */
 /****************************************************************//**
 Looks for a free slot for an undo log segment.
-@return	slot index or ULINT_UNDEFINED if not found */
+@return slot index or ULINT_UNDEFINED if not found */
 UNIV_INLINE
 ulint
 trx_rsegf_undo_find_free(
@@ -86,51 +91,55 @@ trx_rsegf_undo_find_free(
 	mtr_t*		mtr);	/*!< in: mtr */
 /******************************************************************//**
 Looks for a rollback segment, based on the rollback segment id.
-@return	rollback segment */
+@return rollback segment */
 UNIV_INLINE
 trx_rseg_t*
 trx_rseg_get_on_id(
 /*===============*/
-	ulint	id);		/*!< in: rollback segment id */
-/****************************************************************//**
-Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database.
-@return	page number of the created segment, FIL_NULL if fail */
-UNIV_INTERN
+	ulint	id,		/*!< in: rollback segment id */
+	bool	is_redo_rseg);	/*!< in: true if redo rseg else false. */
+
+/** Creates a rollback segment header.
+This function is called only when a new rollback segment is created in
+the database.
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in]	max_size	max size in pages
+@param[in]	rseg_slot_no	rseg id == slot number in trx sys
+@param[in,out]	mtr		mini-transaction
+@return page number of the created segment, FIL_NULL if fail */
 ulint
 trx_rseg_header_create(
-/*===================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	max_size,	/*!< in: max size in pages */
-	ulint	rseg_slot_no,	/*!< in: rseg id == slot number in trx sys */
-	mtr_t*	mtr);		/*!< in: mtr */
+	ulint			space,
+	const page_size_t&	page_size,
+	ulint			max_size,
+	ulint			rseg_slot_no,
+	mtr_t*			mtr);
+
 /*********************************************************************//**
 Creates the memory copies for rollback segments and initializes the
 rseg array in trx_sys at a database startup. */
-UNIV_INTERN
 void
 trx_rseg_array_init(
 /*================*/
-	trx_sysf_t*	sys_header,	/*!< in/out: trx system header */
-	ib_bh_t*	ib_bh,		/*!< in: rseg queue */
-	mtr_t*		mtr);		/*!< in/out: mtr */
+	purge_pq_t*	purge_queue);	/*!< in: rseg queue */
+
 /***************************************************************************
 Free's an instance of the rollback segment in memory. */
-UNIV_INTERN
 void
 trx_rseg_mem_free(
 /*==============*/
-	trx_rseg_t*	rseg);		/*!< in, own: instance to free */
-
+	trx_rseg_t*	rseg,		/*!< in, own: instance to free */
+	trx_rseg_t**	rseg_array);	/*!< out: add rseg reference to this
+					central array. */
 /*********************************************************************
 Creates a rollback segment. */
-UNIV_INTERN
 trx_rseg_t*
 trx_rseg_create(
 /*============*/
-	ulint	space);			/*!< in: id of UNDO tablespace */
+	ulint	space_id,	/*!< in: id of UNDO tablespace */
+	ulint   nth_free_slot);	/*!< in: allocate nth free slot.
+				0 means next free slots. */
 
 /********************************************************************
 Get the number of unique rollback tablespaces in use except space id 0.
@@ -138,7 +147,6 @@ The last space id will be the sentinel value ULINT_UNDEFINED. The array
 will be sorted on space id. Note: space_ids should have have space for
 TRX_SYS_N_RSEGS + 1 elements.
 @return number of unique rollback tablespaces in use. */
-UNIV_INTERN
 ulint
 trx_rseg_get_n_undo_tablespaces(
 /*============================*/
@@ -150,51 +158,69 @@ trx_rseg_get_n_undo_tablespaces(
 /* Maximum number of transactions supported by a single rollback segment */
 #define TRX_RSEG_MAX_N_TRXS	(TRX_RSEG_N_SLOTS / 2)
 
-/* The rollback segment memory object */
-struct trx_rseg_t{
+/** The rollback segment memory object */
+struct trx_rseg_t {
 	/*--------------------------------------------------------*/
-	ulint		id;	/*!< rollback segment id == the index of
-				its slot in the trx system file copy */
-	ib_mutex_t		mutex;	/*!< mutex protecting the fields in this
-				struct except id, which is constant */
-	ulint		space;	/*!< space where the rollback segment is
-				header is placed */
-	ulint		zip_size;/* compressed page size of space
-				in bytes, or 0 for uncompressed spaces */
-	ulint		page_no;/* page number of the rollback segment
-				header */
-	ulint		max_size;/* maximum allowed size in pages */
-	ulint		curr_size;/* current size in pages */
+	/** rollback segment id == the index of its slot in the trx
+	system file copy */
+	ulint				id;
+
+	/** mutex protecting the fields in this struct except id,space,page_no
+	which are constant */
+	RsegMutex			mutex;
+
+	/** space where the rollback segment header is placed */
+	ulint				space;
+
+	/** page number of the rollback segment header */
+	ulint				page_no;
+
+	/** page size of the relevant tablespace */
+	page_size_t			page_size;
+
+	/** maximum allowed size in pages */
+	ulint				max_size;
+
+	/** current size in pages */
+	ulint				curr_size;
+
 	/*--------------------------------------------------------*/
 	/* Fields for update undo logs */
-	UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list;
-					/* List of update undo logs */
-	UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached;
-					/* List of update undo log segments
-					cached for fast reuse */
+	/** List of update undo logs */
+	UT_LIST_BASE_NODE_T(trx_undo_t)	update_undo_list;
+
+	/** List of update undo log segments cached for fast reuse */
+	UT_LIST_BASE_NODE_T(trx_undo_t)	update_undo_cached;
+
 	/*--------------------------------------------------------*/
 	/* Fields for insert undo logs */
+	/** List of insert undo logs */
 	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list;
-					/* List of insert undo logs */
+
+	/** List of insert undo log segments cached for fast reuse */
 	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
-					/* List of insert undo log segments
-					cached for fast reuse */
+
 	/*--------------------------------------------------------*/
-	ulint		last_page_no;	/*!< Page number of the last not yet
-					purged log header in the history list;
-					FIL_NULL if all list purged */
-	ulint		last_offset;	/*!< Byte offset of the last not yet
-					purged log header */
-	trx_id_t	last_trx_no;	/*!< Transaction number of the last not
-					yet purged log */
-	ibool		last_del_marks;	/*!< TRUE if the last not yet purged log
-					needs purging */
-};
 
-/** For prioritising the rollback segments for purge. */
-struct rseg_queue_t {
-        trx_id_t	trx_no;         /*!< trx_rseg_t::last_trx_no */
-        trx_rseg_t*     rseg;           /*!< Rollback segment */
+	/** Page number of the last not yet purged log header in the history
+	list; FIL_NULL if all list purged */
+	ulint				last_page_no;
+
+	/** Byte offset of the last not yet purged log header */
+	ulint				last_offset;
+
+	/** Transaction number of the last not yet purged log */
+	trx_id_t			last_trx_no;
+
+	/** TRUE if the last not yet purged log needs purging */
+	ibool				last_del_marks;
+
+	/** Reference counter to track rseg allocated transactions. */
+	ulint				trx_ref_count;
+
+	/** If true, then skip allocating this rseg as it reside in
+	UNDO-tablespace marked for truncate. */
+	bool				skip_allocation;
 };
 
 /* Undo log segment slot in a rollback segment header */
diff --git a/storage/innobase/include/trx0rseg.ic b/storage/innobase/include/trx0rseg.ic
index 30743da9b8c..9574be67ff8 100644
--- a/storage/innobase/include/trx0rseg.ic
+++ b/storage/innobase/include/trx0rseg.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,23 +27,26 @@ Created 3/26/1996 Heikki Tuuri
 #include "mtr0log.h"
 #include "trx0sys.h"
 
-/******************************************************************//**
-Gets a rollback segment header.
-@return	rollback segment header, page x-latched */
+/** Gets a rollback segment header.
+@param[in]	space		space where placed
+@param[in]	page_no		page number of the header
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
 trx_rsegf_get(
-/*==========*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number of the header */
-	mtr_t*	mtr)		/*!< in: mtr */
+	ulint			space,
+	ulint			page_no,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	buf_block_t*	block;
 	trx_rsegf_t*	header;
 
-	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+	block = buf_page_get(
+		page_id_t(space, page_no), page_size, RW_X_LATCH, mtr);
+
 	buf_block_dbg_add_level(block, SYNC_RSEG_HEADER);
 
 	header = TRX_RSEG + buf_block_get_frame(block);
@@ -51,23 +54,26 @@ trx_rsegf_get(
 	return(header);
 }
 
-/******************************************************************//**
-Gets a newly created rollback segment header.
-@return	rollback segment header, page x-latched */
+/** Gets a newly created rollback segment header.
+@param[in]	space		space where placed
+@param[in]	page_no		page number of the header
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
 trx_rsegf_get_new(
-/*==============*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number of the header */
-	mtr_t*	mtr)		/*!< in: mtr */
+	ulint			space,
+	ulint			page_no,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	buf_block_t*	block;
 	trx_rsegf_t*	header;
 
-	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+	block = buf_page_get(
+		page_id_t(space, page_no), page_size, RW_X_LATCH, mtr);
+
 	buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
 
 	header = TRX_RSEG + buf_block_get_frame(block);
@@ -77,7 +83,7 @@ trx_rsegf_get_new(
 
 /***************************************************************//**
 Gets the file page number of the nth undo log slot.
-@return	page number of the undo log segment */
+@return page number of the undo log segment */
 UNIV_INLINE
 ulint
 trx_rsegf_get_nth_undo(
@@ -86,12 +92,7 @@ trx_rsegf_get_nth_undo(
 	ulint		n,	/*!< in: index of slot */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	if (n >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to get slot %lu of rseg\n",
-			(ulong) n);
-		ut_error;
-	}
+	ut_a(n < TRX_RSEG_N_SLOTS);
 
 	return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS
 			      + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
@@ -108,12 +109,7 @@ trx_rsegf_set_nth_undo(
 	ulint		page_no,/*!< in: page number of the undo log segment */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	if (n >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to set slot %lu of rseg\n",
-			(ulong) n);
-		ut_error;
-	}
+	ut_a(n < TRX_RSEG_N_SLOTS);
 
 	mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE,
 			 page_no, MLOG_4BYTES, mtr);
@@ -121,7 +117,7 @@ trx_rsegf_set_nth_undo(
 
 /****************************************************************//**
 Looks for a free slot for an undo log segment.
-@return	slot index or ULINT_UNDEFINED if not found */
+@return slot index or ULINT_UNDEFINED if not found */
 UNIV_INLINE
 ulint
 trx_rsegf_undo_find_free(
@@ -131,19 +127,19 @@ trx_rsegf_undo_find_free(
 {
 	ulint		i;
 	ulint		page_no;
+	ulint		max_slots = TRX_RSEG_N_SLOTS;
 
-	for (i = 0;
-#ifndef UNIV_DEBUG
-	     i < TRX_RSEG_N_SLOTS;
-#else
-	     i < (trx_rseg_n_slots_debug ? trx_rseg_n_slots_debug : TRX_RSEG_N_SLOTS);
+#ifdef UNIV_DEBUG
+	if (trx_rseg_n_slots_debug) {
+		max_slots = ut_min(static_cast<ulint>(trx_rseg_n_slots_debug),
+				   static_cast<ulint>(TRX_RSEG_N_SLOTS));
+	}
 #endif
-	     i++) {
 
+	for (i = 0; i < max_slots; i++) {
 		page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
 
 		if (page_no == FIL_NULL) {
-
 			return(i);
 		}
 	}
@@ -153,15 +149,25 @@ trx_rsegf_undo_find_free(
 
 /******************************************************************//**
 Looks for a rollback segment, based on the rollback segment id.
-@return	rollback segment */
+@return rollback segment */
 UNIV_INLINE
 trx_rseg_t*
 trx_rseg_get_on_id(
 /*===============*/
-	ulint	id)	/*!< in: rollback segment id */
+	ulint	id,		/*!< in: rollback segment id */
+	bool	is_redo_rseg)	/*!< in: true if redo rseg else false. */
 {
 	ut_a(id < TRX_SYS_N_RSEGS);
 
+	/* If redo rseg is being requested and id falls in range of
+	non-redo rseg that is from slot-1....slot-srv_tmp_undo_logs then
+	server is being upgraded from pre-5.7.2. In such case return
+	rseg from pending_purge_rseg_array array. */
+	if (is_redo_rseg && trx_sys_is_noredo_rseg_slot(id)) {
+		ut_ad(trx_sys->pending_purge_rseg_array[id] != NULL);
+		return(trx_sys->pending_purge_rseg_array[id]);
+	}
+
 	return(trx_sys->rseg_array[id]);
 }
 
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index 8c6b13f9dd4..ddf535158b6 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -28,99 +28,81 @@ Created 3/26/1996 Heikki Tuuri
 
 #include "univ.i"
 
-#include "trx0types.h"
-#include "fsp0types.h"
-#include "fil0fil.h"
 #include "buf0buf.h"
+#include "fil0fil.h"
+#include "trx0types.h"
 #ifndef UNIV_HOTBACKUP
+#include "mem0mem.h"
 #include "mtr0mtr.h"
 #include "ut0byte.h"
 #include "mem0mem.h"
-#include "sync0sync.h"
 #include "ut0lst.h"
-#include "ut0bh.h"
 #include "read0types.h"
 #include "page0types.h"
-#include "ut0bh.h"
+#include "ut0mutex.h"
+#include "trx0trx.h"
 #ifdef WITH_WSREP
 #include "trx0xa.h"
 #endif /* WITH_WSREP */
 
-typedef UT_LIST_BASE_NODE_T(trx_t) trx_list_t;
+typedef UT_LIST_BASE_NODE_T(trx_t) trx_ut_list_t;
 
-/** In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. */
-/* @{ */
-/** Master binlog file name */
-extern char		trx_sys_mysql_master_log_name[];
-/** Master binlog file position.  We have successfully got the updates
-up to this position.  -1 means that no crash recovery was needed, or
-there was no master log position info inside InnoDB.*/
-extern ib_int64_t	trx_sys_mysql_master_log_pos;
-/* @} */
-
-/** If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. */
-/* @{ */
-/** Binlog file name */
-extern char		trx_sys_mysql_bin_log_name[];
-/** Binlog file position, or -1 if unknown */
-extern ib_int64_t	trx_sys_mysql_bin_log_pos;
-/* @} */
+// Forward declaration
+class MVCC;
+class ReadView;
 
 /** The transaction system */
 extern trx_sys_t*	trx_sys;
 
-/***************************************************************//**
-Checks if a page address is the trx sys header page.
-@return	TRUE if trx sys header page */
+/** Checks if a page address is the trx sys header page.
+@param[in]	page_id	page id
+@return true if trx sys header page */
 UNIV_INLINE
-ibool
+bool
 trx_sys_hdr_page(
-/*=============*/
-	ulint	space,	/*!< in: space */
-	ulint	page_no);/*!< in: page number */
+	const page_id_t&	page_id);
+
 /*****************************************************************//**
 Creates and initializes the central memory structures for the transaction
 system. This is called when the database is started.
 @return min binary heap of rsegs to purge */
-UNIV_INTERN
-ib_bh_t*
+purge_pq_t*
 trx_sys_init_at_db_start(void);
 /*==========================*/
 /*****************************************************************//**
-Creates the trx_sys instance and initializes ib_bh and mutex. */
-UNIV_INTERN
+Creates the trx_sys instance and initializes purge_queue and mutex. */
 void
 trx_sys_create(void);
 /*================*/
 /*****************************************************************//**
 Creates and initializes the transaction system at the database creation. */
-UNIV_INTERN
 void
 trx_sys_create_sys_pages(void);
 /*==========================*/
 /****************************************************************//**
 Looks for a free slot for a rollback segment in the trx system file copy.
-@return	slot index or ULINT_UNDEFINED if not found */
-UNIV_INTERN
+@return slot index or ULINT_UNDEFINED if not found */
 ulint
 trx_sysf_rseg_find_free(
 /*====================*/
-	mtr_t*		mtr);		/*!< in: mtr */
+	mtr_t*	mtr,			/*!< in/out: mtr */
+	bool	include_tmp_slots,	/*!< in: if true, report slots reserved
+					for temp-tablespace as free slots. */
+	ulint	nth_free_slots);	/*!< in: allocate nth free slot.
+					0 means next free slot. */
 /***************************************************************//**
 Gets the pointer in the nth slot of the rseg array.
-@return	pointer to rseg object, NULL if slot not in use */
+@return pointer to rseg object, NULL if slot not in use */
 UNIV_INLINE
 trx_rseg_t*
 trx_sys_get_nth_rseg(
 /*=================*/
-	trx_sys_t*	sys,	/*!< in: trx system */
-	ulint		n);	/*!< in: index of slot */
+	trx_sys_t*	sys,		/*!< in: trx system */
+	ulint		n,		/*!< in: index of slot */
+	bool		is_redo_rseg);	/*!< in: true if redo rseg. */
 /**********************************************************************//**
 Gets a pointer to the transaction system file copy and x-locks its page.
-@return	pointer to system file copy, page x-locked */
+@return pointer to system file copy, page x-locked */
 UNIV_INLINE
 trx_sysf_t*
 trx_sysf_get(
@@ -129,7 +111,7 @@ trx_sysf_get(
 /*****************************************************************//**
 Gets the space of the nth rollback segment slot in the trx system
 file copy.
-@return	space id */
+@return space id */
 UNIV_INLINE
 ulint
 trx_sysf_rseg_get_space(
@@ -140,7 +122,7 @@ trx_sysf_rseg_get_space(
 /*****************************************************************//**
 Gets the page number of the nth rollback segment slot in the trx system
 file copy.
-@return	page number, FIL_NULL if slot unused */
+@return page number, FIL_NULL if slot unused */
 UNIV_INLINE
 ulint
 trx_sysf_rseg_get_page_no(
@@ -173,11 +155,11 @@ trx_sysf_rseg_set_page_no(
 	mtr_t*		mtr);		/*!< in: mtr */
 /*****************************************************************//**
 Allocates a new transaction id.
-@return	new, allocated trx id */
+@return new, allocated trx id */
 UNIV_INLINE
 trx_id_t
-trx_sys_get_new_trx_id(void);
-/*========================*/
+trx_sys_get_new_trx_id();
+/*===================*/
 /*****************************************************************//**
 Determines the maximum transaction id.
 @return maximum currently allocated trx id; will be stale after the
@@ -193,6 +175,14 @@ extern uint			trx_rseg_n_slots_debug;
 #endif
 
 /*****************************************************************//**
+Check if slot-id is reserved slot-id for noredo rsegs. */
+UNIV_INLINE
+bool
+trx_sys_is_noredo_rseg_slot(
+/*========================*/
+	ulint	slot_id);	/*!< in: slot_id to check */
+
+/*****************************************************************//**
 Writes a trx id to an index page. In case that the id size changes in
 some future version, this function should be used instead of
 mach_write_... */
@@ -206,7 +196,7 @@ trx_write_trx_id(
 Reads a trx id from an index page. In case that the id size changes in
 some future version, this function should be used instead of
 mach_read_...
-@return	id */
+@return id */
 UNIV_INLINE
 trx_id_t
 trx_read_trx_id(
@@ -214,10 +204,7 @@ trx_read_trx_id(
 	const byte*	ptr);	/*!< in: pointer to memory from where to read */
 /****************************************************************//**
 Looks for the trx instance with the given id in the rw trx_list.
-The caller must be holding trx_sys->mutex.
-@return	the trx handle or NULL if not found;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
+@return	the trx handle or NULL if not found */
 UNIV_INLINE
 trx_t*
 trx_get_rw_trx_by_id(
@@ -228,18 +215,14 @@ Returns the minimum trx id in rw trx list. This is the smallest id for which
 the trx can possibly be active. (But, you must look at the trx->state to
 find out if the minimum trx id transaction itself is active, or already
 committed.)
-@return	the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
+@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
 UNIV_INLINE
 trx_id_t
 trx_rw_min_trx_id(void);
 /*===================*/
 /****************************************************************//**
-Checks if a rw transaction with the given id is active. Caller must hold
-trx_sys->mutex in shared mode. If the caller is not holding
-lock_sys->mutex, the transaction may already have been committed.
-@return	transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
+Checks if a rw transaction with the given id is active.
+@return transaction instance if active, or NULL */
 UNIV_INLINE
 trx_t*
 trx_rw_is_active_low(
@@ -249,28 +232,27 @@ trx_rw_is_active_low(
 					that will be set if corrupt */
 /****************************************************************//**
 Checks if a rw transaction with the given id is active. If the caller is
-not holding lock_sys->mutex, the transaction may already have been
+not holding trx_sys->mutex, the transaction may already have been
 committed.
-@return	transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
+@return transaction instance if active, or NULL; */
 UNIV_INLINE
 trx_t*
 trx_rw_is_active(
 /*=============*/
 	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
-	ibool*		corrupt);	/*!< in: NULL or pointer to a flag
+	ibool*		corrupt,	/*!< in: NULL or pointer to a flag
 					that will be set if corrupt */
+	bool		do_ref_count);	/*!< in: if true then increment the
+					trx_t::n_ref_count */
 #ifdef UNIV_DEBUG
 /****************************************************************//**
-Checks whether a trx is in one of rw_trx_list or ro_trx_list.
-@return	TRUE if is in */
-UNIV_INTERN
-ibool
-trx_in_trx_list(
+Checks whether a trx is in on of rw_trx_list
+@return TRUE if is in */
+bool
+trx_in_rw_trx_list(
 /*============*/
 	const trx_t*	in_trx)		/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* UNIV_DEBUG */
 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 /***********************************************************//**
@@ -288,22 +270,18 @@ Updates the offset information about the end of the MySQL binlog entry
 which corresponds to the transaction just being committed. In a MySQL
 replication slave updates the latest master binlog position up to which
 replication has proceeded. */
-UNIV_INTERN
 void
 trx_sys_update_mysql_binlog_offset(
 /*===============================*/
 	const char*	file_name,/*!< in: MySQL log file name */
-	ib_int64_t	offset,	/*!< in: position in that log file */
+	int64_t		offset,	/*!< in: position in that log file */
 	ulint		field,	/*!< in: offset of the MySQL log info field in
 				the trx sys header */
-#ifdef WITH_WSREP
         trx_sysf_t*     sys_header, /*!< in: trx sys header */
-#endif /* WITH_WSREP */
 	mtr_t*		mtr);	/*!< in: mtr */
 /*****************************************************************//**
 Prints to stderr the MySQL binlog offset info in the trx system header if
 the magic number shows it valid. */
-UNIV_INTERN
 void
 trx_sys_print_mysql_binlog_offset(void);
 /*===================================*/
@@ -311,53 +289,46 @@ trx_sys_print_mysql_binlog_offset(void);
 /** Update WSREP checkpoint XID in sys header. */
 void
 trx_sys_update_wsrep_checkpoint(
-        const XID*      xid,         /*!< in: WSREP XID */
-        trx_sysf_t*     sys_header,  /*!< in: sys_header */
-        mtr_t*          mtr);        /*!< in: mtr       */
+/*============================*/
+	const XID*	xid,		/*!< in: WSREP XID */
+	trx_sysf_t*	sys_header,	/*!< in: sys_header */
+	mtr_t*		mtr);		/*!< in: mtr */
 
 void
 /** Read WSREP checkpoint XID from sys header. */
 trx_sys_read_wsrep_checkpoint(
-        XID* xid); /*!< out: WSREP XID */
+/*==========================*/
+	XID* xid); /*!< out: WSREP XID */
 #endif /* WITH_WSREP */
-/*****************************************************************//**
-Prints to stderr the MySQL master log offset info in the trx system header if
-the magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_master_log_pos(void);
-/*====================================*/
-/*****************************************************************//**
-Initializes the tablespace tag system. */
-UNIV_INTERN
+
+/** Initializes the tablespace tag system. */
 void
 trx_sys_file_format_init(void);
 /*==========================*/
+
 /*****************************************************************//**
 Closes the tablespace tag system. */
-UNIV_INTERN
 void
 trx_sys_file_format_close(void);
 /*===========================*/
+
 /********************************************************************//**
 Tags the system table space with minimum format id if it has not been
 tagged yet.
 WARNING: This function is only called during the startup and AFTER the
 redo log application during recovery has finished. */
-UNIV_INTERN
 void
 trx_sys_file_format_tag_init(void);
 /*==============================*/
+
 /*****************************************************************//**
 Shutdown/Close the transaction system. */
-UNIV_INTERN
 void
 trx_sys_close(void);
 /*===============*/
 /*****************************************************************//**
 Get the name representation of the file format from its id.
-@return	pointer to the name */
-UNIV_INTERN
+@return pointer to the name */
 const char*
 trx_sys_file_format_id_to_name(
 /*===========================*/
@@ -365,8 +336,7 @@ trx_sys_file_format_id_to_name(
 /*****************************************************************//**
 Set the file format id unconditionally except if it's already the
 same value.
-@return	TRUE if value updated */
-UNIV_INTERN
+@return TRUE if value updated */
 ibool
 trx_sys_file_format_max_set(
 /*========================*/
@@ -376,12 +346,13 @@ trx_sys_file_format_max_set(
 /*********************************************************************
 Creates the rollback segments
 @return number of rollback segments that are active. */
-UNIV_INTERN
 ulint
 trx_sys_create_rsegs(
 /*=================*/
 	ulint	n_spaces,	/*!< number of tablespaces for UNDO logs */
-	ulint	n_rsegs);	/*!< number of rollback segments to create */
+	ulint	n_rsegs,	/*!< number of rollback segments to create */
+	ulint	n_tmp_rsegs);	/*!< number of rollback segments reserved for
+				temp-tables. */
 /*****************************************************************//**
 Get the number of transaction in the system, independent of their state.
 @return count of transactions in trx_sys_t::trx_list */
@@ -393,7 +364,6 @@ trx_sys_get_n_rw_trx(void);
 /*********************************************************************
 Check if there are any active (non-prepared) transactions.
 @return total number of active transactions or 0 if none */
-UNIV_INTERN
 ulint
 trx_sys_any_active_transactions(void);
 /*=================================*/
@@ -401,7 +371,6 @@ trx_sys_any_active_transactions(void);
 /*****************************************************************//**
 Prints to stderr the MySQL binlog info in the system header if the
 magic number shows it valid. */
-UNIV_INTERN
 void
 trx_sys_print_mysql_binlog_offset_from_page(
 /*========================================*/
@@ -414,7 +383,6 @@ Even if the call succeeds and returns TRUE, the returned format id
 may be ULINT_UNDEFINED signalling that the format id was not present
 in the data file.
 @return TRUE if call succeeds */
-UNIV_INTERN
 ibool
 trx_sys_read_file_format_id(
 /*========================*/
@@ -425,7 +393,6 @@ trx_sys_read_file_format_id(
 /*****************************************************************//**
 Reads the file format id from the given per-table data file.
 @return TRUE if call succeeds */
-UNIV_INTERN
 ibool
 trx_sys_read_pertable_file_format_id(
 /*=================================*/
@@ -436,15 +403,13 @@ trx_sys_read_pertable_file_format_id(
 #endif /* !UNIV_HOTBACKUP */
 /*****************************************************************//**
 Get the name representation of the file format from its id.
-@return	pointer to the max format name */
-UNIV_INTERN
+@return pointer to the max format name */
 const char*
 trx_sys_file_format_max_get(void);
 /*=============================*/
 /*****************************************************************//**
 Check for the max file format tag stored on disk.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 trx_sys_file_format_max_check(
 /*==========================*/
@@ -452,8 +417,7 @@ trx_sys_file_format_max_check(
 /********************************************************************//**
 Update the file format tag in the system tablespace only if the given
 format id is greater than the known max id.
-@return	TRUE if format_id was bigger than the known max id */
-UNIV_INTERN
+@return TRUE if format_id was bigger than the known max id */
 ibool
 trx_sys_file_format_max_upgrade(
 /*============================*/
@@ -461,31 +425,32 @@ trx_sys_file_format_max_upgrade(
 	ulint		format_id);	/*!< in: file format identifier */
 /*****************************************************************//**
 Get the name representation of the file format from its id.
-@return	pointer to the name */
-UNIV_INTERN
+@return pointer to the name */
 const char*
 trx_sys_file_format_id_to_name(
 /*===========================*/
 	const ulint	id);	/*!< in: id of the file format */
 
+/**
+Add the transaction to the RW transaction set
+@param trx		transaction instance to add */
+UNIV_INLINE
+void
+trx_sys_rw_trx_add(trx_t* trx);
+
 #ifdef UNIV_DEBUG
 /*************************************************************//**
-Validate the trx_sys_t::trx_list. */
-UNIV_INTERN
-ibool
-trx_sys_validate_trx_list(void);
-/*===========================*/
+Validate the trx_sys_t::rw_trx_list.
+@return true if the list is valid */
+bool
+trx_sys_validate_trx_list();
+/*========================*/
 #endif /* UNIV_DEBUG */
 
-/* The automatically created system rollback segment has this id */
+/** The automatically created system rollback segment has this id */
 #define TRX_SYS_SYSTEM_RSEG_ID	0
 
-/* Space id and page no where the trx system file copy resides */
-#define	TRX_SYS_SPACE	0	/* the SYSTEM tablespace */
-#include "fsp0fsp.h"
-#define	TRX_SYS_PAGE_NO	FSP_TRX_SYS_PAGE_NO
-
-/* The offset of the transaction system header on the page */
+/** The offset of the transaction system header on the page */
 #define	TRX_SYS		FSEG_PAGE_DATA
 
 /** Transaction system header */
@@ -522,9 +487,7 @@ rollback segment.  It initialized some arrays with this number of entries.
 We must remember this limit in order to keep file compatibility. */
 #define TRX_SYS_OLD_N_RSEGS		256
 
-/** Maximum length of MySQL binlog file name, in bytes.
-@see trx_sys_mysql_master_log_name
-@see trx_sys_mysql_bin_log_name */
+/** Maximum length of MySQL binlog file name, in bytes. */
 #define TRX_SYS_MYSQL_LOG_NAME_LEN	512
 /** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
 #define TRX_SYS_MYSQL_LOG_MAGIC_N	873422344
@@ -532,10 +495,6 @@ We must remember this limit in order to keep file compatibility. */
 #if UNIV_PAGE_SIZE_MIN < 4096
 # error "UNIV_PAGE_SIZE_MIN < 4096"
 #endif
-/** The offset of the MySQL replication info in the trx system header;
-this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
-#define TRX_SYS_MYSQL_MASTER_LOG_INFO	(UNIV_PAGE_SIZE - 2000)
-
 /** The offset of the MySQL binlog offset info in the trx system header */
 #define TRX_SYS_MYSQL_LOG_INFO		(UNIV_PAGE_SIZE - 1000)
 #define	TRX_SYS_MYSQL_LOG_MAGIC_N_FLD	0	/*!< magic number which is
@@ -633,73 +592,112 @@ identifier is added to this 64-bit constant. */
 
 #ifndef UNIV_HOTBACKUP
 /** The transaction system central memory data structure. */
-struct trx_sys_t{
+struct trx_sys_t {
 
-	ib_mutex_t		mutex;		/*!< mutex protecting most fields in
+	TrxSysMutex	mutex;		/*!< mutex protecting most fields in
 					this structure except when noted
 					otherwise */
-	ulint		n_prepared_trx;	/*!< Number of transactions currently
-					in the XA PREPARED state */
-	ulint		n_prepared_recovered_trx; /*!< Number of transactions
-					currently in XA PREPARED state that are
-					also recovered. Such transactions cannot
-					be added during runtime. They can only
-					occur after recovery if mysqld crashed
-					while there were XA PREPARED
-					transactions. We disable query cache
-					if such transactions exist. */
-	trx_id_t	max_trx_id;	/*!< The smallest number not yet
+
+	MVCC*		mvcc;		/*!< Multi version concurrency control
+					manager */
+	volatile trx_id_t
+			max_trx_id;	/*!< The smallest number not yet
 					assigned as a transaction id or
-					transaction number */
+					transaction number. This is declared
+					volatile because it can be accessed
+					without holding any mutex during
+					AC-NL-RO view creation. */
+	trx_ut_list_t	serialisation_list;
+					/*!< Ordered on trx_t::no of all the
+					currenrtly active RW transactions */
 #ifdef UNIV_DEBUG
-	trx_id_t	rw_max_trx_id;	/*!< Max trx id of read-write transactions
-					which exist or existed */
-#endif
-	trx_list_t	rw_trx_list;	/*!< List of active and committed in
+	trx_id_t	rw_max_trx_id;	/*!< Max trx id of read-write
+					transactions which exist or existed */
+#endif /* UNIV_DEBUG */
+
+	char		pad1[64];	/*!< To avoid false sharing */
+	trx_ut_list_t	rw_trx_list;	/*!< List of active and committed in
 					memory read-write transactions, sorted
 					on trx id, biggest first. Recovered
 					transactions are always on this list. */
-	trx_list_t	ro_trx_list;	/*!< List of active and committed in
-					memory read-only transactions, sorted
-					on trx id, biggest first. NOTE:
-					The order for read-only transactions
-					is not necessary. We should exploit
-					this and increase concurrency during
-					add/remove. */
-	trx_list_t	mysql_trx_list;	/*!< List of transactions created
-					for MySQL. All transactions on
-					ro_trx_list are on mysql_trx_list. The
-					rw_trx_list can contain system
-					transactions and recovered transactions
-					that will not be in the mysql_trx_list.
-					There can be active non-locking
-					auto-commit read only transactions that
-					are on this list but not on ro_trx_list.
+
+	char		pad2[64];	/*!< To avoid false sharing */
+	trx_ut_list_t	mysql_trx_list;	/*!< List of transactions created
+					for MySQL. All user transactions are
+					on mysql_trx_list. The rw_trx_list
+					can contain system transactions and
+					recovered transactions that will not
+					be in the mysql_trx_list.
 					mysql_trx_list may additionally contain
 					transactions that have not yet been
 					started in InnoDB. */
-	trx_rseg_t*	const rseg_array[TRX_SYS_N_RSEGS];
+
+	trx_ids_t	rw_trx_ids;	/*!< Array of Read write transaction IDs
+					for MVCC snapshot. A ReadView would take
+					a snapshot of these transactions whose
+					changes are not visible to it. We should
+					remove transactions from the list before
+					committing in memory and releasing locks
+					to ensure right order of removal and
+					consistent snapshot. */
+
+	char		pad3[64];	/*!< To avoid false sharing */
+	trx_rseg_t*	rseg_array[TRX_SYS_N_RSEGS];
 					/*!< Pointer array to rollback
 					segments; NULL if slot not in use;
 					created and destroyed in
 					single-threaded mode; not protected
 					by any mutex, because it is read-only
 					during multi-threaded operation */
-	ulint		rseg_history_len;/*!< Length of the TRX_RSEG_HISTORY
+	ulint		rseg_history_len;
+					/*!< Length of the TRX_RSEG_HISTORY
 					list (update undo logs for committed
 					transactions), protected by
 					rseg->mutex */
-	UT_LIST_BASE_NODE_T(read_view_t) view_list;
-					/*!< List of read views sorted
-					on trx no, biggest first */
+
+	trx_rseg_t*	const pending_purge_rseg_array[TRX_SYS_N_RSEGS];
+					/*!< Pointer array to rollback segments
+					between slot-1..slot-srv_tmp_undo_logs
+					that are now replaced by non-redo
+					rollback segments. We need them for
+					scheduling purge if any of the rollback
+					segment has pending records to purge. */
+
+	TrxIdSet	rw_trx_set;	/*!< Mapping from transaction id
+					to transaction instance */
+
+	ulint		n_prepared_trx;	/*!< Number of transactions currently
+					in the XA PREPARED state */
+
+	ulint		n_prepared_recovered_trx; /*!< Number of transactions
+					currently in XA PREPARED state that are
+					also recovered. Such transactions cannot
+					be added during runtime. They can only
+					occur after recovery if mysqld crashed
+					while there were XA PREPARED
+					transactions. We disable query cache
+					if such transactions exist. */
 };
 
 /** When a trx id which is zero modulo this number (which must be a power of
 two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
 page is updated */
-#define TRX_SYS_TRX_ID_WRITE_MARGIN	256
+#define TRX_SYS_TRX_ID_WRITE_MARGIN	((trx_id_t) 256)
 #endif /* !UNIV_HOTBACKUP */
 
+/** Test if trx_sys->mutex is owned. */
+#define trx_sys_mutex_own() (trx_sys->mutex.is_owned())
+
+/** Acquire the trx_sys->mutex. */
+#define trx_sys_mutex_enter() do {			\
+	mutex_enter(&trx_sys->mutex);			\
+} while (0)
+
+/** Release the trx_sys->mutex. */
+#define trx_sys_mutex_exit() do {			\
+	trx_sys->mutex.exit();				\
+} while (0)
+
 #ifndef UNIV_NONINL
 #include "trx0sys.ic"
 #endif
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
index 7265a97ae25..6158aea0c48 100644
--- a/storage/innobase/include/trx0sys.ic
+++ b/storage/innobase/include/trx0sys.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -47,63 +47,69 @@ typedef byte	trx_sysf_rseg_t;
 
 /*****************************************************************//**
 Writes the value of max_trx_id to the file based trx system header. */
-UNIV_INTERN
 void
 trx_sys_flush_max_trx_id(void);
 /*==========================*/
 
-/***************************************************************//**
-Checks if a page address is the trx sys header page.
-@return	TRUE if trx sys header page */
+/** Checks if a page address is the trx sys header page.
+@param[in]	page_id	page id
+@return true if trx sys header page */
 UNIV_INLINE
-ibool
+bool
 trx_sys_hdr_page(
-/*=============*/
-	ulint	space,	/*!< in: space */
-	ulint	page_no)/*!< in: page number */
+	const page_id_t&	page_id)
 {
-	if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
+	return(page_id.space() == TRX_SYS_SPACE
+	       && page_id.page_no() == TRX_SYS_PAGE_NO);
 }
 
 /***************************************************************//**
 Gets the pointer in the nth slot of the rseg array.
-@return	pointer to rseg object, NULL if slot not in use */
+@return pointer to rseg object, NULL if slot not in use */
 UNIV_INLINE
 trx_rseg_t*
 trx_sys_get_nth_rseg(
 /*=================*/
-	trx_sys_t*	sys,	/*!< in: trx system */
-	ulint		n)	/*!< in: index of slot */
+	trx_sys_t*	sys,		/*!< in: trx system */
+	ulint		n,		/*!< in: index of slot */
+	bool		is_redo_rseg)	/*!< in: true if redo rseg. */
 {
 	ut_ad(n < TRX_SYS_N_RSEGS);
 
+	/* If redo rseg is being requested and id falls in range of
+	non-redo rseg that is from slot-1....slot-srv_tmp_undo_logs then
+	server is being upgraded from pre-5.7.2. In such case return
+	rseg from pending_purge_rseg_array array. */
+	if (is_redo_rseg && trx_sys_is_noredo_rseg_slot(n)) {
+		ut_ad(trx_sys->pending_purge_rseg_array[n] != NULL);
+		return(trx_sys->pending_purge_rseg_array[n]);
+	}
+
 	return(sys->rseg_array[n]);
 }
 
 /**********************************************************************//**
 Gets a pointer to the transaction system header and x-latches its page.
-@return	pointer to system header, page x-latched. */
+@return pointer to system header, page x-latched. */
 UNIV_INLINE
 trx_sysf_t*
 trx_sysf_get(
 /*=========*/
 	mtr_t*	mtr)	/*!< in: mtr */
 {
-	buf_block_t*	block;
-	trx_sysf_t*	header;
+	buf_block_t*	block = NULL;
+	trx_sysf_t*	header = NULL;
 
 	ut_ad(mtr);
 
-	block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
-			     RW_X_LATCH, mtr);
-	buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
+	block = buf_page_get(page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
+			     univ_page_size, RW_X_LATCH, mtr);
 
-	header = TRX_SYS + buf_block_get_frame(block);
+	if (block) {
+		buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
+
+		header = TRX_SYS + buf_block_get_frame(block);
+	}
 
 	return(header);
 }
@@ -111,7 +117,7 @@ trx_sysf_get(
 /*****************************************************************//**
 Gets the space of the nth rollback segment slot in the trx system
 file copy.
-@return	space id */
+@return space id */
 UNIV_INLINE
 ulint
 trx_sysf_rseg_get_space(
@@ -131,7 +137,7 @@ trx_sysf_rseg_get_space(
 /*****************************************************************//**
 Gets the page number of the nth rollback segment slot in the trx system
 header.
-@return	page number, FIL_NULL if slot unused */
+@return page number, FIL_NULL if slot unused */
 UNIV_INLINE
 ulint
 trx_sysf_rseg_get_page_no(
@@ -208,15 +214,28 @@ trx_write_trx_id(
 #if DATA_TRX_ID_LEN != 6
 # error "DATA_TRX_ID_LEN != 6"
 #endif
+	ut_ad(id > 0);
 	mach_write_to_6(ptr, id);
 }
 
+/*****************************************************************//**
+Check if slot-id is reserved slot-id for noredo rsegs. */
+UNIV_INLINE
+bool
+trx_sys_is_noredo_rseg_slot(
+/*========================*/
+	ulint	slot_id)	/*!< in: slot_id to check */
+{
+	/* Slots allocated from temp-tablespace are no-redo slots. */
+	return(slot_id > 0 && slot_id < (srv_tmp_undo_logs + 1));
+}
+
 #ifndef UNIV_HOTBACKUP
 /*****************************************************************//**
 Reads a trx id from an index page. In case that the id size changes in
 some future version, this function should be used instead of
 mach_read_...
-@return	id */
+@return id */
 UNIV_INLINE
 trx_id_t
 trx_read_trx_id(
@@ -232,7 +251,7 @@ trx_read_trx_id(
 /****************************************************************//**
 Looks for the trx handle with the given id in rw_trx_list.
 The caller must be holding trx_sys->mutex.
-@return	the trx handle or NULL if not found;
+@return the trx handle or NULL if not found;
 the pointer must not be dereferenced unless lock_sys->mutex was
 acquired before calling this function and is still being held */
 UNIV_INLINE
@@ -241,57 +260,18 @@ trx_get_rw_trx_by_id(
 /*=================*/
 	trx_id_t	trx_id)	/*!< in: trx id to search for */
 {
-	trx_t*		trx;
-	ulint		len;
-	trx_t*		first;
-
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	len = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+	ut_ad(trx_id > 0);
+	ut_ad(trx_sys_mutex_own());
 
-	if (len == 0) {
+	if (trx_sys->rw_trx_set.empty()) {
 		return(NULL);
 	}
 
-	/* Because the list is ordered on trx id in descending order,
-	we try to speed things up a bit. */
-
-	trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
-	assert_trx_in_rw_list(trx);
-
-	if (trx_id == trx->id) {
-		return(trx);
-	} else if (len == 1 || trx_id > trx->id) {
-		return(NULL);
-	}
+	TrxIdSet::iterator	it;
 
-	first = trx;
+	it = trx_sys->rw_trx_set.find(TrxTrack(trx_id));
 
-	trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
-	assert_trx_in_rw_list(trx);
-
-	if (trx_id == trx->id) {
-		return(trx);
-	} else if (len == 2 || trx_id < trx->id) {
-		return(NULL);
-	}
-
-	/* Search the list from the lower end (tail). */
-	if (trx_id < (first->id + trx->id) >> 1) {
-		for (trx = UT_LIST_GET_PREV(trx_list, trx);
-		     trx != NULL && trx_id > trx->id;
-		     trx = UT_LIST_GET_PREV(trx_list, trx)) {
-			assert_trx_in_rw_list(trx);
-		}
-	} else {
-		for (trx = UT_LIST_GET_NEXT(trx_list, first);
-		     trx != NULL && trx_id < trx->id;
-		     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-			assert_trx_in_rw_list(trx);
-		}
-	}
-
-	return((trx != NULL && trx->id == trx_id) ? trx : NULL);
+	return(it == trx_sys->rw_trx_set.end() ? NULL : it->m_trx);
 }
 
 /****************************************************************//**
@@ -299,18 +279,17 @@ Returns the minimum trx id in trx list. This is the smallest id for which
 the trx can possibly be active. (But, you must look at the trx->state
 to find out if the minimum trx id transaction itself is active, or already
 committed.). The caller must be holding the trx_sys_t::mutex in shared mode.
-@return	the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
+@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
 UNIV_INLINE
 trx_id_t
 trx_rw_min_trx_id_low(void)
 /*=======================*/
 {
 	trx_id_t	id;
-	const trx_t*	trx;
 
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
-	trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
+	const trx_t*	trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
 
 	if (trx == NULL) {
 		id = trx_sys->max_trx_id;
@@ -334,12 +313,12 @@ trx_assert_recovered(
 {
 	const trx_t*	trx;
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	trx = trx_get_rw_trx_by_id(trx_id);
 	ut_a(trx->is_recovered);
 
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	return(TRUE);
 }
@@ -350,30 +329,25 @@ Returns the minimum trx id in rw trx list. This is the smallest id for which
 the rw trx can possibly be active. (But, you must look at the trx->state
 to find out if the minimum trx id transaction itself is active, or already
 committed.)
-@return	the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */
+@return the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */
 UNIV_INLINE
 trx_id_t
 trx_rw_min_trx_id(void)
 /*===================*/
 {
-	trx_id_t	id;
+	trx_sys_mutex_enter();
 
-	mutex_enter(&trx_sys->mutex);
+	trx_id_t	id = trx_rw_min_trx_id_low();
 
-	id = trx_rw_min_trx_id_low();
-
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	return(id);
 }
 
 /****************************************************************//**
-Checks if a rw transaction with the given id is active. Caller must hold
-trx_sys->mutex. If the caller is not holding lock_sys->mutex, the
-transaction may already have been committed.
-@return	transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
+Checks if a rw transaction with the given id is active.  If the caller is
+not holding lock_sys->mutex, the transaction may already have been committed.
+@return transaction instance if active, or NULL */
 UNIV_INLINE
 trx_t*
 trx_rw_is_active_low(
@@ -384,7 +358,7 @@ trx_rw_is_active_low(
 {
 	trx_t*		trx;
 
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
 	if (trx_id < trx_rw_min_trx_id_low()) {
 
@@ -415,39 +389,43 @@ trx_rw_is_active_low(
 Checks if a rw transaction with the given id is active. If the caller is
 not holding lock_sys->mutex, the transaction may already have been
 committed.
-@return	transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
+@return transaction instance if active, or NULL; */
 UNIV_INLINE
 trx_t*
 trx_rw_is_active(
 /*=============*/
 	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
-	ibool*		corrupt)	/*!< in: NULL or pointer to a flag
+	ibool*		corrupt,	/*!< in: NULL or pointer to a flag
 					that will be set if corrupt */
+	bool		do_ref_count)	/*!< in: if true then increment the
+					trx_t::n_ref_count */
 {
 	trx_t*		trx;
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	trx = trx_rw_is_active_low(trx_id, corrupt);
 
-	mutex_exit(&trx_sys->mutex);
+	if (trx != 0) {
+		trx = trx_reference(trx, do_ref_count);
+	}
+
+	trx_sys_mutex_exit();
 
 	return(trx);
 }
 
 /*****************************************************************//**
 Allocates a new transaction id.
-@return	new, allocated trx id */
+@return new, allocated trx id */
 UNIV_INLINE
 trx_id_t
-trx_sys_get_new_trx_id(void)
-/*========================*/
+trx_sys_get_new_trx_id()
+/*====================*/
 {
 #ifndef WITH_WSREP
 	/* wsrep_fake_trx_id  violates this assert */
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 #endif /* WITH_WSREP */
 
 	/* VERY important: after the database is started, max_trx_id value is
@@ -457,7 +435,7 @@ trx_sys_get_new_trx_id(void)
 	Thus trx id values will not overlap when the database is
 	repeatedly started! */
 
-	if (!(trx_sys->max_trx_id % (trx_id_t) TRX_SYS_TRX_ID_WRITE_MARGIN)) {
+	if (!(trx_sys->max_trx_id % TRX_SYS_TRX_ID_WRITE_MARGIN)) {
 
 		trx_sys_flush_max_trx_id();
 	}
@@ -474,24 +452,24 @@ trx_id_t
 trx_sys_get_max_trx_id(void)
 /*========================*/
 {
-#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
-	trx_id_t	max_trx_id;
-#endif
-
-	ut_ad(!mutex_own(&trx_sys->mutex));
+	ut_ad(!trx_sys_mutex_own());
 
 #if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
 	/* Avoid torn reads. */
-	mutex_enter(&trx_sys->mutex);
-	max_trx_id = trx_sys->max_trx_id;
-	mutex_exit(&trx_sys->mutex);
+
+	trx_sys_mutex_enter();
+
+	trx_id_t	max_trx_id = trx_sys->max_trx_id;
+
+	trx_sys_mutex_exit();
+
 	return(max_trx_id);
 #else
 	/* Perform a dirty read. Callers should be prepared for stale
 	values, and we know that the value fits in a machine word, so
 	that it will be read and written atomically. */
 	return(trx_sys->max_trx_id);
-#endif
+#endif /* UNIV_WORD_SIZE < DATA_TRX_ID_LEN */
 }
 
 /*****************************************************************//**
@@ -504,12 +482,26 @@ trx_sys_get_n_rw_trx(void)
 {
 	ulint	n_trx;
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
 
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	return(n_trx);
 }
+
+/**
+Add the transaction to the RW transaction set
+@param trx		transaction instance to add */
+UNIV_INLINE
+void
+trx_sys_rw_trx_add(trx_t* trx)
+{
+	ut_ad(trx->id != 0);
+
+	trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
+	ut_d(trx->in_rw_trx_list = true);
+}
+
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 11836183d57..19b0747c6a3 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -27,33 +27,55 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef trx0trx_h
 #define trx0trx_h
 
-#include "univ.i"
-#include "trx0types.h"
+#include <set>
+#include <list>
+
+#include "ha_prototypes.h"
+
 #include "dict0types.h"
+#include "trx0types.h"
+#include "ut0new.h"
+
 #ifndef UNIV_HOTBACKUP
 #include "lock0types.h"
 #include "log0log.h"
 #include "usr0types.h"
 #include "que0types.h"
 #include "mem0mem.h"
-#include "read0types.h"
 #include "trx0xa.h"
 #include "ut0vec.h"
 #include "fts0fts.h"
+#include "srv0srv.h"
+
+// Forward declaration
+struct mtr_t;
+
+// Forward declaration
+class ReadView;
+
+// Forward declaration
+class FlushObserver;
 
 /** Dummy session used currently in MySQL interface */
 extern sess_t*	trx_dummy_sess;
 
-/********************************************************************//**
-Releases the search latch if trx has reserved it. */
+/**
+Releases the search latch if trx has reserved it.
+@param[in,out] trx		Transaction that may own the AHI latch */
 UNIV_INLINE
 void
-trx_search_latch_release_if_reserved(
-/*=================================*/
-	trx_t*		trx); /*!< in: transaction */
+trx_search_latch_release_if_reserved(trx_t* trx);
+
+/** Set flush observer for the transaction
+@param[in/out]	trx		transaction struct
+@param[in]	observer	flush observer */
+void
+trx_set_flush_observer(
+	trx_t*		trx,
+	FlushObserver*	observer);
+
 /******************************************************************//**
 Set detailed error message for the transaction. */
-UNIV_INTERN
 void
 trx_set_detailed_error(
 /*===================*/
@@ -62,7 +84,6 @@ trx_set_detailed_error(
 /*************************************************************//**
 Set detailed error message for the transaction from a file. Note that the
 file is rewinded before reading from it. */
-UNIV_INTERN
 void
 trx_set_detailed_error_from_file(
 /*=============================*/
@@ -70,7 +91,7 @@ trx_set_detailed_error_from_file(
 	FILE*	file);	/*!< in: file to read message from */
 /****************************************************************//**
 Retrieves the error_info field from a trx.
-@return	the error info */
+@return the error info */
 UNIV_INLINE
 const dict_index_t*
 trx_get_error_info(
@@ -78,108 +99,145 @@ trx_get_error_info(
 	const trx_t*	trx);	/*!< in: trx object */
 /********************************************************************//**
 Creates a transaction object for MySQL.
-@return	own: transaction object */
-UNIV_INTERN
+@return own: transaction object */
 trx_t*
 trx_allocate_for_mysql(void);
 /*========================*/
 /********************************************************************//**
 Creates a transaction object for background operations by the master thread.
-@return	own: transaction object */
-UNIV_INTERN
+@return own: transaction object */
 trx_t*
 trx_allocate_for_background(void);
 /*=============================*/
-/********************************************************************//**
-Frees a transaction object of a background operation of the master thread. */
-UNIV_INTERN
+
+/** Frees and initialize a transaction object instantinated during recovery.
+@param trx trx object to free and initialize during recovery */
 void
-trx_free_for_background(
-/*====================*/
-	trx_t*	trx);	/*!< in, own: trx object */
+trx_free_resurrected(trx_t* trx);
+
+/** Free a transaction that was allocated by background or user threads.
+@param trx trx object to free */
+void
+trx_free_for_background(trx_t* trx);
+
 /********************************************************************//**
 At shutdown, frees a transaction object that is in the PREPARED state. */
-UNIV_INTERN
 void
 trx_free_prepared(
 /*==============*/
-	trx_t*	trx)	/*!< in, own: trx object */
-	UNIV_COLD MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Frees a transaction object for MySQL. */
-UNIV_INTERN
-void
-trx_free_for_mysql(
-/*===============*/
 	trx_t*	trx);	/*!< in, own: trx object */
+
+/** Free a transaction object for MySQL.
+@param[in,out]	trx	transaction */
+void
+trx_free_for_mysql(trx_t*	trx);
+
+/** Disconnect a transaction from MySQL.
+@param[in,out]	trx	transaction */
+void
+trx_disconnect_plain(trx_t*	trx);
+
+/** Disconnect a prepared transaction from MySQL.
+@param[in,out]	trx	transaction */
+void
+trx_disconnect_prepared(trx_t*	trx);
+
 /****************************************************************//**
 Creates trx objects for transactions and initializes the trx list of
 trx_sys at database start. Rollback segment and undo log lists must
 already exist when this function is called, because the lists of
 transactions to be rolled back or cleaned up are built based on the
 undo log lists. */
-UNIV_INTERN
 void
 trx_lists_init_at_db_start(void);
 /*============================*/
 
-#ifdef UNIV_DEBUG
-#define trx_start_if_not_started_xa(t)				\
-	{							\
-	(t)->start_line = __LINE__;				\
-	(t)->start_file = __FILE__;				\
-	trx_start_if_not_started_xa_low((t));			\
-	}
-#else
-#define trx_start_if_not_started_xa(t)				\
-	trx_start_if_not_started_xa_low((t))
-#endif /* UNIV_DEBUG */
-
 /*************************************************************//**
 Starts the transaction if it is not yet started. */
-UNIV_INTERN
 void
 trx_start_if_not_started_xa_low(
 /*============================*/
-	trx_t*	trx);	/*!< in: transaction */
+	trx_t*	trx,		/*!< in/out: transaction */
+	bool	read_write);	/*!< in: true if read write transaction */
 /*************************************************************//**
 Starts the transaction if it is not yet started. */
-UNIV_INTERN
 void
 trx_start_if_not_started_low(
 /*=========================*/
-	trx_t*	trx);	/*!< in: transaction */
+	trx_t*	trx,		/*!< in/out: transaction */
+	bool	read_write);	/*!< in: true if read write transaction */
+
+/*************************************************************//**
+Starts a transaction for internal processing. */
+void
+trx_start_internal_low(
+/*===================*/
+	trx_t*	trx);		/*!< in/out: transaction */
+
+/** Starts a read-only transaction for internal processing.
+@param[in,out] trx	transaction to be started */
+void
+trx_start_internal_read_only_low(
+	trx_t*	trx);
 
 #ifdef UNIV_DEBUG
-#define trx_start_if_not_started(t)				\
-	{							\
+#define trx_start_if_not_started_xa(t, rw)			\
+	do {							\
 	(t)->start_line = __LINE__;				\
 	(t)->start_file = __FILE__;				\
-	trx_start_if_not_started_low((t));			\
-	}
+	trx_start_if_not_started_xa_low((t), rw);		\
+	} while (false)
+
+#define trx_start_if_not_started(t, rw)				\
+	do {							\
+	(t)->start_line = __LINE__;				\
+	(t)->start_file = __FILE__;				\
+	trx_start_if_not_started_low((t), rw);			\
+	} while (false)
+
+#define trx_start_internal(t)					\
+	do {							\
+	(t)->start_line = __LINE__;				\
+	(t)->start_file = __FILE__;				\
+	trx_start_internal_low((t));				\
+	} while (false)
+
+#define trx_start_internal_read_only(t)				\
+	do {							\
+	(t)->start_line = __LINE__;				\
+	(t)->start_file = __FILE__;				\
+	trx_start_internal_read_only_low(t);			\
+	} while (false)
 #else
-#define trx_start_if_not_started(t)				\
-	trx_start_if_not_started_low((t))
+#define trx_start_if_not_started(t, rw)				\
+	trx_start_if_not_started_low((t), rw)
+
+#define trx_start_internal(t)					\
+	trx_start_internal_low((t))
+
+#define trx_start_internal_read_only(t)				\
+	trx_start_internal_read_only_low(t)
+
+#define trx_start_if_not_started_xa(t, rw)			\
+	trx_start_if_not_started_xa_low((t), (rw))
 #endif /* UNIV_DEBUG */
 
 /*************************************************************//**
 Starts the transaction for a DDL operation. */
-UNIV_INTERN
 void
 trx_start_for_ddl_low(
 /*==================*/
 	trx_t*		trx,	/*!< in/out: transaction */
-	trx_dict_op_t	op)	/*!< in: dictionary operation type */
-	MY_ATTRIBUTE((nonnull));
+	trx_dict_op_t	op);	/*!< in: dictionary operation type */
 
 #ifdef UNIV_DEBUG
 #define trx_start_for_ddl(t, o)					\
-	{							\
+	do {							\
 	ut_ad((t)->start_file == 0);				\
 	(t)->start_line = __LINE__;				\
 	(t)->start_file = __FILE__;				\
 	trx_start_for_ddl_low((t), (o));			\
-	}
+	} while (0)
 #else
 #define trx_start_for_ddl(t, o)					\
 	trx_start_for_ddl_low((t), (o))
@@ -187,51 +245,46 @@ trx_start_for_ddl_low(
 
 /****************************************************************//**
 Commits a transaction. */
-UNIV_INTERN
 void
 trx_commit(
 /*=======*/
-	trx_t*	trx)	/*!< in/out: transaction */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*	trx);	/*!< in/out: transaction */
+
 /****************************************************************//**
 Commits a transaction and a mini-transaction. */
-UNIV_INTERN
 void
 trx_commit_low(
 /*===========*/
 	trx_t*	trx,	/*!< in/out: transaction */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction (will be committed),
+	mtr_t*	mtr);	/*!< in/out: mini-transaction (will be committed),
 			or NULL if trx made no modifications */
-	MY_ATTRIBUTE((nonnull(1)));
 /****************************************************************//**
 Cleans up a transaction at database startup. The cleanup is needed if
 the transaction already got to the middle of a commit when the database
 crashed, and we cannot roll it back. */
-UNIV_INTERN
 void
 trx_cleanup_at_db_startup(
 /*======================*/
 	trx_t*	trx);	/*!< in: transaction */
 /**********************************************************************//**
 Does the transaction commit for MySQL.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
 dberr_t
 trx_commit_for_mysql(
 /*=================*/
 	trx_t*	trx);	/*!< in/out: transaction */
-/**********************************************************************//**
-Does the transaction prepare for MySQL. */
-UNIV_INTERN
-void
-trx_prepare_for_mysql(
-/*==================*/
-	trx_t*	trx);	/*!< in/out: trx handle */
+
+/**
+Does the transaction prepare for MySQL.
+@param[in, out] trx		Transaction instance to prepare */
+
+dberr_t
+trx_prepare_for_mysql(trx_t* trx);
+
 /**********************************************************************//**
 This function is used to find number of prepared transactions and
 their transaction objects for a recovery.
-@return	number of prepared transactions */
-UNIV_INTERN
+@return number of prepared transactions */
 int
 trx_recover_for_mysql(
 /*==================*/
@@ -240,26 +293,22 @@ trx_recover_for_mysql(
 /*******************************************************************//**
 This function is used to find one X/Open XA distributed transaction
 which is in the prepared state
-@return	trx or NULL; on match, the trx->xid will be invalidated;
+@return trx or NULL; on match, the trx->xid will be invalidated;
 note that the trx may have been committed, unless the caller is
 holding lock_sys->mutex */
-UNIV_INTERN
 trx_t *
 trx_get_trx_by_xid(
 /*===============*/
-	const XID*	xid);	/*!< in: X/Open XA transaction identifier */
+	XID*	xid);	/*!< in: X/Open XA transaction identifier */
 /**********************************************************************//**
 If required, flushes the log to disk if we called trx_commit_for_mysql()
 with trx->flush_log_later == TRUE. */
-UNIV_INTERN
 void
 trx_commit_complete_for_mysql(
 /*==========================*/
-	trx_t*	trx)	/*!< in/out: transaction */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*	trx);	/*!< in/out: transaction */
 /**********************************************************************//**
 Marks the latest SQL statement ended. */
-UNIV_INTERN
 void
 trx_mark_sql_stat_end(
 /*==================*/
@@ -267,32 +316,44 @@ trx_mark_sql_stat_end(
 /********************************************************************//**
 Assigns a read view for a consistent read query. All the consistent reads
 within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction.
-@return	consistent read view */
-UNIV_INTERN
-read_view_t*
+when this function is first called for a new started transaction. */
+ReadView*
 trx_assign_read_view(
 /*=================*/
 	trx_t*	trx);	/*!< in: active transaction */
+
+/****************************************************************//**
+@return the transaction's read view or NULL if one not assigned. */
+UNIV_INLINE
+ReadView*
+trx_get_read_view(
+/*==============*/
+	trx_t*	trx);
+
+/****************************************************************//**
+@return the transaction's read view or NULL if one not assigned. */
+UNIV_INLINE
+const ReadView*
+trx_get_read_view(
+/*==============*/
+	const trx_t*	trx);
+
 /****************************************************************//**
 Prepares a transaction for commit/rollback. */
-UNIV_INTERN
 void
 trx_commit_or_rollback_prepare(
 /*===========================*/
 	trx_t*	trx);	/*!< in/out: transaction */
 /*********************************************************************//**
 Creates a commit command node struct.
-@return	own: commit node struct */
-UNIV_INTERN
+@return own: commit node struct */
 commit_node_t*
 trx_commit_node_create(
 /*===================*/
 	mem_heap_t*	heap);	/*!< in: mem heap where created */
 /***********************************************************//**
 Performs an execution step for a commit type node in a query graph.
-@return	query thread to run next, or NULL */
-UNIV_INTERN
+@return query thread to run next, or NULL */
 que_thr_t*
 trx_commit_step(
 /*============*/
@@ -301,7 +362,6 @@ trx_commit_step(
 /**********************************************************************//**
 Prints info about a transaction.
 Caller must hold trx_sys->mutex. */
-UNIV_INTERN
 void
 trx_print_low(
 /*==========*/
@@ -316,46 +376,41 @@ trx_print_low(
 			/*!< in: lock_number_of_rows_locked(&trx->lock) */
 	ulint		n_trx_locks,
 			/*!< in: length of trx->lock.trx_locks */
-	ulint		heap_size)
+	ulint		heap_size);
 			/*!< in: mem_heap_get_size(trx->lock.lock_heap) */
-	MY_ATTRIBUTE((nonnull));
 
 /**********************************************************************//**
 Prints info about a transaction.
 The caller must hold lock_sys->mutex and trx_sys->mutex.
 When possible, use trx_print() instead. */
-UNIV_INTERN
 void
 trx_print_latched(
 /*==============*/
 	FILE*		f,		/*!< in: output stream */
 	const trx_t*	trx,		/*!< in: transaction */
-	ulint		max_query_len)	/*!< in: max query length to print,
+	ulint		max_query_len);	/*!< in: max query length to print,
 					or 0 to use the default max length */
-	MY_ATTRIBUTE((nonnull));
 
 /**********************************************************************//**
 Prints info about a transaction.
 Acquires and releases lock_sys->mutex and trx_sys->mutex. */
-UNIV_INTERN
 void
 trx_print(
 /*======*/
 	FILE*		f,		/*!< in: output stream */
 	const trx_t*	trx,		/*!< in: transaction */
-	ulint		max_query_len)	/*!< in: max query length to print,
+	ulint		max_query_len);	/*!< in: max query length to print,
 					or 0 to use the default max length */
-	MY_ATTRIBUTE((nonnull));
 
 /**********************************************************************//**
 Determine if a transaction is a dictionary operation.
-@return	dictionary operation mode */
+@return dictionary operation mode */
 UNIV_INLINE
 enum trx_dict_op_t
 trx_get_dict_operation(
 /*===================*/
 	const trx_t*	trx)	/*!< in: transaction */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************************//**
 Flag a transaction a dictionary operation. */
 UNIV_INLINE
@@ -371,12 +426,10 @@ trx_set_dict_operation(
 Determines if a transaction is in the given state.
 The caller must hold trx_sys->mutex, or it must be the thread
 that is serving a running transaction.
-A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list
-unless it is a non-locking autocommit read only transaction, which is only
-in trx_sys->mysql_trx_list.
-@return	TRUE if trx->state == state */
+A running RW transaction must be in trx_sys->rw_trx_list.
+@return TRUE if trx->state == state */
 UNIV_INLINE
-ibool
+bool
 trx_state_eq(
 /*=========*/
 	const trx_t*	trx,	/*!< in: transaction */
@@ -394,26 +447,23 @@ trx_state_eq(
 Asserts that a transaction has been started.
 The caller must hold trx_sys->mutex.
 @return TRUE if started */
-UNIV_INTERN
 ibool
 trx_assert_started(
 /*===============*/
 	const trx_t*	trx)	/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 # endif /* UNIV_DEBUG */
 
 /**********************************************************************//**
 Determines if the currently running transaction has been interrupted.
-@return	TRUE if interrupted */
-UNIV_INTERN
+@return TRUE if interrupted */
 ibool
 trx_is_interrupted(
 /*===============*/
 	const trx_t*	trx);	/*!< in: transaction */
 /**********************************************************************//**
 Determines if the currently running transaction is in strict mode.
-@return	TRUE if strict */
-UNIV_INTERN
+@return TRUE if strict */
 ibool
 trx_is_strict(
 /*==========*/
@@ -425,22 +475,20 @@ trx_is_strict(
 /*******************************************************************//**
 Calculates the "weight" of a transaction. The weight of one transaction
 is estimated as the number of altered rows + the number of locked rows.
-@param t	transaction
-@return		transaction weight */
+@param t transaction
+@return transaction weight */
 #define TRX_WEIGHT(t)	((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks))
 
 /*******************************************************************//**
 Compares the "weight" (or size) of two transactions. Transactions that
 have edited non-transactional tables are considered heavier than ones
 that have not.
-@return	TRUE if weight(a) >= weight(b) */
-UNIV_INTERN
-ibool
+@return true if weight(a) >= weight(b) */
+bool
 trx_weight_ge(
 /*==========*/
-	const trx_t*	a,	/*!< in: the first transaction to be compared */
-	const trx_t*	b);	/*!< in: the second transaction to be compared */
-
+	const trx_t*	a,	/*!< in: the transaction to be compared */
+	const trx_t*	b);	/*!< in: the transaction to be compared */
 /* Maximum length of a string that can be returned by
 trx_get_que_state_str(). */
 #define TRX_QUE_STATE_STR_MAX_LEN	12 /* "ROLLING BACK" */
@@ -448,64 +496,145 @@ trx_get_que_state_str(). */
 /*******************************************************************//**
 Retrieves transaction's que state in a human readable string. The string
 should not be free()'d or modified.
-@return	string in the data segment */
+@return string in the data segment */
 UNIV_INLINE
 const char*
 trx_get_que_state_str(
 /*==================*/
 	const trx_t*	trx);	/*!< in: transaction */
 
+/** Retreieves the transaction ID.
+In a given point in time it is guaranteed that IDs of the running
+transactions are unique. The values returned by this function for readonly
+transactions may be reused, so a subsequent RO transaction may get the same ID
+as a RO transaction that existed in the past. The values returned by this
+function should be used for printing purposes only.
+@param[in]	trx	transaction whose id to retrieve
+@return transaction id */
+UNIV_INLINE
+trx_id_t
+trx_get_id_for_print(
+	const trx_t*	trx);
+
 /****************************************************************//**
-Assign a read-only transaction a rollback-segment, if it is attempting
-to write to a TEMPORARY table. */
-UNIV_INTERN
+Assign a transaction temp-tablespace bound rollback-segment. */
 void
 trx_assign_rseg(
 /*============*/
-	trx_t*		trx);		/*!< A read-only transaction that
-					needs to be assigned a RBS. */
-/*******************************************************************//**
+	trx_t*		trx);		/*!< transaction that involves write
+					to temp-table. */
+
+/** Create the trx_t pool */
+void
+trx_pool_init();
+
+/** Destroy the trx_t pool */
+void
+trx_pool_close();
+
+/**
+Set the transaction as a read-write transaction if it is not already
+tagged as such.
+@param[in,out] trx	Transaction that needs to be "upgraded" to RW from RO */
+void
+trx_set_rw_mode(
+	trx_t*		trx);
+
+/**
+Increase the reference count. If the transaction is in state
+TRX_STATE_COMMITTED_IN_MEMORY then the transaction is considered
+committed and the reference count is not incremented.
+@param trx Transaction that is being referenced
+@param do_ref_count Increment the reference iff this is true
+@return transaction instance if it is not committed */
+UNIV_INLINE
+trx_t*
+trx_reference(
+	trx_t*		trx,
+	bool		do_ref_count);
+
+/**
+Release the transaction. Decrease the reference count.
+@param trx Transaction that is being released */
+UNIV_INLINE
+void
+trx_release_reference(
+	trx_t*		trx);
+
+/**
+Check if the transaction is being referenced. */
+#define trx_is_referenced(t)	((t)->n_ref > 0)
+
+/**
+@param[in] requestor	Transaction requesting the lock
+@param[in] holder	Transaction holding the lock
+@return the transaction that will be rolled back, null don't care */
+
+UNIV_INLINE
+const trx_t*
+trx_arbitrate(const trx_t* requestor, const trx_t* holder);
+
+/**
+@param[in] trx		Transaction to check
+@return true if the transaction is a high priority transaction.*/
+UNIV_INLINE
+bool
+trx_is_high_priority(const trx_t* trx);
+
+/**
+Kill all transactions that are blocking this transaction from acquiring locks.
+@param[in,out] trx	High priority transaction */
+
+void
+trx_kill_blocking(trx_t* trx);
+
+/**
+Check if redo/noredo rseg is modified for insert/update.
+@param[in] trx		Transaction to check */
+UNIV_INLINE
+bool
+trx_is_rseg_updated(const trx_t* trx);
+
+/**
 Transactions that aren't started by the MySQL server don't set
 the trx_t::mysql_thd field. For such transactions we set the lock
 wait timeout to 0 instead of the user configured value that comes
 from innodb_lock_wait_timeout via trx_t::mysql_thd.
-@param trx	transaction
-@return		lock wait timeout in seconds */
-#define trx_lock_wait_timeout_get(trx)					\
-	((trx)->mysql_thd != NULL					\
-	 ? thd_lock_wait_timeout((trx)->mysql_thd)			\
+@param trx transaction
+@return lock wait timeout in seconds */
+#define trx_lock_wait_timeout_get(t)					\
+	((t)->mysql_thd != NULL						\
+	 ? thd_lock_wait_timeout((t)->mysql_thd)			\
 	 : 0)
 
-/*******************************************************************//**
+/**
 Determine if the transaction is a non-locking autocommit select
 (implied read-only).
-@param t	transaction
-@return true	if non-locking autocommit select transaction. */
+@param t transaction
+@return true if non-locking autocommit select transaction. */
 #define trx_is_autocommit_non_locking(t)				\
 ((t)->auto_commit && (t)->will_lock == 0)
 
-/*******************************************************************//**
+/**
 Determine if the transaction is a non-locking autocommit select
 with an explicit check for the read-only status.
-@param t	transaction
-@return true	if non-locking autocommit read-only transaction. */
+@param t transaction
+@return true if non-locking autocommit read-only transaction. */
 #define trx_is_ac_nl_ro(t)						\
 ((t)->read_only && trx_is_autocommit_non_locking((t)))
 
-/*******************************************************************//**
+/**
 Assert that the transaction is in the trx_sys_t::rw_trx_list */
 #define assert_trx_in_rw_list(t) do {					\
 	ut_ad(!(t)->read_only);						\
-	assert_trx_in_list(t);						\
+	ut_ad((t)->in_rw_trx_list					\
+	      == !((t)->read_only || !(t)->rsegs.m_redo.rseg));		\
+	check_trx_state(t);						\
 } while (0)
 
-/*******************************************************************//**
-Assert that the transaction is either in trx_sys->ro_trx_list or
-trx_sys->rw_trx_list but not both and it cannot be an autocommit
-non-locking select */
-#define assert_trx_in_list(t) do {					\
-	ut_ad((t)->in_ro_trx_list == (t)->read_only);			\
-	ut_ad((t)->in_rw_trx_list == !(t)->read_only);			\
+/**
+Check transaction state */
+#define check_trx_state(t) do {						\
 	ut_ad(!trx_is_autocommit_non_locking((t)));			\
 	switch ((t)->state) {						\
 	case TRX_STATE_PREPARED:					\
@@ -514,15 +643,36 @@ non-locking select */
 	case TRX_STATE_COMMITTED_IN_MEMORY:				\
 		continue;						\
 	case TRX_STATE_NOT_STARTED:					\
+	case TRX_STATE_FORCED_ROLLBACK:					\
 		break;							\
 	}								\
 	ut_error;							\
 } while (0)
 
+/** Check if transaction is free so that it can be re-initialized.
+@param t transaction handle */
+#define	assert_trx_is_free(t)	do {					\
+	ut_ad(trx_state_eq((t), TRX_STATE_NOT_STARTED)			\
+	      || trx_state_eq((t), TRX_STATE_FORCED_ROLLBACK));		\
+	ut_ad(!trx_is_rseg_updated(trx));				\
+	ut_ad(!MVCC::is_view_active((t)->read_view));			\
+	ut_ad((t)->lock.wait_thr == NULL);				\
+	ut_ad(UT_LIST_GET_LEN((t)->lock.trx_locks) == 0);		\
+	ut_ad((t)->dict_operation == TRX_DICT_OP_NONE);			\
+} while(0)
+
+/** Check if transaction is in-active so that it can be freed and put back to
+transaction pool.
+@param t transaction handle */
+#define assert_trx_is_inactive(t) do {					\
+	assert_trx_is_free((t));					\
+	ut_ad((t)->dict_operation_lock_mode == 0);			\
+} while(0)
+
 #ifdef UNIV_DEBUG
 /*******************************************************************//**
 Assert that an autocommit non-locking select cannot be in the
-ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
+rw_trx_list and that it is a read-only transaction.
 The tranasction must be in the mysql_trx_list. */
 # define assert_trx_nonlocking_or_in_list(t)				\
 	do {								\
@@ -530,23 +680,25 @@ The tranasction must be in the mysql_trx_list. */
 			trx_state_t	t_state = (t)->state;		\
 			ut_ad((t)->read_only);				\
 			ut_ad(!(t)->is_recovered);			\
-			ut_ad(!(t)->in_ro_trx_list);			\
 			ut_ad(!(t)->in_rw_trx_list);			\
 			ut_ad((t)->in_mysql_trx_list);			\
 			ut_ad(t_state == TRX_STATE_NOT_STARTED		\
+			      || t_state == TRX_STATE_FORCED_ROLLBACK	\
 			      || t_state == TRX_STATE_ACTIVE);		\
 		} else {						\
-			assert_trx_in_list(t);				\
+			check_trx_state(t);				\
 		}							\
 	} while (0)
 #else /* UNIV_DEBUG */
 /*******************************************************************//**
 Assert that an autocommit non-locking slect cannot be in the
-ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
+rw_trx_list and that it is a read-only transaction.
 The tranasction must be in the mysql_trx_list. */
 # define assert_trx_nonlocking_or_in_list(trx) ((void)0)
 #endif /* UNIV_DEBUG */
 
+typedef std::vector<ib_lock_t*, ut_allocator<ib_lock_t*> >	lock_pool_t;
+
 /*******************************************************************//**
 Latching protocol for trx_lock_t::que_state.  trx_lock_t::que_state
 captures the state of the query thread during the execution of a query.
@@ -589,12 +741,12 @@ struct trx_lock_t {
 	ib_uint64_t	deadlock_mark;	/*!< A mark field that is initialized
 					to and checked against lock_mark_counter
 					by lock_deadlock_recursive(). */
-	ibool		was_chosen_as_deadlock_victim;
+	bool		was_chosen_as_deadlock_victim;
 					/*!< when the transaction decides to
-					wait for a lock, it sets this to FALSE;
+					wait for a lock, it sets this to false;
 					if another transaction chooses this
 					transaction as a victim in deadlock
-					resolution, it sets this to TRUE.
+					resolution, it sets this to true.
 					Protected by trx->mutex. */
 	time_t		wait_started;	/*!< lock wait started at this time,
 					protected only by lock_sys->mutex */
@@ -607,20 +759,26 @@ struct trx_lock_t {
 					only be modified by the thread that is
 					serving the running transaction. */
 
+	lock_pool_t	rec_pool;	/*!< Pre-allocated record locks */
+
+	lock_pool_t	table_pool;	/*!< Pre-allocated table locks */
+
+	ulint		rec_cached;	/*!< Next free rec lock in pool */
+
+	ulint		table_cached;	/*!< Next free table lock in pool */
+
 	mem_heap_t*	lock_heap;	/*!< memory heap for trx_locks;
 					protected by lock_sys->mutex */
 
-	UT_LIST_BASE_NODE_T(lock_t)
-			trx_locks;	/*!< locks requested
-					by the transaction;
+	trx_lock_list_t trx_locks;	/*!< locks requested by the transaction;
 					insertions are protected by trx->mutex
 					and lock_sys->mutex; removals are
 					protected by lock_sys->mutex */
 
-	ib_vector_t*	table_locks;	/*!< All table locks requested by this
+	lock_pool_t	table_locks;	/*!< All table locks requested by this
 					transaction, including AUTOINC locks */
 
-	ibool		cancel;		/*!< TRUE if the transaction is being
+	bool		cancel;		/*!< true if the transaction is being
 					rolled back either via deadlock
 					detection or due to lock timeout. The
 					caller has to acquire the trx_t::mutex
@@ -631,9 +789,21 @@ struct trx_lock_t {
 					mutex to prevent recursive deadlocks.
 					Protected by both the lock sys mutex
 					and the trx_t::mutex. */
+	ulint		n_rec_locks;	/*!< number of rec locks in this trx */
+
+	/** The transaction called ha_innobase::start_stmt() to
+	lock a table. Most likely a temporary table. */
+	bool		start_stmt;
 };
 
-#define TRX_MAGIC_N	91118598
+/** Type used to store the list of tables that are modified by a given
+transaction. We store pointers to the table objects in memory because
+we know that a table object will not be destroyed while a transaction
+that modified it is running. */
+typedef std::set<
+	dict_table_t*,
+	std::less<dict_table_t*>,
+	ut_allocator<dict_table_t*> >	trx_mod_tables_t;
 
 /** The transaction handle
 
@@ -680,17 +850,90 @@ lock_sys->mutex and sometimes by trx->mutex. */
 
 typedef enum {
 	TRX_SERVER_ABORT = 0,
-	TRX_WSREP_ABORT  = 1,
-	TRX_REPLICATION_ABORT = 2
+	TRX_WSREP_ABORT  = 1
 } trx_abort_t;
 
-struct trx_t{
-	ulint		magic_n;
 
-	ib_mutex_t	mutex;		/*!< Mutex protecting the fields
-					state and lock
-					(except some fields of lock, which
-					are protected by lock_sys->mutex) */
+/** Represents an instance of rollback segment along with its state variables.*/
+struct trx_undo_ptr_t {
+	trx_rseg_t*	rseg;		/*!< rollback segment assigned to the
+					transaction, or NULL if not assigned
+					yet */
+	trx_undo_t*	insert_undo;	/*!< pointer to the insert undo log, or
+					NULL if no inserts performed yet */
+	trx_undo_t*	update_undo;	/*!< pointer to the update undo log, or
+					NULL if no update performed yet */
+};
+
+/** Rollback segments assigned to a transaction for undo logging. */
+struct trx_rsegs_t {
+	/** undo log ptr holding reference to a rollback segment that resides in
+	system/undo tablespace used for undo logging of tables that needs
+	to be recovered on crash. */
+	trx_undo_ptr_t	m_redo;
+
+	/** undo log ptr holding reference to a rollback segment that resides in
+	temp tablespace used for undo logging of tables that doesn't need
+	to be recovered on crash. */
+	trx_undo_ptr_t	m_noredo;
+};
+
+enum trx_rseg_type_t {
+	TRX_RSEG_TYPE_NONE = 0,		/*!< void rollback segment type. */
+	TRX_RSEG_TYPE_REDO,		/*!< redo rollback segment. */
+	TRX_RSEG_TYPE_NOREDO		/*!< non-redo rollback segment. */
+};
+
+struct TrxVersion {
+	TrxVersion(trx_t* trx);
+
+	/**
+	@return true if the trx_t instance is the same */
+	bool operator==(const TrxVersion& rhs) const
+	{
+		return(rhs.m_trx == m_trx);
+	}
+
+	trx_t*		m_trx;
+	ulint		m_version;
+};
+
+typedef std::list<TrxVersion, ut_allocator<TrxVersion> > hit_list_t;
+
+struct trx_t {
+	TrxMutex	mutex;		/*!< Mutex protecting the fields
+					state and lock (except some fields
+					of lock, which are protected by
+					lock_sys->mutex) */
+
+	/* Note: in_depth was split from in_innodb for fixing a RO
+	performance issue. Acquiring the trx_t::mutex for each row
+	costs ~3% in performance. It is not required for correctness.
+	Therefore we increment/decrement in_depth without holding any
+	mutex. The assumption is that the Server will only ever call
+	the handler from one thread. This is not true for kill_connection.
+	Therefore in innobase_kill_connection. We don't increment this
+	counter via TrxInInnoDB. */
+
+	ib_uint32_t	in_depth;	/*!< Track nested TrxInInnoDB
+					count */
+
+	ib_uint32_t	in_innodb;	/*!< if the thread is executing
+					in the InnoDB context count > 0. */
+
+	bool		abort;		/*!< if this flag is set then
+					this transaction must abort when
+					it can */
+
+	trx_id_t	id;		/*!< transaction id */
+
+	trx_id_t	no;		/*!< transaction serialization number:
+					max trx id shortly before the
+					transaction is moved to
+					COMMITTED_IN_MEMORY state.
+					Protected by trx_sys_t::mutex
+					when trx->in_rw_trx_list. Initially
+					set to TRX_ID_MAX. */
 
 	/** State of the trx from the point of view of concurrency control
 	and the valid state transitions.
@@ -698,6 +941,7 @@ struct trx_t{
 	Possible states:
 
 	TRX_STATE_NOT_STARTED
+	TRX_STATE_FORCED_ROLLBACK
 	TRX_STATE_ACTIVE
 	TRX_STATE_PREPARED
 	TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED)
@@ -716,55 +960,90 @@ struct trx_t{
 	Recovered XA:
 	* NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
 
-	XA (2PC) (shutdown before ROLLBACK or COMMIT):
+	XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
 	* NOT_STARTED -> PREPARED -> (freed)
 
+	Disconnected XA can become recovered:
+	* ... -> ACTIVE -> PREPARED (connected) -> PREPARED (disconnected)
+	Disconnected means from mysql e.g due to the mysql client disconnection.
 	Latching and various transaction lists membership rules:
 
 	XA (2PC) transactions are always treated as non-autocommit.
 
 	Transitions to ACTIVE or NOT_STARTED occur when
-	!in_rw_trx_list and !in_ro_trx_list (no trx_sys->mutex needed).
+	!in_rw_trx_list (no trx_sys->mutex needed).
 
 	Autocommit non-locking read-only transactions move between states
-	without holding any mutex. They are !in_rw_trx_list, !in_ro_trx_list.
+	without holding any mutex. They are !in_rw_trx_list.
+
+	All transactions, unless they are determined to be ac-nl-ro,
+	explicitly tagged as read-only or read-write, will first be put
+	on the read-only transaction list. Only when a !read-only transaction
+	in the read-only list tries to acquire an X or IX lock on a table
+	do we remove it from the read-only list and put it on the read-write
+	list. During this switch we assign it a rollback segment.
 
 	When a transaction is NOT_STARTED, it can be in_mysql_trx_list if
-	it is a user transaction. It cannot be in ro_trx_list or rw_trx_list.
+	it is a user transaction. It cannot be in rw_trx_list.
 
 	ACTIVE->PREPARED->COMMITTED is only possible when trx->in_rw_trx_list.
 	The transition ACTIVE->PREPARED is protected by trx_sys->mutex.
 
 	ACTIVE->COMMITTED is possible when the transaction is in
-	ro_trx_list or rw_trx_list.
+	rw_trx_list.
 
 	Transitions to COMMITTED are protected by both lock_sys->mutex
 	and trx->mutex.
 
 	NOTE: Some of these state change constraints are an overkill,
 	currently only required for a consistent view for printing stats.
-	This unnecessarily adds a huge cost for the general case.
+	This unnecessarily adds a huge cost for the general case. */
 
-	NOTE: In the future we should add read only transactions to the
-	ro_trx_list the first time they try to acquire a lock ie. by default
-	we treat all read-only transactions as non-locking.  */
 	trx_state_t	state;
 
+	ReadView*	read_view;	/*!< consistent read view used in the
+					transaction, or NULL if not yet set */
+
+	UT_LIST_NODE_T(trx_t)
+			trx_list;	/*!< list of transactions;
+					protected by trx_sys->mutex. */
+	UT_LIST_NODE_T(trx_t)
+			no_list;	/*!< Required during view creation
+					to check for the view limit for
+					transactions that are committing */
+
 	trx_lock_t	lock;		/*!< Information about the transaction
 					locks and state. Protected by
 					trx->mutex or lock_sys->mutex
 					or both */
-	ulint		is_recovered;	/*!< 0=normal transaction,
+	bool		is_recovered;	/*!< 0=normal transaction,
 					1=recovered, must be rolled back,
 					protected by trx_sys->mutex when
 					trx->in_rw_trx_list holds */
 
+	hit_list_t	hit_list;	/*!< List of transactions to kill,
+					when a high priority transaction
+					is blocked on a lock wait. */
+
+	os_thread_id_t	killed_by;	/*!< The thread ID that wants to
+					kill this transaction asynchronously.
+					This is required because we recursively
+					enter the handlerton methods and need
+					to distinguish between the kill thread
+					and the transaction thread.
+
+					Note: We need to be careful w.r.t the
+					Thread Pool. The thread doing the kill
+					should not leave InnoDB between the
+					mark and the actual async kill because
+					the running thread can change. */
+
 	/* These fields are not protected by any mutex. */
 	const char*	op_info;	/*!< English text describing the
 					current operation, or an empty
 					string */
 	ulint		isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
-	ulint		check_foreigns;	/*!< normally TRUE, but if the user
+	bool		check_foreigns;	/*!< normally TRUE, but if the user
 					wants to suppress foreign key checks,
 					(in table imports, for example) we
 					set this FALSE */
@@ -773,88 +1052,60 @@ struct trx_t{
 	commit between multiple storage engines and the binary log. When
 	an engine participates in a transaction, it's responsible for
 	registering itself using the trans_register_ha() API. */
-	unsigned	is_registered:1;/* This flag is set to 1 after the
+	bool		is_registered;	/* This flag is set to true after the
 					transaction has been registered with
 					the coordinator using the XA API, and
-					is set to 0 after commit or rollback. */
-	unsigned	active_commit_ordered:1;/* 1 if owns prepare mutex, if
-					this is set to 1 then registered should
-					also be set to 1. This is used in the
-					XA code */
+					is set to false  after commit or
+					rollback. */
+	unsigned	active_commit_ordered:1;/* 1 if owns prepare mutex */
 	/*------------------------------*/
-	ulint		check_unique_secondary;
+	bool		check_unique_secondary;
 					/*!< normally TRUE, but if the user
 					wants to speed up inserts by
 					suppressing unique key checks
 					for secondary indexes when we decide
 					if we can use the insert buffer for
 					them, we set this FALSE */
-	ulint		support_xa;	/*!< normally we do the XA two-phase
-					commit steps, but by setting this to
-					FALSE, one can save CPU time and about
-					150 bytes in the undo log size as then
-					we skip XA steps */
-	ulint		flush_log_later;/* In 2PC, we hold the
+	bool		support_xa;	/*!< normally we do the XA two-phase */
+	bool		flush_log_later;/* In 2PC, we hold the
 					prepare_commit mutex across
 					both phases. In that case, we
 					defer flush of the logs to disk
 					until after we release the
 					mutex. */
-	ulint		must_flush_log_later;/*!< this flag is set to TRUE in
+	bool		must_flush_log_later;/*!< this flag is set to TRUE in
 					trx_commit() if flush_log_later was
 					TRUE, and there were modifications by
 					the transaction; in that case we must
 					flush the log in
 					trx_commit_complete_for_mysql() */
 	ulint		duplicates;	/*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
-	ulint		has_search_latch;
+	bool		has_search_latch;
 					/*!< TRUE if this trx has latched the
 					search system latch in S-mode */
-	ulint		search_latch_timeout;
-					/*!< If we notice that someone is
-					waiting for our S-lock on the search
-					latch to be released, we wait in
-					row0sel.cc for BTR_SEA_TIMEOUT new
-					searches until we try to keep
-					the search latch again over
-					calls from MySQL; this is intended
-					to reduce contention on the search
-					latch */
-	trx_dict_op_t	dict_operation;	/**< @see enum trx_dict_op */
+	trx_dict_op_t	dict_operation;	/**< @see enum trx_dict_op_t */
 
 	/* Fields protected by the srv_conc_mutex. */
-	ulint		declared_to_be_inside_innodb;
+	bool		declared_to_be_inside_innodb;
 					/*!< this is TRUE if we have declared
 					this transaction in
 					srv_conc_enter_innodb to be inside the
 					InnoDB engine */
-	ulint		n_tickets_to_enter_innodb;
+	ib_uint32_t	n_tickets_to_enter_innodb;
 					/*!< this can be > 0 only when
 					declared_to_... is TRUE; when we come
 					to srv_conc_innodb_enter, if the value
 					here is > 0, we decrement this by 1 */
-	ulint		dict_operation_lock_mode;
+	ib_uint32_t	dict_operation_lock_mode;
 					/*!< 0, RW_S_LATCH, or RW_X_LATCH:
 					the latch mode trx currently holds
 					on dict_operation_lock. Protected
 					by dict_operation_lock. */
 
-	trx_id_t	no;		/*!< transaction serialization number:
-					max trx id shortly before the
-					transaction is moved to
-					COMMITTED_IN_MEMORY state.
-					Protected by trx_sys_t::mutex
-					when trx->in_rw_trx_list. Initially
-					set to TRX_ID_MAX. */
-
-	time_t		start_time;	/*!< time the trx state last time became
+	time_t		start_time;	/*!< time the state last time became
 					TRX_STATE_ACTIVE */
-	clock_t		start_time_micro;	/*!< start time of transaction in
-					microseconds */
-	trx_id_t	id;		/*!< transaction id */
-	XID		xid;		/*!< X/Open XA transaction
-					identification to identify a
-					transaction branch */
+	clock_t		start_time_micro; /*!< start time of the transaction
+					in microseconds. */
 	lsn_t		commit_lsn;	/*!< lsn at the time of the commit */
 	table_id_t	table_id;	/*!< Table to drop iff dict_operation
 					== TRX_DICT_OP_TABLE, or 0. */
@@ -868,39 +1119,32 @@ struct trx_t{
 					contains a pointer to the latest file
 					name; this is NULL if binlog is not
 					used */
-	ib_int64_t	mysql_log_offset;
+	int64_t		mysql_log_offset;
 					/*!< if MySQL binlog is used, this
 					field contains the end offset of the
 					binlog entry */
 	/*------------------------------*/
-	ulint		n_mysql_tables_in_use; /*!< number of Innobase tables
+	ib_uint32_t	n_mysql_tables_in_use; /*!< number of Innobase tables
 					used in the processing of the current
 					SQL statement in MySQL */
-	ulint		mysql_n_tables_locked;
+	ib_uint32_t	mysql_n_tables_locked;
 					/*!< how many tables the current SQL
 					statement uses, except those
 					in consistent read */
 	/*------------------------------*/
-	UT_LIST_NODE_T(trx_t)
-			trx_list;	/*!< list of transactions;
-					protected by trx_sys->mutex.
-					The same node is used for both
-					trx_sys_t::ro_trx_list and
-					trx_sys_t::rw_trx_list */
 #ifdef UNIV_DEBUG
 	/** The following two fields are mutually exclusive. */
 	/* @{ */
 
-	ibool		in_ro_trx_list;	/*!< TRUE if in trx_sys->ro_trx_list */
-	ibool		in_rw_trx_list;	/*!< TRUE if in trx_sys->rw_trx_list */
+	bool		in_rw_trx_list;	/*!< true if in trx_sys->rw_trx_list */
 	/* @} */
 #endif /* UNIV_DEBUG */
 	UT_LIST_NODE_T(trx_t)
 			mysql_trx_list;	/*!< list of transactions created for
 					MySQL; protected by trx_sys->mutex */
 #ifdef UNIV_DEBUG
-	ibool		in_mysql_trx_list;
-					/*!< TRUE if in
+	bool		in_mysql_trx_list;
+					/*!< true if in
 					trx_sys->mysql_trx_list */
 #endif /* UNIV_DEBUG */
 	/*------------------------------*/
@@ -922,24 +1166,12 @@ struct trx_t{
 					survive over a transaction commit, if
 					it is a stored procedure with a COMMIT
 					WORK statement, for instance */
-	mem_heap_t*	global_read_view_heap;
-					/*!< memory heap for the global read
-					view */
-	read_view_t*	global_read_view;
-					/*!< consistent read view associated
-					to a transaction or NULL */
-	read_view_t*	read_view;	/*!< consistent read view used in the
-					transaction or NULL, this read view
-					if defined can be normal read view
-					associated to a transaction (i.e.
-					same as global_read_view) or read view
-					associated to a cursor */
 	/*------------------------------*/
 	UT_LIST_BASE_NODE_T(trx_named_savept_t)
 			trx_savepoints;	/*!< savepoints set with SAVEPOINT ...,
 					oldest first */
 	/*------------------------------*/
-	ib_mutex_t	undo_mutex;	/*!< mutex protecting the fields in this
+	UndoMutex	undo_mutex;	/*!< mutex protecting the fields in this
 					section (down to undo_no_arr), EXCEPT
 					last_sql_stat_start, which can be
 					accessed only when we know that there
@@ -952,25 +1184,23 @@ struct trx_t{
 					with no gaps; thus it represents
 					the number of modified/inserted
 					rows in a transaction */
+	ulint		undo_rseg_space;
+					/*!< space id where last undo record
+					was written */
 	trx_savept_t	last_sql_stat_start;
 					/*!< undo_no when the last sql statement
 					was started: in case of an error, trx
 					is rolled back down to this undo
 					number; see note at undo_mutex! */
-	trx_rseg_t*	rseg;		/*!< rollback segment assigned to the
-					transaction, or NULL if not assigned
-					yet */
-	trx_undo_t*	insert_undo;	/*!< pointer to the insert undo log, or
-					NULL if no inserts performed yet */
-	trx_undo_t*	update_undo;	/*!< pointer to the update undo log, or
-					NULL if no update performed yet */
+	trx_rsegs_t	rsegs;		/* rollback segments for undo logging */
 	undo_no_t	roll_limit;	/*!< least undo number to undo during
-					a rollback */
+					a partial rollback; 0 otherwise */
+#ifdef UNIV_DEBUG
+	bool		in_rollback;	/*!< true when the transaction is
+					executing a partial or full rollback */
+#endif /* UNIV_DEBUG */
 	ulint		pages_undone;	/*!< number of undo log pages undone
 					since the last undo log truncation */
-	trx_undo_arr_t*	undo_no_arr;	/*!< array of undo numbers of undo log
-					records which are currently processed
-					by a rollback operation */
 	/*------------------------------*/
 	ulint		n_autoinc_rows;	/*!< no. of AUTO-INC rows required for
 					an SQL statement. This is useful for
@@ -982,43 +1212,76 @@ struct trx_t{
 					when the trx instance is destroyed.
 					Protected by lock_sys->mutex. */
 	/*------------------------------*/
-	ibool		read_only;	/*!< TRUE if transaction is flagged
+	bool		read_only;	/*!< true if transaction is flagged
 					as a READ-ONLY transaction.
-					if !auto_commit || will_lock > 0
-					then it will added to the list
-					trx_sys_t::ro_trx_list. A read only
+					if auto_commit && will_lock == 0
+					then it will be handled as a
+					AC-NL-RO-SELECT (Auto Commit Non-Locking
+					Read Only Select). A read only
 					transaction will not be assigned an
-					UNDO log. Non-locking auto-commit
-					read-only transaction will not be on
-					either list. */
-	ibool		auto_commit;	/*!< TRUE if it is an autocommit */
-	ulint		will_lock;	/*!< Will acquire some locks. Increment
+					UNDO log. */
+	bool		auto_commit;	/*!< true if it is an autocommit */
+	ib_uint32_t	will_lock;	/*!< Will acquire some locks. Increment
 					each time we determine that a lock will
 					be acquired by the MySQL layer. */
-	bool		ddl;		/*!< true if it is a transaction that
-					is being started for a DDL operation */
 	/*------------------------------*/
 	fts_trx_t*	fts_trx;	/*!< FTS information, or NULL if
 					transaction hasn't modified tables
 					with FTS indexes (yet). */
 	doc_id_t	fts_next_doc_id;/* The document id used for updates */
 	/*------------------------------*/
-	ulint		flush_tables;	/*!< if "covering" the FLUSH TABLES",
+	ib_uint32_t	flush_tables;	/*!< if "covering" the FLUSH TABLES",
 					count of tables being flushed. */
 
 	/*------------------------------*/
+	bool		ddl;		/*!< true if it is an internal
+					transaction for DDL */
+	bool		internal;	/*!< true if it is a system/internal
+					transaction background task. This
+					includes DDL transactions too.  Such
+					transactions are always treated as
+					read-write. */
+	/*------------------------------*/
 #ifdef UNIV_DEBUG
 	ulint		start_line;	/*!< Track where it was started from */
 	const char*	start_file;	/*!< Filename where it was started */
 #endif /* UNIV_DEBUG */
-	/*------------------------------*/
+
+	lint		n_ref;		/*!< Count of references, protected
+					by trx_t::mutex. We can't release the
+					locks nor commit the transaction until
+					this reference is 0.  We can change
+					the state to COMMITTED_IN_MEMORY to
+					signify that it is no longer
+					"active". */
+
+	/** Version of this instance. It is incremented each time the
+	instance is re-used in trx_start_low(). It is used to track
+	whether a transaction has been restarted since it was tagged
+	for asynchronous rollback. */
+	ulint		version;
+
+	XID*		xid;		/*!< X/Open XA transaction
+					identification to identify a
+					transaction branch */
+	trx_mod_tables_t mod_tables;	/*!< List of tables that were modified
+					by this transaction */
+        /*------------------------------*/
 	bool		api_trx;	/*!< trx started by InnoDB API */
 	bool		api_auto_commit;/*!< automatic commit */
 	bool		read_write;	/*!< if read and write operation */
 
 	/*------------------------------*/
-	char detailed_error[256];	/*!< detailed error message for last
+	char*		detailed_error;	/*!< detailed error message for last
 					error, or empty. */
+	FlushObserver*	flush_observer;	/*!< flush observer */
+
+#ifdef UNIV_DEBUG
+	bool		is_dd_trx;	/*!< True if the transaction is used for
+					doing Non-locking Read-only Read
+					Committed on DD tables */
+#endif /* UNIV_DEBUG */
+
 	/* Lock wait statistics */
 	ulint		n_rec_lock_waits;
 					/*!< Number of record lock waits,
@@ -1036,8 +1299,23 @@ struct trx_t{
 #ifdef WITH_WSREP
 	os_event_t	wsrep_event;	/* event waited for in srv_conc_slot */
 #endif /* WITH_WSREP */
+
+	ulint		magic_n;
 };
 
+/**
+Check if transaction is started.
+@param[in] trx		Transaction whose state we need to check
+@reutrn true if transaction is in state started */
+inline
+bool
+trx_is_started(
+	const trx_t*	trx)
+{
+	return(trx->state != TRX_STATE_NOT_STARTED
+	       && trx->state != TRX_STATE_FORCED_ROLLBACK);
+}
+
 /* Transaction isolation levels (trx->isolation_level) */
 #define TRX_ISO_READ_UNCOMMITTED	0	/* dirty read: non-locking
 						SELECTs are performed so that
@@ -1079,20 +1357,6 @@ Multiple flags can be combined with bitwise OR. */
 #define TRX_DUP_REPLACE	2	/* duplicate rows are to be replaced */
 
 
-/* Types of a trx signal */
-#define TRX_SIG_NO_SIGNAL		0
-#define TRX_SIG_TOTAL_ROLLBACK		1
-#define TRX_SIG_ROLLBACK_TO_SAVEPT	2
-#define TRX_SIG_COMMIT			3
-#define TRX_SIG_BREAK_EXECUTION		5
-
-/* Sender types of a signal */
-#define TRX_SIG_SELF		0	/* sent by the session itself, or
-					by an error occurring within this
-					session */
-#define TRX_SIG_OTHER_SESS	1	/* sent by another session (which
-					must hold rights to this) */
-
 /** Commit node states */
 enum commit_node_state {
 	COMMIT_NODE_SEND = 1,	/*!< about to send a commit signal to
@@ -1122,23 +1386,229 @@ struct commit_node_t{
 	mutex_exit(&t->mutex);			\
 } while (0)
 
-/** @brief The latch protecting the adaptive search system
+/** Track if a transaction is executing inside InnoDB code. It acts
+like a gate between the Server and InnoDB.  */
+class TrxInInnoDB {
+public:
+	/**
+	@param[in,out] trx	Transaction entering InnoDB via the handler
+	@param[in] disable	true if called from COMMIT/ROLLBACK method */
+	TrxInInnoDB(trx_t* trx, bool disable = false)
+		:
+		m_trx(trx)
+	{
+		enter(trx, disable);
+	}
+
+	/**
+	Destructor */
+	~TrxInInnoDB()
+	{
+		exit(m_trx);
+	}
+
+	/**
+	@return true if the transaction has been marked for asynchronous
+		rollback */
+	bool is_aborted() const
+	{
+		return(is_aborted(m_trx));
+	}
+
+	/**
+	@return true if the transaction can't be rolled back asynchronously */
+	bool is_rollback_disabled() const
+	{
+		return((m_trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE) > 0);
+	}
+
+	/**
+	@return true if the transaction has been marked for asynchronous
+		rollback */
+	static bool is_aborted(const trx_t* trx)
+	{
+		if (trx->state == TRX_STATE_NOT_STARTED) {
+			return(false);
+		}
+
+		ut_ad(srv_read_only_mode || trx->in_depth > 0);
+		ut_ad(srv_read_only_mode || trx->in_innodb > 0);
+
+		return(trx->abort
+		       || trx->state == TRX_STATE_FORCED_ROLLBACK);
+	}
+
+	/**
+	Start statement requested for transaction.
+	@param[in, out] trx	Transaction at the start of a SQL statement */
+	static void begin_stmt(trx_t* trx)
+	{
+		enter(trx, false);
+	}
+
+	/**
+	Note an end statement for transaction
+	@param[in, out] trx	Transaction at end of a SQL statement */
+	static void end_stmt(trx_t* trx)
+	{
+		exit(trx);
+	}
+
+	/**
+	@return true if the rollback is being initiated by the thread that
+		marked the transaction for asynchronous rollback */
+	static bool is_async_rollback(const trx_t* trx)
+	{
+		return(trx->killed_by == os_thread_get_curr_id());
+	}
+
+private:
+	/**
+	Note that we have crossed into InnoDB code.
+	@param[in] disable	true if called from COMMIT/ROLLBACK method */
+	static void enter(trx_t* trx, bool disable)
+	{
+		if (srv_read_only_mode) {
+
+			return;
+		}
+
+		/* Avoid excessive mutex acquire/release */
+
+		ut_ad(!is_async_rollback(trx));
+
+		++trx->in_depth;
+
+		/* If trx->in_depth is greater than 1 then
+		transaction is already in InnoDB. */
+		if (trx->in_depth > 1) {
+
+			return;
+		}
+
+		/* Only the owning thread should release the latch. */
+
+		trx_search_latch_release_if_reserved(trx);
+
+		trx_mutex_enter(trx);
+
+		wait(trx);
+
+		ut_ad((trx->in_innodb & TRX_FORCE_ROLLBACK_MASK)
+		      < (TRX_FORCE_ROLLBACK_MASK - 1));
+
+		/* If it hasn't already been marked for async rollback.
+		and it will be committed/rolled back. */
 
-This latch protects the
-(1) hash index;
-(2) columns of a record to which we have a pointer in the hash index;
+		if (!is_forced_rollback(trx)
+		    && disable
+		    && is_started(trx)
+		    && !trx_is_autocommit_non_locking(trx)) {
 
-but does NOT protect:
+			ut_ad(trx->killed_by == 0);
 
-(3) next record offset field in a record;
-(4) next or previous records on the same page.
+			/* This transaction has crossed the point of no
+			return and cannot be rolled back asynchronously
+			now. It must commit or rollback synhronously. */
 
-Bear in mind (3) and (4) when using the hash index.
-*/
-extern rw_lock_t*	btr_search_latch_temp;
+			trx->in_innodb |= TRX_FORCE_ROLLBACK_DISABLE;
+		}
 
-/** The latch protecting the adaptive search system */
-#define btr_search_latch	(*btr_search_latch_temp)
+		++trx->in_innodb;
+
+		trx_mutex_exit(trx);
+	}
+
+	/**
+	Note that we are exiting InnoDB code */
+	static void exit(trx_t* trx)
+	{
+		if (srv_read_only_mode) {
+
+			return;
+		}
+
+		/* Avoid excessive mutex acquire/release */
+
+		ut_ad(trx->in_depth > 0);
+
+		--trx->in_depth;
+
+		if (trx->in_depth > 0) {
+
+			return;
+		}
+
+		/* Only the owning thread should release the latch. */
+
+		trx_search_latch_release_if_reserved(trx);
+
+		trx_mutex_enter(trx);
+
+		ut_ad((trx->in_innodb & TRX_FORCE_ROLLBACK_MASK) > 0);
+
+		--trx->in_innodb;
+
+		trx_mutex_exit(trx);
+	}
+
+	/*
+	@return true if it is a forced rollback, asynchronously */
+	static bool is_forced_rollback(const trx_t* trx)
+	{
+		ut_ad(trx_mutex_own(trx));
+
+		return((trx->in_innodb & TRX_FORCE_ROLLBACK)) > 0;
+	}
+
+	/**
+	Wait for the asynchronous rollback to complete, if it is in progress */
+	static void wait(trx_t* trx)
+	{
+		ut_ad(trx_mutex_own(trx));
+
+		ulint	loop_count = 0;
+		/* start with optimistic sleep time - 20 micro seconds. */
+		ulint	sleep_time = 20;
+
+		while (is_forced_rollback(trx)) {
+
+			/* Wait for the async rollback to complete */
+
+			trx_mutex_exit(trx);
+
+			loop_count++;
+			/* If the wait is long, don't hog the cpu. */
+			if (loop_count < 100) {
+				/* 20 microseconds */
+				sleep_time = 20;
+			} else if (loop_count < 1000) {
+				/* 1 millisecond */
+				sleep_time = 1000;
+			} else {
+				/* 100 milliseconds */
+				sleep_time = 100000;
+			}
+
+			os_thread_sleep(sleep_time);
+
+			trx_mutex_enter(trx);
+		}
+	}
+
+	/**
+	@return true if transaction is started */
+	static bool is_started(const trx_t* trx)
+	{
+		ut_ad(trx_mutex_own(trx));
+
+		return(trx_is_started(trx));
+	}
+private:
+	/**
+	Transaction instance crossing the handler boundary from the Server. */
+	trx_t*			m_trx;
+};
 
 #ifndef UNIV_NONINL
 #include "trx0trx.ic"
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
index fa30e6229a3..f0ed9b61c5a 100644
--- a/storage/innobase/include/trx0trx.ic
+++ b/storage/innobase/include/trx0trx.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2016, MariaDB Corporation. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -24,16 +24,16 @@ The transaction
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
+#include "read0read.h"
+
 /**********************************************************************//**
 Determines if a transaction is in the given state.
 The caller must hold trx_sys->mutex, or it must be the thread
 that is serving a running transaction.
-A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list
-unless it is a non-locking autocommit read only transaction, which is only
-in trx_sys->mysql_trx_list.
-@return	TRUE if trx->state == state */
+A running RW transaction must be in trx_sys->rw_trx_list.
+@return TRUE if trx->state == state */
 UNIV_INLINE
-ibool
+bool
 trx_state_eq(
 /*=========*/
 	const trx_t*	trx,	/*!< in: transaction */
@@ -49,25 +49,32 @@ trx_state_eq(
 #ifdef UNIV_DEBUG
 	switch (trx->state) {
 	case TRX_STATE_PREPARED:
+
 		ut_ad(!trx_is_autocommit_non_locking(trx));
 		return(trx->state == state);
 
 	case TRX_STATE_ACTIVE:
+
 		assert_trx_nonlocking_or_in_list(trx);
 		return(state == trx->state);
 
 	case TRX_STATE_COMMITTED_IN_MEMORY:
-		assert_trx_in_list(trx);
+
+		check_trx_state(trx);
 		return(state == trx->state);
 
 	case TRX_STATE_NOT_STARTED:
-		/* This state is not allowed for running transactions. */
+	case TRX_STATE_FORCED_ROLLBACK:
+
+		/* These states are not allowed for running transactions. */
 		ut_a(state == TRX_STATE_NOT_STARTED
+		     || state == TRX_STATE_FORCED_ROLLBACK
 		     || (relaxed
 			 && thd_get_error_number(trx->mysql_thd)));
+
 		ut_ad(!trx->in_rw_trx_list);
-		ut_ad(!trx->in_ro_trx_list);
-		return(state == trx->state);
+
+		return(true);
 	}
 	ut_error;
 #endif /* UNIV_DEBUG */
@@ -76,7 +83,7 @@ trx_state_eq(
 
 /****************************************************************//**
 Retrieves the error_info field from a trx.
-@return	the error info */
+@return the error info */
 UNIV_INLINE
 const dict_index_t*
 trx_get_error_info(
@@ -89,7 +96,7 @@ trx_get_error_info(
 /*******************************************************************//**
 Retrieves transaction's que state in a human readable string. The string
 should not be free()'d or modified.
-@return	string in the data segment */
+@return string in the data segment */
 UNIV_INLINE
 const char*
 trx_get_que_state_str(
@@ -111,9 +118,45 @@ trx_get_que_state_str(
 	}
 }
 
+/** Retreieves the transaction ID.
+In a given point in time it is guaranteed that IDs of the running
+transactions are unique. The values returned by this function for readonly
+transactions may be reused, so a subsequent RO transaction may get the same ID
+as a RO transaction that existed in the past. The values returned by this
+function should be used for printing purposes only.
+@param[in]	trx	transaction whose id to retrieve
+@return transaction id */
+UNIV_INLINE
+trx_id_t
+trx_get_id_for_print(
+	const trx_t*	trx)
+{
+	/* Readonly and transactions whose intentions are unknown (whether
+	they will eventually do a WRITE) don't have trx_t::id assigned (it is
+	0 for those transactions). Transaction IDs in
+	innodb_trx.trx_id,
+	innodb_locks.lock_id,
+	innodb_locks.lock_trx_id,
+	innodb_lock_waits.requesting_trx_id,
+	innodb_lock_waits.blocking_trx_id should match because those tables
+	could be used in an SQL JOIN on those columns. Also trx_t::id is
+	printed by SHOW ENGINE INNODB STATUS, and in logs, so we must have the
+	same value printed everywhere consistently. */
+
+	/* DATA_TRX_ID_LEN is the storage size in bytes. */
+	static const trx_id_t	max_trx_id
+		= (1ULL << (DATA_TRX_ID_LEN * CHAR_BIT)) - 1;
+
+	ut_ad(trx->id <= max_trx_id);
+
+	return(trx->id != 0
+	       ? trx->id
+	       : reinterpret_cast<trx_id_t>(trx) | (max_trx_id + 1));
+}
+
 /**********************************************************************//**
 Determine if a transaction is a dictionary operation.
-@return	dictionary operation mode */
+@return dictionary operation mode */
 UNIV_INLINE
 enum trx_dict_op_t
 trx_get_dict_operation(
@@ -170,18 +213,192 @@ ok:
 	trx->dict_operation = op;
 }
 
+/**
+Releases the search latch if trx has reserved it.
+@param[in,out] trx		Transaction that may own the AHI latch */
+UNIV_INLINE
+void
+trx_search_latch_release_if_reserved(trx_t* trx)
+{
+	ut_a(!trx->has_search_latch);
+}
+
 /********************************************************************//**
-Releases the search latch if trx has reserved it. */
+Check if redo rseg is modified for insert/update. */
+UNIV_INLINE
+bool
+trx_is_redo_rseg_updated(
+/*=====================*/
+	const trx_t*	   trx) /*!< in: transaction */
+{
+	return(trx->rsegs.m_redo.insert_undo != 0
+	       || trx->rsegs.m_redo.update_undo != 0);
+}
+
+/********************************************************************//**
+Check if noredo rseg is modified for insert/update. */
+UNIV_INLINE
+bool
+trx_is_noredo_rseg_updated(
+/*=======================*/
+	const trx_t*	   trx) /*!< in: transaction */
+{
+	return(trx->rsegs.m_noredo.insert_undo != 0
+	       || trx->rsegs.m_noredo.update_undo != 0);
+}
+
+/********************************************************************//**
+Check if redo/noredo rseg is modified for insert/update. */
+UNIV_INLINE
+bool
+trx_is_rseg_updated(
+/*================*/
+	const trx_t*	   trx) /*!< in: transaction */
+{
+	return(trx_is_redo_rseg_updated(trx)
+	       || trx_is_noredo_rseg_updated(trx));
+}
+
+/********************************************************************//**
+Check if redo/nonredo rseg is valid. */
+UNIV_INLINE
+bool
+trx_is_rseg_assigned(
+/*=================*/
+	const trx_t*	   trx) /*!< in: transaction */
+{
+	return(trx->rsegs.m_redo.rseg != NULL
+	       || trx->rsegs.m_noredo.rseg != NULL);
+}
+
+/**
+Increase the reference count. If the transaction is in state
+TRX_STATE_COMMITTED_IN_MEMORY then the transaction is considered
+committed and the reference count is not incremented.
+@param trx Transaction that is being referenced
+@param do_ref_count Increment the reference iff this is true
+@return transaction instance if it is not committed */
+UNIV_INLINE
+trx_t*
+trx_reference(
+	trx_t*		trx,
+	bool		do_ref_count)
+{
+	trx_mutex_enter(trx);
+
+	if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
+		trx_mutex_exit(trx);
+		trx = NULL;
+	} else if (do_ref_count) {
+		ut_ad(trx->n_ref >= 0);
+		++trx->n_ref;
+		trx_mutex_exit(trx);
+	} else {
+		trx_mutex_exit(trx);
+	}
+
+	return(trx);
+}
+
+/**
+Release the transaction. Decrease the reference count.
+@param trx Transaction that is being released */
 UNIV_INLINE
 void
-trx_search_latch_release_if_reserved(
-/*=================================*/
-	trx_t*	   trx) /*!< in: transaction */
+trx_release_reference(
+	trx_t*		trx)
+{
+	trx_mutex_enter(trx);
+
+	ut_ad(trx->n_ref > 0);
+	--trx->n_ref;
+
+	trx_mutex_exit(trx);
+}
+
+
+/**
+@param trx		Get the active view for this transaction, if one exists
+@return the transaction's read view or NULL if one not assigned. */
+UNIV_INLINE
+ReadView*
+trx_get_read_view(
+	trx_t*		trx)
+{
+	return(!MVCC::is_view_active(trx->read_view) ? NULL : trx->read_view);
+}
+
+/**
+@param trx		Get the active view for this transaction, if one exists
+@return the transaction's read view or NULL if one not assigned. */
+UNIV_INLINE
+const ReadView*
+trx_get_read_view(
+	const trx_t*	trx)
 {
-	if (trx->has_search_latch) {
-		rw_lock_s_unlock(&btr_search_latch);
+	return(!MVCC::is_view_active(trx->read_view) ? NULL : trx->read_view);
+}
 
-		trx->has_search_latch = FALSE;
+/**
+@param[in] trx		Transaction to check
+@return true if the transaction is a high priority transaction.*/
+UNIV_INLINE
+bool
+trx_is_high_priority(const trx_t* trx)
+{
+	if (trx->mysql_thd == NULL) {
+		return(false);
 	}
+
+	return(thd_trx_priority(trx->mysql_thd) > 0);
 }
 
+/**
+@param[in] requestor	Transaction requesting the lock
+@param[in] holder	Transaction holding the lock
+@return the transaction that will be rolled back, null don't care */
+UNIV_INLINE
+const trx_t*
+trx_arbitrate(const trx_t* requestor, const trx_t* holder)
+{
+	ut_ad(!trx_is_autocommit_non_locking(holder));
+	ut_ad(!trx_is_autocommit_non_locking(requestor));
+
+	/* Note: Background stats collection transactions also acquire
+	locks on user tables. They don't have an associated MySQL session
+	instance. */
+
+	if (requestor->mysql_thd == NULL) {
+
+		ut_ad(!trx_is_high_priority(requestor));
+
+		if (trx_is_high_priority(holder)) {
+			return(requestor);
+		} else {
+			return(NULL);
+		}
+
+	} else if (holder->mysql_thd == NULL) {
+
+		ut_ad(!trx_is_high_priority(holder));
+
+		if (trx_is_high_priority(requestor)) {
+			return(holder);
+		}
+
+		return(NULL);
+	}
+
+	const THD*	victim = thd_trx_arbitrate(
+		requestor->mysql_thd, holder->mysql_thd);
+
+	ut_ad(victim == NULL
+	      || victim == requestor->mysql_thd
+	      || victim == holder->mysql_thd);
+
+	if (victim != NULL) {
+		return(victim == requestor->mysql_thd ? requestor : holder);
+	}
+
+	return(NULL);
+}
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
index 7ca95131328..37a53f900eb 100644
--- a/storage/innobase/include/trx0types.h
+++ b/storage/innobase/include/trx0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,13 +27,45 @@ Created 3/26/1996 Heikki Tuuri
 #define trx0types_h
 
 #include "ut0byte.h"
+#include "ut0mutex.h"
+#include "ut0new.h"
+
+#include <set>
+#include <queue>
+#include <vector>
+
+//#include <unordered_set>
 
 /** printf(3) format used for printing DB_TRX_ID and other system fields */
-#define TRX_ID_FMT		IB_ID_FMT
+#define TRX_ID_FMT	IB_ID_FMT
 
 /** maximum length that a formatted trx_t::id could take, not including
 the terminating NUL character. */
-#define TRX_ID_MAX_LEN		17
+static const ulint TRX_ID_MAX_LEN = 17;
+
+/** Space id of the transaction system page (the system tablespace) */
+static const ulint TRX_SYS_SPACE = 0;
+
+/** Page number of the transaction system page */
+#define TRX_SYS_PAGE_NO		FSP_TRX_SYS_PAGE_NO
+
+/** Random value to check for corruption of trx_t */
+static const ulint TRX_MAGIC_N = 91118598;
+
+/** If this flag is set then the transaction cannot be rolled back
+asynchronously. */
+static const ib_uint32_t TRX_FORCE_ROLLBACK_DISABLE = 1 << 29;
+
+/** Was the transaction rolled back asynchronously or by the
+owning thread. This flag is relevant only if TRX_FORCE_ROLLBACK
+is set.  */
+static const ib_uint32_t TRX_FORCE_ROLLBACK_ASYNC = 1 << 30;
+
+/** Mark the transaction for forced rollback */
+static const ib_uint32_t TRX_FORCE_ROLLBACK = 1 << 31;
+
+/** For masking out the above four flags */
+static const ib_uint32_t TRX_FORCE_ROLLBACK_MASK = 0x1FFFFFFF;
 
 /** Transaction execution states when trx->state == TRX_STATE_ACTIVE */
 enum trx_que_t {
@@ -46,9 +78,18 @@ enum trx_que_t {
 
 /** Transaction states (trx_t::state) */
 enum trx_state_t {
+
 	TRX_STATE_NOT_STARTED,
+
+	/** Same as not started but with additional semantics that it
+	was rolled back asynchronously the last time it was active. */
+	TRX_STATE_FORCED_ROLLBACK,
+
 	TRX_STATE_ACTIVE,
-	TRX_STATE_PREPARED,			/* Support for 2PC/XA */
+
+	/** Support for 2PC/XA */
+	TRX_STATE_PREPARED,
+
 	TRX_STATE_COMMITTED_IN_MEMORY
 };
 
@@ -81,10 +122,6 @@ struct trx_sig_t;
 struct trx_rseg_t;
 /** Transaction undo log */
 struct trx_undo_t;
-/** Array of undo numbers of undo records being rolled back or purged */
-struct trx_undo_arr_t;
-/** A cell of trx_undo_arr_t */
-struct trx_undo_inf_t;
 /** The control structure used in the purge operation */
 struct trx_purge_t;
 /** Rollback command node in a query graph */
@@ -95,21 +132,6 @@ struct commit_node_t;
 struct trx_named_savept_t;
 /* @} */
 
-/** Rollback contexts */
-enum trx_rb_ctx {
-	RB_NONE = 0,	/*!< no rollback */
-	RB_NORMAL,	/*!< normal rollback */
-	RB_RECOVERY_PURGE_REC,
-			/*!< rolling back an incomplete transaction,
-			in crash recovery, rolling back an
-			INSERT that was performed by updating a
-			delete-marked record; if the delete-marked record
-			no longer exists in an active read view, it will
-			be purged */
-	RB_RECOVERY	/*!< rolling back an incomplete transaction,
-			in crash recovery */
-};
-
 /** Row identifier (DB_ROW_ID, DATA_ROW_ID) */
 typedef ib_id_t	row_id_t;
 /** Transaction identifier (DB_TRX_ID, DATA_TRX_ID) */
@@ -142,6 +164,159 @@ typedef byte	trx_upagef_t;
 
 /** Undo log record */
 typedef	byte	trx_undo_rec_t;
+
 /* @} */
 
-#endif
+typedef ib_mutex_t RsegMutex;
+typedef ib_mutex_t TrxMutex;
+typedef ib_mutex_t UndoMutex;
+typedef ib_mutex_t PQMutex;
+typedef ib_mutex_t TrxSysMutex;
+
+/** Rollback segements from a given transaction with trx-no
+scheduled for purge. */
+class TrxUndoRsegs {
+private:
+	typedef std::vector<trx_rseg_t*, ut_allocator<trx_rseg_t*> >
+		trx_rsegs_t;
+public:
+	typedef trx_rsegs_t::iterator iterator;
+
+	/** Default constructor */
+	TrxUndoRsegs() : m_trx_no() { }
+
+	explicit TrxUndoRsegs(trx_id_t trx_no)
+		:
+		m_trx_no(trx_no)
+	{
+		// Do nothing
+	}
+
+	/** Get transaction number
+	@return trx_id_t - get transaction number. */
+	trx_id_t get_trx_no() const
+	{
+		return(m_trx_no);
+	}
+
+	/** Add rollback segment.
+	@param rseg rollback segment to add. */
+	void push_back(trx_rseg_t* rseg)
+	{
+		m_rsegs.push_back(rseg);
+	}
+
+	/** Erase the element pointed by given iterator.
+	@param[in]	iterator	iterator */
+	void erase(iterator& it)
+	{
+		m_rsegs.erase(it);
+	}
+
+	/** Number of registered rsegs.
+	@return size of rseg list. */
+	ulint size() const
+	{
+		return(m_rsegs.size());
+	}
+
+	/**
+	@return an iterator to the first element */
+	iterator begin()
+	{
+		return(m_rsegs.begin());
+	}
+
+	/**
+	@return an iterator to the end */
+	iterator end()
+	{
+		return(m_rsegs.end());
+	}
+
+	/** Append rollback segments from referred instance to current
+	instance. */
+	void append(const TrxUndoRsegs& append_from)
+	{
+		ut_ad(get_trx_no() == append_from.get_trx_no());
+
+		m_rsegs.insert(m_rsegs.end(),
+			       append_from.m_rsegs.begin(),
+			       append_from.m_rsegs.end());
+	}
+
+	/** Compare two TrxUndoRsegs based on trx_no.
+	@param elem1 first element to compare
+	@param elem2 second element to compare
+	@return true if elem1 > elem2 else false.*/
+	bool operator()(const TrxUndoRsegs& lhs, const TrxUndoRsegs& rhs)
+	{
+		return(lhs.m_trx_no > rhs.m_trx_no);
+	}
+
+	/** Compiler defined copy-constructor/assignment operator
+	should be fine given that there is no reference to a memory
+	object outside scope of class object.*/
+
+private:
+	/** The rollback segments transaction number. */
+	trx_id_t		m_trx_no;
+
+	/** Rollback segments of a transaction, scheduled for purge. */
+	trx_rsegs_t		m_rsegs;
+};
+
+typedef std::priority_queue<
+	TrxUndoRsegs,
+	std::vector<TrxUndoRsegs, ut_allocator<TrxUndoRsegs> >,
+	TrxUndoRsegs>	purge_pq_t;
+
+typedef std::vector<trx_id_t, ut_allocator<trx_id_t> >	trx_ids_t;
+
+/** Mapping read-write transactions from id to transaction instance, for
+creating read views and during trx id lookup for MVCC and locking. */
+struct TrxTrack {
+	explicit TrxTrack(trx_id_t id, trx_t* trx = NULL)
+		:
+		m_id(id),
+		m_trx(trx)
+	{
+		// Do nothing
+	}
+
+	trx_id_t	m_id;
+	trx_t*		m_trx;
+};
+
+struct TrxTrackHash {
+	size_t operator()(const TrxTrack& key) const
+	{
+		return(size_t(key.m_id));
+	}
+};
+
+/**
+Comparator for TrxMap */
+struct TrxTrackHashCmp {
+
+	bool operator() (const TrxTrack& lhs, const TrxTrack& rhs) const
+	{
+		return(lhs.m_id == rhs.m_id);
+	}
+};
+
+/**
+Comparator for TrxMap */
+struct TrxTrackCmp {
+
+	bool operator() (const TrxTrack& lhs, const TrxTrack& rhs) const
+	{
+		return(lhs.m_id < rhs.m_id);
+	}
+};
+
+//typedef std::unordered_set<TrxTrack, TrxTrackHash, TrxTrackHashCmp> TrxIdSet;
+typedef std::set<TrxTrack, TrxTrackCmp, ut_allocator<TrxTrack> >
+	TrxIdSet;
+
+#endif /* trx0types_h */
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index 42ac62916e0..60fbb9d2304 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2016, MariaDB Corporation
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +27,6 @@ Created 3/26/1996 Heikki Tuuri
 #define trx0undo_h
 
 #ifndef UNIV_INNOCHECKSUM
-
 #include "univ.i"
 #include "trx0types.h"
 #include "mtr0mtr.h"
@@ -39,7 +37,7 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Builds a roll pointer.
-@return	roll pointer */
+@return roll pointer */
 UNIV_INLINE
 roll_ptr_t
 trx_undo_build_roll_ptr(
@@ -62,7 +60,7 @@ trx_undo_decode_roll_ptr(
 					entry within page */
 /***********************************************************************//**
 Returns TRUE if the roll pointer is of the insert type.
-@return	TRUE if insert undo log */
+@return TRUE if insert undo log */
 UNIV_INLINE
 ibool
 trx_undo_roll_ptr_is_insert(
@@ -70,13 +68,13 @@ trx_undo_roll_ptr_is_insert(
 	roll_ptr_t	roll_ptr);	/*!< in: roll pointer */
 /***********************************************************************//**
 Returns true if the record is of the insert type.
-@return	true if the record was freshly inserted (not updated). */
+@return true if the record was freshly inserted (not updated). */
 UNIV_INLINE
 bool
 trx_undo_trx_id_is_insert(
 /*======================*/
 	const byte*	trx_id)	/*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /*****************************************************************//**
 Writes a roll ptr to an index page. In case that the size changes in
@@ -93,41 +91,42 @@ trx_write_roll_ptr(
 Reads a roll ptr from an index page. In case that the roll ptr size
 changes in some future version, this function should be used instead of
 mach_read_...
-@return	roll ptr */
+@return roll ptr */
 UNIV_INLINE
 roll_ptr_t
 trx_read_roll_ptr(
 /*==============*/
 	const byte*	ptr);	/*!< in: pointer to memory from where to read */
 #ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Gets an undo log page and x-latches it.
-@return	pointer to page x-latched */
+
+/** Gets an undo log page and x-latches it.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return pointer to page x-latched */
 UNIV_INLINE
 page_t*
 trx_undo_page_get(
-/*==============*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number */
-	mtr_t*	mtr);		/*!< in: mtr */
-/******************************************************************//**
-Gets an undo log page and s-latches it.
-@return	pointer to page s-latched */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr);
+
+/** Gets an undo log page and s-latches it.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return pointer to page s-latched */
 UNIV_INLINE
 page_t*
 trx_undo_page_get_s_latched(
-/*========================*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number */
-	mtr_t*	mtr);		/*!< in: mtr */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr);
+
 /******************************************************************//**
 Returns the previous undo record on the page in the specified log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_prev_rec(
@@ -138,7 +137,7 @@ trx_undo_page_get_prev_rec(
 /******************************************************************//**
 Returns the next undo log record on the page in the specified log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_next_rec(
@@ -149,7 +148,7 @@ trx_undo_page_get_next_rec(
 /******************************************************************//**
 Returns the last undo record on the page in the specified undo log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_last_rec(
@@ -160,7 +159,7 @@ trx_undo_page_get_last_rec(
 /******************************************************************//**
 Returns the first undo record on the page in the specified undo log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_first_rec(
@@ -170,8 +169,7 @@ trx_undo_page_get_first_rec(
 	ulint	offset);/*!< in: undo log header offset on page */
 /***********************************************************************//**
 Gets the previous record in an undo log.
-@return	undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
+@return undo log record, the page s-latched, NULL if none */
 trx_undo_rec_t*
 trx_undo_get_prev_rec(
 /*==================*/
@@ -182,8 +180,7 @@ trx_undo_get_prev_rec(
 	mtr_t*		mtr);	/*!< in: mtr */
 /***********************************************************************//**
 Gets the next record in an undo log.
-@return	undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
+@return undo log record, the page s-latched, NULL if none */
 trx_undo_rec_t*
 trx_undo_get_next_rec(
 /*==================*/
@@ -191,37 +188,42 @@ trx_undo_get_next_rec(
 	ulint		page_no,/*!< in: undo log header page number */
 	ulint		offset,	/*!< in: undo log header offset on page */
 	mtr_t*		mtr);	/*!< in: mtr */
-/***********************************************************************//**
-Gets the first record in an undo log.
-@return	undo log record, the page latched, NULL if none */
-UNIV_INTERN
+
+/** Gets the first record in an undo log.
+@param[in]	space		undo log header space
+@param[in]	page_size	page size
+@param[in]	page_no		undo log header page number
+@param[in]	offset		undo log header offset on page
+@param[in]	mode		latching mode: RW_S_LATCH or RW_X_LATCH
+@param[in,out]	mtr		mini-transaction
+@return undo log record, the page latched, NULL if none */
 trx_undo_rec_t*
 trx_undo_get_first_rec(
-/*===================*/
-	ulint	space,	/*!< in: undo log header space */
-	ulint	zip_size,/*!< in: compressed page size in bytes
-			or 0 for uncompressed pages */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset,	/*!< in: undo log header offset on page */
-	ulint	mode,	/*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
-	mtr_t*	mtr);	/*!< in: mtr */
+	ulint			space,
+	const page_size_t&	page_size,
+	ulint			page_no,
+	ulint			offset,
+	ulint			mode,
+	mtr_t*			mtr);
+
 /********************************************************************//**
 Tries to add a page to the undo log segment where the undo log is placed.
-@return	X-latched block if success, else NULL */
-UNIV_INTERN
+@return X-latched block if success, else NULL */
 buf_block_t*
 trx_undo_add_page(
 /*==============*/
-	trx_t*		trx,	/*!< in: transaction */
-	trx_undo_t*	undo,	/*!< in: undo log memory object */
-	mtr_t*		mtr)	/*!< in: mtr which does not have a latch to any
-				undo log page; the caller must have reserved
-				the rollback segment mutex */
+	trx_t*		trx,		/*!< in: transaction */
+	trx_undo_t*	undo,		/*!< in: undo log memory object */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: assign undo log from
+					referred rollback segment. */
+	mtr_t*		mtr)		/*!< in: mtr which does not have
+					a latch to any undo log page;
+					the caller must have reserved
+					the rollback segment mutex */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Frees the last undo log page.
 The caller must hold the rollback segment mutex. */
-UNIV_INTERN
 void
 trx_undo_free_last_page_func(
 /*==========================*/
@@ -244,40 +246,37 @@ trx_undo_free_last_page_func(
 /***********************************************************************//**
 Truncates an undo log from the end. This function is used during a rollback
 to free space from an undo log. */
-UNIV_INTERN
 void
-trx_undo_truncate_end(
+trx_undo_truncate_end_func(
 /*=======================*/
 	trx_t*		trx,	/*!< in: transaction whose undo log it is */
 	trx_undo_t*	undo,	/*!< in/out: undo log */
 	undo_no_t	limit)	/*!< in: all undo records with undo number
 				>= this value should be truncated */
-	MY_ATTRIBUTE((nonnull));
+	MY_ATTRIBUTE((nonnull(1,2)));
 
-/***********************************************************************//**
-Truncates an undo log from the start. This function is used during a purge
-operation. */
-UNIV_INTERN
+#define trx_undo_truncate_end(trx, undo, limit) \
+	trx_undo_truncate_end_func(trx, undo, limit)
+
+/** Truncate the head of an undo log.
+NOTE that only whole pages are freed; the header page is not
+freed, but emptied, if all the records there are below the limit.
+@param[in,out]	rseg		rollback segment
+@param[in]	hdr_page_no	header page number
+@param[in]	hdr_offset	header offset on the page
+@param[in]	limit		first undo number to preserve
+(everything below the limit will be truncated) */
 void
 trx_undo_truncate_start(
-/*====================*/
-	trx_rseg_t*	rseg,		/*!< in: rollback segment */
-	ulint		space,		/*!< in: space id of the log */
-	ulint		hdr_page_no,	/*!< in: header page number */
-	ulint		hdr_offset,	/*!< in: header offset on the page */
-	undo_no_t	limit);		/*!< in: all undo pages with
-					undo numbers < this value
-					should be truncated; NOTE that
-					the function only frees whole
-					pages; the header page is not
-					freed, but emptied, if all the
-					records there are < limit */
+	trx_rseg_t*	rseg,
+	ulint		hdr_page_no,
+	ulint		hdr_offset,
+	undo_no_t	limit);
 /********************************************************************//**
 Initializes the undo log lists for a rollback segment memory copy.
 This function is only called when the database is started or a new
 rollback segment created.
-@return	the combined size of undo log segments in pages */
-UNIV_INTERN
+@return the combined size of undo log segments in pages */
 ulint
 trx_undo_lists_init(
 /*================*/
@@ -288,91 +287,113 @@ undo log reused.
 @return DB_SUCCESS if undo log assign successful, possible error codes
 are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
 DB_OUT_OF_MEMORY */
-UNIV_INTERN
 dberr_t
 trx_undo_assign_undo(
 /*=================*/
-	trx_t*		trx,	/*!< in: transaction */
-	ulint		type)	/*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+	trx_t*		trx,		/*!< in: transaction */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: assign undo log from
+					referred rollback segment. */
+	ulint		type)		/*!< in: TRX_UNDO_INSERT or
+					TRX_UNDO_UPDATE */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /******************************************************************//**
 Sets the state of the undo log segment at a transaction finish.
-@return	undo log segment header page, x-latched */
-UNIV_INTERN
+@return undo log segment header page, x-latched */
 page_t*
 trx_undo_set_state_at_finish(
 /*=========================*/
 	trx_undo_t*	undo,	/*!< in: undo log memory copy */
 	mtr_t*		mtr);	/*!< in: mtr */
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction prepare.
-@return	undo log segment header page, x-latched */
-UNIV_INTERN
+
+/** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK.
+@param[in,out]	trx		transaction
+@param[in,out]	undo		insert_undo or update_undo log
+@param[in]	rollback	false=XA PREPARE, true=XA ROLLBACK
+@param[in,out]	mtr		mini-transaction
+@return undo log segment header page, x-latched */
 page_t*
 trx_undo_set_state_at_prepare(
-/*==========================*/
-	trx_t*		trx,	/*!< in: transaction */
-	trx_undo_t*	undo,	/*!< in: undo log memory copy */
-	mtr_t*		mtr);	/*!< in: mtr */
+	trx_t*		trx,
+	trx_undo_t*	undo,
+	bool		rollback,
+	mtr_t*		mtr);
 
 /**********************************************************************//**
 Adds the update undo log header as the first in the history list, and
 frees the memory object, or puts it to the list of cached update undo log
 segments. */
-UNIV_INTERN
 void
 trx_undo_update_cleanup(
 /*====================*/
-	trx_t*	trx,		/*!< in: trx owning the update undo log */
-	page_t*	undo_page,	/*!< in: update undo log header page,
-				x-latched */
-	mtr_t*	mtr);		/*!< in: mtr */
-/******************************************************************//**
-Frees or caches an insert undo log after a transaction commit or rollback.
+	trx_t*		trx,		/*!< in: trx owning the update
+					undo log */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: update undo log. */
+	page_t*		undo_page,	/*!< in: update undo log header page,
+					x-latched */
+	bool		update_rseg_history_len,
+					/*!< in: if true: update rseg history
+					len else skip updating it. */
+	ulint		n_added_logs,	/*!< in: number of logs added */
+	mtr_t*		mtr);		/*!< in: mtr */
+
+/** Frees an insert undo log after a transaction commit or rollback.
 Knowledge of inserts is not needed after a commit or rollback, therefore
-the data can be discarded. */
-UNIV_INTERN
+the data can be discarded.
+@param[in,out]	undo_ptr	undo log to clean up
+@param[in]	noredo		whether the undo tablespace is redo logged */
 void
 trx_undo_insert_cleanup(
-/*====================*/
-	trx_t*	trx);	/*!< in: transaction handle */
+	trx_undo_ptr_t*	undo_ptr,
+	bool		noredo);
 
 /********************************************************************//**
 At shutdown, frees the undo logs of a PREPARED transaction. */
-UNIV_INTERN
 void
 trx_undo_free_prepared(
 /*===================*/
 	trx_t*	trx)	/*!< in/out: PREPARED transaction */
 	UNIV_COLD MY_ATTRIBUTE((nonnull));
+
+/* Forward declaration. */
+namespace undo {
+	class Truncate;
+};
+
+/** Truncate UNDO tablespace, reinitialize header and rseg.
+@param[in]	undo_trunc	UNDO tablespace handler
+@return true if success else false. */
+bool
+trx_undo_truncate_tablespace(
+	undo::Truncate*	undo_trunc);
+
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Parses the redo log entry of an undo log page initialization.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 trx_undo_parse_page_init(
 /*=====================*/
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	page_t*	page,	/*!< in: page or NULL */
-	mtr_t*	mtr);	/*!< in: mtr or NULL */
-/***********************************************************//**
-Parses the redo log entry of an undo log page header create or reuse.
-@return	end of log record or NULL */
-UNIV_INTERN
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< in: page or NULL */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+/** Parse the redo log entry of an undo log page header create or reuse.
+@param[in]	type	MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE
+@param[in]	ptr	redo log record
+@param[in]	end_ptr	end of log buffer
+@param[in,out]	page	page frame or NULL
+@param[in,out]	mtr	mini-transaction or NULL
+@return end of log record or NULL */
 byte*
 trx_undo_parse_page_header(
-/*=======================*/
-	ulint	type,	/*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	page_t*	page,	/*!< in: page or NULL */
-	mtr_t*	mtr);	/*!< in: mtr or NULL */
+	mlog_id_t	type,
+	const byte*	ptr,
+	const byte*	end_ptr,
+	page_t*		page,
+	mtr_t*		mtr);
 /***********************************************************//**
 Parses the redo log entry of an undo log page header discard.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 trx_undo_parse_discard_latest(
 /*==========================*/
@@ -382,7 +403,6 @@ trx_undo_parse_discard_latest(
 	mtr_t*	mtr);	/*!< in: mtr or NULL */
 /************************************************************************
 Frees an undo log memory copy. */
-UNIV_INTERN
 void
 trx_undo_mem_free(
 /*==============*/
@@ -407,12 +427,12 @@ trx_undo_mem_free(
 #define	TRX_UNDO_PREPARED	5	/* contains an undo log of an
 					prepared transaction */
 
-#ifndef UNIV_HOTBACKUP
-#ifndef UNIV_INNOCHECKSUM
+#if !defined UNIV_HOTBACKUP && !defined UNIV_INNOCHECKSUM
+
 /** Transaction undo log memory object; this is protected by the undo_mutex
 in the corresponding transaction object */
 
-struct trx_undo_t{
+struct trx_undo_t {
 	/*-----------------------------*/
 	ulint		id;		/*!< undo log slot number within the
 					rollback segment */
@@ -439,8 +459,7 @@ struct trx_undo_t{
 	/*-----------------------------*/
 	ulint		space;		/*!< space id where the undo log
 					placed */
-	ulint		zip_size;	/*!< compressed page size of space
-					in bytes, or 0 for uncompressed */
+	page_size_t	page_size;
 	ulint		hdr_page_no;	/*!< page number of the header page in
 					the undo log */
 	ulint		hdr_offset;	/*!< header offset of the undo log on
@@ -462,13 +481,14 @@ struct trx_undo_t{
 	undo_no_t	top_undo_no;	/*!< undo number of the latest record */
 	buf_block_t*	guess_block;	/*!< guess for the buffer block where
 					the top page might reside */
+	ulint		withdraw_clock;	/*!< the withdraw clock value of the
+					buffer pool when guess_block was stored */
 	/*-----------------------------*/
 	UT_LIST_NODE_T(trx_undo_t) undo_list;
 					/*!< undo log objects in the rollback
 					segment are chained into lists */
 };
-#endif /* !UNIV_INNOCHECKSUM */
-#endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_HOTBACKUP && !UNIV_INNOCHECKSUM */
 
 /** The offset of the undo log page header on pages of the undo log */
 #define	TRX_UNDO_PAGE_HDR	FSEG_PAGE_DATA
@@ -516,6 +536,9 @@ log segment */
 /* @{ */
 /*-------------------------------------------------------------*/
 #define	TRX_UNDO_STATE		0	/*!< TRX_UNDO_ACTIVE, ... */
+
+#ifndef UNIV_INNOCHECKSUM
+
 #define	TRX_UNDO_LAST_LOG	2	/*!< Offset of the last undo log header
 					on the segment header page, 0 if
 					none */
@@ -595,7 +618,6 @@ quite a large overhead. */
 					with the XA XID */
 /* @} */
 
-#ifndef UNIV_INNOCHECKSUM
 #ifndef UNIV_NONINL
 #include "trx0undo.ic"
 #endif
diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic
index 577759d6c3d..f8e74d0fb03 100644
--- a/storage/innobase/include/trx0undo.ic
+++ b/storage/innobase/include/trx0undo.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,7 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Builds a roll pointer.
-@return	roll pointer */
+@return roll pointer */
 UNIV_INLINE
 roll_ptr_t
 trx_undo_build_roll_ptr(
@@ -85,7 +85,7 @@ trx_undo_decode_roll_ptr(
 
 /***********************************************************************//**
 Returns TRUE if the roll pointer is of the insert type.
-@return	TRUE if insert undo log */
+@return TRUE if insert undo log */
 UNIV_INLINE
 ibool
 trx_undo_roll_ptr_is_insert(
@@ -104,7 +104,7 @@ trx_undo_roll_ptr_is_insert(
 
 /***********************************************************************//**
 Returns true if the record is of the insert type.
-@return	true if the record was freshly inserted (not updated). */
+@return true if the record was freshly inserted (not updated). */
 UNIV_INLINE
 bool
 trx_undo_trx_id_is_insert(
@@ -140,7 +140,7 @@ trx_write_roll_ptr(
 Reads a roll ptr from an index page. In case that the roll ptr size
 changes in some future version, this function should be used instead of
 mach_read_...
-@return	roll ptr */
+@return roll ptr */
 UNIV_INLINE
 roll_ptr_t
 trx_read_roll_ptr(
@@ -154,41 +154,42 @@ trx_read_roll_ptr(
 }
 
 #ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Gets an undo log page and x-latches it.
-@return	pointer to page x-latched */
+
+/** Gets an undo log page and x-latches it.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return pointer to page x-latched */
 UNIV_INLINE
 page_t*
 trx_undo_page_get(
-/*==============*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number */
-	mtr_t*	mtr)		/*!< in: mtr */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
-	buf_block_t*	block = buf_page_get(space, zip_size, page_no,
+	buf_block_t*	block = buf_page_get(page_id, page_size,
 					     RW_X_LATCH, mtr);
+
 	buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
 
 	return(buf_block_get_frame(block));
 }
 
-/******************************************************************//**
-Gets an undo log page and s-latches it.
-@return	pointer to page s-latched */
+/** Gets an undo log page and s-latches it.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return pointer to page s-latched */
 UNIV_INLINE
 page_t*
 trx_undo_page_get_s_latched(
-/*========================*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number */
-	mtr_t*	mtr)		/*!< in: mtr */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
-	buf_block_t*	block = buf_page_get(space, zip_size, page_no,
+	buf_block_t*	block = buf_page_get(page_id, page_size,
 					     RW_S_LATCH, mtr);
+
 	buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
 
 	return(buf_block_get_frame(block));
@@ -197,7 +198,7 @@ trx_undo_page_get_s_latched(
 /******************************************************************//**
 Returns the start offset of the undo log records of the specified undo
 log on the page.
-@return	start offset */
+@return start offset */
 UNIV_INLINE
 ulint
 trx_undo_page_get_start(
@@ -222,7 +223,7 @@ trx_undo_page_get_start(
 /******************************************************************//**
 Returns the end offset of the undo log records of the specified undo
 log on the page.
-@return	end offset */
+@return end offset */
 UNIV_INLINE
 ulint
 trx_undo_page_get_end(
@@ -255,7 +256,7 @@ trx_undo_page_get_end(
 /******************************************************************//**
 Returns the previous undo record on the page in the specified log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_prev_rec(
@@ -282,7 +283,7 @@ trx_undo_page_get_prev_rec(
 /******************************************************************//**
 Returns the next undo log record on the page in the specified log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_next_rec(
@@ -312,7 +313,7 @@ trx_undo_page_get_next_rec(
 /******************************************************************//**
 Returns the last undo record on the page in the specified undo log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_last_rec(
@@ -338,7 +339,7 @@ trx_undo_page_get_last_rec(
 /******************************************************************//**
 Returns the first undo record on the page in the specified undo log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_first_rec(
diff --git a/storage/innobase/include/trx0xa.h b/storage/innobase/include/trx0xa.h
index 7caddfb7ba4..b333f32cd73 100644
--- a/storage/innobase/include/trx0xa.h
+++ b/storage/innobase/include/trx0xa.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,6 +24,13 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #ifndef	XA_H
 #define	XA_H
 
+/* Missing MySQL 5.7 header */
+#ifdef HAVE_XA_H
+#include "xa.h"
+#else
+#include "handler.h"
+#endif
+
 /*
  * Transaction branch identification: XID and NULLXID:
  */
@@ -35,17 +42,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #define	MAXGTRIDSIZE	 64		/*!< maximum size in bytes of gtrid */
 #define	MAXBQUALSIZE	 64		/*!< maximum size in bytes of bqual */
 
-/** X/Open XA distributed transaction identifier */
-struct xid_t {
-	long formatID;			/*!< format identifier; -1
-					means that the XID is null */
-	long gtrid_length;		/*!< value from 1 through 64 */
-	long bqual_length;		/*!< value from 1 through 64 */
-	char data[XIDDATASIZE];		/*!< distributed transaction
-					identifier */
-};
-/** X/Open XA distributed transaction identifier */
-typedef	struct xid_t XID;
 #endif
 /** X/Open XA distributed transaction status codes */
 /* @{ */
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 81190c3ad2e..f885bd2191f 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -2,7 +2,7 @@
 
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
-Copyright (c) 2013, 2015, MariaDB Corporation.
+Copyright (c) 2013, 2016, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -44,8 +44,8 @@ Created 1/20/1994 Heikki Tuuri
 #define IB_TO_STR(s)	_IB_TO_STR(s)
 
 #define INNODB_VERSION_MAJOR	5
-#define INNODB_VERSION_MINOR	6
-#define INNODB_VERSION_BUGFIX	33
+#define INNODB_VERSION_MINOR	7
+#define INNODB_VERSION_BUGFIX	14
 
 /* The following is the InnoDB version as shown in
 SELECT plugin_version FROM information_schema.plugins;
@@ -75,63 +75,58 @@ the virtual method table (vtable) in GCC 3. */
 # define ha_innobase ha_innodb
 #endif /* MYSQL_DYNAMIC_PLUGIN */
 
-#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__)
-# undef __WIN__
-# define __WIN__
-
+#if defined(_WIN32)
 # include <windows.h>
+#endif /* _WIN32 */
 
-# ifdef _NT_
-#  define __NT__
-# endif
-
-#else
 /* The defines used with MySQL */
 
-/* Include two header files from MySQL to make the Unix flavor used
-in compiling more Posix-compatible. These headers also define __WIN__
-if we are compiling on Windows. */
-
 #ifndef UNIV_HOTBACKUP
-# include <my_global.h>
-# include <my_pthread.h>
-#endif /* UNIV_HOTBACKUP */
-
-/* Include <sys/stat.h> to get S_I... macros defined for os0file.cc */
-# include <sys/stat.h>
-# if !defined(__WIN__)
-#  include <sys/mman.h> /* mmap() for os0proc.cc */
-# endif
 
-/* Include the header file generated by GNU autoconf */
-# ifndef __WIN__
-#  ifndef UNIV_HOTBACKUP
-#   include "my_config.h"
-#  endif /* UNIV_HOTBACKUP */
-# endif
+/* Include a minimum number of SQL header files so that few changes
+made in SQL code cause a complete InnoDB rebuild.  These headers are
+used throughout InnoDB but do not include too much themselves.  They
+support cross-platform development and expose comonly used SQL names. */
 
-# ifdef HAVE_SCHED_H
-#  include <sched.h>
-# endif
+# include <my_global.h>
 
-/* We only try to do explicit inlining of functions with gcc and
-Sun Studio */
+/* JAN: TODO: missing 5.7 header */
+#ifdef HAVE_MY_THREAD_H
+//# include <my_thread.h>
+#endif
 
-# ifdef HAVE_PREAD
-#  define HAVE_PWRITE
-# endif
+# ifndef UNIV_INNOCHECKSUM
+#  include <m_string.h>
+#  include <mysqld_error.h>
+# endif /* !UNIV_INNOCHECKSUM */
+#endif /* !UNIV_HOTBACKUP  */
 
-#endif /* #if (defined(WIN32) || ... */
+/* Include <sys/stat.h> to get S_I... macros defined for os0file.cc */
+#include <sys/stat.h>
+
+#ifndef _WIN32
+# include <sys/mman.h> /* mmap() for os0proc.cc */
+# include <sched.h>
+#endif /* !_WIN32 */
+
+/* Include the header file generated by CMake */
+#ifndef _WIN32
+# ifndef UNIV_HOTBACKUP
+#  include "my_config.h"
+# endif /* UNIV_HOTBACKUP */
+#endif
 
-#ifndef __WIN__
-#define __STDC_FORMAT_MACROS    /* Enable C99 printf format macros */
+#include <stdint.h>
 #include <inttypes.h>
-#endif /* !__WIN__ */
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 
+#include "my_pthread.h"
 /* Following defines are to enable performance schema
-instrumentation in each of four InnoDB modules if
+instrumentation in each of five InnoDB modules if
 HAVE_PSI_INTERFACE is defined. */
-#if defined HAVE_PSI_INTERFACE && !defined UNIV_HOTBACKUP
+#if defined(HAVE_PSI_INTERFACE) && !defined(UNIV_HOTBACKUP)
 # define UNIV_PFS_MUTEX
 # define UNIV_PFS_RWLOCK
 /* For I/O instrumentation, performance schema rely
@@ -139,11 +134,17 @@ on a native descriptor to identify the file, this
 descriptor could conflict with our OS level descriptor.
 Disable IO instrumentation on Windows until this is
 resolved */
-# ifndef __WIN__
+# ifndef _WIN32
 #  define UNIV_PFS_IO
 # endif
 # define UNIV_PFS_THREAD
 
+// JAN: TODO: MySQL 5.7 PSI
+// # include "mysql/psi/psi.h" /* HAVE_PSI_MEMORY_INTERFACE */
+# ifdef HAVE_PSI_MEMORY_INTERFACE
+#  define UNIV_PFS_MEMORY
+# endif /* HAVE_PSI_MEMORY_INTERFACE */
+
 /* There are mutexes/rwlocks that we want to exclude from
 instrumentation even if their corresponding performance schema
 define is set. And this PFS_NOT_INSTRUMENTED is used
@@ -153,26 +154,52 @@ be excluded from instrumentation. */
 
 # define PFS_IS_INSTRUMENTED(key)	((key) != PFS_NOT_INSTRUMENTED)
 
+/* JAN: TODO: missing 5.7 header */
+#ifdef HAVE_PFS_THREAD_PROVIDER_H
+/* For PSI_MUTEX_CALL() and similar. */
+#include "pfs_thread_provider.h"
+#endif
+
+#include "mysql/psi/mysql_thread.h"
+/* For PSI_FILE_CALL(). */
+/* JAN: TODO: missing 5.7 header */
+#ifdef HAVE_PFS_FILE_PROVIDER_H
+#include "pfs_file_provider.h"
+#endif
+
+#include "mysql/psi/mysql_file.h"
+
 #endif /* HAVE_PSI_INTERFACE */
 
-#ifdef __WIN__
+#ifdef _WIN32
 # define YY_NO_UNISTD_H 1
-#endif /* __WIN__ */
+/* VC++ tries to optimise for size by default, from V8+. The size of
+the pointer to member depends on whether the type is defined before the
+compiler sees the type in the translation unit. This default behaviour
+can cause the pointer to be a different size in different translation
+units, depending on the above rule. We force optimise for size behaviour
+for all cases. This is used by ut0lst.h related code. */
+# pragma pointers_to_members(full_generality, multiple_inheritance)
+#endif /* _WIN32 */
 
 /*			DEBUG VERSION CONTROL
 			===================== */
 
 /* When this macro is defined then additional test functions will be
 compiled. These functions live at the end of each relevant source file
-and have "test_" prefix. These functions are not called from anywhere in
-the code, they can be called from gdb after
+and have "test_" prefix. These functions can be called from the end of
+innobase_init() or they can be called from gdb after
 innobase_start_or_create_for_mysql() has executed using the call
-command. Not tested on Windows. */
+command. */
 /*
 #define UNIV_COMPILE_TEST_FUNCS
+#define UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
+#define UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
+#define UNIV_ENABLE_UNIT_TEST_DICT_STATS
+#define UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT
 */
 
-#if defined(HAVE_valgrind)&& defined(HAVE_VALGRIND_MEMCHECK_H)
+#if defined HAVE_VALGRIND
 # define UNIV_DEBUG_VALGRIND
 #endif /* HAVE_VALGRIND */
 #if 0
@@ -191,29 +218,18 @@ command. Not tested on Windows. */
 #define UNIV_DEBUG_LOCK_VALIDATE		/* Enable
 						ut_ad(lock_rec_validate_page())
 						assertions. */
-#define UNIV_DEBUG_FILE_ACCESSES		/* Enable freed block access
-						debugging without UNIV_DEBUG */
 #define UNIV_LRU_DEBUG				/* debug the buffer pool LRU */
 #define UNIV_HASH_DEBUG				/* debug HASH_ macros */
-#define UNIV_LIST_DEBUG				/* debug UT_LIST_ macros */
 #define UNIV_LOG_LSN_DEBUG			/* write LSN to the redo log;
 this will break redo log file compatibility, but it may be useful when
 debugging redo log application problems. */
-#define UNIV_MEM_DEBUG				/* detect memory leaks etc */
 #define UNIV_IBUF_DEBUG				/* debug the insert buffer */
-#define UNIV_BLOB_DEBUG				/* track BLOB ownership;
-assumes that no BLOBs survive server restart */
 #define UNIV_IBUF_COUNT_DEBUG			/* debug the insert buffer;
 this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES,
 and the insert buffer must be empty when the database is started */
 #define UNIV_PERF_DEBUG                         /* debug flag that enables
                                                 light weight performance
                                                 related stuff. */
-#define UNIV_SYNC_DEBUG				/* debug mutex and latch
-operations (very slow); also UNIV_DEBUG must be defined */
-#define UNIV_SEARCH_DEBUG			/* debug B-tree comparisons */
-#define UNIV_SYNC_PERF_STAT			/* operation counts for
-						rw-locks and mutexes */
 #define UNIV_SEARCH_PERF_STAT			/* statistics for the
 						adaptive hash index */
 #define UNIV_SRV_PRINT_LATCH_WAITS		/* enable diagnostic output
@@ -237,27 +253,30 @@ operations (very slow); also UNIV_DEBUG must be defined */
 #define UNIV_BTR_DEBUG				/* check B-tree links */
 #define UNIV_LIGHT_MEM_DEBUG			/* light memory debugging */
 
-/*
-#define UNIV_SQL_DEBUG
-#define UNIV_LOG_DEBUG
-*/
-			/* the above option prevents forcing of log to disk
-			at a buffer page write: it should be tested with this
-			option off; also some ibuf tests are suppressed */
+// #define UNIV_SQL_DEBUG
 
 /* Linkage specifier for non-static InnoDB symbols (variables and functions)
 that are only referenced from within InnoDB, not from MySQL. We disable the
 GCC visibility directive on all Sun operating systems because there is no
 easy way to get it to work. See http://bugs.mysql.com/bug.php?id=52263. */
-#define MY_ATTRIBUTE __attribute__
 #if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(sun) || defined(__INTEL_COMPILER)
-# define UNIV_INTERN MY_ATTRIBUTE((visibility ("hidden")))
+# define UNIV_INTERN __attribute__((visibility ("hidden")))
 #else
 # define UNIV_INTERN
 #endif
-#if defined(INNODB_COMPILER_HINTS)      \
+
+#ifndef MY_ATTRIBUTE
+#if defined(__GNUC__)
+#  define MY_ATTRIBUTE(A) __attribute__(A)
+#else
+#  define MY_ATTRIBUTE(A)
+#endif
+#endif
+
+#if defined(COMPILER_HINTS)      \
     && defined __GNUC__                 \
     && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 3)
+
 /** Starting with GCC 4.3, the "cold" attribute is used to inform the
 compiler that a function is unlikely executed.  The function is
 optimized for size rather than speed and on many targets it is placed
@@ -281,26 +300,25 @@ rarely invoked function for size instead for speed. */
 definitions: */
 
 #define UNIV_NONINL
-#define UNIV_INLINE	UNIV_INTERN
+#define UNIV_INLINE
 
 #endif /* !UNIV_MUST_NOT_INLINE */
 
 #ifdef _WIN32
-#define UNIV_WORD_SIZE		4
-#elif defined(_WIN64)
-#define UNIV_WORD_SIZE		8
-#else
-/** MySQL config.h generated by GNU autoconf will define SIZEOF_LONG in Posix */
+# ifdef _WIN64
+#  define UNIV_WORD_SIZE	8
+# else
+#  define UNIV_WORD_SIZE	4
+# endif
+#else	 /* !_WIN32 */
+/** MySQL config.h generated by CMake will define SIZEOF_LONG in Posix */
 #define UNIV_WORD_SIZE		SIZEOF_LONG
-#endif
+#endif	 /* _WIN32 */
 
 /** The following alignment is used in memory allocations in memory heap
 management to ensure correct alignment for doubles etc. */
 #define UNIV_MEM_ALIGNMENT	8
 
-/** The following alignment is used in aligning lints etc. */
-#define UNIV_WORD_ALIGNMENT	UNIV_WORD_SIZE
-
 /*
 			DATABASE VERSION CONTROL
 			========================
@@ -387,6 +405,8 @@ and 2 bits for flags. This limits the uncompressed page size to 16k.
 #define UNIV_PAGE_SIZE_SHIFT_DEF	14
 /** Original 16k InnoDB Page Size Shift, in case the default changes */
 #define UNIV_PAGE_SIZE_SHIFT_ORIG	14
+/** Original 16k InnoDB Page Size as an ssize (log2 - 9) */
+#define UNIV_PAGE_SSIZE_ORIG		(UNIV_PAGE_SIZE_SHIFT_ORIG - 9)
 
 /** Minimum page size InnoDB currently supports. */
 #define UNIV_PAGE_SIZE_MIN	(1 << UNIV_PAGE_SIZE_SHIFT_MIN)
@@ -403,11 +423,16 @@ and 2 bits for flags. This limits the uncompressed page size to 16k.
 /** Largest compressed page size */
 #define UNIV_ZIP_SIZE_MAX	(1 << UNIV_ZIP_SIZE_SHIFT_MAX)
 
-/** Number of supported page sizes (The convention 'ssize' is used
-for 'log2 minus 9' or the number of shifts starting with 512.)
-This number varies depending on UNIV_PAGE_SIZE. */
-#define UNIV_PAGE_SSIZE_MAX					\
-	(UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+/** Largest possible ssize for an uncompressed page.
+(The convention 'ssize' is used for 'log2 minus 9' or the number of
+shifts starting with 512.)
+This max number varies depending on UNIV_PAGE_SIZE. */
+#define UNIV_PAGE_SSIZE_MAX	\
+	static_cast<ulint>(UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+
+/** Smallest possible ssize for an uncompressed page. */
+#define UNIV_PAGE_SSIZE_MIN	\
+	static_cast<ulint>(UNIV_PAGE_SIZE_SHIFT_MIN - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
 
 /** Maximum number of parallel threads in a parallelized operation */
 #define UNIV_MAX_PARALLELISM	32
@@ -435,6 +460,10 @@ database name and table name. In addition, 14 bytes is added for:
 #define MAX_FULL_NAME_LEN				\
 	(MAX_TABLE_NAME_LEN + MAX_DATABASE_NAME_LEN + 14)
 
+/** Maximum length of the compression alogrithm string. Currently we support
+only (NONE | ZLIB | LZ4). */
+#define MAX_COMPRESSION_LEN     4
+
 /** The maximum length in bytes that a database name can occupy when stored in
 UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
 mysql_com.h if you are to use this macro. */
@@ -458,33 +487,28 @@ the word size of the machine, that is on a 32-bit platform 32 bits, and on a
 64-bit platform 64 bits. We also give the printf format for the type as a
 macro ULINTPF. */
 
-
-#ifdef __WIN__
+#ifdef _WIN32
 /* Use the integer types and formatting strings defined in Visual Studio. */
-# define UINT32PF	"%I32u"
-# define INT64PF	"%I64d"
-# define UINT64PF	"%I64u"
-# define UINT64PFx	"%016I64x"
-# define DBUG_LSN_PF    "%llu"
-typedef __int64 ib_int64_t;
+# define UINT32PF	"%u"
+# define UINT64PF	"%llu"
+# define UINT64PFx	"%016llx"
+# define UINT64scan     "llu"
 typedef unsigned __int64 ib_uint64_t;
 typedef unsigned __int32 ib_uint32_t;
 #else
-/* Use the integer types and formatting strings defined in the C99 standard. */
-# define UINT32PF	"%" PRIu32
-# define INT64PF	"%" PRId64
-# define UINT64PF	"%" PRIu64
-# define UINT64PFx	"%016" PRIx64
-# define DBUG_LSN_PF    UINT64PF
-typedef int64_t ib_int64_t;
+# define UINT32PF	"%u"
+#if SIZEOF_LONG == 8
+# define UINT64PF	"%lu"
+# define UINT64PFx	"%016lx"
+# define UINT64scan     "lu"
+#else
+# define UINT64PF	"%llu"
+# define UINT64PFx	"%016llx"
+# define UINT64scan     "llu"
+#endif
 typedef uint64_t ib_uint64_t;
 typedef uint32_t ib_uint32_t;
-# endif /* __WIN__ */
-
-# define IB_ID_FMT	UINT64PF
-
-/* Type used for all log sequence number storage and arithmetics */
-typedef	ib_uint64_t		lsn_t;
+#endif /* _WIN32 */
 
 #ifdef _WIN64
 typedef unsigned __int64	ulint;
@@ -496,11 +520,7 @@ typedef long int		lint;
 # define ULINTPF		"%lu"
 #endif /* _WIN64 */
 
-#ifndef UNIV_HOTBACKUP
-typedef unsigned long long int	ullint;
-#endif /* UNIV_HOTBACKUP */
-
-#ifndef __WIN__
+#ifndef _WIN32
 #if SIZEOF_LONG != SIZEOF_VOIDP
 #error "Error: InnoDB's ulint must be of the same size as void*"
 #endif
@@ -526,12 +546,13 @@ typedef unsigned long long int	ullint;
 #define IB_UINT64_MAX		((ib_uint64_t) (~0ULL))
 
 /** The generic InnoDB system object identifier data type */
-typedef ib_uint64_t		ib_id_t;
-#define IB_ID_MAX		IB_UINT64_MAX
-
-/** The 'undefined' value for a ullint */
-#define ULLINT_UNDEFINED        ((ullint)(-1))
+typedef ib_uint64_t	        ib_id_t;
+#define IB_ID_MAX               (~(ib_id_t) 0)
+#define IB_ID_FMT               UINT64PF
 
+#ifndef UINTMAX_MAX
+#define UINTMAX_MAX		IB_UINT64_MAX
+#endif
 /** This 'ibool' type is used within Innobase. Remember that different included
 headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
 #define ibool			ulint
@@ -560,8 +581,7 @@ contains the sum of the following flag and the locally stored len. */
 
 #define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE_DEF)
 
-#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
-#define HAVE_GCC_GT_2
+#if defined(__GNUC__)
 /* Tell the compiler that variable/function is unused. */
 # define UNIV_UNUSED    MY_ATTRIBUTE ((unused))
 #else
@@ -569,7 +589,7 @@ contains the sum of the following flag and the locally stored len. */
 #endif /* CHECK FOR GCC VER_GT_2 */
 
 /* Some macros to improve branch prediction and reduce cache misses */
-#if defined(INNODB_COMPILER_HINTS) && defined(HAVE_GCC_GT_2)
+#if defined(COMPILER_HINTS) && defined(__GNUC__)
 /* Tell the compiler that 'expr' probably evaluates to 'constant'. */
 # define UNIV_EXPECT(expr,constant) __builtin_expect(expr, constant)
 /* Tell the compiler that a pointer is likely to be NULL */
@@ -582,28 +602,30 @@ it is read or written. */
 # define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3)
 
 /* Sun Studio includes sun_prefetch.h as of version 5.9 */
-#elif (defined(__SUNPRO_C) && __SUNPRO_C >= 0x590) \
-       || (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x590)
+#elif (defined(__SUNPRO_C) || defined(__SUNPRO_CC))
 
 # include <sun_prefetch.h>
 
-#if __SUNPRO_C >= 0x550
-# undef UNIV_INTERN
-# define UNIV_INTERN __hidden
-#endif /* __SUNPRO_C >= 0x550 */
-
 # define UNIV_EXPECT(expr,value) (expr)
 # define UNIV_LIKELY_NULL(expr) (expr)
 
-# if defined(INNODB_COMPILER_HINTS)
+# if defined(COMPILER_HINTS)
 //# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr)
 #  define UNIV_PREFETCH_R(addr) ((void) 0)
 #  define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr)
 # else
 #  define UNIV_PREFETCH_R(addr) ((void) 0)
 #  define UNIV_PREFETCH_RW(addr) ((void) 0)
-# endif /* INNODB_COMPILER_HINTS */
+# endif /* COMPILER_HINTS */
 
+# elif defined __WIN__ && defined COMPILER_HINTS
+# include <xmmintrin.h>
+# define UNIV_EXPECT(expr,value) (expr)
+# define UNIV_LIKELY_NULL(expr) (expr)
+// __MM_HINT_T0 - (temporal data)
+// prefetch data into all levels of the cache hierarchy.
+# define UNIV_PREFETCH_R(addr) _mm_prefetch((char *) addr, _MM_HINT_T0)
+# define UNIV_PREFETCH_RW(addr) _mm_prefetch((char *) addr, _MM_HINT_T0)
 #else
 /* Dummy versions of the macros */
 # define UNIV_EXPECT(expr,value) (expr)
@@ -624,18 +646,25 @@ it is read or written. */
 Windows, so define a typedef for it and a macro to use at the end of such
 functions. */
 
-#ifdef __WIN__
+#ifdef _WIN32
 typedef ulint os_thread_ret_t;
-#define OS_THREAD_DUMMY_RETURN return(0)
+# define OS_THREAD_DUMMY_RETURN		return(0)
+# define OS_PATH_SEPARATOR		'\\'
+# define OS_PATH_SEPARATOR_ALT		'/'
 #else
 typedef void* os_thread_ret_t;
-#define OS_THREAD_DUMMY_RETURN return(NULL)
+# define OS_THREAD_DUMMY_RETURN		return(NULL)
+# define OS_PATH_SEPARATOR		'/'
+# define OS_PATH_SEPARATOR_ALT		'\\'
 #endif
 
 #include <stdio.h>
+#include "db0err.h"
 #include "ut0dbg.h"
+#include "ut0lst.h"
 #include "ut0ut.h"
-#include "db0err.h"
+#include "sync0types.h"
+
 #ifdef UNIV_DEBUG_VALGRIND
 # include <valgrind/memcheck.h>
 # define UNIV_MEM_VALID(addr, size) VALGRIND_MAKE_MEM_DEFINED(addr, size)
@@ -699,4 +728,10 @@ typedef void* os_thread_ret_t;
 extern ulong	srv_page_size_shift;
 extern ulong	srv_page_size;
 
+static const size_t UNIV_SECTOR_SIZE = 512;
+
+/* Dimension of spatial object we support so far. It has its root in
+myisam/sp_defs.h. We only support 2 dimension data */
+#define SPDIMS          2
+
 #endif
diff --git a/storage/innobase/include/usr0sess.h b/storage/innobase/include/usr0sess.h
index b5c80b97b43..21c32986259 100644
--- a/storage/innobase/include/usr0sess.h
+++ b/storage/innobase/include/usr0sess.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -38,14 +38,12 @@ Created 6/25/1996 Heikki Tuuri
 
 /*********************************************************************//**
 Opens a session.
-@return	own: session object */
-UNIV_INTERN
+@return own: session object */
 sess_t*
 sess_open(void);
 /*============*/
 /*********************************************************************//**
 Closes a session, freeing the memory occupied by it. */
-UNIV_INTERN
 void
 sess_close(
 /*=======*/
@@ -60,9 +58,6 @@ struct sess_t{
 					transaction instance designated by the
 					trx id changes, but the memory
 					structure is preserved */
-	UT_LIST_BASE_NODE_T(que_t)
-			graphs;		/*!< query graphs belonging to this
-					session */
 };
 
 /* Session states */
diff --git a/storage/innobase/include/ut0bh.h b/storage/innobase/include/ut0bh.h
deleted file mode 100644
index 1085736c7ab..00000000000
--- a/storage/innobase/include/ut0bh.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2011, 2013, Oracle Corpn. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0bh.h
-Binary min-heap interface.
-
-Created 2010-05-28 by Sunny Bains
-*******************************************************/
-
-#ifndef INNOBASE_UT0BH_H
-#define INNOBASE_UT0BH_H
-
-#include "univ.i"
-
-/** Comparison function for objects in the binary heap. */
-typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2);
-
-struct ib_bh_t;
-
-/**********************************************************************//**
-Get the number of elements in the binary heap.
-@return number of elements */
-UNIV_INLINE
-ulint
-ib_bh_size(
-/*=======*/
-	const ib_bh_t*	ib_bh);			/*!< in: instance */
-
-/**********************************************************************//**
-Test if binary heap is empty.
-@return TRUE if empty. */
-UNIV_INLINE
-ibool
-ib_bh_is_empty(
-/*===========*/
-	const ib_bh_t*	ib_bh);			/*!< in: instance */
-
-/**********************************************************************//**
-Test if binary heap is full.
-@return TRUE if full. */
-UNIV_INLINE
-ibool
-ib_bh_is_full(
-/*===========*/
-	const ib_bh_t*	ib_bh);			/*!< in: instance */
-
-/**********************************************************************//**
-Get a pointer to the element.
-@return pointer to element */
-UNIV_INLINE
-void*
-ib_bh_get(
-/*=======*/
-	ib_bh_t*	ib_bh,			/*!< in: instance */
-	ulint		i);			/*!< in: index */
-
-/**********************************************************************//**
-Copy an element to the binary heap.
-@return pointer to copied element */
-UNIV_INLINE
-void*
-ib_bh_set(
-/*======*/
-	ib_bh_t*	ib_bh,			/*!< in/out: instance */
-	ulint		i,			/*!< in: index */
-	const void*	elem);			/*!< in: element to add */
-
-/**********************************************************************//**
-Return the first element from the binary heap.
-@return pointer to first element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_first(
-/*========*/
-	ib_bh_t*	ib_bh);			/*!< in: instance */
-
-/**********************************************************************//**
-Return the last element from the binary heap.
-@return pointer to last element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_last(
-/*========*/
-	ib_bh_t*	ib_bh);			/*!< in/out: instance */
-
-/**********************************************************************//**
-Create a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-ib_bh_t*
-ib_bh_create(
-/*=========*/
-	ib_bh_cmp_t	compare,		/*!< in: comparator */
-	ulint		sizeof_elem,		/*!< in: size of one element */
-	ulint		max_elems);		/*!< in: max elements allowed */
-
-/**********************************************************************//**
-Free a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-void
-ib_bh_free(
-/*=======*/
-	ib_bh_t*	ib_bh);			/*!< in,own: instance */
-
-/**********************************************************************//**
-Add an element to the binary heap. Note: The element is copied.
-@return pointer to added element or NULL if full. */
-UNIV_INTERN
-void*
-ib_bh_push(
-/*=======*/
-	ib_bh_t*	ib_bh,			/*!< in/out: instance */
-	const void*	elem);			/*!< in: element to add */
-
-/**********************************************************************//**
-Remove the first element from the binary heap. */
-UNIV_INTERN
-void
-ib_bh_pop(
-/*======*/
-	ib_bh_t*	ib_bh);			/*!< in/out: instance */
-
-/** Binary heap data structure */
-struct ib_bh_t {
-	ulint		max_elems;		/*!< max elements allowed */
-	ulint		n_elems;		/*!< current size */
-	ulint		sizeof_elem;		/*!< sizeof element */
-	ib_bh_cmp_t	compare;		/*!< comparator */
-};
-
-#ifndef UNIV_NONINL
-#include "ut0bh.ic"
-#endif
-
-#endif /* INNOBASE_UT0BH_H */
diff --git a/storage/innobase/include/ut0bh.ic b/storage/innobase/include/ut0bh.ic
deleted file mode 100644
index b11de5b8b3e..00000000000
--- a/storage/innobase/include/ut0bh.ic
+++ /dev/null
@@ -1,125 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0bh.ic
-Binary min-heap implementation.
-
-Created 2011-01-15 by Sunny Bains
-*******************************************************/
-
-#include "ut0bh.h"
-#include "ut0mem.h"	/* For ut_memcpy() */
-
-/**********************************************************************//**
-Get the number of elements in the binary heap.
-@return number of elements */
-UNIV_INLINE
-ulint
-ib_bh_size(
-/*=======*/
-	const ib_bh_t*	ib_bh)			/*!< in: instance */
-{
-	return(ib_bh->n_elems);
-}
-
-/**********************************************************************//**
-Test if binary heap is empty.
-@return TRUE if empty. */
-UNIV_INLINE
-ibool
-ib_bh_is_empty(
-/*===========*/
-	const ib_bh_t*	ib_bh)			/*!< in: instance */
-{
-	return(ib_bh_size(ib_bh) == 0);
-}
-
-/**********************************************************************//**
-Test if binary heap is full.
-@return TRUE if full. */
-UNIV_INLINE
-ibool
-ib_bh_is_full(
-/*===========*/
-	const ib_bh_t*	ib_bh)			/*!< in: instance */
-{
-	return(ib_bh_size(ib_bh) >= ib_bh->max_elems);
-}
-
-/**********************************************************************//**
-Get a pointer to the element.
-@return pointer to element */
-UNIV_INLINE
-void*
-ib_bh_get(
-/*=======*/
-	ib_bh_t*	ib_bh,			/*!< in: instance */
-	ulint		i)			/*!< in: index */
-{
-	byte*		ptr = (byte*) (ib_bh + 1);
-
-	ut_a(i < ib_bh_size(ib_bh));
-
-	return(ptr + (ib_bh->sizeof_elem * i));
-}
-
-/**********************************************************************//**
-Copy an element to the binary heap.
-@return pointer to copied element */
-UNIV_INLINE
-void*
-ib_bh_set(
-/*======*/
-	ib_bh_t*	ib_bh,			/*!< in/out: instance */
-	ulint		i,			/*!< in: index */
-	const void*	elem)			/*!< in: element to add */
-{
-	void*		ptr = ib_bh_get(ib_bh, i);
-
-	ut_memcpy(ptr, elem, ib_bh->sizeof_elem);
-
-	return(ptr);
-}
-
-/**********************************************************************//**
-Return the first element from the binary heap.
-@return pointer to first element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_first(
-/*========*/
-	ib_bh_t*	ib_bh)			/*!< in: instance */
-{
-	return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0));
-}
-
-/**********************************************************************//**
-Return the last element from the binary heap.
-@return pointer to last element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_last(
-/*========*/
-	ib_bh_t*	ib_bh)			/*!< in/out: instance */
-{
-	return(ib_bh_is_empty(ib_bh)
-		? NULL
-		: ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1));
-}
-
diff --git a/storage/innobase/include/ut0byte.h b/storage/innobase/include/ut0byte.h
index 4893ab9f9af..bcf1b8e4a2d 100644
--- a/storage/innobase/include/ut0byte.h
+++ b/storage/innobase/include/ut0byte.h
@@ -32,7 +32,7 @@ Created 1/20/1994 Heikki Tuuri
 
 /*******************************************************//**
 Creates a 64-bit integer out of two 32-bit integers.
-@return	created integer */
+@return created integer */
 UNIV_INLINE
 ib_uint64_t
 ut_ull_create(
@@ -43,7 +43,7 @@ ut_ull_create(
 
 /********************************************************//**
 Rounds a 64-bit integer downward to a multiple of a power of 2.
-@return	rounded value */
+@return rounded value */
 UNIV_INLINE
 ib_uint64_t
 ut_uint64_align_down(
@@ -53,7 +53,7 @@ ut_uint64_align_down(
 					which must be a power of 2 */
 /********************************************************//**
 Rounds ib_uint64_t upward to a multiple of a power of 2.
-@return	rounded value */
+@return rounded value */
 UNIV_INLINE
 ib_uint64_t
 ut_uint64_align_up(
@@ -63,7 +63,7 @@ ut_uint64_align_up(
 					which must be a power of 2 */
 /*********************************************************//**
 The following function rounds up a pointer to the nearest aligned address.
-@return	aligned pointer */
+@return aligned pointer */
 UNIV_INLINE
 void*
 ut_align(
@@ -73,7 +73,7 @@ ut_align(
 /*********************************************************//**
 The following function rounds down a pointer to the nearest
 aligned address.
-@return	aligned pointer */
+@return aligned pointer */
 UNIV_INLINE
 void*
 ut_align_down(
@@ -84,7 +84,7 @@ ut_align_down(
 /*********************************************************//**
 The following function computes the offset of a pointer from the nearest
 aligned address.
-@return	distance from aligned pointer */
+@return distance from aligned pointer */
 UNIV_INLINE
 ulint
 ut_align_offset(
@@ -94,7 +94,7 @@ ut_align_offset(
 			MY_ATTRIBUTE((const));
 /*****************************************************************//**
 Gets the nth bit of a ulint.
-@return	TRUE if nth bit is 1; 0th bit is defined to be the least significant */
+@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
 UNIV_INLINE
 ibool
 ut_bit_get_nth(
@@ -103,7 +103,7 @@ ut_bit_get_nth(
 	ulint	n);	/*!< in: nth bit requested */
 /*****************************************************************//**
 Sets the nth bit of a ulint.
-@return	the ulint with the bit set as requested */
+@return the ulint with the bit set as requested */
 UNIV_INLINE
 ulint
 ut_bit_set_nth(
diff --git a/storage/innobase/include/ut0byte.ic b/storage/innobase/include/ut0byte.ic
index 1a7af5ae33d..9c0cd6ee3c3 100644
--- a/storage/innobase/include/ut0byte.ic
+++ b/storage/innobase/include/ut0byte.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,7 +25,7 @@ Created 5/30/1994 Heikki Tuuri
 
 /*******************************************************//**
 Creates a 64-bit integer out of two 32-bit integers.
-@return	created integer */
+@return created integer */
 UNIV_INLINE
 ib_uint64_t
 ut_ull_create(
@@ -40,7 +40,7 @@ ut_ull_create(
 
 /********************************************************//**
 Rounds a 64-bit integer downward to a multiple of a power of 2.
-@return	rounded value */
+@return rounded value */
 UNIV_INLINE
 ib_uint64_t
 ut_uint64_align_down(
@@ -57,7 +57,7 @@ ut_uint64_align_down(
 
 /********************************************************//**
 Rounds ib_uint64_t upward to a multiple of a power of 2.
-@return	rounded value */
+@return rounded value */
 UNIV_INLINE
 ib_uint64_t
 ut_uint64_align_up(
@@ -76,7 +76,7 @@ ut_uint64_align_up(
 
 /*********************************************************//**
 The following function rounds up a pointer to the nearest aligned address.
-@return	aligned pointer */
+@return aligned pointer */
 UNIV_INLINE
 void*
 ut_align(
@@ -96,7 +96,7 @@ ut_align(
 /*********************************************************//**
 The following function rounds down a pointer to the nearest
 aligned address.
-@return	aligned pointer */
+@return aligned pointer */
 UNIV_INLINE
 void*
 ut_align_down(
@@ -116,7 +116,7 @@ ut_align_down(
 /*********************************************************//**
 The following function computes the offset of a pointer from the nearest
 aligned address.
-@return	distance from aligned pointer */
+@return distance from aligned pointer */
 UNIV_INLINE
 ulint
 ut_align_offset(
@@ -135,7 +135,7 @@ ut_align_offset(
 
 /*****************************************************************//**
 Gets the nth bit of a ulint.
-@return	TRUE if nth bit is 1; 0th bit is defined to be the least significant */
+@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
 UNIV_INLINE
 ibool
 ut_bit_get_nth(
@@ -152,7 +152,7 @@ ut_bit_get_nth(
 
 /*****************************************************************//**
 Sets the nth bit of a ulint.
-@return	the ulint with the bit set as requested */
+@return the ulint with the bit set as requested */
 UNIV_INLINE
 ulint
 ut_bit_set_nth(
diff --git a/storage/innobase/include/ut0counter.h b/storage/innobase/include/ut0counter.h
index 63a133a175d..175427df333 100644
--- a/storage/innobase/include/ut0counter.h
+++ b/storage/innobase/include/ut0counter.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,19 +24,27 @@ Counter utility class
 Created 2012/04/12 by Sunny Bains
 *******************************************************/
 
-#ifndef UT0COUNTER_H
-#define UT0COUNTER_H
+#ifndef ut0counter_h
+#define ut0counter_h
 
+#include <my_rdtsc.h>
 #include "univ.i"
-#include <string.h>
 #include "os0thread.h"
 
 /** CPU cache line size */
-#ifdef __powerpc__
+#ifndef UNIV_HOTBACKUP
+# ifdef CPU_LEVEL1_DCACHE_LINESIZE
+#  define CACHE_LINE_SIZE	CPU_LEVEL1_DCACHE_LINESIZE
+# else
+#  error CPU_LEVEL1_DCACHE_LINESIZE is undefined
+# endif /* CPU_LEVEL1_DCACHE_LINESIZE */
+#else
+#ifdef powerpc
 #define CACHE_LINE_SIZE		128
 #else
-#define CACHE_LINE_SIZE		64
-#endif
+# define CACHE_LINE_SIZE	64
+#endif /* __powerpc__ */
+#endif /* UNIV_HOTBACKUP */
 
 /** Default number of slots to use in ib_counter_t */
 #define IB_N_SLOTS		64
@@ -47,63 +55,67 @@ struct generic_indexer_t {
 	/** Default constructor/destructor should be OK. */
 
         /** @return offset within m_counter */
-        size_t offset(size_t index) const UNIV_NOTHROW {
+        static size_t offset(size_t index) UNIV_NOTHROW
+	{
                 return(((index % N) + 1) * (CACHE_LINE_SIZE / sizeof(Type)));
         }
 };
 
-#ifdef HAVE_SCHED_GETCPU
-#include <utmpx.h>
-/** Use the cpu id to index into the counter array. If it fails then
-use the thread id. */
-template <typename Type, int N>
-struct get_sched_indexer_t : public generic_indexer_t<Type, N> {
-	/** Default constructor/destructor should be OK. */
-
-	/* @return result from sched_getcpu(), the thread id if it fails. */
-	size_t get_rnd_index() const UNIV_NOTHROW {
+/** Use the result of my_timer_cycles(), which mainly uses RDTSC for cycles,
+to index into the counter array. See the comments for my_timer_cycles() */
+template <typename Type=ulint, int N=1>
+struct counter_indexer_t : public generic_indexer_t<Type, N> {
 
-		size_t	cpu = sched_getcpu();
-		if (cpu == -1) {
-			cpu = (lint) os_thread_get_curr_id();
-		}
+	/** Default constructor/destructor should be OK. */
 
-		return(cpu);
-	}
-};
-#endif /* HAVE_SCHED_GETCPU */
+	enum { fast = 1 };
 
-/** Use the thread id to index into the counter array. */
-template <typename Type, int N>
-struct thread_id_indexer_t : public generic_indexer_t<Type, N> {
-	/** Default constructor/destructor should are OK. */
+	/** @return result from RDTSC or similar functions. */
+	static size_t get_rnd_index() UNIV_NOTHROW
+	{
+		size_t	c = static_cast<size_t>(my_timer_cycles());
+
+		if (c != 0) {
+			return(c);
+		} else {
+			/* We may go here if my_timer_cycles() returns 0,
+			so we have to have the plan B for the counter. */
+#if !defined(_WIN32)
+			return(size_t(os_thread_get_curr_id()));
+#else
+			LARGE_INTEGER cnt;
+			QueryPerformanceCounter(&cnt);
 
-	/* @return a random number, currently we use the thread id. Where
-	thread id is represented as a pointer, it may not work as
-	effectively. */
-	size_t get_rnd_index() const UNIV_NOTHROW {
-		return((lint) os_thread_get_curr_id());
+			return(static_cast<size_t>(cnt.QuadPart));
+#endif /* !_WIN32 */
+		}
 	}
 };
 
-/** For counters wher N=1 */
-template <typename Type, int N=1>
+/** For counters where N=1 */
+template <typename Type=ulint, int N=1>
 struct single_indexer_t {
 	/** Default constructor/destructor should are OK. */
 
+	enum { fast = 0 };
+
         /** @return offset within m_counter */
-        size_t offset(size_t index) const UNIV_NOTHROW {
+        static size_t offset(size_t index) UNIV_NOTHROW
+	{
 		ut_ad(N == 1);
                 return((CACHE_LINE_SIZE / sizeof(Type)));
         }
 
-	/* @return 1 */
-	size_t get_rnd_index() const UNIV_NOTHROW {
+	/** @return 1 */
+	static size_t get_rnd_index() UNIV_NOTHROW
+	{
 		ut_ad(N == 1);
 		return(1);
 	}
 };
 
+#define	default_indexer_t	counter_indexer_t
+
 /** Class for using fuzzy counters. The counter is not protected by any
 mutex and the results are not guaranteed to be 100% accurate but close
 enough. Creates an array of counters and separates each element by the
@@ -111,7 +123,7 @@ CACHE_LINE_SIZE bytes */
 template <
 	typename Type,
 	int N = IB_N_SLOTS,
-	template<typename, int> class Indexer = thread_id_indexer_t>
+	template<typename, int> class Indexer = default_indexer_t>
 class ib_counter_t {
 public:
 	ib_counter_t() { memset(m_counter, 0x0, sizeof(m_counter)); }
@@ -121,6 +133,8 @@ public:
 		ut_ad(validate());
 	}
 
+	static bool is_fast() { return(Indexer<Type, N>::fast); }
+
 	bool validate() UNIV_NOTHROW {
 #ifdef UNIV_DEBUG
 		size_t	n = (CACHE_LINE_SIZE / sizeof(Type));
@@ -139,7 +153,7 @@ public:
 	void inc() UNIV_NOTHROW { add(1); }
 
 	/** If you can't use a good index id.
-	* @param n  - is the amount to increment */
+	@param n is the amount to increment */
 	void add(Type n) UNIV_NOTHROW {
 		size_t	i = m_policy.offset(m_policy.get_rnd_index());
 
@@ -148,10 +162,10 @@ public:
 		m_counter[i] += n;
 	}
 
-	/** Use this if you can use a unique indentifier, saves a
+	/** Use this if you can use a unique identifier, saves a
 	call to get_rnd_index().
-	@param i - index into a slot
-	@param n - amount to increment */
+	@param i index into a slot
+	@param n amount to increment */
 	void add(size_t index, Type n) UNIV_NOTHROW {
 		size_t	i = m_policy.offset(index);
 
@@ -164,7 +178,7 @@ public:
 	void dec() UNIV_NOTHROW { sub(1); }
 
 	/** If you can't use a good index id.
-	* @param - n is the amount to decrement */
+	@param n the amount to decrement */
 	void sub(Type n) UNIV_NOTHROW {
 		size_t	i = m_policy.offset(m_policy.get_rnd_index());
 
@@ -173,10 +187,10 @@ public:
 		m_counter[i] -= n;
 	}
 
-	/** Use this if you can use a unique indentifier, saves a
+	/** Use this if you can use a unique identifier, saves a
 	call to get_rnd_index().
-	@param i - index into a slot
-	@param n - amount to decrement */
+	@param i index into a slot
+	@param n amount to decrement */
 	void sub(size_t index, Type n) UNIV_NOTHROW {
 		size_t	i = m_policy.offset(index);
 
@@ -204,4 +218,4 @@ private:
 	Type		m_counter[(N + 1) * (CACHE_LINE_SIZE / sizeof(Type))];
 };
 
-#endif /* UT0COUNTER_H */
+#endif /* ut0counter_h */
diff --git a/storage/innobase/include/ut0crc32.h b/storage/innobase/include/ut0crc32.h
index af6f0bc74e7..36b389b5bd2 100644
--- a/storage/innobase/include/ut0crc32.h
+++ b/storage/innobase/include/ut0crc32.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,24 +30,31 @@ Created Aug 10, 2011 Vasil Dimov
 #include "univ.i"
 
 /********************************************************************//**
-Initializes the data structures used by ut_crc32(). Does not do any
+Initializes the data structures used by ut_crc32*(). Does not do any
 allocations, would not hurt if called twice, but would be pointless. */
-UNIV_INTERN
 void
 ut_crc32_init();
 /*===========*/
 
 /********************************************************************//**
 Calculates CRC32.
-@param ptr	- data over which to calculate CRC32.
-@param len	- data length in bytes.
+@param ptr - data over which to calculate CRC32.
+@param len - data length in bytes.
 @return CRC32 (CRC-32C, using the GF(2) primitive polynomial 0x11EDC6F41,
 or 0x1EDC6F41 without the high-order bit) */
-typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len);
+typedef uint32_t	(*ut_crc32_func_t)(const byte* ptr, ulint len);
 
-extern ib_ut_crc32_t	ut_crc32;
+/** Pointer to CRC32 calculation function. */
+extern ut_crc32_func_t	ut_crc32;
 
-extern bool	ut_crc32_sse2_enabled;
-extern bool	ut_crc32_power8_enabled;
+/** Pointer to CRC32 calculation function, which uses big-endian byte order
+when converting byte strings to integers internally. */
+extern ut_crc32_func_t	ut_crc32_legacy_big_endian;
+
+/** Pointer to CRC32-byte-by-byte calculation function (byte order agnostic,
+but very slow). */
+extern ut_crc32_func_t	ut_crc32_byte_by_byte;
+
+extern const char*	ut_crc32_implementation;
 
 #endif /* ut0crc32_h */
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
index 3f5baef0a3c..1a61ed84a38 100644
--- a/storage/innobase/include/ut0dbg.h
+++ b/storage/innobase/include/ut0dbg.h
@@ -32,59 +32,38 @@ Created 1/30/1994 Heikki Tuuri
 #define ut_error	assert(0)
 #else /* !UNIV_INNOCHECKSUM */
 
-#include "univ.i"
-#include <stdlib.h>
-#include "os0thread.h"
+/* Do not include univ.i because univ.i includes this. */
 
-#if defined(__GNUC__) && (__GNUC__ > 2)
-/** Test if an assertion fails.
-@param EXPR	assertion expression
-@return		nonzero if EXPR holds, zero if not */
-# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR)))
-#else
-/** This is used to eliminate compiler warnings */
-extern ulint	ut_dbg_zero;
-/** Test if an assertion fails.
-@param EXPR	assertion expression
-@return		nonzero if EXPR holds, zero if not */
-# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero)
-#endif
+#include "os0thread.h"
 
 /*************************************************************//**
 Report a failed assertion. */
-UNIV_INTERN
 void
 ut_dbg_assertion_failed(
 /*====================*/
 	const char*	expr,	/*!< in: the failed assertion */
 	const char*	file,	/*!< in: source file containing the assertion */
 	ulint		line)	/*!< in: line number of the assertion */
-	UNIV_COLD MY_ATTRIBUTE((nonnull(2)));
-
-/** Abort the execution. */
-# define UT_DBG_PANIC abort()
+	UNIV_COLD MY_ATTRIBUTE((nonnull(2), noreturn));
 
 /** Abort execution if EXPR does not evaluate to nonzero.
-@param EXPR	assertion expression that should hold */
+@param EXPR assertion expression that should hold */
 #define ut_a(EXPR) do {						\
-	if (UT_DBG_FAIL(EXPR)) {				\
+	if (UNIV_UNLIKELY(!(ulint) (EXPR))) {			\
 		ut_dbg_assertion_failed(#EXPR,			\
 				__FILE__, (ulint) __LINE__);	\
-		UT_DBG_PANIC;					\
 	}							\
 } while (0)
 
 /** Abort execution. */
-#define ut_error do {						\
-	ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__);	\
-	UT_DBG_PANIC;						\
-} while (0)
+#define ut_error						\
+	ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__)
 
 #ifdef UNIV_DEBUG
 /** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
 #define ut_ad(EXPR)	ut_a(EXPR)
 /** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
-#define ut_d(EXPR)	do {EXPR;} while (0)
+#define ut_d(EXPR)	EXPR
 #else
 /** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
 #define ut_ad(EXPR)
@@ -93,39 +72,114 @@ ut_dbg_assertion_failed(
 #endif
 
 /** Silence warnings about an unused variable by doing a null assignment.
-@param A	the unused variable */
+@param A the unused variable */
 #define UT_NOT_USED(A)	A = A
 
-#ifdef UNIV_COMPILE_TEST_FUNCS
+#if defined(HAVE_SYS_TIME_H) && defined(HAVE_SYS_RESOURCE_H)
+
+#define HAVE_UT_CHRONO_T
 
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 
-/** structure used for recording usage statistics */
-struct speedo_t {
-	struct rusage	ru;	/*!< getrusage() result */
-	struct timeval	tv;	/*!< gettimeofday() result */
+/** A "chronometer" used to clock snippets of code.
+Example usage:
+	ut_chrono_t	ch("this loop");
+	for (;;) { ... }
+	ch.show();
+would print the timings of the for() loop, prefixed with "this loop:" */
+class ut_chrono_t {
+public:
+	/** Constructor.
+	@param[in]	name	chrono's name, used when showing the values */
+	ut_chrono_t(
+		const char*	name)
+		:
+		m_name(name),
+		m_show_from_destructor(true)
+	{
+		reset();
+	}
+
+	/** Resets the chrono (records the current time in it). */
+	void
+	reset()
+	{
+		gettimeofday(&m_tv, NULL);
+
+		getrusage(RUSAGE_SELF, &m_ru);
+	}
+
+	/** Shows the time elapsed and usage statistics since the last reset. */
+	void
+	show()
+	{
+		struct rusage	ru_now;
+		struct timeval	tv_now;
+		struct timeval	tv_diff;
+
+		getrusage(RUSAGE_SELF, &ru_now);
+
+		gettimeofday(&tv_now, NULL);
+
+#ifndef timersub
+#define timersub(a, b, r)						\
+		do {							\
+			(r)->tv_sec = (a)->tv_sec - (b)->tv_sec;	\
+			(r)->tv_usec = (a)->tv_usec - (b)->tv_usec;	\
+			if ((r)->tv_usec < 0) {				\
+				(r)->tv_sec--;				\
+				(r)->tv_usec += 1000000;		\
+			}						\
+		} while (0)
+#endif /* timersub */
+
+#define CHRONO_PRINT(type, tvp)						\
+		fprintf(stderr, "%s: %s% 5ld.%06ld sec\n",		\
+			m_name, type,					\
+			static_cast<long>((tvp)->tv_sec),		\
+			static_cast<long>((tvp)->tv_usec))
+
+		timersub(&tv_now, &m_tv, &tv_diff);
+		CHRONO_PRINT("real", &tv_diff);
+
+		timersub(&ru_now.ru_utime, &m_ru.ru_utime, &tv_diff);
+		CHRONO_PRINT("user", &tv_diff);
+
+		timersub(&ru_now.ru_stime, &m_ru.ru_stime, &tv_diff);
+		CHRONO_PRINT("sys ", &tv_diff);
+	}
+
+	/** Cause the timings not to be printed from the destructor. */
+	void end()
+	{
+		m_show_from_destructor = false;
+	}
+
+	/** Destructor. */
+	~ut_chrono_t()
+	{
+		if (m_show_from_destructor) {
+			show();
+		}
+	}
+
+private:
+	/** Name of this chronometer. */
+	const char*	m_name;
+
+	/** True if the current timings should be printed by the destructor. */
+	bool		m_show_from_destructor;
+
+	/** getrusage() result as of the last reset(). */
+	struct rusage	m_ru;
+
+	/** gettimeofday() result as of the last reset(). */
+	struct timeval	m_tv;
 };
 
-/*******************************************************************//**
-Resets a speedo (records the current time in it). */
-UNIV_INTERN
-void
-speedo_reset(
-/*=========*/
-	speedo_t*	speedo);	/*!< out: speedo */
-
-/*******************************************************************//**
-Shows the time elapsed and usage statistics since the last reset of a
-speedo. */
-UNIV_INTERN
-void
-speedo_show(
-/*========*/
-	const speedo_t*	speedo);	/*!< in: speedo */
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
+#endif /* HAVE_SYS_TIME_H && HAVE_SYS_RESOURCE_H */
 
 #endif /* !UNIV_INNOCHECKSUM */
 
diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h
index 796a272db59..3b91384bbff 100644
--- a/storage/innobase/include/ut0list.h
+++ b/storage/innobase/include/ut0list.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -54,8 +54,7 @@ struct ib_list_node_t;
 /****************************************************************//**
 Create a new list using mem_alloc. Lists created with this function must be
 freed with ib_list_free.
-@return	list */
-UNIV_INTERN
+@return list */
 ib_list_t*
 ib_list_create(void);
 /*=================*/
@@ -64,8 +63,7 @@ ib_list_create(void);
 /****************************************************************//**
 Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
 lists created with this function.
-@return	list */
-UNIV_INTERN
+@return list */
 ib_list_t*
 ib_list_create_heap(
 /*================*/
@@ -73,7 +71,6 @@ ib_list_create_heap(
 
 /****************************************************************//**
 Free a list. */
-UNIV_INTERN
 void
 ib_list_free(
 /*=========*/
@@ -81,8 +78,7 @@ ib_list_free(
 
 /****************************************************************//**
 Add the data to the start of the list.
-@return	new list node */
-UNIV_INTERN
+@return new list node */
 ib_list_node_t*
 ib_list_add_first(
 /*==============*/
@@ -92,8 +88,7 @@ ib_list_add_first(
 
 /****************************************************************//**
 Add the data to the end of the list.
-@return	new list node */
-UNIV_INTERN
+@return new list node */
 ib_list_node_t*
 ib_list_add_last(
 /*=============*/
@@ -103,8 +98,7 @@ ib_list_add_last(
 
 /****************************************************************//**
 Add the data after the indicated node.
-@return	new list node */
-UNIV_INTERN
+@return new list node */
 ib_list_node_t*
 ib_list_add_after(
 /*==============*/
@@ -116,7 +110,6 @@ ib_list_add_after(
 
 /****************************************************************//**
 Remove the node from the list. */
-UNIV_INTERN
 void
 ib_list_remove(
 /*===========*/
@@ -125,7 +118,7 @@ ib_list_remove(
 
 /****************************************************************//**
 Get the first node in the list.
-@return	first node, or NULL */
+@return first node, or NULL */
 UNIV_INLINE
 ib_list_node_t*
 ib_list_get_first(
@@ -134,7 +127,7 @@ ib_list_get_first(
 
 /****************************************************************//**
 Get the last node in the list.
-@return	last node, or NULL */
+@return last node, or NULL */
 UNIV_INLINE
 ib_list_node_t*
 ib_list_get_last(
diff --git a/storage/innobase/include/ut0list.ic b/storage/innobase/include/ut0list.ic
index 7a7f53adb2f..dd5389a68e1 100644
--- a/storage/innobase/include/ut0list.ic
+++ b/storage/innobase/include/ut0list.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,7 +25,7 @@ Created 4/26/2006 Osku Salerma
 
 /****************************************************************//**
 Get the first node in the list.
-@return	first node, or NULL */
+@return first node, or NULL */
 UNIV_INLINE
 ib_list_node_t*
 ib_list_get_first(
@@ -37,7 +37,7 @@ ib_list_get_first(
 
 /****************************************************************//**
 Get the last node in the list.
-@return	last node, or NULL */
+@return last node, or NULL */
 UNIV_INLINE
 ib_list_node_t*
 ib_list_get_last(
diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
index b53e7ade4c1..09733da20a0 100644
--- a/storage/innobase/include/ut0lst.h
+++ b/storage/innobase/include/ut0lst.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -21,122 +21,150 @@ this program; if not, write to the Free Software Foundation, Inc.,
 List utilities
 
 Created 9/10/1995 Heikki Tuuri
+Rewritten by Sunny Bains Dec 2011.
 ***********************************************************************/
 
 #ifndef ut0lst_h
 #define ut0lst_h
 
-#include "univ.i"
+/* Do not include univ.i because univ.i includes this. */
+
+#include "ut0dbg.h"
+
+/* This module implements the two-way linear list. Note that a single
+list node may belong to two or more lists, but is only on one list
+at a time. */
 
 /*******************************************************************//**
-Return offset of F in POD T.
-@param T	- POD pointer
-@param F	- Field in T */
-#define IB_OFFSETOF(T, F)						\
-	(reinterpret_cast<byte*>(&(T)->F) - reinterpret_cast<byte*>(T))
+The two way list node.
+@param TYPE the list node type name */
+template <typename Type>
+struct ut_list_node {
+	Type*		prev;			/*!< pointer to the previous
+						node, NULL if start of list */
+	Type*		next;			/*!< pointer to next node,
+						NULL if end of list */
+
+	void reverse()
+	{
+		Type*	tmp = prev;
+		prev = next;
+		next = tmp;
+	}
+};
 
-/* This module implements the two-way linear list which should be used
-if a list is used in the database. Note that a single struct may belong
-to two or more lists, provided that the list are given different names.
-An example of the usage of the lists can be found in fil0fil.cc. */
+/** Macro used for legacy reasons */
+#define UT_LIST_NODE_T(t)		ut_list_node<t>
 
 /*******************************************************************//**
-This macro expands to the unnamed type definition of a struct which acts
-as the two-way list base node. The base node contains pointers
-to both ends of the list and a count of nodes in the list (excluding
-the base node from the count).
-@param TYPE	the name of the list node data type */
-template <typename TYPE>
+The two-way list base node. The base node contains pointers to both ends
+of the list and a count of nodes in the list (excluding the base node
+from the count). We also store a pointer to the member field so that it
+doesn't have to be specified when doing list operations.
+@param Type the type of the list element
+@param NodePtr field member pointer that points to the list node */
+template <typename Type, typename NodePtr>
 struct ut_list_base {
-	typedef TYPE elem_type;
-
-	ulint	count;	/*!< count of nodes in list */
-	TYPE*	start;	/*!< pointer to list start, NULL if empty */
-	TYPE*	end;	/*!< pointer to list end, NULL if empty */
+	typedef Type elem_type;
+	typedef NodePtr node_ptr;
+	typedef ut_list_node<Type> node_type;
+
+	ulint		count;			/*!< count of nodes in list */
+	elem_type*	start;			/*!< pointer to list start,
+						NULL if empty */
+	elem_type*	end;			/*!< pointer to list end,
+						NULL if empty */
+	node_ptr	node;			/*!< Pointer to member field
+						that is used as a link node */
+#ifdef UNIV_DEBUG
+	ulint		init;			/*!< UT_LIST_INITIALISED if
+						the list was initialised with
+						UT_LIST_INIT() */
+#endif /* UNIV_DEBUG */
+
+	void reverse()
+	{
+		Type*	tmp = start;
+		start = end;
+		end = tmp;
+	}
 };
 
-#define UT_LIST_BASE_NODE_T(TYPE)	ut_list_base<TYPE>
+#define UT_LIST_BASE_NODE_T(t)	ut_list_base<t, ut_list_node<t> t::*>
+
+#ifdef UNIV_DEBUG
+# define UT_LIST_INITIALISED		0xCAFE
+# define UT_LIST_INITIALISE(b)		(b).init = UT_LIST_INITIALISED
+# define UT_LIST_IS_INITIALISED(b)	ut_a(((b).init == UT_LIST_INITIALISED))
+#else
+# define UT_LIST_INITIALISE(b)
+# define UT_LIST_IS_INITIALISED(b)
+#endif /* UNIV_DEBUG */
 
 /*******************************************************************//**
-This macro expands to the unnamed type definition of a struct which
-should be embedded in the nodes of the list, the node type must be a struct.
-This struct contains the pointers to next and previous nodes in the list.
-The name of the field in the node struct should be the name given
-to the list.
-@param TYPE	the list node type name */
-/* Example:
-struct LRU_node_t {
-	UT_LIST_NODE_T(LRU_node_t)	LRU_list;
-	...
+Note: This is really the list constructor. We should be able to use
+placement new here.
+Initializes the base node of a two-way list.
+@param b the list base node
+@param pmf point to member field that will be used as the link node */
+#define UT_LIST_INIT(b, pmf)						\
+{									\
+	(b).count = 0;							\
+	(b).start = 0;							\
+	(b).end   = 0;							\
+	(b).node  = pmf;						\
+	UT_LIST_INITIALISE(b);						\
 }
-The example implements an LRU list of name LRU_list. Its nodes are of type
-LRU_node_t. */
 
-template <typename TYPE>
-struct ut_list_node {
-	TYPE* 	prev;	/*!< pointer to the previous node,
-			NULL if start of list */
-	TYPE* 	next;	/*!< pointer to next node, NULL if end of list */
-};
+/** Functor for accessing the embedded node within a list element. This is
+required because some lists can have the node emebedded inside a nested
+struct/union. See lock0priv.h (table locks) for an example. It provides a
+specialised functor to grant access to the list node. */
+template <typename Type>
+struct GenericGetNode {
 
-#define UT_LIST_NODE_T(TYPE)	ut_list_node<TYPE>
+	typedef ut_list_node<Type> node_type;
 
-/*******************************************************************//**
-Get the list node at offset.
-@param elem	- list element
-@param offset	- offset within element.
-@return reference to list node. */
-template <typename Type>
-ut_list_node<Type>&
-ut_elem_get_node(Type&	elem, size_t offset)
-{
-	ut_a(offset < sizeof(elem));
+	GenericGetNode(node_type Type::* node) : m_node(node) {}
 
-	return(*reinterpret_cast<ut_list_node<Type>*>(
-		reinterpret_cast<byte*>(&elem) + offset));
-}
+	node_type& operator() (Type& elem)
+	{
+		return(elem.*m_node);
+	}
 
-/*******************************************************************//**
-Initializes the base node of a two-way list.
-@param BASE	the list base node
-*/
-#define UT_LIST_INIT(BASE)\
-{\
-	(BASE).count = 0;\
-	(BASE).start = NULL;\
-	(BASE).end   = NULL;\
-}\
+	node_type	Type::*m_node;
+};
 
 /*******************************************************************//**
 Adds the node as the first element in a two-way linked list.
-@param list	the base node (not a pointer to it)
-@param elem	the element to add
-@param offset	offset of list node in elem. */
-template <typename List, typename Type>
+@param list the base node (not a pointer to it)
+@param elem the element to add */
+template <typename List>
 void
 ut_list_prepend(
-	List&		list,
-	Type&		elem,
-	size_t		offset)
+	List&				list,
+	typename List::elem_type*	elem)
 {
-	ut_list_node<Type>&	elem_node = ut_elem_get_node(elem, offset);
+	typename List::node_type&	elem_node = elem->*list.node;
+
+	UT_LIST_IS_INITIALISED(list);
 
- 	elem_node.prev = 0;
- 	elem_node.next = list.start;
+	elem_node.prev = 0;
+	elem_node.next = list.start;
 
 	if (list.start != 0) {
-		ut_list_node<Type>&	base_node =
-			ut_elem_get_node(*list.start, offset);
+		typename List::node_type&	base_node =
+			list.start->*list.node;
 
-		ut_ad(list.start != &elem);
+		ut_ad(list.start != elem);
 
-		base_node.prev = &elem;
+		base_node.prev = elem;
 	}
 
-	list.start = &elem;
+	list.start = elem;
 
 	if (list.end == 0) {
-		list.end = &elem;
+		list.end = elem;
 	}
 
 	++list.count;
@@ -144,42 +172,41 @@ ut_list_prepend(
 
 /*******************************************************************//**
 Adds the node as the first element in a two-way linked list.
-@param NAME	list name
-@param LIST	the base node (not a pointer to it)
-@param ELEM	the element to add */
-#define UT_LIST_ADD_FIRST(NAME, LIST, ELEM)	\
-	ut_list_prepend(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+@param LIST the base node (not a pointer to it)
+@param ELEM the element to add */
+#define UT_LIST_ADD_FIRST(LIST, ELEM)	ut_list_prepend(LIST, ELEM)
 
 /*******************************************************************//**
 Adds the node as the last element in a two-way linked list.
-@param list	list
-@param elem	the element to add
-@param offset	offset of list node in elem */
-template <typename List, typename Type>
+@param list list
+@param elem the element to add
+@param get_node to get the list node for that element */
+template <typename List, typename Functor>
 void
 ut_list_append(
-	List&		list,
-	Type&		elem,
-	size_t		offset)
+	List&				list,
+	typename List::elem_type*	elem,
+	Functor				get_node)
 {
-	ut_list_node<Type>&	elem_node = ut_elem_get_node(elem, offset);
+	typename List::node_type&	node = get_node(*elem);
+
+	UT_LIST_IS_INITIALISED(list);
 
-	elem_node.next = 0;
-	elem_node.prev = list.end;
+	node.next = 0;
+	node.prev = list.end;
 
 	if (list.end != 0) {
-		ut_list_node<Type>&	base_node =
-			ut_elem_get_node(*list.end, offset);
+		typename List::node_type&	base_node = get_node(*list.end);
 
-		ut_ad(list.end != &elem);
+		ut_ad(list.end != elem);
 
-		base_node.next = &elem;
+		base_node.next = elem;
 	}
 
-	list.end = &elem;
+	list.end = elem;
 
 	if (list.start == 0) {
-		list.start = &elem;
+		list.start = elem;
 	}
 
 	++list.count;
@@ -187,45 +214,57 @@ ut_list_append(
 
 /*******************************************************************//**
 Adds the node as the last element in a two-way linked list.
-@param NAME	list name
-@param LIST	list
-@param ELEM	the element to add */
-#define UT_LIST_ADD_LAST(NAME, LIST, ELEM)\
-	ut_list_append(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+@param list list
+@param elem the element to add */
+template <typename List>
+void
+ut_list_append(
+	List&				list,
+	typename List::elem_type*	elem)
+{
+	ut_list_append(
+		list, elem,
+		GenericGetNode<typename List::elem_type>(list.node));
+}
+
+/*******************************************************************//**
+Adds the node as the last element in a two-way linked list.
+@param LIST list base node (not a pointer to it)
+@param ELEM the element to add */
+#define UT_LIST_ADD_LAST(LIST, ELEM)	ut_list_append(LIST, ELEM)
 
 /*******************************************************************//**
 Inserts a ELEM2 after ELEM1 in a list.
-@param list	the base node
-@param elem1	node after which ELEM2 is inserted
-@param elem2	node being inserted after NODE1
-@param offset	offset of list node in elem1 and elem2 */
-template <typename List, typename Type>
+@param list the base node
+@param elem1 node after which ELEM2 is inserted
+@param elem2 node being inserted after ELEM1 */
+template <typename List>
 void
 ut_list_insert(
-	List&		list,
-	Type&		elem1,
-	Type&		elem2,
-	size_t		offset)
+	List&				list,
+	typename List::elem_type*	elem1,
+	typename List::elem_type*	elem2)
 {
-	ut_ad(&elem1 != &elem2);
+	ut_ad(elem1 != elem2);
+	UT_LIST_IS_INITIALISED(list);
 
-	ut_list_node<Type>&	elem1_node = ut_elem_get_node(elem1, offset);
-	ut_list_node<Type>&	elem2_node = ut_elem_get_node(elem2, offset);
+	typename List::node_type&	elem1_node = elem1->*list.node;
+	typename List::node_type&	elem2_node = elem2->*list.node;
 
-	elem2_node.prev = &elem1;
+	elem2_node.prev = elem1;
 	elem2_node.next = elem1_node.next;
 
 	if (elem1_node.next != NULL) {
-		ut_list_node<Type>&	next_node =
-			ut_elem_get_node(*elem1_node.next, offset);
+		typename List::node_type&	next_node =
+			elem1_node.next->*list.node;
 
-		next_node.prev = &elem2;
+		next_node.prev = elem2;
 	}
 
-	elem1_node.next = &elem2;
+	elem1_node.next = elem2;
 
-	if (list.end == &elem1) {
-		list.end = &elem2;
+	if (list.end == elem1) {
+		list.end = elem2;
 	}
 
 	++list.count;
@@ -233,132 +272,179 @@ ut_list_insert(
 
 /*******************************************************************//**
 Inserts a ELEM2 after ELEM1 in a list.
-@param NAME	list name
-@param LIST	the base node
-@param ELEM1	node after which ELEM2 is inserted
-@param ELEM2	node being inserted after ELEM1 */
-#define UT_LIST_INSERT_AFTER(NAME, LIST, ELEM1, ELEM2)\
-	ut_list_insert(LIST, *ELEM1, *ELEM2, IB_OFFSETOF(ELEM1, NAME))
-
-#ifdef UNIV_LIST_DEBUG
-/** Invalidate the pointers in a list node.
-@param NAME	list name
-@param N	pointer to the node that was removed */
-# define UT_LIST_REMOVE_CLEAR(N)					\
-	(N).next = (Type*) -1;						\
-	(N).prev = (N).next
-#else
-/** Invalidate the pointers in a list node.
-@param NAME	list name
-@param N	pointer to the node that was removed */
-# define UT_LIST_REMOVE_CLEAR(N)
-#endif /* UNIV_LIST_DEBUG */
+@param LIST list base node (not a pointer to it)
+@param ELEM1 node after which ELEM2 is inserted
+@param ELEM2 node being inserted after ELEM1 */
+#define UT_LIST_INSERT_AFTER(LIST, ELEM1, ELEM2)			\
+	ut_list_insert(LIST, ELEM1, ELEM2)
 
 /*******************************************************************//**
+Inserts a ELEM2 after ELEM1 in a list.
+@param list the base node
+@param elem1 node after which ELEM2 is inserted
+@param elem2 node being inserted after ELEM1
+@param get_node to get the list node for that element */
+
+template <typename List, typename Functor>
+void
+ut_list_insert(
+	List&				list,
+	typename List::elem_type*	elem1,
+        typename List::elem_type*	elem2,
+	Functor				get_node)
+{
+	ut_ad(elem1 != elem2);
+	UT_LIST_IS_INITIALISED(list);
+
+	typename List::node_type&	elem1_node = get_node(*elem1);
+	typename List::node_type&	elem2_node = get_node(*elem2);
+
+	elem2_node.prev = elem1;
+	elem2_node.next = elem1_node.next;
+
+	if (elem1_node.next != NULL) {
+		typename List::node_type&	next_node =
+			get_node(*elem1_node.next);
+
+		next_node.prev = elem2;
+	}
+
+	elem1_node.next = elem2;
+
+	if (list.end == elem1) {
+		list.end = elem2;
+	}
+
+	++list.count;
+
+}
+/*******************************************************************//**
 Removes a node from a two-way linked list.
-@param list	the base node (not a pointer to it)
-@param elem	node to be removed from the list
-@param offset	offset of list node within elem */
-template <typename List, typename Type>
+@param list the base node (not a pointer to it)
+@param node member node within list element that is to be removed
+@param get_node functor to get the list node from elem */
+template <typename List, typename Functor>
 void
 ut_list_remove(
-	List&		list,
- 	Type&		elem,
-	size_t		offset)
+	List&				list,
+	typename List::node_type&	node,
+	Functor				get_node)
 {
-	ut_list_node<Type>&	elem_node = ut_elem_get_node(elem, offset);
-
 	ut_a(list.count > 0);
+	UT_LIST_IS_INITIALISED(list);
 
-	if (elem_node.next != NULL) {
-		ut_list_node<Type>&	next_node =
-			ut_elem_get_node(*elem_node.next, offset);
+	if (node.next != NULL) {
+		typename List::node_type&	next_node =
+			get_node(*node.next);
 
-		next_node.prev = elem_node.prev;
+		next_node.prev = node.prev;
 	} else {
-		list.end = elem_node.prev;
+		list.end = node.prev;
 	}
 
-	if (elem_node.prev != NULL) {
-		ut_list_node<Type>&	prev_node =
-			ut_elem_get_node(*elem_node.prev, offset);
+	if (node.prev != NULL) {
+		typename List::node_type&	prev_node =
+			get_node(*node.prev);
 
-		prev_node.next = elem_node.next;
+		prev_node.next = node.next;
 	} else {
-		list.start = elem_node.next;
+		list.start = node.next;
 	}
 
-	UT_LIST_REMOVE_CLEAR(elem_node);
+	node.next = 0;
+	node.prev = 0;
 
 	--list.count;
 }
 
 /*******************************************************************//**
 Removes a node from a two-way linked list.
-  aram NAME	list name
-@param LIST	the base node (not a pointer to it)
-@param ELEM	node to be removed from the list */
-#define UT_LIST_REMOVE(NAME, LIST, ELEM)				\
-	ut_list_remove(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+@param list the base node (not a pointer to it)
+@param elem element to be removed from the list
+@param get_node functor to get the list node from elem */
+template <typename List, typename Functor>
+void
+ut_list_remove(
+	List&				list,
+	typename List::elem_type*	elem,
+	Functor				get_node)
+{
+	ut_list_remove(list, get_node(*elem), get_node);
+}
+
+/*******************************************************************//**
+Removes a node from a two-way linked list.
+@param list the base node (not a pointer to it)
+@param elem element to be removed from the list */
+template <typename List>
+void
+ut_list_remove(
+	List&				list,
+	typename List::elem_type*	elem)
+{
+	ut_list_remove(
+		list, elem->*list.node,
+		GenericGetNode<typename List::elem_type>(list.node));
+}
+
+/*******************************************************************//**
+Removes a node from a two-way linked list.
+@param LIST the base node (not a pointer to it)
+@param ELEM node to be removed from the list */
+#define UT_LIST_REMOVE(LIST, ELEM)	ut_list_remove(LIST, ELEM)
 
 /********************************************************************//**
 Gets the next node in a two-way list.
-@param NAME	list name
-@param N	pointer to a node
-@return		the successor of N in NAME, or NULL */
-#define UT_LIST_GET_NEXT(NAME, N)\
-	(((N)->NAME).next)
+@param NAME list name
+@param N pointer to a node
+@return the successor of N in NAME, or NULL */
+#define UT_LIST_GET_NEXT(NAME, N)	(((N)->NAME).next)
 
 /********************************************************************//**
 Gets the previous node in a two-way list.
-@param NAME	list name
-@param N	pointer to a node
-@return		the predecessor of N in NAME, or NULL */
-#define UT_LIST_GET_PREV(NAME, N)\
-	(((N)->NAME).prev)
+@param NAME list name
+@param N pointer to a node
+@return the predecessor of N in NAME, or NULL */
+#define UT_LIST_GET_PREV(NAME, N)	(((N)->NAME).prev)
 
 /********************************************************************//**
 Alternative macro to get the number of nodes in a two-way list, i.e.,
 its length.
-@param BASE	the base node (not a pointer to it).
-@return		the number of nodes in the list */
-#define UT_LIST_GET_LEN(BASE)\
-	(BASE).count
+@param BASE the base node (not a pointer to it).
+@return the number of nodes in the list */
+#define UT_LIST_GET_LEN(BASE)		(BASE).count
 
 /********************************************************************//**
 Gets the first node in a two-way list.
-@param BASE	the base node (not a pointer to it)
-@return		first node, or NULL if the list is empty */
-#define UT_LIST_GET_FIRST(BASE)\
-	(BASE).start
+@param BASE the base node (not a pointer to it)
+@return first node, or NULL if the list is empty */
+#define UT_LIST_GET_FIRST(BASE)		(BASE).start
 
 /********************************************************************//**
 Gets the last node in a two-way list.
-@param BASE	the base node (not a pointer to it)
-@return		last node, or NULL if the list is empty */
-#define UT_LIST_GET_LAST(BASE)\
-	(BASE).end
+@param BASE the base node (not a pointer to it)
+@return last node, or NULL if the list is empty */
+#define UT_LIST_GET_LAST(BASE)		(BASE).end
 
 struct	NullValidate { void operator()(const void* elem) { } };
 
 /********************************************************************//**
 Iterate over all the elements and call the functor for each element.
-@param list	base node (not a pointer to it)
-@param functor	Functor that is called for each element in the list
-@parm  node	pointer to member node within list element */
+@param[in]	list	base node (not a pointer to it)
+@param[in,out]	functor	Functor that is called for each element in the list */
 template <typename List, class Functor>
 void
 ut_list_map(
-	List&		list,
-	ut_list_node<typename List::elem_type>
-			List::elem_type::*node,
-	Functor		functor)
+	const List&	list,
+	Functor&	functor)
 {
 	ulint		count = 0;
 
+	UT_LIST_IS_INITIALISED(list);
+
 	for (typename List::elem_type* elem = list.start;
 	     elem != 0;
-	     elem = (elem->*node).next, ++count) {
+	     elem = (elem->*list.node).next, ++count) {
 
 		functor(elem);
 	}
@@ -366,43 +452,95 @@ ut_list_map(
 	ut_a(count == list.count);
 }
 
+template <typename List>
+void
+ut_list_reverse(List& list)
+{
+	UT_LIST_IS_INITIALISED(list);
+
+	for (typename List::elem_type* elem = list.start;
+	     elem != 0;
+	     elem = (elem->*list.node).prev) {
+		(elem->*list.node).reverse();
+	}
+
+	list.reverse();
+}
+
+#define UT_LIST_REVERSE(LIST)	ut_list_reverse(LIST)
+
 /********************************************************************//**
 Checks the consistency of a two-way list.
-@param list	base node (not a pointer to it)
-@param functor	Functor that is called for each element in the list
-@parm  node	pointer to member node within list element */
+@param[in]		list base node (not a pointer to it)
+@param[in,out]		functor Functor that is called for each element in the list */
 template <typename List, class Functor>
 void
 ut_list_validate(
-	List&		list,
-	ut_list_node<typename List::elem_type>
-			List::elem_type::*node,
-	Functor		functor = NullValidate())
+	const List&	list,
+	Functor&	functor)
 {
-	ut_list_map(list, node, functor);
+	ut_list_map(list, functor);
 
+	/* Validate the list backwards. */
 	ulint		count = 0;
 
 	for (typename List::elem_type* elem = list.end;
 	     elem != 0;
-	     elem = (elem->*node).prev, ++count) {
-
-		functor(elem);
+	     elem = (elem->*list.node).prev) {
+		++count;
 	}
 
 	ut_a(count == list.count);
 }
 
-/********************************************************************//**
-Checks the consistency of a two-way list.
-@param NAME		the name of the list
-@param TYPE		node type
-@param LIST		base node (not a pointer to it)
-@param FUNCTOR		called for each list element */
-#define UT_LIST_VALIDATE(NAME, TYPE, LIST, FUNCTOR)			\
-	ut_list_validate(LIST, &TYPE::NAME, FUNCTOR)
-
-#define UT_LIST_CHECK(NAME, TYPE, LIST)					\
-	ut_list_validate(LIST, &TYPE::NAME, NullValidate())
+/** Check the consistency of a two-way list.
+@param[in] LIST base node reference */
+#define UT_LIST_CHECK(LIST) do {		\
+	NullValidate nullV;			\
+	ut_list_validate(LIST, nullV);		\
+} while (0)
+
+/** Move the given element to the beginning of the list.
+@param[in,out]	list	the list object
+@param[in]	elem	the element of the list which will be moved
+			to the beginning of the list. */
+template <typename List>
+void
+ut_list_move_to_front(
+	List&				list,
+	typename List::elem_type*	elem)
+{
+	ut_ad(ut_list_exists(list, elem));
+
+	if (UT_LIST_GET_FIRST(list) != elem) {
+		ut_list_remove(list, elem);
+		ut_list_prepend(list, elem);
+	}
+}
+
+#ifdef UNIV_DEBUG
+/** Check if the given element exists in the list.
+@param[in,out]	list	the list object
+@param[in]	elem	the element of the list which will be checked */
+template <typename List>
+bool
+ut_list_exists(
+	List&				list,
+	typename List::elem_type*	elem)
+{
+	typename List::elem_type*	e1;
+
+	for (e1 = UT_LIST_GET_FIRST(list); e1 != NULL;
+	     e1 = (e1->*list.node).next) {
+		if (elem == e1) {
+			return(true);
+		}
+	}
+	return(false);
+}
+#endif
+
+#define UT_LIST_MOVE_TO_FRONT(LIST, ELEM) \
+   ut_list_move_to_front(LIST, ELEM)
 
 #endif /* ut0lst.h */
diff --git a/storage/innobase/include/ut0mem.h b/storage/innobase/include/ut0mem.h
index 81470358f2f..6d56be4d820 100644
--- a/storage/innobase/include/ut0mem.h
+++ b/storage/innobase/include/ut0mem.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,137 +27,60 @@ Created 5/30/1994 Heikki Tuuri
 #define ut0mem_h
 
 #include "univ.i"
-#include <string.h>
 #ifndef UNIV_HOTBACKUP
-# include "os0sync.h"
-
-/** The total amount of memory currently allocated from the operating
-system with os_mem_alloc_large() or malloc().  Does not count malloc()
-if srv_use_sys_malloc is set.  Protected by ut_list_mutex. */
-extern ulint		ut_total_allocated_memory;
-
-/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
-extern os_fast_mutex_t	ut_list_mutex;
+# include "os0event.h"
+# include "ut0mutex.h"
 #endif /* !UNIV_HOTBACKUP */
 
 /** Wrapper for memcpy(3).  Copy memory area when the source and
 target are not overlapping.
-* @param dest	in: copy to
-* @param sour	in: copy from
-* @param n	in: number of bytes to copy
-* @return	dest */
+@param[in,out]	dest	copy to
+@param[in]	src	copy from
+@param[in]	n	number of bytes to copy
+@return dest */
 UNIV_INLINE
 void*
-ut_memcpy(void* dest, const void* sour, ulint n);
+ut_memcpy(void* dest, const void* src, ulint n);
 
 /** Wrapper for memmove(3).  Copy memory area when the source and
 target are overlapping.
-* @param dest	in: copy to
-* @param sour	in: copy from
-* @param n	in: number of bytes to copy
-* @return	dest */
+@param[in,out]	dest	Move to
+@param[in]	src	Move from
+@param[in]	n	number of bytes to move
+@return dest */
 UNIV_INLINE
 void*
 ut_memmove(void* dest, const void* sour, ulint n);
 
 /** Wrapper for memcmp(3).  Compare memory areas.
-* @param str1	in: first memory block to compare
-* @param str2	in: second memory block to compare
-* @param n	in: number of bytes to compare
-* @return	negative, 0, or positive if str1 is smaller, equal,
+@param[in]	str1	first memory block to compare
+@param[in]	str2	second memory block to compare
+@param[in]	n	number of bytes to compare
+@return negative, 0, or positive if str1 is smaller, equal,
 		or greater than str2, respectively. */
 UNIV_INLINE
 int
 ut_memcmp(const void* str1, const void* str2, ulint n);
 
-/**********************************************************************//**
-Initializes the mem block list at database startup. */
-UNIV_INTERN
-void
-ut_mem_init(void);
-/*=============*/
-
-/**********************************************************************//**
-Allocates memory.
-@return	own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc_low(
-/*==========*/
-	ulint	n,			/*!< in: number of bytes to allocate */
-	ibool	assert_on_error)	/*!< in: if TRUE, we crash mysqld if
-					the memory cannot be allocated */
-	MY_ATTRIBUTE((malloc));
-/**********************************************************************//**
-Allocates memory. */
-#define ut_malloc(n) ut_malloc_low(n, TRUE)
-/**********************************************************************//**
-Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is
-a nop. */
-UNIV_INTERN
-void
-ut_free(
-/*====*/
-	void* ptr);  /*!< in, own: memory block, can be NULL */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Implements realloc. This is needed by /pars/lexyy.cc. Otherwise, you should not
-use this function because the allocation functions in mem0mem.h are the
-recommended ones in InnoDB.
-
-man realloc in Linux, 2004:
-
-       realloc()  changes the size of the memory block pointed to
-       by ptr to size bytes.  The contents will be  unchanged  to
-       the minimum of the old and new sizes; newly allocated mem�
-       ory will be uninitialized.  If ptr is NULL,  the	 call  is
-       equivalent  to malloc(size); if size is equal to zero, the
-       call is equivalent to free(ptr).	 Unless ptr is	NULL,  it
-       must  have  been	 returned by an earlier call to malloc(),
-       calloc() or realloc().
-
-RETURN VALUE
-       realloc() returns a pointer to the newly allocated memory,
-       which is suitably aligned for any kind of variable and may
-       be different from ptr, or NULL if the  request  fails.  If
-       size  was equal to 0, either NULL or a pointer suitable to
-       be passed to free() is returned.	 If realloc()  fails  the
-       original	 block	is  left  untouched  - it is not freed or
-       moved.
-@return	own: pointer to new mem block or NULL */
-UNIV_INTERN
-void*
-ut_realloc(
-/*=======*/
-	void*	ptr,	/*!< in: pointer to old block or NULL */
-	ulint	size);	/*!< in: desired size */
-/**********************************************************************//**
-Frees in shutdown all allocated memory not freed yet. */
-UNIV_INTERN
-void
-ut_free_all_mem(void);
-/*=================*/
-#endif /* !UNIV_HOTBACKUP */
-
 /** Wrapper for strcpy(3).  Copy a NUL-terminated string.
-* @param dest	in: copy to
-* @param sour	in: copy from
-* @return	dest */
+@param[in,out]	dest	Destination to copy to
+@param[in]	src	Source to copy from
+@return dest */
 UNIV_INLINE
 char*
-ut_strcpy(char* dest, const char* sour);
+ut_strcpy(char* dest, const char* src);
 
 /** Wrapper for strlen(3).  Determine the length of a NUL-terminated string.
-* @param str	in: string
-* @return	length of the string in bytes, excluding the terminating NUL */
+@param[in]	str	string
+@return length of the string in bytes, excluding the terminating NUL */
 UNIV_INLINE
 ulint
 ut_strlen(const char* str);
 
 /** Wrapper for strcmp(3).  Compare NUL-terminated strings.
-* @param str1	in: first string to compare
-* @param str2	in: second string to compare
-* @return	negative, 0, or positive if str1 is smaller, equal,
+@param[in]	str1	first string to compare
+@param[in]	str2	second string to compare
+@return negative, 0, or positive if str1 is smaller, equal,
 		or greater than str2, respectively. */
 UNIV_INLINE
 int
@@ -167,8 +90,7 @@ ut_strcmp(const char* str1, const char* str2);
 Copies up to size - 1 characters from the NUL-terminated string src to
 dst, NUL-terminating the result. Returns strlen(src), so truncation
 occurred if the return value >= size.
-@return	strlen(src) */
-UNIV_INTERN
+@return strlen(src) */
 ulint
 ut_strlcpy(
 /*=======*/
@@ -179,8 +101,7 @@ ut_strlcpy(
 /**********************************************************************//**
 Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
 (size - 1) bytes of src, not the first.
-@return	strlen(src) */
-UNIV_INTERN
+@return strlen(src) */
 ulint
 ut_strlcpy_rev(
 /*===========*/
@@ -191,8 +112,7 @@ ut_strlcpy_rev(
 /**********************************************************************//**
 Return the number of times s2 occurs in s1. Overlapping instances of s2
 are only counted once.
-@return	the number of times s2 occurs in s1 */
-UNIV_INTERN
+@return the number of times s2 occurs in s1 */
 ulint
 ut_strcount(
 /*========*/
@@ -202,8 +122,7 @@ ut_strcount(
 /**********************************************************************//**
 Replace every occurrence of s1 in str with s2. Overlapping instances of s1
 are only replaced once.
-@return	own: modified string, must be freed with mem_free() */
-UNIV_INTERN
+@return own: modified string, must be freed with ut_free() */
 char*
 ut_strreplace(
 /*==========*/
@@ -213,12 +132,11 @@ ut_strreplace(
 
 /********************************************************************
 Concatenate 3 strings.*/
-
 char*
 ut_str3cat(
 /*=======*/
 				/* out, own: concatenated string, must be
-				freed with mem_free() */
+				freed with ut_free() */
 	const char*	s1,	/* in: string 1 */
 	const char*	s2,	/* in: string 2 */
 	const char*	s3);	/* in: string 3 */
@@ -228,7 +146,7 @@ Converts a raw binary data to a NUL-terminated hex string. The output is
 truncated if there is not enough space in "hex", make sure "hex_size" is at
 least (2 * raw_size + 1) if you do not want this to happen. Returns the
 actual number of characters written to "hex" (including the NUL).
-@return	number of chars written */
+@return number of chars written */
 UNIV_INLINE
 ulint
 ut_raw_to_hex(
@@ -243,7 +161,7 @@ Adds single quotes to the start and end of string and escapes any quotes
 by doubling them. Returns the number of bytes that were written to "buf"
 (including the terminating NUL). If buf_size is too small then the
 trailing bytes from "str" are discarded.
-@return	number of bytes that were written */
+@return number of bytes that were written */
 UNIV_INLINE
 ulint
 ut_str_sql_format(
diff --git a/storage/innobase/include/ut0mem.ic b/storage/innobase/include/ut0mem.ic
index 5c9071d52cc..224ff98b0f4 100644
--- a/storage/innobase/include/ut0mem.ic
+++ b/storage/innobase/include/ut0mem.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,35 +28,35 @@ Created 5/30/1994 Heikki Tuuri
 
 /** Wrapper for memcpy(3).  Copy memory area when the source and
 target are not overlapping.
-* @param dest	in: copy to
-* @param sour	in: copy from
-* @param n	in: number of bytes to copy
-* @return	dest */
+@param[in,out]	dest	copy to
+@param[in]	src	copy from
+@param[in]	n	number of bytes to copy
+@return dest */
 UNIV_INLINE
 void*
-ut_memcpy(void* dest, const void* sour, ulint n)
+ut_memcpy(void* dest, const void* src, ulint n)
 {
-	return(memcpy(dest, sour, n));
+	return(memcpy(dest, src, n));
 }
 
 /** Wrapper for memmove(3).  Copy memory area when the source and
 target are overlapping.
-* @param dest	in: copy to
-* @param sour	in: copy from
-* @param n	in: number of bytes to copy
-* @return	dest */
+@param[in,out]	dest	Move to
+@param[in]	src	Move from
+@param[in]	n	number of bytes to move
+@return dest */
 UNIV_INLINE
 void*
-ut_memmove(void* dest, const void* sour, ulint n)
+ut_memmove(void* dest, const void* src, ulint n)
 {
-	return(memmove(dest, sour, n));
+	return(memmove(dest, src, n));
 }
 
 /** Wrapper for memcmp(3).  Compare memory areas.
-* @param str1	in: first memory block to compare
-* @param str2	in: second memory block to compare
-* @param n	in: number of bytes to compare
-* @return	negative, 0, or positive if str1 is smaller, equal,
+@param[in]	str1	first memory block to compare
+@param[in]	str2	second memory block to compare
+@param[in]	n	number of bytes to compare
+@return negative, 0, or positive if str1 is smaller, equal,
 		or greater than str2, respectively. */
 UNIV_INLINE
 int
@@ -66,19 +66,19 @@ ut_memcmp(const void* str1, const void* str2, ulint n)
 }
 
 /** Wrapper for strcpy(3).  Copy a NUL-terminated string.
-* @param dest	in: copy to
-* @param sour	in: copy from
-* @return	dest */
+@param[in,out]	dest	Destination to copy to
+@param[in]	src	Source to copy from
+@return dest */
 UNIV_INLINE
 char*
-ut_strcpy(char* dest, const char* sour)
+ut_strcpy(char* dest, const char* src)
 {
-	return(strcpy(dest, sour));
+	return(strcpy(dest, src));
 }
 
 /** Wrapper for strlen(3).  Determine the length of a NUL-terminated string.
-* @param str	in: string
-* @return	length of the string in bytes, excluding the terminating NUL */
+@param[in]	str	string
+@return length of the string in bytes, excluding the terminating NUL */
 UNIV_INLINE
 ulint
 ut_strlen(const char* str)
@@ -87,9 +87,9 @@ ut_strlen(const char* str)
 }
 
 /** Wrapper for strcmp(3).  Compare NUL-terminated strings.
-* @param str1	in: first string to compare
-* @param str2	in: second string to compare
-* @return	negative, 0, or positive if str1 is smaller, equal,
+@param[in]	str1	first string to compare
+@param[in]	str2	second string to compare
+@return negative, 0, or positive if str1 is smaller, equal,
 		or greater than str2, respectively. */
 UNIV_INLINE
 int
@@ -103,7 +103,7 @@ Converts a raw binary data to a NUL-terminated hex string. The output is
 truncated if there is not enough space in "hex", make sure "hex_size" is at
 least (2 * raw_size + 1) if you do not want this to happen. Returns the
 actual number of characters written to "hex" (including the NUL).
-@return	number of chars written */
+@return number of chars written */
 UNIV_INLINE
 ulint
 ut_raw_to_hex(
@@ -223,7 +223,7 @@ Adds single quotes to the start and end of string and escapes any quotes
 by doubling them. Returns the number of bytes that were written to "buf"
 (including the terminating NUL). If buf_size is too small then the
 trailing bytes from "str" are discarded.
-@return	number of bytes that were written */
+@return number of bytes that were written */
 UNIV_INLINE
 ulint
 ut_str_sql_format(
diff --git a/storage/innobase/include/ut0mutex.h b/storage/innobase/include/ut0mutex.h
new file mode 100644
index 00000000000..e4ab671eece
--- /dev/null
+++ b/storage/innobase/include/ut0mutex.h
@@ -0,0 +1,201 @@
+/*****************************************************************************
+
+Copyright (c) 2012, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0mutex.h
+Policy based mutexes.
+
+Created 2012-03-24 Sunny Bains.
+***********************************************************************/
+
+#ifndef UNIV_INNOCHECKSUM
+
+#ifndef ut0mutex_h
+#define ut0mutex_h
+
+extern ulong	srv_spin_wait_delay;
+extern ulong	srv_n_spin_wait_rounds;
+extern ulong	srv_force_recovery_crash;
+
+#include "sync0policy.h"
+#include "ib0mutex.h"
+#include <set>
+
+/** Create a typedef using the MutexType<PolicyType>
+@param[in]	M		Mutex type
+@param[in[	P		Policy type
+@param[in]	T		The resulting typedef alias */
+#define UT_MUTEX_TYPE(M, P, T) typedef PolicyMutex<M<P> > T;
+
+typedef OSMutex EventMutex;
+
+# ifdef HAVE_IB_LINUX_FUTEX
+UT_MUTEX_TYPE(TTASFutexMutex, GenericPolicy, FutexMutex);
+UT_MUTEX_TYPE(TTASFutexMutex, BlockMutexPolicy, BlockFutexMutex);
+# endif /* HAVE_IB_LINUX_FUTEX */
+
+UT_MUTEX_TYPE(TTASMutex, GenericPolicy, SpinMutex);
+UT_MUTEX_TYPE(TTASMutex, BlockMutexPolicy, BlockSpinMutex);
+
+UT_MUTEX_TYPE(OSTrackMutex, GenericPolicy, SysMutex);
+UT_MUTEX_TYPE(OSTrackMutex, BlockMutexPolicy, BlockSysMutex);
+
+UT_MUTEX_TYPE(TTASEventMutex, GenericPolicy, SyncArrayMutex);
+UT_MUTEX_TYPE(TTASEventMutex, BlockMutexPolicy, BlockSyncArrayMutex);
+
+#ifdef MUTEX_FUTEX
+/** The default mutex type. */
+typedef FutexMutex ib_mutex_t;
+typedef BlockFutexMutex ib_bpmutex_t;
+#define MUTEX_TYPE	"Uses futexes"
+#elif defined(MUTEX_SYS)
+typedef SysMutex ib_mutex_t;
+typedef BlockSysMutex ib_bpmutex_t;
+#define MUTEX_TYPE	"Uses system mutexes"
+#elif defined(MUTEX_EVENT)
+typedef SyncArrayMutex ib_mutex_t;
+typedef BlockSyncArrayMutex ib_bpmutex_t;
+#define MUTEX_TYPE	"Uses event mutexes"
+#else
+#error "ib_mutex_t type is unknown"
+#endif /* MUTEX_FUTEX */
+
+extern ulong	srv_spin_wait_delay;
+extern ulong	srv_n_spin_wait_rounds;
+
+#define mutex_create(I, M)		mutex_init((M), (I), __FILE__, __LINE__)
+
+#define mutex_enter(M)			(M)->enter(			\
+					srv_n_spin_wait_rounds,		\
+					srv_spin_wait_delay,		\
+					__FILE__, __LINE__)
+
+#define mutex_enter_nospin(M)		(M)->enter(			\
+					0,				\
+					0,				\
+					__FILE__, __LINE__)
+
+#define mutex_enter_nowait(M)		(M)->trylock(__FILE__, __LINE__)
+
+#define mutex_exit(M)			(M)->exit()
+
+#define mutex_free(M)			mutex_destroy(M)
+
+#ifdef UNIV_DEBUG
+/**
+Checks that the mutex has been initialized. */
+#define mutex_validate(M)		(M)->validate()
+
+/**
+Checks that the current thread owns the mutex. Works only
+in the debug version. */
+#define mutex_own(M)			(M)->is_owned()
+#else
+#define mutex_own(M)			/* No op */
+#define mutex_validate(M)		/* No op */
+#endif /* UNIV_DEBUG */
+
+/** Iterate over the mutex meta data */
+class MutexMonitor {
+public:
+	/** Constructor */
+	MutexMonitor() { }
+
+	/** Destructor */
+	~MutexMonitor() { }
+
+	/** Enable the mutex monitoring */
+	void enable();
+
+	/** Disable the mutex monitoring */
+	void disable();
+
+	/** Reset the mutex monitoring values */
+	void reset();
+
+	/** Invoke the callback for each active mutex collection
+	@param[in,out]	callback	Functor to call
+	@return false if callback returned false */
+	template<typename Callback>
+	bool iterate(Callback& callback) const
+		UNIV_NOTHROW
+	{
+		LatchMetaData::iterator	end = latch_meta.end();
+
+		for (LatchMetaData::iterator it = latch_meta.begin();
+		     it != end;
+		     ++it) {
+
+			/* Some of the slots will be null in non-debug mode */
+
+			if (*it == NULL) {
+				continue;
+			}
+
+			latch_meta_t*	latch_meta = *it;
+
+			bool	ret = callback(*latch_meta);
+
+			if (!ret) {
+				return(ret);
+			}
+		}
+
+		return(true);
+	}
+};
+
+/** Defined in sync0sync.cc */
+extern MutexMonitor*	mutex_monitor;
+
+/**
+Creates, or rather, initializes a mutex object in a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed.
+Add the mutex instance to the global mutex list.
+@param[in,out]	mutex		mutex to initialise
+@param[in]	id		The mutex ID (Latch ID)
+@param[in]	filename	Filename from where it was called
+@param[in]	line		Line number in filename from where called */
+template <typename Mutex>
+void mutex_init(
+	Mutex*		mutex,
+	latch_id_t	id,
+	const char*	file_name,
+	uint32_t	line)
+{
+	new(mutex) Mutex();
+
+	mutex->init(id, file_name, line);
+}
+
+/**
+Removes a mutex instance from the mutex list. The mutex is checked to
+be in the reset state.
+@param[in,out]	 mutex		mutex instance to destroy */
+template <typename Mutex>
+void mutex_destroy(
+	Mutex*		mutex)
+{
+	mutex->destroy();
+}
+
+#endif /* ut0mutex_h */
+
+#endif /* UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/ut0new.h b/storage/innobase/include/ut0new.h
new file mode 100644
index 00000000000..6f3c06cf978
--- /dev/null
+++ b/storage/innobase/include/ut0new.h
@@ -0,0 +1,931 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ut/ut0new.h
+Instrumented memory allocator.
+
+Created May 26, 2014 Vasil Dimov
+*******************************************************/
+
+/** Dynamic memory allocation within InnoDB guidelines.
+All dynamic (heap) memory allocations (malloc(3), strdup(3), etc, "new",
+various std:: containers that allocate memory internally), that are done
+within InnoDB are instrumented. This means that InnoDB uses a custom set
+of functions for allocating memory, rather than calling e.g. "new" directly.
+
+Here follows a cheat sheet on what InnoDB functions to use whenever a
+standard one would have been used.
+
+Creating new objects with "new":
+--------------------------------
+Standard:
+  new expression
+  or
+  new(std::nothrow) expression
+InnoDB, default instrumentation:
+  UT_NEW_NOKEY(expression)
+InnoDB, custom instrumentation, preferred:
+  UT_NEW(expression, key)
+
+Destroying objects, created with "new":
+---------------------------------------
+Standard:
+  delete ptr
+InnoDB:
+  UT_DELETE(ptr)
+
+Creating new arrays with "new[]":
+---------------------------------
+Standard:
+  new type[num]
+  or
+  new(std::nothrow) type[num]
+InnoDB, default instrumentation:
+  UT_NEW_ARRAY_NOKEY(type, num)
+InnoDB, custom instrumentation, preferred:
+  UT_NEW_ARRAY(type, num, key)
+
+Destroying arrays, created with "new[]":
+----------------------------------------
+Standard:
+  delete[] ptr
+InnoDB:
+  UT_DELETE_ARRAY(ptr)
+
+Declaring a type with a std:: container, e.g. std::vector:
+----------------------------------------------------------
+Standard:
+  std::vector<t>
+InnoDB:
+  std::vector<t, ut_allocator<t> >
+
+Declaring objects of some std:: type:
+-------------------------------------
+Standard:
+  std::vector<t> v
+InnoDB, default instrumentation:
+  std::vector<t, ut_allocator<t> > v
+InnoDB, custom instrumentation, preferred:
+  std::vector<t, ut_allocator<t> > v(ut_allocator<t>(key))
+
+Raw block allocation (as usual in C++, consider whether using "new" would
+not be more appropriate):
+-------------------------------------------------------------------------
+Standard:
+  malloc(num)
+InnoDB, default instrumentation:
+  ut_malloc_nokey(num)
+InnoDB, custom instrumentation, preferred:
+  ut_malloc(num, key)
+
+Raw block resize:
+-----------------
+Standard:
+  realloc(ptr, new_size)
+InnoDB:
+  ut_realloc(ptr, new_size)
+
+Raw block deallocation:
+-----------------------
+Standard:
+  free(ptr)
+InnoDB:
+  ut_free(ptr)
+
+Note: the expression passed to UT_NEW() or UT_NEW_NOKEY() must always end
+with (), thus:
+Standard:
+  new int
+InnoDB:
+  UT_NEW_NOKEY(int())
+*/
+
+#ifndef ut0new_h
+#define ut0new_h
+
+#include <algorithm> /* std::min() */
+#include <limits> /* std::numeric_limits */
+#include <map> /* std::map */
+
+#include <stddef.h>
+#include <stdlib.h> /* malloc() */
+#include <string.h> /* strlen(), strrchr(), strncmp() */
+
+#include "my_global.h" /* needed for headers from mysql/psi/ */
+/* JAN: TODO: missing 5.7 header */
+#ifdef HAVE_MYSQL_MEMORY_H
+#include "mysql/psi/mysql_memory.h" /* PSI_MEMORY_CALL() */
+#endif
+
+#include "mysql/psi/psi_memory.h" /* PSI_memory_key, PSI_memory_info */
+
+#include "univ.i"
+
+#include "os0proc.h" /* os_mem_alloc_large() */
+#include "os0thread.h" /* os_thread_sleep() */
+#include "ut0ut.h" /* ut_strcmp_functor, ut_basename_noext() */
+
+#define	OUT_OF_MEMORY_MSG \
+	"Check if you should increase the swap file or ulimits of your" \
+	" operating system. Note that on most 32-bit computers the process" \
+	" memory space is limited to 2 GB or 4 GB."
+
+/** Maximum number of retries to allocate memory. */
+extern const size_t	alloc_max_retries;
+
+/** Keys for registering allocations with performance schema.
+Pointers to these variables are supplied to PFS code via the pfs_info[]
+array and the PFS code initializes them via PSI_MEMORY_CALL(register_memory)().
+mem_key_other and mem_key_std are special in the following way (see also
+ut_allocator::get_mem_key()):
+* If the caller has not provided a key and the file name of the caller is
+  unknown, then mem_key_std will be used. This happens only when called from
+  within std::* containers.
+* If the caller has not provided a key and the file name of the caller is
+  known, but is not amongst the predefined names (see ut_new_boot()) then
+  mem_key_other will be used. Generally this should not happen and if it
+  happens then that means that the list of predefined names must be extended.
+Keep this list alphabetically sorted. */
+extern PSI_memory_key	mem_key_ahi;
+extern PSI_memory_key	mem_key_buf_buf_pool;
+extern PSI_memory_key	mem_key_dict_stats_bg_recalc_pool_t;
+extern PSI_memory_key	mem_key_dict_stats_index_map_t;
+extern PSI_memory_key	mem_key_dict_stats_n_diff_on_level;
+extern PSI_memory_key	mem_key_other;
+extern PSI_memory_key	mem_key_row_log_buf;
+extern PSI_memory_key	mem_key_row_merge_sort;
+extern PSI_memory_key	mem_key_std;
+extern PSI_memory_key	mem_key_trx_sys_t_rw_trx_ids;
+extern PSI_memory_key	mem_key_partitioning;
+
+/** Setup the internal objects needed for UT_NEW() to operate.
+This must be called before the first call to UT_NEW(). */
+void
+ut_new_boot();
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Retrieve a memory key (registered with PFS), given a portion of the file
+name of the caller.
+@param[in]	file	portion of the filename - basename without an extension
+@return registered memory key or PSI_NOT_INSTRUMENTED if not found */
+PSI_memory_key
+ut_new_get_key_by_file(
+	const char*	file);
+
+#endif /* UNIV_PFS_MEMORY */
+
+/** A structure that holds the necessary data for performance schema
+accounting. An object of this type is put in front of each allocated block
+of memory when allocation is done by ut_allocator::allocate(). This is
+because the data is needed even when freeing the memory. Users of
+ut_allocator::allocate_large() are responsible for maintaining this
+themselves. */
+struct ut_new_pfx_t {
+
+#ifdef UNIV_PFS_MEMORY
+
+	/** Performance schema key. Assigned to a name at startup via
+	PSI_MEMORY_CALL(register_memory)() and later used for accounting
+	allocations and deallocations with
+	PSI_MEMORY_CALL(memory_alloc)(key, size, owner) and
+	PSI_MEMORY_CALL(memory_free)(key, size, owner). */
+	PSI_memory_key	m_key;
+
+        /**
+          Thread owner.
+          Instrumented thread that owns the allocated memory.
+          This state is used by the performance schema to maintain
+          per thread statistics,
+          when memory is given from thread A to thread B.
+        */
+        struct PSI_thread *m_owner;
+
+#endif /* UNIV_PFS_MEMORY */
+
+	/** Size of the allocated block in bytes, including this prepended
+	aux structure (for ut_allocator::allocate()). For example if InnoDB
+	code requests to allocate 100 bytes, and sizeof(ut_new_pfx_t) is 16,
+	then 116 bytes are allocated in total and m_size will be 116.
+	ut_allocator::allocate_large() does not prepend this struct to the
+	allocated block and its users are responsible for maintaining it
+	and passing it later to ut_allocator::deallocate_large(). */
+	size_t		m_size;
+#if SIZEOF_VOIDP == 4
+	/** Pad the header size to a multiple of 64 bits on 32-bit systems,
+	so that the payload will be aligned to 64 bits. */
+	size_t		pad;
+#endif
+};
+
+/** Allocator class for allocating memory from inside std::* containers. */
+template <class T>
+class ut_allocator {
+public:
+	typedef T*		pointer;
+	typedef const T*	const_pointer;
+	typedef T&		reference;
+	typedef const T&	const_reference;
+	typedef T		value_type;
+	typedef size_t		size_type;
+	typedef ptrdiff_t	difference_type;
+
+	/** Default constructor. */
+	explicit
+	ut_allocator(
+		PSI_memory_key	key = PSI_NOT_INSTRUMENTED)
+		:
+#ifdef UNIV_PFS_MEMORY
+		m_key(key),
+#endif /* UNIV_PFS_MEMORY */
+		m_oom_fatal(true)
+	{
+	}
+
+	/** Constructor from allocator of another type. */
+	template <class U>
+	ut_allocator(
+		const ut_allocator<U>&	other)
+		: m_oom_fatal(other.is_oom_fatal())
+	{
+#ifdef UNIV_PFS_MEMORY
+		const PSI_memory_key	other_key = other.get_mem_key(NULL);
+
+		m_key = (other_key != mem_key_std)
+			? other_key
+			: PSI_NOT_INSTRUMENTED;
+#endif /* UNIV_PFS_MEMORY */
+	}
+
+	/** When out of memory (OOM) happens, report error and do not
+	make it fatal.
+	@return a reference to the allocator. */
+	ut_allocator&
+	set_oom_not_fatal() {
+		m_oom_fatal = false;
+		return(*this);
+	}
+
+	/** Check if allocation failure is a fatal error.
+	@return true if allocation failure is fatal, false otherwise. */
+	bool is_oom_fatal() const {
+		return(m_oom_fatal);
+	}
+
+	/** Return the maximum number of objects that can be allocated by
+	this allocator. */
+	size_type
+	max_size() const
+	{
+		const size_type	s_max = std::numeric_limits<size_type>::max();
+
+#ifdef UNIV_PFS_MEMORY
+		return((s_max - sizeof(ut_new_pfx_t)) / sizeof(T));
+#else
+		return(s_max / sizeof(T));
+#endif /* UNIV_PFS_MEMORY */
+	}
+
+	/** Allocate a chunk of memory that can hold 'n_elements' objects of
+	type 'T' and trace the allocation.
+	If the allocation fails this method may throw an exception. This
+	is mandated by the standard and if it returns NULL instead, then
+	STL containers that use it (e.g. std::vector) may get confused.
+	After successfull allocation the returned pointer must be passed
+	to ut_allocator::deallocate() when no longer needed.
+	@param[in]	n_elements	number of elements
+	@param[in]	hint		pointer to a nearby memory location,
+	unused by this implementation
+	@param[in]	file		file name of the caller
+	@param[in]	set_to_zero	if true, then the returned memory is
+	initialized with 0x0 bytes.
+	@return pointer to the allocated memory */
+	pointer
+	allocate(
+		size_type	n_elements,
+		const_pointer	hint = NULL,
+		const char*	file = NULL,
+		bool		set_to_zero = false,
+		bool		throw_on_error = true)
+	{
+		if (n_elements == 0) {
+			return(NULL);
+		}
+
+		if (n_elements > max_size()) {
+			if (throw_on_error) {
+				throw(std::bad_alloc());
+			} else {
+				return(NULL);
+			}
+		}
+
+		void*	ptr;
+		size_t	total_bytes = n_elements * sizeof(T);
+
+#ifdef UNIV_PFS_MEMORY
+		/* The header size must not ruin the 64-bit alignment
+		on 32-bit systems. Some allocated structures use
+		64-bit fields. */
+		ut_ad((sizeof(ut_new_pfx_t) & 7) == 0);
+		total_bytes += sizeof(ut_new_pfx_t);
+#endif /* UNIV_PFS_MEMORY */
+
+		for (size_t retries = 1; ; retries++) {
+
+			if (set_to_zero) {
+				ptr = calloc(1, total_bytes);
+			} else {
+				ptr = malloc(total_bytes);
+			}
+
+			if (ptr != NULL || retries >= alloc_max_retries) {
+				break;
+			}
+
+			os_thread_sleep(1000000 /* 1 second */);
+		}
+
+		if (ptr == NULL) {
+			ib::fatal_or_error(m_oom_fatal)
+				<< "Cannot allocate " << total_bytes
+				<< " bytes of memory after "
+				<< alloc_max_retries << " retries over "
+				<< alloc_max_retries << " seconds. OS error: "
+				<< strerror(errno) << " (" << errno << "). "
+				<< OUT_OF_MEMORY_MSG;
+			if (throw_on_error) {
+				throw(std::bad_alloc());
+			} else {
+				return(NULL);
+			}
+		}
+
+#ifdef UNIV_PFS_MEMORY
+		ut_new_pfx_t*	pfx = static_cast<ut_new_pfx_t*>(ptr);
+
+		allocate_trace(total_bytes, file, pfx);
+
+		return(reinterpret_cast<pointer>(pfx + 1));
+#else
+		return(reinterpret_cast<pointer>(ptr));
+#endif /* UNIV_PFS_MEMORY */
+	}
+
+	/** Free a memory allocated by allocate() and trace the deallocation.
+	@param[in,out]	ptr		pointer to memory to free
+	@param[in]	n_elements	number of elements allocated (unused) */
+	void
+	deallocate(
+		pointer		ptr,
+		size_type	n_elements = 0)
+	{
+		if (ptr == NULL) {
+			return;
+		}
+
+#ifdef UNIV_PFS_MEMORY
+		ut_new_pfx_t*	pfx = reinterpret_cast<ut_new_pfx_t*>(ptr) - 1;
+
+		deallocate_trace(pfx);
+
+		free(pfx);
+#else
+		// free(ptr);
+#endif /* UNIV_PFS_MEMORY */
+	}
+
+	/** Create an object of type 'T' using the value 'val' over the
+	memory pointed by 'p'. */
+	void
+	construct(
+		pointer		p,
+		const T&	val)
+	{
+		new(p) T(val);
+	}
+
+	/** Destroy an object pointed by 'p'. */
+	void
+	destroy(
+		pointer	p)
+	{
+		p->~T();
+	}
+
+	/** Return the address of an object. */
+	pointer
+	address(
+		reference	x) const
+	{
+		return(&x);
+	}
+
+	/** Return the address of a const object. */
+	const_pointer
+	address(
+		const_reference	x) const
+	{
+		return(&x);
+	}
+
+	template <class U>
+	struct rebind {
+		typedef ut_allocator<U>	other;
+	};
+
+	/* The following are custom methods, not required by the standard. */
+
+#ifdef UNIV_PFS_MEMORY
+
+	/** realloc(3)-like method.
+	The passed in ptr must have been returned by allocate() and the
+	pointer returned by this method must be passed to deallocate() when
+	no longer needed.
+	@param[in,out]	ptr		old pointer to reallocate
+	@param[in]	n_elements	new number of elements to allocate
+	@param[in]	file		file name of the caller
+	@return newly allocated memory */
+	pointer
+	reallocate(
+		void*		ptr,
+		size_type	n_elements,
+		const char*	file)
+	{
+		if (n_elements == 0) {
+			deallocate(static_cast<pointer>(ptr));
+			return(NULL);
+		}
+
+		if (ptr == NULL) {
+			return(allocate(n_elements, NULL, file, false, false));
+		}
+
+		if (n_elements > max_size()) {
+			return(NULL);
+		}
+
+		ut_new_pfx_t*	pfx_old;
+		ut_new_pfx_t*	pfx_new;
+		size_t		total_bytes;
+
+		pfx_old = reinterpret_cast<ut_new_pfx_t*>(ptr) - 1;
+
+		total_bytes = n_elements * sizeof(T) + sizeof(ut_new_pfx_t);
+
+		for (size_t retries = 1; ; retries++) {
+
+			pfx_new = static_cast<ut_new_pfx_t*>(
+				realloc(pfx_old, total_bytes));
+
+			if (pfx_new != NULL || retries >= alloc_max_retries) {
+				break;
+			}
+
+			os_thread_sleep(1000000 /* 1 second */);
+		}
+
+		if (pfx_new == NULL) {
+			ib::fatal_or_error(m_oom_fatal)
+				<< "Cannot reallocate " << total_bytes
+				<< " bytes of memory after "
+				<< alloc_max_retries << " retries over "
+				<< alloc_max_retries << " seconds. OS error: "
+				<< strerror(errno) << " (" << errno << "). "
+				<< OUT_OF_MEMORY_MSG;
+			/* not reached */
+			return(NULL);
+		}
+
+		/* pfx_new still contains the description of the old block
+		that was presumably freed by realloc(). */
+		deallocate_trace(pfx_new);
+
+		/* pfx_new is set here to describe the new block. */
+		allocate_trace(total_bytes, file, pfx_new);
+
+		return(reinterpret_cast<pointer>(pfx_new + 1));
+	}
+
+	/** Allocate, trace the allocation and construct 'n_elements' objects
+	of type 'T'. If the allocation fails or if some of the constructors
+	throws an exception, then this method will return NULL. It does not
+	throw exceptions. After successfull completion the returned pointer
+	must be passed to delete_array() when no longer needed.
+	@param[in]	n_elements	number of elements to allocate
+	@param[in]	file		file name of the caller
+	@return pointer to the first allocated object or NULL */
+	pointer
+	new_array(
+		size_type	n_elements,
+		const char*	file)
+	{
+		T*	p = allocate(n_elements, NULL, file, false, false);
+
+		if (p == NULL) {
+			return(NULL);
+		}
+
+		T*		first = p;
+		size_type	i;
+
+		try {
+			for (i = 0; i < n_elements; i++) {
+				new(p) T;
+				++p;
+			}
+		} catch (...) {
+			for (size_type j = 0; j < i; j++) {
+				--p;
+				p->~T();
+			}
+
+			deallocate(first);
+
+			throw;
+		}
+
+		return(first);
+	}
+
+	/** Destroy, deallocate and trace the deallocation of an array created
+	by new_array().
+	@param[in,out]	ptr	pointer to the first object in the array */
+	void
+	delete_array(
+		T*	ptr)
+	{
+		if (ptr == NULL) {
+			return;
+		}
+
+		const size_type	n_elements = n_elements_allocated(ptr);
+
+		T*		p = ptr + n_elements - 1;
+
+		for (size_type i = 0; i < n_elements; i++) {
+			p->~T();
+			--p;
+		}
+
+		deallocate(ptr);
+	}
+
+#endif /* UNIV_PFS_MEMORY */
+
+	/** Allocate a large chunk of memory that can hold 'n_elements'
+	objects of type 'T' and trace the allocation.
+	@param[in]	n_elements	number of elements
+	@param[out]	pfx		storage for the description of the
+	allocated memory. The caller must provide space for this one and keep
+	it until the memory is no longer needed and then pass it to
+	deallocate_large().
+	@return pointer to the allocated memory or NULL */
+	pointer
+	allocate_large(
+		size_type	n_elements,
+		ut_new_pfx_t*	pfx)
+	{
+		if (n_elements == 0 || n_elements > max_size()) {
+			return(NULL);
+		}
+
+		ulint	n_bytes = n_elements * sizeof(T);
+
+		pointer	ptr = reinterpret_cast<pointer>(
+			os_mem_alloc_large(&n_bytes));
+
+#ifdef UNIV_PFS_MEMORY
+		if (ptr != NULL) {
+			allocate_trace(n_bytes, NULL, pfx);
+		}
+#else
+		pfx->m_size = n_bytes;
+#endif /* UNIV_PFS_MEMORY */
+
+		return(ptr);
+	}
+
+	/** Free a memory allocated by allocate_large() and trace the
+	deallocation.
+	@param[in,out]	ptr	pointer to memory to free
+	@param[in]	pfx	descriptor of the memory, as returned by
+	allocate_large(). */
+	void
+	deallocate_large(
+		pointer			ptr,
+		const ut_new_pfx_t*	pfx)
+	{
+#ifdef UNIV_PFS_MEMORY
+		deallocate_trace(pfx);
+#endif /* UNIV_PFS_MEMORY */
+
+		os_mem_free_large(ptr, pfx->m_size);
+	}
+
+#ifdef UNIV_PFS_MEMORY
+
+	/** Get the performance schema key to use for tracing allocations.
+	@param[in]	file	file name of the caller or NULL if unknown
+	@return performance schema key */
+	PSI_memory_key
+	get_mem_key(
+		const char*	file) const
+	{
+		if (m_key != PSI_NOT_INSTRUMENTED) {
+			return(m_key);
+		}
+
+		if (file == NULL) {
+			return(mem_key_std);
+		}
+
+		/* e.g. "btr0cur", derived from "/path/to/btr0cur.cc" */
+		char		keyname[FILENAME_MAX];
+		const size_t	len = ut_basename_noext(file, keyname,
+							sizeof(keyname));
+		/* If sizeof(keyname) was not enough then the output would
+		be truncated, assert that this did not happen. */
+		ut_a(len < sizeof(keyname));
+
+		const PSI_memory_key	key = ut_new_get_key_by_file(keyname);
+
+		if (key != PSI_NOT_INSTRUMENTED) {
+			return(key);
+		}
+
+		return(mem_key_other);
+	}
+
+private:
+
+	/** Retrieve the size of a memory block allocated by new_array().
+	@param[in]	ptr	pointer returned by new_array().
+	@return size of memory block */
+	size_type
+	n_elements_allocated(
+		const_pointer	ptr)
+	{
+		const ut_new_pfx_t*	pfx
+			= reinterpret_cast<const ut_new_pfx_t*>(ptr) - 1;
+
+		const size_type		user_bytes
+			= pfx->m_size - sizeof(ut_new_pfx_t);
+
+		ut_ad(user_bytes % sizeof(T) == 0);
+
+		return(user_bytes / sizeof(T));
+	}
+
+	/** Trace a memory allocation.
+	After the accounting, the data needed for tracing the deallocation
+	later is written into 'pfx'.
+	The PFS event name is picked on the following criteria:
+	1. If key (!= PSI_NOT_INSTRUMENTED) has been specified when constructing
+	   this ut_allocator object, then the name associated with that key will
+	   be used (this is the recommended approach for new code)
+	2. Otherwise, if "file" is NULL, then the name associated with
+	   mem_key_std will be used
+	3. Otherwise, if an entry is found by ut_new_get_key_by_file(), that
+	   corresponds to "file", that will be used (see ut_new_boot())
+	4. Otherwise, the name associated with mem_key_other will be used.
+	@param[in]	size	number of bytes that were allocated
+	@param[in]	file	file name of the caller or NULL if unknown
+	@param[out]	pfx	placeholder to store the info which will be
+	needed when freeing the memory */
+	void
+	allocate_trace(
+		size_t		size,
+		const char*	file,
+		ut_new_pfx_t*	pfx)
+	{
+		const PSI_memory_key	key = get_mem_key(file);
+
+		pfx->m_key = PSI_MEMORY_CALL(memory_alloc)(key, size, & pfx->m_owner);
+		pfx->m_size = size;
+	}
+
+	/** Trace a memory deallocation.
+	@param[in]	pfx	info for the deallocation */
+	void
+	deallocate_trace(
+		const ut_new_pfx_t*	pfx)
+	{
+		PSI_MEMORY_CALL(memory_free)(pfx->m_key, pfx->m_size, pfx->m_owner);
+	}
+
+	/** Performance schema key. */
+	PSI_memory_key	m_key;
+
+#endif /* UNIV_PFS_MEMORY */
+
+private:
+
+	/** Assignment operator, not used, thus disabled (private). */
+	template <class U>
+	void
+	operator=(
+		const ut_allocator<U>&);
+
+	/** A flag to indicate whether out of memory (OOM) error is considered
+	fatal.  If true, it is fatal. */
+	bool	m_oom_fatal;
+};
+
+/** Compare two allocators of the same type.
+As long as the type of A1 and A2 is the same, a memory allocated by A1
+could be freed by A2 even if the pfs mem key is different. */
+template <typename T>
+inline
+bool
+operator==(
+	const ut_allocator<T>&	lhs,
+	const ut_allocator<T>&	rhs)
+{
+	return(true);
+}
+
+/** Compare two allocators of the same type. */
+template <typename T>
+inline
+bool
+operator!=(
+	const ut_allocator<T>&	lhs,
+	const ut_allocator<T>&	rhs)
+{
+	return(!(lhs == rhs));
+}
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Allocate, trace the allocation and construct an object.
+Use this macro instead of 'new' within InnoDB.
+For example: instead of
+	Foo*	f = new Foo(args);
+use:
+	Foo*	f = UT_NEW(Foo(args), mem_key_some);
+Upon failure to allocate the memory, this macro may return NULL. It
+will not throw exceptions. After successfull allocation the returned
+pointer must be passed to UT_DELETE() when no longer needed.
+@param[in]	expr	any expression that could follow "new"
+@param[in]	key	performance schema memory tracing key
+@return pointer to the created object or NULL */
+#define UT_NEW(expr, key) \
+	/* Placement new will return NULL and not attempt to construct an
+	object if the passed in pointer is NULL, e.g. if allocate() has
+	failed to allocate memory and has returned NULL. */ \
+	::new(ut_allocator<byte>(key).allocate( \
+		sizeof expr, NULL, __FILE__, false, false)) expr
+
+/** Allocate, trace the allocation and construct an object.
+Use this macro instead of 'new' within InnoDB and instead of UT_NEW()
+when creating a dedicated memory key is not feasible.
+For example: instead of
+	Foo*	f = new Foo(args);
+use:
+	Foo*	f = UT_NEW_NOKEY(Foo(args));
+Upon failure to allocate the memory, this macro may return NULL. It
+will not throw exceptions. After successfull allocation the returned
+pointer must be passed to UT_DELETE() when no longer needed.
+@param[in]	expr	any expression that could follow "new"
+@return pointer to the created object or NULL */
+#define UT_NEW_NOKEY(expr)	UT_NEW(expr, PSI_NOT_INSTRUMENTED)
+
+/** Destroy, deallocate and trace the deallocation of an object created by
+UT_NEW() or UT_NEW_NOKEY().
+We can't instantiate ut_allocator without having the type of the object, thus
+we redirect this to a templated function. */
+#define UT_DELETE(ptr)		ut_delete(ptr)
+
+/** Destroy and account object created by UT_NEW() or UT_NEW_NOKEY().
+@param[in,out]	ptr	pointer to the object */
+template <typename T>
+inline
+void
+ut_delete(
+	T*	ptr)
+{
+	if (ptr == NULL) {
+		return;
+	}
+
+	ut_allocator<T>	allocator;
+
+	allocator.destroy(ptr);
+	allocator.deallocate(ptr);
+}
+
+/** Allocate and account 'n_elements' objects of type 'type'.
+Use this macro to allocate memory within InnoDB instead of 'new[]'.
+The returned pointer must be passed to UT_DELETE_ARRAY().
+@param[in]	type		type of objects being created
+@param[in]	n_elements	number of objects to create
+@param[in]	key		performance schema memory tracing key
+@return pointer to the first allocated object or NULL */
+#define UT_NEW_ARRAY(type, n_elements, key) \
+	ut_allocator<type>(key).new_array(n_elements, __FILE__)
+
+/** Allocate and account 'n_elements' objects of type 'type'.
+Use this macro to allocate memory within InnoDB instead of 'new[]' and
+instead of UT_NEW_ARRAY() when it is not feasible to create a dedicated key.
+@param[in]	type		type of objects being created
+@param[in]	n_elements	number of objects to create
+@return pointer to the first allocated object or NULL */
+#define UT_NEW_ARRAY_NOKEY(type, n_elements) \
+	UT_NEW_ARRAY(type, n_elements, PSI_NOT_INSTRUMENTED)
+
+/** Destroy, deallocate and trace the deallocation of an array created by
+UT_NEW_ARRAY() or UT_NEW_ARRAY_NOKEY().
+We can't instantiate ut_allocator without having the type of the object, thus
+we redirect this to a templated function. */
+#define UT_DELETE_ARRAY(ptr)	ut_delete_array(ptr)
+
+/** Destroy and account objects created by UT_NEW_ARRAY() or
+UT_NEW_ARRAY_NOKEY().
+@param[in,out]	ptr	pointer to the first object in the array */
+template <typename T>
+inline
+void
+ut_delete_array(
+	T*	ptr)
+{
+	ut_allocator<T>().delete_array(ptr);
+}
+
+#define ut_malloc(n_bytes, key)		static_cast<void*>( \
+	ut_allocator<byte>(key).allocate( \
+		n_bytes, NULL, __FILE__, false, false))
+
+#define ut_zalloc(n_bytes, key)		static_cast<void*>( \
+	ut_allocator<byte>(key).allocate( \
+		n_bytes, NULL, __FILE__, true, false))
+
+#define ut_malloc_nokey(n_bytes)	static_cast<void*>( \
+	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).allocate( \
+		n_bytes, NULL, __FILE__, false, false))
+
+#define ut_zalloc_nokey(n_bytes)	static_cast<void*>( \
+	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).allocate( \
+		n_bytes, NULL, __FILE__, true, false))
+
+#define ut_zalloc_nokey_nofatal(n_bytes)	static_cast<void*>( \
+	ut_allocator<byte>(PSI_NOT_INSTRUMENTED). \
+		set_oom_not_fatal(). \
+		allocate(n_bytes, NULL, __FILE__, true, false))
+
+#define ut_realloc(ptr, n_bytes)	static_cast<void*>( \
+	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).reallocate( \
+		ptr, n_bytes, __FILE__))
+
+#define ut_free(ptr)	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate( \
+	reinterpret_cast<byte*>(ptr))
+
+#else /* UNIV_PFS_MEMORY */
+
+/* Fallbacks when memory tracing is disabled at compile time. */
+
+#define UT_NEW(expr, key)		::new(std::nothrow) expr
+#define UT_NEW_NOKEY(expr)		::new(std::nothrow) expr
+#define UT_DELETE(ptr)			::delete ptr
+
+#define UT_NEW_ARRAY(type, n_elements, key) \
+	::new(std::nothrow) type[n_elements]
+
+#define UT_NEW_ARRAY_NOKEY(type, n_elements) \
+	::new(std::nothrow) type[n_elements]
+
+#define UT_DELETE_ARRAY(ptr)		::delete[] ptr
+
+#define ut_malloc(n_bytes, key)		::malloc(n_bytes)
+
+#define ut_zalloc(n_bytes, key)		::calloc(1, n_bytes)
+
+#define ut_malloc_nokey(n_bytes)	::malloc(n_bytes)
+
+#define ut_zalloc_nokey(n_bytes)	::calloc(1, n_bytes)
+
+#define ut_zalloc_nokey_nofatal(n_bytes)	::calloc(1, n_bytes)
+
+#define ut_realloc(ptr, n_bytes)	::realloc(ptr, n_bytes)
+
+#define ut_free(ptr)			::free(ptr)
+
+#endif /* UNIV_PFS_MEMORY */
+
+#endif /* ut0new_h */
diff --git a/storage/innobase/include/ut0pool.h b/storage/innobase/include/ut0pool.h
new file mode 100644
index 00000000000..f60608bf6c6
--- /dev/null
+++ b/storage/innobase/include/ut0pool.h
@@ -0,0 +1,366 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0pool.h
+Object pool.
+
+Created 2012-Feb-26 Sunny Bains
+***********************************************************************/
+
+#ifndef ut0pool_h
+#define ut0pool_h
+
+#include <vector>
+#include <queue>
+#include <functional>
+
+#include "ut0new.h"
+
+/** Allocate the memory for the object in blocks. We keep the objects sorted
+on pointer so that they are closer together in case they have to be iterated
+over in a list. */
+template <typename Type, typename Factory, typename LockStrategy>
+struct Pool {
+
+	typedef Type value_type;
+
+	// FIXME: Add an assertion to check alignment and offset is
+	// as we expect it. Also, sizeof(void*) can be 8, can we impove on this.
+	struct Element {
+		Pool*		m_pool;
+		value_type	m_type;
+	};
+
+	/** Constructor
+	@param size size of the memory block */
+	Pool(size_t size)
+		:
+		m_end(),
+		m_start(),
+		m_size(size),
+		m_last()
+	{
+		ut_a(size >= sizeof(Element));
+
+		m_lock_strategy.create();
+
+		ut_a(m_start == 0);
+
+		m_start = reinterpret_cast<Element*>(ut_zalloc_nokey(m_size));
+
+		m_last = m_start;
+
+		m_end = &m_start[m_size / sizeof(*m_start)];
+
+		/* Note: Initialise only a small subset, even though we have
+		allocated all the memory. This is required only because PFS
+		(MTR) results change if we instantiate too many mutexes up
+		front. */
+
+		init(ut_min(size_t(16), size_t(m_end - m_start)));
+
+		ut_ad(m_pqueue.size() <= size_t(m_last - m_start));
+	}
+
+	/** Destructor */
+	~Pool()
+	{
+		m_lock_strategy.destroy();
+
+		for (Element* elem = m_start; elem != m_last; ++elem) {
+
+			ut_ad(elem->m_pool == this);
+			Factory::destroy(&elem->m_type);
+		}
+
+		ut_free(m_start);
+		m_end = m_last = m_start = 0;
+		m_size = 0;
+	}
+
+	/** Get an object from the pool.
+	@retrun a free instance or NULL if exhausted. */
+	Type*	get()
+	{
+		Element*	elem;
+
+		m_lock_strategy.enter();
+
+		if (!m_pqueue.empty()) {
+
+			elem = m_pqueue.top();
+			m_pqueue.pop();
+
+		} else if (m_last < m_end) {
+
+			/* Initialise the remaining elements. */
+			init(m_end - m_last);
+
+			ut_ad(!m_pqueue.empty());
+
+			elem = m_pqueue.top();
+			m_pqueue.pop();
+		} else {
+			elem = NULL;
+		}
+
+		m_lock_strategy.exit();
+
+		return(elem != NULL ? &elem->m_type : 0);
+	}
+
+	/** Add the object to the pool.
+	@param ptr object to free */
+	static void mem_free(value_type* ptr)
+	{
+		Element*	elem;
+		byte*		p = reinterpret_cast<byte*>(ptr + 1);
+
+		elem = reinterpret_cast<Element*>(p - sizeof(*elem));
+
+		elem->m_pool->put(elem);
+	}
+
+protected:
+	// Disable copying
+	Pool(const Pool&);
+	Pool& operator=(const Pool&);
+
+private:
+
+	/* We only need to compare on pointer address. */
+	typedef std::priority_queue<
+		Element*,
+		std::vector<Element*, ut_allocator<Element*> >,
+		std::greater<Element*> >	pqueue_t;
+
+	/** Release the object to the free pool
+	@param elem element to free */
+	void put(Element* elem)
+	{
+		m_lock_strategy.enter();
+
+		ut_ad(elem >= m_start && elem < m_last);
+
+		ut_ad(Factory::debug(&elem->m_type));
+
+		m_pqueue.push(elem);
+
+		m_lock_strategy.exit();
+	}
+
+	/** Initialise the elements.
+	@param n_elems Number of elements to initialise */
+	void init(size_t n_elems)
+	{
+		ut_ad(size_t(m_end - m_last) >= n_elems);
+
+		for (size_t i = 0; i < n_elems; ++i, ++m_last) {
+
+			m_last->m_pool = this;
+			Factory::init(&m_last->m_type);
+			m_pqueue.push(m_last);
+		}
+
+		ut_ad(m_last <= m_end);
+	}
+
+private:
+	/** Pointer to the last element */
+	Element*		m_end;
+
+	/** Pointer to the first element */
+	Element*		m_start;
+
+	/** Size of the block in bytes */
+	size_t			m_size;
+
+	/** Upper limit of used space */
+	Element*		m_last;
+
+	/** Priority queue ordered on the pointer addresse. */
+	pqueue_t		m_pqueue;
+
+	/** Lock strategy to use */
+	LockStrategy		m_lock_strategy;
+};
+
+template <typename Pool, typename LockStrategy>
+struct PoolManager {
+
+	typedef Pool PoolType;
+	typedef typename PoolType::value_type value_type;
+
+	PoolManager(size_t size)
+		:
+		m_size(size)
+	{
+		create();
+	}
+
+	~PoolManager()
+	{
+		destroy();
+
+		ut_a(m_pools.empty());
+	}
+
+	/** Get an element from one of the pools.
+	@return instance or NULL if pool is empty. */
+	value_type* get()
+	{
+		size_t		index = 0;
+		size_t		delay = 1;
+		value_type*	ptr = NULL;
+
+		do {
+			m_lock_strategy.enter();
+
+			ut_ad(!m_pools.empty());
+
+			size_t	n_pools = m_pools.size();
+
+			PoolType*	pool = m_pools[index % n_pools];
+
+			m_lock_strategy.exit();
+
+			ptr = pool->get();
+
+			if (ptr == 0 && (index / n_pools) > 2) {
+
+				if (!add_pool(n_pools)) {
+
+					ib::error() << "Failed to allocate"
+						" memory for a pool of size "
+						<< m_size << " bytes. Will"
+						" wait for " << delay
+						<< " seconds for a thread to"
+						" free a resource";
+
+					/* There is nothing much we can do
+					except crash and burn, however lets
+					be a little optimistic and wait for
+					a resource to be freed. */
+					os_thread_sleep(delay * 1000000);
+
+					if (delay < 32) {
+						delay <<= 1;
+					}
+
+				} else {
+					delay = 1;
+				}
+			}
+
+			++index;
+
+		} while (ptr == NULL);
+
+		return(ptr);
+	}
+
+	static void mem_free(value_type* ptr)
+	{
+		PoolType::mem_free(ptr);
+	}
+
+private:
+	/** Add a new pool
+	@param n_pools Number of pools that existed when the add pool was
+			called.
+	@return true on success */
+	bool add_pool(size_t n_pools)
+	{
+		bool	added = false;
+
+		m_lock_strategy.enter();
+
+		if (n_pools < m_pools.size()) {
+			/* Some other thread already added a pool. */
+			added = true;
+		} else {
+			PoolType*	pool;
+
+			ut_ad(n_pools == m_pools.size());
+
+			pool = UT_NEW_NOKEY(PoolType(m_size));
+
+			if (pool != NULL) {
+
+				ut_ad(n_pools <= m_pools.size());
+
+				m_pools.push_back(pool);
+
+				ib::info() << "Number of pools: "
+					<< m_pools.size();
+
+				added = true;
+			}
+		}
+
+		ut_ad(n_pools < m_pools.size() || !added);
+
+		m_lock_strategy.exit();
+
+		return(added);
+	}
+
+	/** Create the pool manager. */
+	void create()
+	{
+		ut_a(m_size > sizeof(value_type));
+		m_lock_strategy.create();
+
+		add_pool(0);
+	}
+
+	/** Release the resources. */
+	void destroy()
+	{
+		typename Pools::iterator it;
+		typename Pools::iterator end = m_pools.end();
+
+		for (it = m_pools.begin(); it != end; ++it) {
+			PoolType*	pool = *it;
+
+			UT_DELETE(pool);
+		}
+
+		m_pools.clear();
+
+		m_lock_strategy.destroy();
+	}
+private:
+	// Disable copying
+	PoolManager(const PoolManager&);
+	PoolManager& operator=(const PoolManager&);
+
+	typedef std::vector<PoolType*, ut_allocator<PoolType*> >	Pools;
+
+	/** Size of each block */
+	size_t		m_size;
+
+	/** Pools managed this manager */
+	Pools		m_pools;
+
+	/** Lock strategy to use */
+	LockStrategy		m_lock_strategy;
+};
+
+#endif /* ut0pool_h */
diff --git a/storage/innobase/include/ut0rbt.h b/storage/innobase/include/ut0rbt.h
index 59e3fc94598..9555fe6137c 100644
--- a/storage/innobase/include/ut0rbt.h
+++ b/storage/innobase/include/ut0rbt.h
@@ -1,6 +1,6 @@
-/***************************************************************************//**
+/*****************************************************************************
 
-Copyright (c) 2007, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -111,15 +111,13 @@ struct ib_rbt_bound_t {
 
 /**********************************************************************//**
 Free an instance of  a red black tree */
-UNIV_INTERN
 void
 rbt_free(
 /*=====*/
 	ib_rbt_t*	tree);			/*!< in: rb tree to free */
 /**********************************************************************//**
 Create an instance of a red black tree
-@return	rb tree instance */
-UNIV_INTERN
+@return rb tree instance */
 ib_rbt_t*
 rbt_create(
 /*=======*/
@@ -128,8 +126,7 @@ rbt_create(
 /**********************************************************************//**
 Create an instance of a red black tree, whose comparison function takes
 an argument
-@return	rb tree instance */
-UNIV_INTERN
+@return rb tree instance */
 ib_rbt_t*
 rbt_create_arg_cmp(
 /*===============*/
@@ -139,7 +136,6 @@ rbt_create_arg_cmp(
 	void*	cmp_arg);		/*!< in: compare fn arg */
 /**********************************************************************//**
 Delete a node from the red black tree, identified by key */
-UNIV_INTERN
 ibool
 rbt_delete(
 /*=======*/
@@ -149,8 +145,7 @@ rbt_delete(
 /**********************************************************************//**
 Remove a node from the red black tree, NOTE: This function will not delete
 the node instance, THAT IS THE CALLERS RESPONSIBILITY.
-@return	the deleted node with the const. */
-UNIV_INTERN
+@return the deleted node with the const. */
 ib_rbt_node_t*
 rbt_remove_node(
 /*============*/
@@ -163,8 +158,7 @@ rbt_remove_node(
 /**********************************************************************//**
 Return a node from the red black tree, identified by
 key, NULL if not found
-@return	node if found else return NULL */
-UNIV_INTERN
+@return node if found else return NULL */
 const ib_rbt_node_t*
 rbt_lookup(
 /*=======*/
@@ -172,8 +166,7 @@ rbt_lookup(
 	const void*	key);			/*!< in: key to lookup */
 /**********************************************************************//**
 Add data to the red black tree, identified by key (no dups yet!)
-@return	inserted node */
-UNIV_INTERN
+@return inserted node */
 const ib_rbt_node_t*
 rbt_insert(
 /*=======*/
@@ -183,8 +176,7 @@ rbt_insert(
 						copied to the node.*/
 /**********************************************************************//**
 Add a new node to the tree, useful for data that is pre-sorted.
-@return	appended node */
-UNIV_INTERN
+@return appended node */
 const ib_rbt_node_t*
 rbt_add_node(
 /*=========*/
@@ -194,24 +186,21 @@ rbt_add_node(
 						to the node */
 /**********************************************************************//**
 Return the left most data node in the tree
-@return	left most node */
-UNIV_INTERN
+@return left most node */
 const ib_rbt_node_t*
 rbt_first(
 /*======*/
 	const ib_rbt_t*	tree);			/*!< in: rb tree */
 /**********************************************************************//**
 Return the right most data node in the tree
-@return	right most node */
-UNIV_INTERN
+@return right most node */
 const ib_rbt_node_t*
 rbt_last(
 /*=====*/
 	const ib_rbt_t*	tree);			/*!< in: rb tree */
 /**********************************************************************//**
 Return the next node from current.
-@return	successor node to current that is passed in. */
-UNIV_INTERN
+@return successor node to current that is passed in. */
 const ib_rbt_node_t*
 rbt_next(
 /*=====*/
@@ -220,8 +209,7 @@ rbt_next(
 			current);
 /**********************************************************************//**
 Return the prev node from current.
-@return	precedessor node to current that is passed in */
-UNIV_INTERN
+@return precedessor node to current that is passed in */
 const ib_rbt_node_t*
 rbt_prev(
 /*=====*/
@@ -230,8 +218,7 @@ rbt_prev(
 			current);
 /**********************************************************************//**
 Find the node that has the lowest key that is >= key.
-@return	node that satisfies the lower bound constraint or NULL */
-UNIV_INTERN
+@return node that satisfies the lower bound constraint or NULL */
 const ib_rbt_node_t*
 rbt_lower_bound(
 /*============*/
@@ -239,8 +226,7 @@ rbt_lower_bound(
 	const void*	key);			/*!< in: key to search */
 /**********************************************************************//**
 Find the node that has the greatest key that is <= key.
-@return	node that satisifies the upper bound constraint or NULL */
-UNIV_INTERN
+@return node that satisifies the upper bound constraint or NULL */
 const ib_rbt_node_t*
 rbt_upper_bound(
 /*============*/
@@ -250,8 +236,7 @@ rbt_upper_bound(
 Search for the key, a node will be retuned in parent.last, whether it
 was found or not. If not found then parent.last will contain the
 parent node for the possibly new key otherwise the matching node.
-@return	result of last comparison */
-UNIV_INTERN
+@return result of last comparison */
 int
 rbt_search(
 /*=======*/
@@ -262,8 +247,7 @@ rbt_search(
 Search for the key, a node will be retuned in parent.last, whether it
 was found or not. If not found then parent.last will contain the
 parent node for the possibly new key otherwise the matching node.
-@return	result of last comparison */
-UNIV_INTERN
+@return result of last comparison */
 int
 rbt_search_cmp(
 /*===========*/
@@ -276,15 +260,13 @@ rbt_search_cmp(
 						with argument */
 /**********************************************************************//**
 Clear the tree, deletes (and free's) all the nodes. */
-UNIV_INTERN
 void
 rbt_clear(
 /*======*/
 	ib_rbt_t*	tree);			/*!< in: rb tree */
 /**********************************************************************//**
 Merge the node from dst into src. Return the number of nodes merged.
-@return	no. of recs merged */
-UNIV_INTERN
+@return no. of recs merged */
 ulint
 rbt_merge_uniq(
 /*===========*/
@@ -296,8 +278,7 @@ Delete the nodes from src after copying node to dst. As a side effect
 the duplicates will be left untouched in the src, since we don't support
 duplicates (yet). NOTE: src and dst must be similar, the function doesn't
 check for this condition (yet).
-@return	no. of recs merged */
-UNIV_INTERN
+@return no. of recs merged */
 ulint
 rbt_merge_uniq_destructive(
 /*=======================*/
@@ -306,15 +287,13 @@ rbt_merge_uniq_destructive(
 /**********************************************************************//**
 Verify the integrity of the RB tree. For debugging. 0 failure else height
 of tree (in count of black nodes).
-@return	TRUE if OK FALSE if tree invalid. */
-UNIV_INTERN
+@return TRUE if OK FALSE if tree invalid. */
 ibool
 rbt_validate(
 /*=========*/
 	const ib_rbt_t*	tree);			/*!< in: tree to validate */
 /**********************************************************************//**
 Iterate over the tree in depth first order. */
-UNIV_INTERN
 void
 rbt_print(
 /*======*/
diff --git a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h
index 6ed3ee3b2e5..aa5b4b6745d 100644
--- a/storage/innobase/include/ut0rnd.h
+++ b/storage/innobase/include/ut0rnd.h
@@ -45,7 +45,7 @@ ut_rnd_set_seed(
 	ulint	 seed);		 /*!< in: seed */
 /********************************************************//**
 The following function generates a series of 'random' ulint integers.
-@return	the next 'random' number */
+@return the next 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_gen_next_ulint(
@@ -56,32 +56,26 @@ The following function generates 'random' ulint integers which
 enumerate the value space (let there be N of them) of ulint integers
 in a pseudo-random fashion. Note that the same integer is repeated
 always after N calls to the generator.
-@return	the 'random' number */
+@return the 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_gen_ulint(void);
 /*==================*/
 /********************************************************//**
 Generates a random integer from a given interval.
-@return	the 'random' number */
+@return the 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_interval(
 /*============*/
 	ulint	low,	/*!< in: low limit; can generate also this value */
 	ulint	high);	/*!< in: high limit; can generate also this value */
-/*********************************************************//**
-Generates a random iboolean value.
-@return	the random value */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void);
-/*=================*/
+
 /*******************************************************//**
 The following function generates a hash value for a ulint integer
 to a hash table of size table_size, which should be a prime or some
 random number to work reliably.
-@return	hash value */
+@return hash value */
 UNIV_INLINE
 ulint
 ut_hash_ulint(
@@ -90,7 +84,7 @@ ut_hash_ulint(
 	ulint	 table_size);	/*!< in: hash table size */
 /*************************************************************//**
 Folds a 64-bit integer.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_ull(
@@ -99,18 +93,17 @@ ut_fold_ull(
 	MY_ATTRIBUTE((const));
 /*************************************************************//**
 Folds a character string ending in the null character.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_string(
 /*===========*/
 	const char*	str)	/*!< in: null-terminated string */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /***********************************************************//**
 Looks for a prime number slightly greater than the given argument.
 The prime is chosen so that it is not near any power of 2.
-@return	prime */
-UNIV_INTERN
+@return prime */
 ulint
 ut_find_prime(
 /*==========*/
@@ -121,7 +114,7 @@ ut_find_prime(
 
 /*************************************************************//**
 Folds a pair of ulints.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_ulint_pair(
@@ -131,7 +124,7 @@ ut_fold_ulint_pair(
 	MY_ATTRIBUTE((const));
 /*************************************************************//**
 Folds a binary string.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_binary(
diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic
index 024c59e553b..503c9482ea3 100644
--- a/storage/innobase/include/ut0rnd.ic
+++ b/storage/innobase/include/ut0rnd.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -54,7 +54,7 @@ ut_rnd_set_seed(
 
 /********************************************************//**
 The following function generates a series of 'random' ulint integers.
-@return	the next 'random' number */
+@return the next 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_gen_next_ulint(
@@ -81,7 +81,7 @@ The following function generates 'random' ulint integers which
 enumerate the value space of ulint integers in a pseudo random
 fashion. Note that the same integer is repeated always after
 2 to power 32 calls to the generator (if ulint is 32-bit).
-@return	the 'random' number */
+@return the 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_gen_ulint(void)
@@ -98,7 +98,7 @@ ut_rnd_gen_ulint(void)
 
 /********************************************************//**
 Generates a random integer from a given interval.
-@return	the 'random' number */
+@return the 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_interval(
@@ -120,31 +120,11 @@ ut_rnd_interval(
 	return(low + (rnd % (high - low)));
 }
 
-/*********************************************************//**
-Generates a random iboolean value.
-@return	the random value */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void)
-/*=================*/
-{
-	ulint	 x;
-
-	x = ut_rnd_gen_ulint();
-
-	if (((x >> 20) + (x >> 15)) & 1) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
 /*******************************************************//**
 The following function generates a hash value for a ulint integer
 to a hash table of size table_size, which should be a prime
 or some random number for the hash table to work reliably.
-@return	hash value */
+@return hash value */
 UNIV_INLINE
 ulint
 ut_hash_ulint(
@@ -160,7 +140,7 @@ ut_hash_ulint(
 
 /*************************************************************//**
 Folds a 64-bit integer.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_ull(
@@ -173,7 +153,7 @@ ut_fold_ull(
 
 /*************************************************************//**
 Folds a character string ending in the null character.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_string(
@@ -196,7 +176,7 @@ ut_fold_string(
 
 /*************************************************************//**
 Folds a pair of ulints.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_ulint_pair(
@@ -210,7 +190,7 @@ ut_fold_ulint_pair(
 
 /*************************************************************//**
 Folds a binary string.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_binary(
diff --git a/storage/innobase/include/ut0stage.h b/storage/innobase/include/ut0stage.h
new file mode 100644
index 00000000000..1cccb0b8f84
--- /dev/null
+++ b/storage/innobase/include/ut0stage.h
@@ -0,0 +1,594 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ut/ut0stage.h
+Supplementary code to performance schema stage instrumentation.
+
+Created Nov 12, 2014 Vasil Dimov
+*******************************************************/
+
+#ifndef ut0stage_h
+#define ut0stage_h
+
+#include <algorithm>
+#include <math.h>
+
+#include "my_global.h" /* needed for headers from mysql/psi/ */
+
+#include "mysql/psi/mysql_stage.h" /* mysql_stage_inc_work_completed */
+#include "mysql/psi/psi.h" /* HAVE_PSI_STAGE_INTERFACE, PSI_stage_progress */
+
+#include "univ.i"
+
+#include "dict0mem.h" /* dict_index_t */
+#include "row0log.h" /* row_log_estimate_work() */
+#include "srv0srv.h" /* ut_stage_alter_t */
+
+#ifdef HAVE_PSI_STAGE_INTERFACE
+
+typedef void PSI_stage_progress;
+
+/** Class used to report ALTER TABLE progress via performance_schema.
+The only user of this class is the ALTER TABLE code and it calls the methods
+in the following order
+constructor
+begin_phase_read_pk()
+  multiple times:
+    n_pk_recs_inc() // once per record read
+    inc() // once per page read
+end_phase_read_pk()
+if any new indexes are being added, for each one:
+  begin_phase_sort()
+    multiple times:
+      inc() // once per record sorted
+  begin_phase_insert()
+    multiple times:
+      inc() // once per record inserted
+  being_phase_log_index()
+    multiple times:
+      inc() // once per log-block applied
+begin_phase_flush()
+    multiple times:
+      inc() // once per page flushed
+begin_phase_log_table()
+    multiple times:
+      inc() // once per log-block applied
+begin_phase_end()
+destructor
+
+This class knows the specifics of each phase and tries to increment the
+progress in an even manner across the entire ALTER TABLE lifetime. */
+class ut_stage_alter_t {
+public:
+	/** Constructor.
+	@param[in]	pk	primary key of the old table */
+	explicit
+	ut_stage_alter_t(
+		const dict_index_t*	pk)
+		:
+		m_progress(NULL),
+		m_pk(pk),
+		m_n_pk_recs(0),
+		m_n_pk_pages(0),
+		m_n_recs_processed(0),
+		m_n_flush_pages(0),
+		m_cur_phase(NOT_STARTED)
+	{
+	}
+
+	/** Destructor. */
+	~ut_stage_alter_t();
+
+	/** Flag an ALTER TABLE start (read primary key phase).
+	@param[in]	n_sort_indexes	number of indexes that will be sorted
+	during ALTER TABLE, used for estimating the total work to be done */
+	void
+	begin_phase_read_pk(
+		ulint	n_sort_indexes);
+
+	/** Increment the number of records in PK (table) with 1.
+	This is used to get more accurate estimate about the number of
+	records per page which is needed because some phases work on
+	per-page basis while some work on per-record basis and we want
+	to get the progress as even as possible. */
+	void
+	n_pk_recs_inc();
+
+	/** Flag either one record or one page processed, depending on the
+	current phase.
+	@param[in]	inc_val	flag this many units processed at once */
+	void
+	inc(
+		ulint	inc_val = 1);
+
+	/** Flag the end of reading of the primary key.
+	Here we know the exact number of pages and records and calculate
+	the number of records per page and refresh the estimate. */
+	void
+	end_phase_read_pk();
+
+	/** Flag the beginning of the sort phase.
+	@param[in]	sort_multi_factor	since merge sort processes
+	one page more than once we only update the estimate once per this
+	many pages processed. */
+	void
+	begin_phase_sort(
+		double	sort_multi_factor);
+
+	/** Flag the beginning of the insert phase. */
+	void
+	begin_phase_insert();
+
+	/** Flag the beginning of the flush phase.
+	@param[in]	n_flush_pages	this many pages are going to be
+	flushed */
+	void
+	begin_phase_flush(
+		ulint	n_flush_pages);
+
+	/** Flag the beginning of the log index phase. */
+	void
+	begin_phase_log_index();
+
+	/** Flag the beginning of the log table phase. */
+	void
+	begin_phase_log_table();
+
+	/** Flag the beginning of the end phase. */
+	void
+	begin_phase_end();
+
+private:
+
+	/** Update the estimate of total work to be done. */
+	void
+	reestimate();
+
+	/** Change the current phase.
+	@param[in]	new_stage	pointer to the new stage to change to */
+	void
+	change_phase(
+		const PSI_stage_info*	new_stage);
+
+	/** Performance schema accounting object. */
+	/* TODO: MySQL 5.7 PSI */
+	PSI_stage_progress*	m_progress;
+
+	/** Old table PK. Used for calculating the estimate. */
+	const dict_index_t*	m_pk;
+
+	/** Number of records in the primary key (table), including delete
+	marked records. */
+	ulint			m_n_pk_recs;
+
+	/** Number of leaf pages in the primary key. */
+	ulint			m_n_pk_pages;
+
+	/** Estimated number of records per page in the primary key. */
+	double			m_n_recs_per_page;
+
+	/** Number of indexes that are being added. */
+	ulint			m_n_sort_indexes;
+
+	/** During the sort phase, increment the counter once per this
+	many pages processed. This is because sort processes one page more
+	than once. */
+	ulint			m_sort_multi_factor;
+
+	/** Number of records processed during sort & insert phases. We
+	need to increment the counter only once page, or once per
+	recs-per-page records. */
+	ulint			m_n_recs_processed;
+
+	/** Number of pages to flush. */
+	ulint			m_n_flush_pages;
+
+	/** Current phase. */
+	enum {
+		NOT_STARTED = 0,
+		READ_PK = 1,
+		SORT = 2,
+		INSERT = 3,
+		FLUSH = 4,
+		/* JAN: TODO: MySQL 5.7 vrs. MariaDB sql/log.h
+		LOG_INDEX = 5,
+		LOG_TABLE = 6, */
+		LOG_INNODB_INDEX = 5,
+		LOG_INNODB_TABLE = 6,
+		END = 7,
+	}			m_cur_phase;
+};
+
+/** Destructor. */
+inline
+ut_stage_alter_t::~ut_stage_alter_t()
+{
+	if (m_progress == NULL) {
+		return;
+	}
+
+	/* TODO: MySQL 5.7 PSI: Set completed = estimated before we quit.
+	mysql_stage_set_work_completed(
+		m_progress,
+		mysql_stage_get_work_estimated(m_progress));
+
+	mysql_end_stage();
+	*/
+}
+
+/** Flag an ALTER TABLE start (read primary key phase).
+@param[in]	n_sort_indexes	number of indexes that will be sorted
+during ALTER TABLE, used for estimating the total work to be done */
+inline
+void
+ut_stage_alter_t::begin_phase_read_pk(
+	ulint	n_sort_indexes)
+{
+	m_n_sort_indexes = n_sort_indexes;
+
+	m_cur_phase = READ_PK;
+
+	/* TODO: MySQL 5.7 PSI
+	m_progress = mysql_set_stage(
+		srv_stage_alter_table_read_pk_internal_sort.m_key);
+
+	mysql_stage_set_work_completed(m_progress, 0);
+	*/
+	reestimate();
+}
+
+/** Increment the number of records in PK (table) with 1.
+This is used to get more accurate estimate about the number of
+records per page which is needed because some phases work on
+per-page basis while some work on per-record basis and we want
+to get the progress as even as possible. */
+inline
+void
+ut_stage_alter_t::n_pk_recs_inc()
+{
+	m_n_pk_recs++;
+}
+
+/** Flag either one record or one page processed, depending on the
+current phase.
+@param[in]	inc_val	flag this many units processed at once */
+inline
+void
+ut_stage_alter_t::inc(
+	ulint	inc_val /* = 1 */)
+{
+	if (m_progress == NULL) {
+		return;
+	}
+
+	ulint	multi_factor = 1;
+	bool	should_proceed = true;
+
+	switch (m_cur_phase) {
+	case NOT_STARTED:
+		ut_error;
+	case READ_PK:
+		m_n_pk_pages++;
+		ut_ad(inc_val == 1);
+		/* Overall the read pk phase will read all the pages from the
+		PK and will do work, proportional to the number of added
+		indexes, thus when this is called once per read page we
+		increment with 1 + m_n_sort_indexes */
+		inc_val = 1 + m_n_sort_indexes;
+		break;
+	case SORT:
+		multi_factor = m_sort_multi_factor;
+		/* fall through */
+	case INSERT: {
+		/* Increment the progress every nth record. During
+		sort and insert phases, this method is called once per
+		record processed. We need fractional point numbers here
+		because "records per page" is such a number naturally and
+		to avoid rounding skew we want, for example: if there are
+		(double) N records per page, then the work_completed
+	        should be incremented on the inc() calls round(k*N),
+		for k=1,2,3... */
+		const double	every_nth = m_n_recs_per_page * multi_factor;
+
+		const ulint	k = static_cast<ulint>(
+			round(m_n_recs_processed / every_nth));
+
+		const ulint	nth = static_cast<ulint>(
+			round(k * every_nth));
+
+		should_proceed = m_n_recs_processed == nth;
+
+		m_n_recs_processed++;
+
+		break;
+	}
+	case FLUSH:
+		break;
+	/* JAN: TODO: MySQL 5.7
+	case LOG_INDEX:
+		break;
+	case LOG_TABLE:
+	break; */
+	case LOG_INNODB_INDEX:
+	case LOG_INNODB_TABLE:
+		break;
+	case END:
+		break;
+	}
+
+	if (should_proceed) {
+		/* TODO: MySQL 5.7 PSI
+		mysql_stage_inc_work_completed(m_progress, inc_val);
+		*/
+		reestimate();
+	}
+}
+
+/** Flag the end of reading of the primary key.
+Here we know the exact number of pages and records and calculate
+the number of records per page and refresh the estimate. */
+inline
+void
+ut_stage_alter_t::end_phase_read_pk()
+{
+	reestimate();
+
+	if (m_n_pk_pages == 0) {
+		/* The number of pages in the PK could be 0 if the tree is
+		empty. In this case we set m_n_recs_per_page to 1 to avoid
+		division by zero later. */
+		m_n_recs_per_page = 1.0;
+	} else {
+		m_n_recs_per_page = std::max(
+			static_cast<double>(m_n_pk_recs) / m_n_pk_pages,
+			1.0);
+	}
+}
+
+/** Flag the beginning of the sort phase.
+@param[in]	sort_multi_factor	since merge sort processes
+one page more than once we only update the estimate once per this
+many pages processed. */
+inline
+void
+ut_stage_alter_t::begin_phase_sort(
+	double	sort_multi_factor)
+{
+	if (sort_multi_factor <= 1.0) {
+		m_sort_multi_factor = 1;
+	} else {
+		m_sort_multi_factor = static_cast<ulint>(
+			round(sort_multi_factor));
+	}
+
+	change_phase(&srv_stage_alter_table_merge_sort);
+}
+
+/** Flag the beginning of the insert phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_insert()
+{
+	change_phase(&srv_stage_alter_table_insert);
+}
+
+/** Flag the beginning of the flush phase.
+@param[in]	n_flush_pages	this many pages are going to be
+flushed */
+inline
+void
+ut_stage_alter_t::begin_phase_flush(
+	ulint	n_flush_pages)
+{
+	m_n_flush_pages = n_flush_pages;
+
+	reestimate();
+
+	change_phase(&srv_stage_alter_table_flush);
+}
+
+/** Flag the beginning of the log index phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_log_index()
+{
+	change_phase(&srv_stage_alter_table_log_index);
+}
+
+/** Flag the beginning of the log table phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_log_table()
+{
+	change_phase(&srv_stage_alter_table_log_table);
+}
+
+/** Flag the beginning of the end phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_end()
+{
+	change_phase(&srv_stage_alter_table_end);
+}
+
+/** Update the estimate of total work to be done. */
+inline
+void
+ut_stage_alter_t::reestimate()
+{
+	if (m_progress == NULL) {
+		return;
+	}
+
+	/* During the log table phase we calculate the estimate as
+	work done so far + log size remaining. */
+	if (m_cur_phase == LOG_INNODB_TABLE) {
+		/* TODO: MySQL 5.7 PSI
+		mysql_stage_set_work_estimated(
+			m_progress,
+			mysql_stage_get_work_completed(m_progress)
+			+ row_log_estimate_work(m_pk));
+		*/
+		return;
+	}
+
+	/* During the other phases we use a formula, regardless of
+	how much work has been done so far. */
+
+	/* For number of pages in the PK - if the PK has not been
+	read yet, use stat_n_leaf_pages (approximate), otherwise
+	use the exact number we gathered. */
+	const ulint	n_pk_pages
+		= m_cur_phase != READ_PK
+		? m_n_pk_pages
+		: m_pk->stat_n_leaf_pages;
+
+	/* If flush phase has not started yet and we do not know how
+	many pages are to be flushed, then use a wild guess - the
+	number of pages in the PK / 2. */
+	if (m_n_flush_pages == 0) {
+		m_n_flush_pages = n_pk_pages / 2;
+	}
+
+	ulonglong	estimate __attribute__((unused))
+		= n_pk_pages
+		* (1 /* read PK */
+		   + m_n_sort_indexes /* row_merge_buf_sort() inside the
+				      read PK per created index */
+		   + m_n_sort_indexes * 2 /* sort & insert per created index */)
+		+ m_n_flush_pages
+		+ row_log_estimate_work(m_pk);
+
+	/* Prevent estimate < completed */
+	/* TODO: MySQL 5.7 PSI
+	estimate = std::max(estimate,
+			    mysql_stage_get_work_completed(m_progress));
+
+	mysql_stage_set_work_estimated(m_progress, estimate);
+	*/
+}
+
+/** Change the current phase.
+@param[in]	new_stage	pointer to the new stage to change to */
+inline
+void
+ut_stage_alter_t::change_phase(
+	const PSI_stage_info*	new_stage)
+{
+	if (m_progress == NULL) {
+		return;
+	}
+
+	if (new_stage == &srv_stage_alter_table_read_pk_internal_sort) {
+		m_cur_phase = READ_PK;
+	} else if (new_stage == &srv_stage_alter_table_merge_sort) {
+		m_cur_phase = SORT;
+	} else if (new_stage == &srv_stage_alter_table_insert) {
+		m_cur_phase = INSERT;
+	} else if (new_stage == &srv_stage_alter_table_flush) {
+		m_cur_phase = FLUSH;
+	/* JAN: TODO: MySQL 5.7 used LOG_INDEX and LOG_TABLE */
+	} else if (new_stage == &srv_stage_alter_table_log_index) {
+		m_cur_phase = LOG_INNODB_INDEX;
+	} else if (new_stage == &srv_stage_alter_table_log_table) {
+		m_cur_phase = LOG_INNODB_TABLE;
+	} else if (new_stage == &srv_stage_alter_table_end) {
+		m_cur_phase = END;
+	} else {
+		ut_error;
+	}
+
+	/* TODO: MySQL 5.7 PSI
+	const ulonglong	c = mysql_stage_get_work_completed(m_progress);
+	const ulonglong	e = mysql_stage_get_work_estimated(m_progress);
+
+	m_progress = mysql_set_stage(new_stage->m_key);
+
+	mysql_stage_set_work_completed(m_progress, c);
+	mysql_stage_set_work_estimated(m_progress, e);
+	*/
+}
+#else /* HAVE_PSI_STAGE_INTERFACE */
+
+class ut_stage_alter_t {
+public:
+	explicit
+	ut_stage_alter_t(
+		const dict_index_t*	pk)
+	{
+	}
+
+	void
+	begin_phase_read_pk(
+		ulint	n_sort_indexes)
+	{
+	}
+
+	void
+	n_pk_recs_inc()
+	{
+	}
+
+	void
+	inc(
+		ulint	inc_val = 1)
+	{
+	}
+
+	void
+	end_phase_read_pk()
+	{
+	}
+
+	void
+	begin_phase_sort(
+		double	sort_multi_factor)
+	{
+	}
+
+	void
+	begin_phase_insert()
+	{
+	}
+
+	void
+	begin_phase_flush(
+		ulint	n_flush_pages)
+	{
+	}
+
+	void
+	begin_phase_log_index()
+	{
+	}
+
+	void
+	begin_phase_log_table()
+	{
+	}
+
+	void
+	begin_phase_end()
+	{
+	}
+};
+
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+#endif /* ut0stage_h */
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index 6786ad166e8..087f175db50 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -26,49 +26,32 @@ Created 1/20/1994 Heikki Tuuri
 #ifndef ut0ut_h
 #define ut0ut_h
 
-#include "univ.i"
+/* Do not include univ.i because univ.i includes this. */
+
+#include <ostream>
+#include <sstream>
+#include <string.h>
 
 #ifndef UNIV_INNOCHECKSUM
 
 #include "db0err.h"
 
-#ifndef UNIV_HOTBACKUP
-# include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
-#endif /* UNIV_HOTBACKUP */
-
-#endif /* !UNIV_INNOCHECKSUM */
-
 #include <time.h>
+
 #ifndef MYSQL_SERVER
 #include <ctype.h>
-#endif
+#endif /* MYSQL_SERVER */
 
-#include <stdarg.h> /* for va_list */
+#include <stdarg.h>
 
 #include <string>
 
-/** Index name prefix in fast index creation */
-#define	TEMP_INDEX_PREFIX	'\377'
 /** Index name prefix in fast index creation, as a string constant */
 #define TEMP_INDEX_PREFIX_STR	"\377"
 
 /** Time stamp */
 typedef time_t	ib_time_t;
 
-/* In order to call a piece of code, when a function returns or when the
-scope ends, use this utility class.  It will invoke the given function
-object in its destructor. */
-template<typename F>
-struct ut_when_dtor {
-	ut_when_dtor(F& p) : f(p) {}
-	~ut_when_dtor() {
-		f();
-	}
-private:
-	F& f;
-};
-
-#ifndef UNIV_INNOCHECKSUM
 #ifndef UNIV_HOTBACKUP
 # if defined(HAVE_PAUSE_INSTRUCTION)
    /* According to the gcc info page, asm volatile means that the
@@ -83,18 +66,22 @@ private:
 
 # elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
 #  define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
-# elif defined(HAVE_WINDOWS_ATOMICS)
+# elif defined _WIN32
    /* In the Win32 API, the x86 PAUSE instruction is executed by calling
    the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
    independent way by using YieldProcessor. */
 #  define UT_RELAX_CPU() YieldProcessor()
-# elif defined(__powerpc__)
+# elif defined(__powerpc__) && defined __GLIBC__
 #include <sys/platform/ppc.h>
 #  define UT_RELAX_CPU() do { \
      volatile lint      volatile_var = __ppc_get_timebase(); \
    } while (0)
 # else
-#  define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
+#  define UT_RELAX_CPU() do { \
+     volatile int32	volatile_var; \
+     int32 oldval= 0; \
+     my_atomic_cas32(&volatile_var, &oldval, 1); \
+   } while (0)
 # endif
 
 #if defined (__GNUC__)
@@ -117,13 +104,13 @@ private:
 /*********************************************************************//**
 Delays execution for at most max_wait_us microseconds or returns earlier
 if cond becomes true.
-@param cond		in: condition to wait for; evaluated every 2 ms
-@param max_wait_us	in: maximum delay to wait, in microseconds */
+@param cond in: condition to wait for; evaluated every 2 ms
+@param max_wait_us in: maximum delay to wait, in microseconds */
 #define UT_WAIT_FOR(cond, max_wait_us)				\
 do {								\
-	ullint	start_us;					\
+	uintmax_t	start_us;					\
 	start_us = ut_time_us(NULL);				\
-	while (!(cond) 						\
+	while (!(cond)						\
 	       && ut_time_us(NULL) - start_us < (max_wait_us)) {\
 								\
 		os_thread_sleep(2000 /* 2 ms */);		\
@@ -131,95 +118,82 @@ do {								\
 } while (0)
 #endif /* !UNIV_HOTBACKUP */
 
-template <class T> T ut_min(T a, T b) { return(a < b ? a : b); }
-template <class T> T ut_max(T a, T b) { return(a > b ? a : b); }
+#define ut_max	std::max
+#define ut_min	std::min
 
-/******************************************************//**
-Calculates the minimum of two ulints.
-@return	minimum */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
-	ulint	 n1,	/*!< in: first number */
-	ulint	 n2);	/*!< in: second number */
-/******************************************************//**
-Calculates the maximum of two ulints.
-@return	maximum */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
-	ulint	 n1,	/*!< in: first number */
-	ulint	 n2);	/*!< in: second number */
-/****************************************************************//**
-Calculates minimum of two ulint-pairs. */
+/** Calculate the minimum of two pairs.
+@param[out]	min_hi	MSB of the minimum pair
+@param[out]	min_lo	LSB of the minimum pair
+@param[in]	a_hi	MSB of the first pair
+@param[in]	a_lo	LSB of the first pair
+@param[in]	b_hi	MSB of the second pair
+@param[in]	b_lo	LSB of the second pair */
 UNIV_INLINE
 void
 ut_pair_min(
-/*========*/
-	ulint*	a,	/*!< out: more significant part of minimum */
-	ulint*	b,	/*!< out: less significant part of minimum */
-	ulint	a1,	/*!< in: more significant part of first pair */
-	ulint	b1,	/*!< in: less significant part of first pair */
-	ulint	a2,	/*!< in: more significant part of second pair */
-	ulint	b2);	/*!< in: less significant part of second pair */
+	ulint*	min_hi,
+	ulint*	min_lo,
+	ulint	a_hi,
+	ulint	a_lo,
+	ulint	b_hi,
+	ulint	b_lo);
 /******************************************************//**
 Compares two ulints.
-@return	1 if a > b, 0 if a == b, -1 if a < b */
+@return 1 if a > b, 0 if a == b, -1 if a < b */
 UNIV_INLINE
 int
 ut_ulint_cmp(
 /*=========*/
 	ulint	a,	/*!< in: ulint */
 	ulint	b);	/*!< in: ulint */
-/*******************************************************//**
-Compares two pairs of ulints.
-@return	-1 if a < b, 0 if a == b, 1 if a > b */
+/** Compare two pairs of integers.
+@param[in]	a_h	more significant part of first pair
+@param[in]	a_l	less significant part of first pair
+@param[in]	b_h	more significant part of second pair
+@param[in]	b_l	less significant part of second pair
+@return comparison result of (a_h,a_l) and (b_h,b_l)
+@retval -1 if (a_h,a_l) is less than (b_h,b_l)
+@retval 0 if (a_h,a_l) is equal to (b_h,b_l)
+@retval 1 if (a_h,a_l) is greater than (b_h,b_l) */
 UNIV_INLINE
 int
 ut_pair_cmp(
-/*========*/
-	ulint	a1,	/*!< in: more significant part of first pair */
-	ulint	a2,	/*!< in: less significant part of first pair */
-	ulint	b1,	/*!< in: more significant part of second pair */
-	ulint	b2);	/*!< in: less significant part of second pair */
-#endif /* !UNIV_INNOCHECKSUM */
-/*************************************************************//**
-Determines if a number is zero or a power of two.
-@param n	in: number
-@return		nonzero if n is zero or a power of two; zero otherwise */
-#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1)))
+	ulint	a_h,
+	ulint	a_l,
+	ulint	b_h,
+	ulint	b_l)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*************************************************************//**
 Calculates fast the remainder of n/m when m is a power of two.
-@param n	in: numerator
-@param m	in: denominator, must be a power of two
-@return		the remainder of n/m */
+@param n in: numerator
+@param m in: denominator, must be a power of two
+@return the remainder of n/m */
 #define ut_2pow_remainder(n, m) ((n) & ((m) - 1))
 /*************************************************************//**
 Calculates the biggest multiple of m that is not bigger than n
 when m is a power of two.  In other words, rounds n down to m * k.
-@param n	in: number to round down
-@param m	in: alignment, must be a power of two
-@return		n rounded down to the biggest possible integer multiple of m */
+@param n in: number to round down
+@param m in: alignment, must be a power of two
+@return n rounded down to the biggest possible integer multiple of m */
 #define ut_2pow_round(n, m) ((n) & ~((m) - 1))
 /** Align a number down to a multiple of a power of two.
-@param n	in: number to round down
-@param m	in: alignment, must be a power of two
-@return		n rounded down to the biggest possible integer multiple of m */
+@param n in: number to round down
+@param m in: alignment, must be a power of two
+@return n rounded down to the biggest possible integer multiple of m */
 #define ut_calc_align_down(n, m) ut_2pow_round(n, m)
 /********************************************************//**
 Calculates the smallest multiple of m that is not smaller than n
 when m is a power of two.  In other words, rounds n up to m * k.
-@param n	in: number to round up
-@param m	in: alignment, must be a power of two
-@return		n rounded up to the smallest possible integer multiple of m */
+@param n in: number to round up
+@param m in: alignment, must be a power of two
+@return n rounded up to the smallest possible integer multiple of m */
 #define ut_calc_align(n, m) (((n) + ((m) - 1)) & ~((m) - 1))
-#ifndef UNIV_INNOCHECKSUM
+
 /*************************************************************//**
 Calculates fast the 2-logarithm of a number, rounded upward to an
 integer.
-@return	logarithm in the base 2, rounded upward */
+@return logarithm in the base 2, rounded upward */
 UNIV_INLINE
 ulint
 ut_2_log(
@@ -227,7 +201,7 @@ ut_2_log(
 	ulint	n);	/*!< in: number */
 /*************************************************************//**
 Calculates 2 to power n.
-@return	2 to power n */
+@return 2 to power n */
 UNIV_INLINE
 ulint
 ut_2_exp(
@@ -235,28 +209,23 @@ ut_2_exp(
 	ulint	n);	/*!< in: number */
 /*************************************************************//**
 Calculates fast the number rounded up to the nearest power of 2.
-@return	first power of 2 which is >= n */
-UNIV_INTERN
+@return first power of 2 which is >= n */
 ulint
 ut_2_power_up(
 /*==========*/
 	ulint	n)	/*!< in: number != 0 */
 	MY_ATTRIBUTE((const));
 
-#endif /* !UNIV_INNOCHECKSUM */
-
 /** Determine how many bytes (groups of 8 bits) are needed to
 store the given number of bits.
-@param b	in: bits
-@return		number of bytes (octets) needed to represent b */
+@param b in: bits
+@return number of bytes (octets) needed to represent b */
 #define UT_BITS_IN_BYTES(b) (((b) + 7) / 8)
 
-#ifndef UNIV_INNOCHECKSUM
 /**********************************************************//**
 Returns system time. We do not specify the format of the time returned:
 the only way to manipulate it is to use the function ut_difftime.
-@return	system time */
-UNIV_INTERN
+@return system time */
 ib_time_t
 ut_time(void);
 /*=========*/
@@ -266,8 +235,7 @@ Returns system time.
 Upon successful completion, the value 0 is returned; otherwise the
 value -1 is returned and the global variable errno is set to indicate the
 error.
-@return	0 on success, -1 otherwise */
-UNIV_INTERN
+@return 0 on success, -1 otherwise */
 int
 ut_usectime(
 /*========*/
@@ -278,18 +246,16 @@ ut_usectime(
 Returns the number of microseconds since epoch. Similar to
 time(3), the return value is also stored in *tloc, provided
 that tloc is non-NULL.
-@return	us since epoch */
-UNIV_INTERN
-ullint
+@return us since epoch */
+uintmax_t
 ut_time_us(
 /*=======*/
-	ullint*	tloc);	/*!< out: us since epoch, if non-NULL */
+	uintmax_t*	tloc);	/*!< out: us since epoch, if non-NULL */
 /**********************************************************//**
 Returns the number of milliseconds since some epoch.  The
 value may wrap around.  It should only be used for heuristic
 purposes.
-@return	ms since epoch */
-UNIV_INTERN
+@return ms since epoch */
 ulint
 ut_time_ms(void);
 /*============*/
@@ -300,15 +266,13 @@ Returns the number of milliseconds since some epoch.  The
 value may wrap around.  It should only be used for heuristic
 purposes.
 @return ms since epoch */
-UNIV_INTERN
 ulint
 ut_time_ms(void);
 /*============*/
 
 /**********************************************************//**
 Returns the difference of two times in seconds.
-@return	time2 - time1 expressed in seconds */
-UNIV_INTERN
+@return time2 - time1 expressed in seconds */
 double
 ut_difftime(
 /*========*/
@@ -317,9 +281,25 @@ ut_difftime(
 
 #endif /* !UNIV_INNOCHECKSUM */
 
+/** Determines if a number is zero or a power of two.
+@param[in]	n	number
+@return nonzero if n is zero or a power of two; zero otherwise */
+#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1)))
+
+/** Functor that compares two C strings. Can be used as a comparator for
+e.g. std::map that uses char* as keys. */
+struct ut_strcmp_functor
+{
+	bool operator()(
+		const char*	a,
+		const char*	b) const
+	{
+		return(strcmp(a, b) < 0);
+	}
+};
+
 /**********************************************************//**
 Prints a timestamp to a file. */
-UNIV_INTERN
 void
 ut_print_timestamp(
 /*===============*/
@@ -330,7 +310,6 @@ ut_print_timestamp(
 
 /**********************************************************//**
 Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-UNIV_INTERN
 void
 ut_sprintf_timestamp(
 /*=================*/
@@ -339,14 +318,12 @@ ut_sprintf_timestamp(
 /**********************************************************//**
 Sprintfs a timestamp to a buffer with no spaces and with ':' characters
 replaced by '_'. */
-UNIV_INTERN
 void
 ut_sprintf_timestamp_without_extra_chars(
 /*=====================================*/
 	char*	buf); /*!< in: buffer where to sprintf */
 /**********************************************************//**
 Returns current year, month, day. */
-UNIV_INTERN
 void
 ut_get_year_month_day(
 /*==================*/
@@ -357,16 +334,14 @@ ut_get_year_month_day(
 /*************************************************************//**
 Runs an idle loop on CPU. The argument gives the desired delay
 in microseconds on 100 MHz Pentium + Visual C++.
-@return	dummy value */
-UNIV_INTERN
-void
+@return dummy value */
+ulint
 ut_delay(
 /*=====*/
 	ulint	delay);	/*!< in: delay in microseconds on 100 MHz Pentium */
 #endif /* UNIV_HOTBACKUP */
 /*************************************************************//**
 Prints the contents of a memory buffer in hex and ascii. */
-UNIV_INTERN
 void
 ut_print_buf(
 /*=========*/
@@ -374,83 +349,70 @@ ut_print_buf(
 	const void*	buf,	/*!< in: memory buffer */
 	ulint		len);	/*!< in: length of the buffer */
 
-/**********************************************************************//**
-Outputs a NUL-terminated file name, quoted with apostrophes. */
-UNIV_INTERN
+/*************************************************************//**
+Prints the contents of a memory buffer in hex. */
 void
-ut_print_filename(
-/*==============*/
-	FILE*		f,	/*!< in: output stream */
-	const char*	name);	/*!< in: name to print */
+ut_print_buf_hex(
+/*=============*/
+	std::ostream&	o,	/*!< in/out: output stream */
+	const void*	buf,	/*!< in: memory buffer */
+	ulint		len)	/*!< in: length of the buffer */
+	MY_ATTRIBUTE((nonnull));
+/*************************************************************//**
+Prints the contents of a memory buffer in hex and ascii. */
+void
+ut_print_buf(
+/*=========*/
+	std::ostream&	o,	/*!< in/out: output stream */
+	const void*	buf,	/*!< in: memory buffer */
+	ulint		len)	/*!< in: length of the buffer */
+	MY_ATTRIBUTE((nonnull));
 
 #ifndef UNIV_HOTBACKUP
 /* Forward declaration of transaction handle */
 struct trx_t;
 
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
+/** Get a fixed-length string, quoted as an SQL identifier.
 If the string contains a slash '/', the string will be
 output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_name(
-/*==========*/
-	FILE*		f,	/*!< in: output stream */
-	const trx_t*	trx,	/*!< in: transaction */
-	ibool		table_id,/*!< in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name);	/*!< in: name to print */
+as in SQL database_name.identifier.
+ @param		[in]	trx		transaction (NULL=no quotes).
+ @param		[in]	name		table name.
+ @retval	String quoted as an SQL identifier.
+*/
+std::string
+ut_get_name(
+	const trx_t*	trx,
+	const char*	name);
 
 /**********************************************************************//**
 Outputs a fixed-length string, quoted as an SQL identifier.
 If the string contains a slash '/', the string will be
 output as two identifiers separated by a period (.),
 as in SQL database_name.identifier. */
-UNIV_INTERN
 void
-ut_print_namel(
-/*===========*/
-	FILE*		f,	/*!< in: output stream */
-	const trx_t*	trx,	/*!< in: transaction (NULL=no quotes) */
-	ibool		table_id,/*!< in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name,	/*!< in: name to print */
-	ulint		namelen);/*!< in: length of name */
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-std::string
-ut_get_name(
-/*=========*/
-	const trx_t*	trx,	/*!< in: transaction (NULL=no quotes) */
-	ibool		table_id,/*!< in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name);	/*!< in: name to print */
-/**********************************************************************//**
-Formats a table or index name, quoted as an SQL identifier. If the name
-contains a slash '/', the result will contain two identifiers separated by
-a period (.), as in SQL database_name.identifier.
+ut_print_name(
+/*==========*/
+	FILE*		ef,	/*!< in: stream */
+	const trx_t*	trx,	/*!< in: transaction */
+	const char*	name);	/*!< in: table name to print */
+/** Format a table name, quoted as an SQL identifier.
+If the name contains a slash '/', the result will contain two
+identifiers separated by a period (.), as in SQL
+database_name.table_name.
+@see table_name_t
+@param[in]	name		table or index name
+@param[out]	formatted	formatted result, will be NUL-terminated
+@param[in]	formatted_size	size of the buffer in bytes
 @return pointer to 'formatted' */
-UNIV_INTERN
 char*
 ut_format_name(
-/*===========*/
-	const char*	name,		/*!< in: table or index name, must be
-					'\0'-terminated */
-	ibool		is_table,	/*!< in: if TRUE then 'name' is a table
-					name */
-	char*		formatted,	/*!< out: formatted result, will be
-					'\0'-terminated */
-	ulint		formatted_size);/*!< out: no more than this number of
-					bytes will be written to 'formatted' */
+	const char*	name,
+	char*		formatted,
+	ulint		formatted_size);
 
 /**********************************************************************//**
 Catenate files. */
-UNIV_INTERN
 void
 ut_copy_file(
 /*=========*/
@@ -458,7 +420,7 @@ ut_copy_file(
 	FILE*	src);	/*!< in: input file to be appended to output */
 #endif /* !UNIV_HOTBACKUP */
 
-#ifdef __WIN__
+#ifdef _WIN32
 /**********************************************************************//**
 A substitute for vsnprintf(3), formatted output conversion into
 a limited buffer. Note: this function DOES NOT return the number of
@@ -466,7 +428,6 @@ characters that would have been printed if the buffer was unlimited because
 VC's _vsnprintf() returns -1 in this case and we would need to call
 _vscprintf() in addition to estimate that but we would need another copy
 of "ap" for that and VC does not provide va_copy(). */
-UNIV_INTERN
 void
 ut_vsnprintf(
 /*=========*/
@@ -480,7 +441,6 @@ A substitute for snprintf(3), formatted output conversion into
 a limited buffer.
 @return number of characters that would have been printed if the size
 were unlimited, not including the terminating '\0'. */
-UNIV_INTERN
 int
 ut_snprintf(
 /*========*/
@@ -502,35 +462,180 @@ of "ap" for that and VC does not provide va_copy(). */
 A wrapper for snprintf(3), formatted output conversion into
 a limited buffer. */
 # define ut_snprintf	snprintf
-#endif /* __WIN__ */
+#endif /* _WIN32 */
 
 /*************************************************************//**
 Convert an error number to a human readable text message. The
 returned string is static and should not be freed or modified.
-@return	string, describing the error */
-UNIV_INTERN
+@return string, describing the error */
 const char*
 ut_strerr(
 /*======*/
 	dberr_t	num);	/*!< in: error number */
 
-/****************************************************************
-Sort function for ulint arrays. */
-UNIV_INTERN
-void
-ut_ulint_sort(
-/*==========*/
-	ulint*	arr,		/*!< in/out: array to sort */
-	ulint*	aux_arr,	/*!< in/out: aux array to use in sort */
-	ulint	low,		/*!< in: lower bound */
-	ulint	high)		/*!< in: upper bound */
-	MY_ATTRIBUTE((nonnull));
+#endif /* !UNIV_INNOCHECKSUM */
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Extract the basename of a file without its extension.
+For example, extract "foo0bar" out of "/path/to/foo0bar.cc".
+@param[in]	file		file path, e.g. "/path/to/foo0bar.cc"
+@param[out]	base		result, e.g. "foo0bar"
+@param[in]	base_size	size of the output buffer 'base', if there
+is not enough space, then the result will be truncated, but always
+'\0'-terminated
+@return number of characters that would have been printed if the size
+were unlimited (not including the final ‘\0’) */
+size_t
+ut_basename_noext(
+	const char*	file,
+	char*		base,
+	size_t		base_size);
+
+#endif /* UNIV_PFS_MEMORY */
+
+namespace ib {
+
+/** This is a wrapper class, used to print any unsigned integer type
+in hexadecimal format.  The main purpose of this data type is to
+overload the global operator<<, so that we can print the given
+wrapper value in hex. */
+struct hex {
+	explicit hex(uintmax_t t): m_val(t) {}
+	const uintmax_t	m_val;
+};
+
+/** This is an overload of the global operator<< for the user defined type
+ib::hex.  The unsigned value held in the ib::hex wrapper class will be printed
+into the given output stream in hexadecimal format.
+@param[in,out]	lhs	the output stream into which rhs is written.
+@param[in]	rhs	the object to be written into lhs.
+@retval	reference to the output stream. */
+inline
+std::ostream&
+operator<<(
+	std::ostream&	lhs,
+	const hex&	rhs)
+{
+	std::ios_base::fmtflags	ff = lhs.flags();
+	lhs << std::showbase << std::hex << rhs.m_val;
+	lhs.setf(ff);
+	return(lhs);
+}
+
+/** The class logger is the base class of all the error log related classes.
+It contains a std::ostringstream object.  The main purpose of this class is
+to forward operator<< to the underlying std::ostringstream object.  Do not
+use this class directly, instead use one of the derived classes. */
+class logger {
+public:
+	template<typename T>
+	logger& operator<<(const T& rhs)
+	{
+		m_oss << rhs;
+		return(*this);
+	}
+
+	/** Write the given buffer to the internal string stream object.
+	@param[in]	buf	the buffer whose contents will be logged.
+	@param[in]	count	the length of the buffer buf.
+	@return the output stream into which buffer was written. */
+	std::ostream&
+	write(
+		const char*		buf,
+		std::streamsize		count)
+	{
+		return(m_oss.write(buf, count));
+	}
+
+	/** Write the given buffer to the internal string stream object.
+	@param[in]	buf	the buffer whose contents will be logged.
+	@param[in]	count	the length of the buffer buf.
+	@return the output stream into which buffer was written. */
+	std::ostream&
+	write(
+		const byte*		buf,
+		std::streamsize		count)
+	{
+		return(m_oss.write(reinterpret_cast<const char*>(buf), count));
+	}
+
+	std::ostringstream	m_oss;
+protected:
+	/* This class must not be used directly, hence making the default
+	constructor protected. */
+	logger() {}
+};
+
+/** The class info is used to emit informational log messages.  It is to be
+used similar to std::cout.  But the log messages will be emitted only when
+the dtor is called.  The preferred usage of this class is to make use of
+unnamed temporaries as follows:
+
+info() << "The server started successfully.";
+
+In the above usage, the temporary object will be destroyed at the end of the
+statement and hence the log message will be emitted at the end of the
+statement.  If a named object is created, then the log message will be emitted
+only when it goes out of scope or destroyed. */
+class info : public logger {
+public:
+	~info();
+};
+
+/** The class warn is used to emit warnings.  Refer to the documentation of
+class info for further details. */
+class warn : public logger {
+public:
+	~warn();
+};
+
+/** The class error is used to emit error messages.  Refer to the
+documentation of class info for further details. */
+class error : public logger {
+public:
+	~error();
+};
+
+/** The class fatal is used to emit an error message and stop the server
+by crashing it.  Use this class when MySQL server needs to be stopped
+immediately.  Refer to the documentation of class info for usage details. */
+class fatal : public logger {
+public:
+	~fatal();
+};
+
+/** Emit an error message if the given predicate is true, otherwise emit a
+warning message */
+class error_or_warn : public logger {
+public:
+	error_or_warn(bool	pred)
+	: m_error(pred)
+	{}
+
+	~error_or_warn();
+private:
+	const bool	m_error;
+};
+
+/** Emit a fatal message if the given predicate is true, otherwise emit a
+error message. */
+class fatal_or_error : public logger {
+public:
+	fatal_or_error(bool	pred)
+	: m_fatal(pred)
+	{}
+
+	~fatal_or_error();
+private:
+	const bool	m_fatal;
+};
+
+} // namespace ib
 
 #ifndef UNIV_NONINL
 #include "ut0ut.ic"
 #endif
 
-#endif /* !UNIV_INNOCHECKSUM */
-
 #endif
 
diff --git a/storage/innobase/include/ut0ut.ic b/storage/innobase/include/ut0ut.ic
index 4e0f76e1957..31e81f7336e 100644
--- a/storage/innobase/include/ut0ut.ic
+++ b/storage/innobase/include/ut0ut.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,60 +23,40 @@ Various utilities
 Created 5/30/1994 Heikki Tuuri
 *******************************************************************/
 
-/******************************************************//**
-Calculates the minimum of two ulints.
-@return	minimum */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
-	ulint	 n1,	/*!< in: first number */
-	ulint	 n2)	/*!< in: second number */
-{
-	return((n1 <= n2) ? n1 : n2);
-}
-
-/******************************************************//**
-Calculates the maximum of two ulints.
-@return	maximum */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
-	ulint	 n1,	/*!< in: first number */
-	ulint	 n2)	/*!< in: second number */
-{
-	return((n1 <= n2) ? n2 : n1);
-}
+#include <algorithm>
 
-/****************************************************************//**
-Calculates minimum of two ulint-pairs. */
+/** Calculate the minimum of two pairs.
+@param[out]	min_hi	MSB of the minimum pair
+@param[out]	min_lo	LSB of the minimum pair
+@param[in]	a_hi	MSB of the first pair
+@param[in]	a_lo	LSB of the first pair
+@param[in]	b_hi	MSB of the second pair
+@param[in]	b_lo	LSB of the second pair */
 UNIV_INLINE
 void
 ut_pair_min(
-/*========*/
-	ulint*	a,	/*!< out: more significant part of minimum */
-	ulint*	b,	/*!< out: less significant part of minimum */
-	ulint	a1,	/*!< in: more significant part of first pair */
-	ulint	b1,	/*!< in: less significant part of first pair */
-	ulint	a2,	/*!< in: more significant part of second pair */
-	ulint	b2)	/*!< in: less significant part of second pair */
+	ulint*	min_hi,
+	ulint*	min_lo,
+	ulint	a_hi,
+	ulint	a_lo,
+	ulint	b_hi,
+	ulint	b_lo)
 {
-	if (a1 == a2) {
-		*a = a1;
-		*b = ut_min(b1, b2);
-	} else if (a1 < a2) {
-		*a = a1;
-		*b = b1;
+	if (a_hi == b_hi) {
+		*min_hi = a_hi;
+		*min_lo = std::min(a_lo, b_lo);
+	} else if (a_hi < b_hi) {
+		*min_hi = a_hi;
+		*min_lo = a_lo;
 	} else {
-		*a = a2;
-		*b = b2;
+		*min_hi = b_hi;
+		*min_lo = b_lo;
 	}
 }
 
 /******************************************************//**
 Compares two ulints.
-@return	1 if a > b, 0 if a == b, -1 if a < b */
+@return 1 if a > b, 0 if a == b, -1 if a < b */
 UNIV_INLINE
 int
 ut_ulint_cmp(
@@ -93,35 +73,36 @@ ut_ulint_cmp(
 	}
 }
 
-/*******************************************************//**
-Compares two pairs of ulints.
-@return	-1 if a < b, 0 if a == b, 1 if a > b */
+/** Compare two pairs of integers.
+@param[in]	a_h	more significant part of first pair
+@param[in]	a_l	less significant part of first pair
+@param[in]	b_h	more significant part of second pair
+@param[in]	b_l	less significant part of second pair
+@return comparison result of (a_h,a_l) and (b_h,b_l)
+@retval -1 if (a_h,a_l) is less than (b_h,b_l)
+@retval 0 if (a_h,a_l) is equal to (b_h,b_l)
+@retval 1 if (a_h,a_l) is greater than (b_h,b_l) */
 UNIV_INLINE
 int
 ut_pair_cmp(
-/*========*/
-	ulint	a1,	/*!< in: more significant part of first pair */
-	ulint	a2,	/*!< in: less significant part of first pair */
-	ulint	b1,	/*!< in: more significant part of second pair */
-	ulint	b2)	/*!< in: less significant part of second pair */
+	ulint	a_h,
+	ulint	a_l,
+	ulint	b_h,
+	ulint	b_l)
 {
-	if (a1 > b1) {
-		return(1);
-	} else if (a1 < b1) {
+	if (a_h < b_h) {
 		return(-1);
-	} else if (a2 > b2) {
+	}
+	if (a_h > b_h) {
 		return(1);
-	} else if (a2 < b2) {
-		return(-1);
-	} else {
-		return(0);
 	}
+	return(ut_ulint_cmp(a_l, b_l));
 }
 
 /*************************************************************//**
 Calculates fast the 2-logarithm of a number, rounded upward to an
 integer.
-@return	logarithm in the base 2, rounded upward */
+@return logarithm in the base 2, rounded upward */
 UNIV_INLINE
 ulint
 ut_2_log(
@@ -151,7 +132,7 @@ ut_2_log(
 
 /*************************************************************//**
 Calculates 2 to power n.
-@return	2 to power n */
+@return 2 to power n */
 UNIV_INLINE
 ulint
 ut_2_exp(
diff --git a/storage/innobase/include/ut0vec.h b/storage/innobase/include/ut0vec.h
index 432fb348a09..b5c0beddc15 100644
--- a/storage/innobase/include/ut0vec.h
+++ b/storage/innobase/include/ut0vec.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -64,7 +64,6 @@ freeing it when done with the vector.
 
 /********************************************************************
 Create a new vector with the given initial size. */
-UNIV_INTERN
 ib_vector_t*
 ib_vector_create(
 /*=============*/
@@ -124,7 +123,6 @@ ib_vector_size(
 
 /********************************************************************
 Increase the size of the vector. */
-UNIV_INTERN
 void
 ib_vector_resize(
 /*=============*/
@@ -142,7 +140,7 @@ ib_vector_is_empty(
 
 /****************************************************************//**
 Get the n'th element.
-@return	n'th element */
+@return n'th element */
 UNIV_INLINE
 void*
 ib_vector_get(
@@ -161,7 +159,7 @@ ib_vector_get_const(
 	ulint			n);	/* in: element index to get */
 /****************************************************************//**
 Get last element. The vector must not be empty.
-@return	last element */
+@return last element */
 UNIV_INLINE
 void*
 ib_vector_get_last(
@@ -263,53 +261,6 @@ ib_heap_allocator_free(
 /*===================*/
 	ib_alloc_t*	ib_ut_alloc);	/* in: alloc instace to free */
 
-/********************************************************************
-Wrapper for ut_free(). */
-UNIV_INLINE
-void
-ib_ut_free(
-/*=======*/
-	ib_alloc_t*	allocator,	/* in: allocator */
-	void*		ptr);		/* in: size in bytes */
-
-/********************************************************************
-Wrapper for ut_malloc(). */
-UNIV_INLINE
-void*
-ib_ut_malloc(
-/*=========*/
-					/* out: pointer to allocated memory */
-	ib_alloc_t*	allocator,	/* in: allocator */
-	ulint		size);		/* in: size in bytes */
-
-/********************************************************************
-Wrapper for ut_realloc(). */
-UNIV_INLINE
-void*
-ib_ut_resize(
-/*=========*/
-					/* out: pointer to reallocated
-					memory */
-	ib_alloc_t*	allocator,	/* in: allocator */
-	void*		old_ptr,	/* in: pointer to memory */
-	ulint		old_size,	/* in: old size in bytes */
-	ulint		new_size);	/* in: new size in bytes */
-
-/********************************************************************
-Create a heap allocator that uses the passed in heap. */
-UNIV_INLINE
-ib_alloc_t*
-ib_ut_allocator_create(void);
-/*=========================*/
-
-/********************************************************************
-Create a heap allocator that uses the passed in heap. */
-UNIV_INLINE
-void
-ib_ut_allocator_free(
-/*=================*/
-	ib_alloc_t*	ib_ut_alloc);	/* in: alloc instace to free */
-
 /* Allocator used by ib_vector_t. */
 struct ib_alloc_t {
 	ib_mem_alloc_t	mem_malloc;	/* For allocating memory */
diff --git a/storage/innobase/include/ut0vec.ic b/storage/innobase/include/ut0vec.ic
index f41a85e1d1d..17f4df579b6 100644
--- a/storage/innobase/include/ut0vec.ic
+++ b/storage/innobase/include/ut0vec.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +23,8 @@ A vector of pointers to data items
 Created 4/6/2006 Osku Salerma
 ************************************************************************/
 
+#include "ut0new.h"
+
 #define	IB_VEC_OFFSET(v, i)	(vec->sizeof_value * i)
 
 /********************************************************************
@@ -54,6 +56,7 @@ ib_heap_free(
 /********************************************************************
 The default ib_vector_t heap resize. Since we can't resize the heap
 we have to copy the elements from the old ptr to the new ptr.
+We always assume new_size >= old_size, so the buffer won't overflow.
 Uses mem_heap_alloc(). */
 UNIV_INLINE
 void*
@@ -67,6 +70,7 @@ ib_heap_resize(
 	void*		new_ptr;
 	mem_heap_t*	heap = (mem_heap_t*) allocator->arg;
 
+	ut_a(new_size >= old_size);
 	new_ptr = mem_heap_alloc(heap, new_size);
 	memcpy(new_ptr, old_ptr, old_size);
 
@@ -105,74 +109,6 @@ ib_heap_allocator_free(
 }
 
 /********************************************************************
-Wrapper around ut_malloc(). */
-UNIV_INLINE
-void*
-ib_ut_malloc(
-/*=========*/
-	ib_alloc_t*	allocator UNIV_UNUSED,	/* in: allocator */
-	ulint		size)			/* in: size in bytes */
-{
-	return(ut_malloc(size));
-}
-
-/********************************************************************
-Wrapper around ut_free(). */
-UNIV_INLINE
-void
-ib_ut_free(
-/*=======*/
-	ib_alloc_t*	allocator UNIV_UNUSED,	/* in: allocator */
-	void*		ptr)			/* in: size in bytes */
-{
-	ut_free(ptr);
-}
-
-/********************************************************************
-Wrapper aroung ut_realloc(). */
-UNIV_INLINE
-void*
-ib_ut_resize(
-/*=========*/
-	ib_alloc_t*	allocator UNIV_UNUSED,	/* in: allocator */
-	void*		old_ptr,	/* in: pointer to memory */
-	ulint		old_size UNIV_UNUSED,/* in: old size in bytes */
-	ulint		new_size)	/* in: new size in bytes */
-{
-	return(ut_realloc(old_ptr, new_size));
-}
-
-/********************************************************************
-Create a ut allocator. */
-UNIV_INLINE
-ib_alloc_t*
-ib_ut_allocator_create(void)
-/*========================*/
-{
-	ib_alloc_t*	ib_ut_alloc;
-
-	ib_ut_alloc = (ib_alloc_t*) ut_malloc(sizeof(*ib_ut_alloc));
-
-	ib_ut_alloc->arg = NULL;
-	ib_ut_alloc->mem_release = ib_ut_free;
-	ib_ut_alloc->mem_malloc = ib_ut_malloc;
-	ib_ut_alloc->mem_resize = ib_ut_resize;
-
-	return(ib_ut_alloc);
-}
-
-/********************************************************************
-Free a ut allocator. */
-UNIV_INLINE
-void
-ib_ut_allocator_free(
-/*=================*/
-	ib_alloc_t*	ib_ut_alloc)	/* in: alloc instace to free */
-{
-	ut_free(ib_ut_alloc);
-}
-
-/********************************************************************
 Get number of elements in vector. */
 UNIV_INLINE
 ulint
@@ -214,7 +150,7 @@ ib_vector_get_const(
 }
 /****************************************************************//**
 Get last element. The vector must not be empty.
-@return	last element */
+@return last element */
 UNIV_INLINE
 void*
 ib_vector_get_last(
@@ -286,7 +222,7 @@ ib_vector_last_const(
 
 /****************************************************************//**
 Remove the last element from the vector.
-@return	last vector element */
+@return last vector element */
 UNIV_INLINE
 void*
 ib_vector_pop(
@@ -392,24 +328,13 @@ ib_vector_free(
 /*===========*/
 	ib_vector_t*	vec)		/* in, own: vector */
 {
-	/* Currently we only support two types of allocators, heap
-	and ut_malloc(), when the heap is freed all the elements are
-	freed too. With ut allocator, we need to free the elements,
-	the vector instance and the allocator separately. */
+	/* Currently we only support one type of allocator - heap,
+	when the heap is freed all the elements are freed too. */
 
 	/* Only the heap allocator uses the arg field. */
-	if (vec->allocator->arg) {
-		mem_heap_free((mem_heap_t*) vec->allocator->arg);
-	} else {
-		ib_alloc_t*	allocator;
-
-		allocator = vec->allocator;
+	ut_ad(vec->allocator->arg != NULL);
 
-		allocator->mem_release(allocator, vec->data);
-		allocator->mem_release(allocator, vec);
-
-		ib_ut_allocator_free(allocator);
-	}
+	mem_heap_free((mem_heap_t*) vec->allocator->arg);
 }
 
 /********************************************************************
diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h
index 9906e299808..771d8d6ae5c 100644
--- a/storage/innobase/include/ut0wqueue.h
+++ b/storage/innobase/include/ut0wqueue.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,41 +34,37 @@ processing.
 
 #include "ut0list.h"
 #include "mem0mem.h"
-#include "os0sync.h"
-#include "sync0types.h"
 
+// Forward declaration
+struct ib_list_t;
 struct ib_wqueue_t;
 
 /****************************************************************//**
 Create a new work queue.
-@return	work queue */
-UNIV_INTERN
+@return work queue */
 ib_wqueue_t*
-ib_wqueue_create(void);
-/*===================*/
+ib_wqueue_create();
+/*===============*/
 
 /****************************************************************//**
 Free a work queue. */
-UNIV_INTERN
 void
 ib_wqueue_free(
 /*===========*/
-	ib_wqueue_t*	wq);	/*!< in: work queue */
+	ib_wqueue_t*	wq);		/*!< in: work queue */
 
 /****************************************************************//**
 Add a work item to the queue. */
-UNIV_INTERN
 void
 ib_wqueue_add(
 /*==========*/
-	ib_wqueue_t*	wq,	/*!< in: work queue */
-	void*		item,	/*!< in: work item */
-	mem_heap_t*	heap);	/*!< in: memory heap to use for allocating the
-				list node */
+	ib_wqueue_t*	wq,		/*!< in: work queue */
+	void*		item,		/*!< in: work item */
+	mem_heap_t*	heap);		/*!< in: memory heap to use for
+					allocating the list node */
 
 /********************************************************************
 Check if queue is empty. */
-
 ibool
 ib_wqueue_is_empty(
 /*===============*/
@@ -78,16 +74,14 @@ ib_wqueue_is_empty(
 
 /****************************************************************//**
 Wait for a work item to appear in the queue.
-@return	work item */
-UNIV_INTERN
+@return work item */
 void*
 ib_wqueue_wait(
 /*===========*/
-	ib_wqueue_t*	wq);	/*!< in: work queue */
+	ib_wqueue_t*	wq);		/*!< in: work queue */
 
 /********************************************************************
 Wait for a work item to appear in the queue for specified time. */
-
 void*
 ib_wqueue_timedwait(
 /*================*/
@@ -102,7 +96,6 @@ void*
 ib_wqueue_nowait(
 /*=============*/
 	ib_wqueue_t*	wq);		/*<! in: work queue */
-
 /********************************************************************
 Get number of items on queue.
 @return number of items on queue */
@@ -112,11 +105,4 @@ ib_wqueue_len(
 	ib_wqueue_t*	wq);		/*<! in: work queue */
 
 
-/* Work queue. */
-struct ib_wqueue_t {
-	ib_mutex_t		mutex;	/*!< mutex protecting everything */
-	ib_list_t*	items;	/*!< work item list */
-	os_event_t	event;	/*!< event we use to signal additions to list */
-};
-
-#endif
+#endif /* IB_WORK_QUEUE_H */